Push the rest

This commit is contained in:
2026-05-11 10:58:46 +02:00
parent adb5c1a439
commit 0031caf16c
94 changed files with 11777 additions and 3474 deletions

View File

@@ -1 +0,0 @@
"""Backend application package."""

View File

174
backend/app/core/audit.py Normal file
View File

@@ -0,0 +1,174 @@
from __future__ import annotations
import logging
from datetime import datetime, timezone
from uuid import uuid4
from opentelemetry import trace
from sqlalchemy import DateTime, Integer, String, Text, JSON
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, sessionmaker, Session
LOGGER = logging.getLogger(__name__)
def _utcnow() -> datetime:
return datetime.now(timezone.utc)
def current_span_context() -> tuple[str | None, str | None]:
ctx = trace.get_current_span().get_span_context()
if not ctx.is_valid:
return None, None
return f"{ctx.trace_id:032x}", f"{ctx.span_id:016x}"
class SharedBase(DeclarativeBase):
pass
class AuditLog(SharedBase):
__tablename__ = "audit_log"
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid4()))
occurred_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow, index=True)
action: Mapped[str] = mapped_column(String(100), index=True)
status: Mapped[str] = mapped_column(String(20), default="success")
actor_type: Mapped[str] = mapped_column(String(20), index=True)
actor_id: Mapped[str | None] = mapped_column(String(200), nullable=True)
domain: Mapped[str] = mapped_column(String(50), index=True)
service: Mapped[str] = mapped_column(String(50), index=True)
entity_type: Mapped[str | None] = mapped_column(String(100), nullable=True, index=True)
trace_id: Mapped[str | None] = mapped_column(String(32), nullable=True, index=True)
span_id: Mapped[str | None] = mapped_column(String(16), nullable=True)
payload: Mapped[dict] = mapped_column(JSON, default=dict)
class JobExecution(SharedBase):
__tablename__ = "job_executions"
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid4()))
started_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow, index=True)
completed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True), nullable=True)
job_name: Mapped[str] = mapped_column(String(100), index=True)
domain: Mapped[str] = mapped_column(String(50), index=True)
status: Mapped[str] = mapped_column(String(20), index=True)
records_processed: Mapped[int | None] = mapped_column(Integer, nullable=True)
duration_ms: Mapped[int | None] = mapped_column(Integer, nullable=True)
error_message: Mapped[str | None] = mapped_column(Text, nullable=True)
trace_id: Mapped[str | None] = mapped_column(String(32), nullable=True, index=True)
span_id: Mapped[str | None] = mapped_column(String(16), nullable=True)
class ExportRecord(SharedBase):
__tablename__ = "export_records"
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid4()))
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow, index=True)
domain: Mapped[str] = mapped_column(String(50), index=True)
service: Mapped[str] = mapped_column(String(50))
source_view: Mapped[str] = mapped_column(String(100), index=True)
format: Mapped[str] = mapped_column(String(10))
filters_applied: Mapped[dict] = mapped_column(JSON, default=dict)
row_count: Mapped[int] = mapped_column(Integer)
file_size_bytes: Mapped[int] = mapped_column(Integer)
actor_id: Mapped[str | None] = mapped_column(String(200), nullable=True)
trace_id: Mapped[str | None] = mapped_column(String(32), nullable=True, index=True)
span_id: Mapped[str | None] = mapped_column(String(16), nullable=True)
def append_audit(
factory: sessionmaker[Session],
*,
action: str,
actor_type: str,
domain: str,
service: str,
entity_type: str | None = None,
actor_id: str | None = None,
status: str = "success",
payload: dict | None = None,
) -> None:
trace_id, span_id = current_span_context()
try:
with factory() as session:
session.add(AuditLog(
action=action,
actor_type=actor_type,
actor_id=actor_id,
domain=domain,
service=service,
entity_type=entity_type,
trace_id=trace_id,
span_id=span_id,
status=status,
payload=payload or {},
))
session.commit()
except Exception as exc: # noqa: BLE001
LOGGER.warning("Failed to write audit record (action=%s): %s", action, exc)
def record_job_start(
factory: sessionmaker[Session],
job_name: str,
domain: str,
trace_id: str | None,
span_id: str | None,
) -> str:
job_id = str(uuid4())
try:
with factory() as session:
session.add(JobExecution(
id=job_id,
job_name=job_name,
domain=domain,
status="running",
trace_id=trace_id,
span_id=span_id,
))
session.commit()
except Exception as exc: # noqa: BLE001
LOGGER.warning("Failed to record job start (job=%s): %s", job_name, exc)
return job_id
def record_job_complete(
factory: sessionmaker[Session],
job_id: str,
started_at: datetime,
records_processed: int,
) -> None:
now = datetime.now(timezone.utc)
duration_ms = int((now - started_at).total_seconds() * 1000)
try:
with factory() as session:
session.query(JobExecution).filter_by(id=job_id).update({
"status": "success",
"completed_at": now,
"records_processed": records_processed,
"duration_ms": duration_ms,
})
session.commit()
except Exception as exc: # noqa: BLE001
LOGGER.warning("Failed to record job completion (id=%s): %s", job_id, exc)
def record_job_failure(
factory: sessionmaker[Session],
job_id: str,
started_at: datetime,
error_message: str,
) -> None:
now = datetime.now(timezone.utc)
duration_ms = int((now - started_at).total_seconds() * 1000)
try:
with factory() as session:
session.query(JobExecution).filter_by(id=job_id).update({
"status": "failure",
"completed_at": now,
"duration_ms": duration_ms,
"error_message": error_message[:2000],
})
session.commit()
except Exception as exc: # noqa: BLE001
LOGGER.warning("Failed to record job failure (id=%s): %s", job_id, exc)

View File

@@ -1,7 +1,6 @@
from __future__ import annotations
from functools import lru_cache
from urllib.parse import quote_plus
from pydantic import Field
from pydantic_settings import BaseSettings, SettingsConfigDict
@@ -22,30 +21,20 @@ class Settings(BaseSettings):
api_port: int = 8000
cors_origins: str = "http://localhost:5173"
request_timeout_seconds: float = 20.0
mssql_host: str = "localhost"
mssql_port: int = 1433
mssql_username: str = "sa"
mssql_password: str = "Password!123"
mssql_driver: str = "ODBC Driver 18 for SQL Server"
mssql_trust_server_certificate: bool = False
# Go analytics service
analytics_service_url: str = "http://localhost:8080"
wwi_database: str = "WorldWideImporters"
aw_database: str = "AdventureWorks2022DWH"
wwi_connection_string: str | None = None
aw_connection_string: str | None = None
# PostgreSQL — write store for derived data
postgres_host: str = "localhost"
postgres_port: int = 5432
postgres_database: str = "otel_bi_app"
postgres_username: str = "otel_bi_app"
postgres_password: str = "otel_bi_app"
postgres_sslmode: str = "require"
postgres_database: str = "otel_bi"
postgres_username: str = "otel_bi"
postgres_password: str = "otel_bi"
postgres_sslmode: str = "prefer"
postgres_connection_string: str | None = None
postgres_required: bool = True
query_service_url: str = "http://localhost:8101"
analytics_service_url: str = "http://localhost:8102"
persistence_service_url: str = "http://localhost:8103"
# Frontend OIDC JWT validation
require_frontend_auth: bool = True
frontend_jwt_issuer_url: str = ""
frontend_jwt_audience: str = ""
@@ -53,18 +42,21 @@ class Settings(BaseSettings):
frontend_jwt_algorithm: str = "RS256"
frontend_required_scopes: str = ""
frontend_clock_skew_seconds: int = Field(default=30, ge=0, le=300)
internal_service_auth_enabled: bool = True
internal_service_shared_secret: str = "change-me"
internal_service_token_ttl_seconds: int = Field(default=120, ge=30, le=900)
internal_service_token_audience: str = "bi-internal"
internal_service_allowed_issuers: str = "api-gateway"
internal_token_clock_skew_seconds: int = Field(default=15, ge=0, le=120)
# Frontend OIDC client config (served via /api/config)
frontend_oidc_client_id: str = ""
frontend_oidc_scope: str = "openid profile email"
# OpenTelemetry
otel_service_name: str = "otel-bi-backend"
otel_service_namespace: str = "final-thesis"
otel_collector_endpoint: str = "http://localhost:4318"
otel_export_timeout_ms: int = 10000
# Report output — points at the K8s CSI / SMB mountpoint in production
report_output_dir: str = "/tmp/otel-bi-reports"
# Analytics defaults (forwarded to Go service as query params)
forecast_horizon_days: int = Field(default=30, ge=7, le=180)
default_history_days: int = Field(default=365, ge=30, le=1460)
ranking_default_top_n: int = Field(default=10, ge=3, le=100)
@@ -72,58 +64,22 @@ class Settings(BaseSettings):
@property
def cors_origins_list(self) -> list[str]:
return [
origin.strip() for origin in self.cors_origins.split(",") if origin.strip()
]
return [o.strip() for o in self.cors_origins.split(",") if o.strip()]
@property
def frontend_required_scopes_list(self) -> list[str]:
return [
scope.strip()
for scope in self.frontend_required_scopes.split(" ")
if scope.strip()
]
@property
def internal_service_allowed_issuers_list(self) -> list[str]:
return [
issuer.strip()
for issuer in self.internal_service_allowed_issuers.split(",")
if issuer.strip()
]
def _build_mssql_connection_url(self, database: str) -> str:
driver = quote_plus(self.mssql_driver)
user = quote_plus(self.mssql_username)
password = quote_plus(self.mssql_password)
trust_cert = "yes" if self.mssql_trust_server_certificate else "no"
return (
f"mssql+pyodbc://{user}:{password}@{self.mssql_host}:{self.mssql_port}/{database}"
f"?driver={driver}&TrustServerCertificate={trust_cert}&ApplicationIntent=ReadOnly"
)
@property
def wwi_connection_url(self) -> str:
return self.wwi_connection_string or self._build_mssql_connection_url(
self.wwi_database
)
@property
def aw_connection_url(self) -> str:
return self.aw_connection_string or self._build_mssql_connection_url(
self.aw_database
)
return [s.strip() for s in self.frontend_required_scopes.split(" ") if s.strip()]
@property
def postgres_connection_url(self) -> str:
if self.postgres_connection_string:
return self.postgres_connection_string
from urllib.parse import quote_plus
user = quote_plus(self.postgres_username)
password = quote_plus(self.postgres_password)
return (
f"postgresql+psycopg://{user}:{password}@{self.postgres_host}:{self.postgres_port}/"
f"{self.postgres_database}?sslmode={self.postgres_sslmode}"
f"postgresql+psycopg://{user}:{password}@{self.postgres_host}:{self.postgres_port}"
f"/{self.postgres_database}?sslmode={self.postgres_sslmode}"
)

27
backend/app/core/db.py Normal file
View File

@@ -0,0 +1,27 @@
from __future__ import annotations
from sqlalchemy import create_engine
from sqlalchemy.engine import Engine
from sqlalchemy.orm import sessionmaker, Session
from app.core.config import settings
def create_postgres_engine() -> Engine:
return create_engine(
settings.postgres_connection_url,
pool_pre_ping=True,
pool_recycle=1800,
pool_size=5,
max_overflow=10,
future=True,
)
def create_session_factory(engine: Engine) -> sessionmaker[Session]:
return sessionmaker(
bind=engine,
autoflush=False,
autocommit=False,
expire_on_commit=False,
)

View File

@@ -0,0 +1,27 @@
from __future__ import annotations
import os
from concurrent.futures import ThreadPoolExecutor
# Shared executor for CPU-bound analytics (pandas/sklearn) and sync MSSQL I/O
# (pyodbc is inherently synchronous and blocks the event loop if called directly).
#
# Workers are capped at 8 to avoid overwhelming the MSSQL connection pools.
# In K8s: set ANALYTICS_WORKERS to match the pod's CPU limit.
_WORKERS = min(8, int(os.environ.get("ANALYTICS_WORKERS", "0")) or (os.cpu_count() or 2) * 2)
_executor: ThreadPoolExecutor | None = None
def get_executor() -> ThreadPoolExecutor:
global _executor
if _executor is None:
_executor = ThreadPoolExecutor(max_workers=_WORKERS, thread_name_prefix="analytics")
return _executor
def shutdown_executor() -> None:
global _executor
if _executor is not None:
_executor.shutdown(wait=False)
_executor = None

View File

@@ -0,0 +1,82 @@
from __future__ import annotations
import io
from reportlab.lib import colors
from reportlab.lib.pagesizes import A4, landscape
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.lib.units import cm
from reportlab.platypus import (
Paragraph,
SimpleDocTemplate,
Spacer,
Table,
TableStyle,
)
_PAGE_W, _ = landscape(A4)
_MARGIN = 1.5 * cm
_HEADER_BG = colors.HexColor("#1a56db")
_ROW_BG = colors.HexColor("#eef2ff")
def _pdf_table(rows: list[dict]) -> Table:
if not rows:
table_data: list[list] = [["No data available"]]
n_cols = 1
else:
headers = list(rows[0].keys())
n_cols = len(headers)
table_data = [headers] + [
[str(row.get(h, "")) for h in headers] for row in rows
]
col_w = (_PAGE_W - 2 * _MARGIN) / n_cols
t = Table(table_data, colWidths=[col_w] * n_cols, repeatRows=1)
style: list = [
("BACKGROUND", (0, 0), (-1, 0), _HEADER_BG),
("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
("FONTSIZE", (0, 0), (-1, 0), 8),
("FONTNAME", (0, 1), (-1, -1), "Helvetica"),
("FONTSIZE", (0, 1), (-1, -1), 7),
("ALIGN", (0, 0), (-1, -1), "LEFT"),
("VALIGN", (0, 0), (-1, -1), "MIDDLE"),
("GRID", (0, 0), (-1, -1), 0.25, colors.HexColor("#d1d5db")),
("TOPPADDING", (0, 0), (-1, -1), 3),
("BOTTOMPADDING", (0, 0), (-1, -1), 3),
("LEFTPADDING", (0, 0), (-1, -1), 5),
("RIGHTPADDING", (0, 0), (-1, -1), 5),
]
for i in range(1, len(table_data)):
bg = _ROW_BG if i % 2 == 1 else colors.white
style.append(("BACKGROUND", (0, i), (-1, i), bg))
t.setStyle(TableStyle(style))
return t
def to_pdf_bytes(rows: list[dict], title: str, subtitle: str = "") -> bytes:
"""Serialise *rows* to a single-sheet PDF and return the raw bytes."""
buf = io.BytesIO()
styles = getSampleStyleSheet()
story = []
story.append(Paragraph(title, styles["Title"]))
if subtitle:
story.append(Spacer(1, 0.2 * cm))
story.append(Paragraph(subtitle, styles["Normal"]))
story.append(Spacer(1, 0.5 * cm))
story.append(_pdf_table(rows))
doc = SimpleDocTemplate(
buf,
pagesize=landscape(A4),
leftMargin=_MARGIN,
rightMargin=_MARGIN,
topMargin=_MARGIN,
bottomMargin=_MARGIN,
)
doc.build(story)
return buf.getvalue()

View File

@@ -7,24 +7,27 @@ from typing import Any
from fastapi import FastAPI
from opentelemetry import metrics, trace
from opentelemetry.baggage.propagation import W3CBaggagePropagator
from opentelemetry.exporter.otlp.proto.http._log_exporter import OTLPLogExporter
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
from opentelemetry.instrumentation.logging import LoggingInstrumentor
from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor
from opentelemetry.propagate import set_global_textmap
from opentelemetry.propagators.composite import CompositePropagator
from opentelemetry.sdk._logs import LoggerProvider
from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
from opentelemetry.sdk.metrics import MeterProvider
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
from opentelemetry.sdk.resources import Resource
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator
from opentelemetry._logs import set_logger_provider
try:
from opentelemetry.instrumentation.system_metrics import SystemMetricsInstrumentor
except ImportError: # pragma: no cover - defensive fallback for minimal envs
except ImportError:
SystemMetricsInstrumentor = None # type: ignore[assignment]
from app.core.config import Settings
@@ -36,12 +39,14 @@ LOGGER = logging.getLogger(__name__)
class TelemetryProviders:
tracer_provider: TracerProvider
meter_provider: MeterProvider
logger_provider: LoggerProvider
def configure_otel(settings: Settings) -> TelemetryProviders:
set_global_textmap(
CompositePropagator([TraceContextTextMapPropagator(), W3CBaggagePropagator()])
)
resource = Resource.create(
{
"service.name": settings.otel_service_name,
@@ -50,34 +55,54 @@ def configure_otel(settings: Settings) -> TelemetryProviders:
}
)
trace_exporter = OTLPSpanExporter(
endpoint=f"{settings.otel_collector_endpoint}/v1/traces",
timeout=settings.otel_export_timeout_ms / 1000,
)
tracer_provider = TracerProvider(resource=resource)
tracer_provider.add_span_processor(BatchSpanProcessor(trace_exporter))
tracer_provider.add_span_processor(
BatchSpanProcessor(
OTLPSpanExporter(
endpoint=f"{settings.otel_collector_endpoint}/v1/traces",
timeout=settings.otel_export_timeout_ms / 1000,
)
)
)
trace.set_tracer_provider(tracer_provider)
metric_reader = PeriodicExportingMetricReader(
exporter=OTLPMetricExporter(
endpoint=f"{settings.otel_collector_endpoint}/v1/metrics",
timeout=settings.otel_export_timeout_ms / 1000,
),
export_interval_millis=10000,
meter_provider = MeterProvider(
resource=resource,
metric_readers=[
PeriodicExportingMetricReader(
exporter=OTLPMetricExporter(
endpoint=f"{settings.otel_collector_endpoint}/v1/metrics",
timeout=settings.otel_export_timeout_ms / 1000,
),
export_interval_millis=10_000,
)
],
)
meter_provider = MeterProvider(resource=resource, metric_readers=[metric_reader])
metrics.set_meter_provider(meter_provider)
logger_provider = LoggerProvider(resource=resource)
logger_provider.add_log_record_processor(
BatchLogRecordProcessor(
OTLPLogExporter(
endpoint=f"{settings.otel_collector_endpoint}/v1/logs",
timeout=settings.otel_export_timeout_ms / 1000,
)
)
)
set_logger_provider(logger_provider)
LoggingInstrumentor().instrument(set_logging_format=True)
if SystemMetricsInstrumentor is not None:
SystemMetricsInstrumentor().instrument()
else:
LOGGER.warning(
"System metrics instrumentor not available, runtime host metrics disabled."
)
LOGGER.info("OpenTelemetry providers configured")
LOGGER.warning("SystemMetricsInstrumentor not available — skipping.")
LOGGER.info("OTel providers configured", extra={"service.name": settings.otel_service_name})
return TelemetryProviders(
tracer_provider=tracer_provider, meter_provider=meter_provider
tracer_provider=tracer_provider,
meter_provider=meter_provider,
logger_provider=logger_provider,
)
@@ -85,19 +110,15 @@ def instrument_fastapi(app: FastAPI) -> None:
FastAPIInstrumentor.instrument_app(app)
def instrument_sqlalchemy_engines(engines: dict[str, Any]) -> None:
def instrument_sqlalchemy(engines: dict[str, Any]) -> None:
for engine in engines.values():
SQLAlchemyInstrumentor().instrument(engine=engine)
def instrument_httpx_clients() -> None:
HTTPXClientInstrumentor().instrument()
def shutdown_otel(providers: TelemetryProviders) -> None:
HTTPXClientInstrumentor().uninstrument()
if SystemMetricsInstrumentor is not None:
SystemMetricsInstrumentor().uninstrument()
LoggingInstrumentor().uninstrument()
providers.meter_provider.shutdown()
providers.tracer_provider.shutdown()
providers.logger_provider.shutdown()

187
backend/app/core/reports.py Normal file
View File

@@ -0,0 +1,187 @@
from __future__ import annotations
import uuid
from datetime import datetime, timezone
from pathlib import Path
import openpyxl
from reportlab.lib import colors
from reportlab.lib.pagesizes import A4, landscape
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.lib.units import cm
from reportlab.platypus import (
PageBreak,
Paragraph,
SimpleDocTemplate,
Spacer,
Table,
TableStyle,
)
_PAGE_W, _ = landscape(A4)
_MARGIN = 1.5 * cm
_HEADER_BG = colors.HexColor("#1a56db")
_ROW_BG = colors.HexColor("#eef2ff")
def _normalise(rows: list[dict] | dict) -> list[dict]:
if isinstance(rows, dict):
return [rows]
return rows or []
# ---------------------------------------------------------------------------
# XLSX
# ---------------------------------------------------------------------------
def _save_xlsx(data: dict, path: str, report_id: str, generated_at: str) -> None:
wb = openpyxl.Workbook()
ws = wb.active
ws.title = "Metadata"
ws.append(["Field", "Value"])
ws.append(["Generated At (UTC)", generated_at])
ws.append(["Report ID", report_id])
sheets = [
("AW Sales KPIs", _normalise(data.get("aw_sales_kpis", {}))),
("AW Sales History", _normalise(data.get("aw_sales_history", []))),
("AW Sales Forecast", _normalise(data.get("aw_sales_forecast", []))),
("AW Rep Scores", _normalise(data.get("aw_rep_scores", []))),
("AW Product Demand", _normalise(data.get("aw_product_demand", []))),
("WWI Sales KPIs", _normalise(data.get("wwi_sales_kpis", {}))),
("WWI Stock Recs", _normalise(data.get("wwi_stock_recommendations", []))),
("WWI Supplier Scores", _normalise(data.get("wwi_supplier_scores", []))),
("WWI Business Events", _normalise(data.get("wwi_business_events", []))),
]
for sheet_name, rows in sheets:
ws = wb.create_sheet(title=sheet_name)
if rows:
ws.append(list(rows[0].keys()))
for row in rows:
ws.append([str(v) if v is not None else "" for v in row.values()])
else:
ws.append(["No data"])
wb.save(path)
# ---------------------------------------------------------------------------
# PDF
# ---------------------------------------------------------------------------
def _pdf_table(rows: list[dict] | dict) -> Table:
data = _normalise(rows)
if not data:
table_data: list[list] = [["No data available"]]
n_cols = 1
else:
headers = list(data[0].keys())
n_cols = len(headers)
table_data = [headers] + [
[str(row.get(h, "")) for h in headers] for row in data
]
col_w = (_PAGE_W - 2 * _MARGIN) / n_cols
t = Table(table_data, colWidths=[col_w] * n_cols, repeatRows=1)
style: list = [
("BACKGROUND", (0, 0), (-1, 0), _HEADER_BG),
("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
("FONTNAME", (0, 0), (-1, 0), "Helvetica-Bold"),
("FONTSIZE", (0, 0), (-1, 0), 8),
("FONTNAME", (0, 1), (-1, -1), "Helvetica"),
("FONTSIZE", (0, 1), (-1, -1), 7),
("ALIGN", (0, 0), (-1, -1), "LEFT"),
("VALIGN", (0, 0), (-1, -1), "MIDDLE"),
("GRID", (0, 0), (-1, -1), 0.25, colors.HexColor("#d1d5db")),
("TOPPADDING", (0, 0), (-1, -1), 3),
("BOTTOMPADDING", (0, 0), (-1, -1), 3),
("LEFTPADDING", (0, 0), (-1, -1), 5),
("RIGHTPADDING", (0, 0), (-1, -1), 5),
]
for i in range(1, len(table_data)):
bg = _ROW_BG if i % 2 == 1 else colors.white
style.append(("BACKGROUND", (0, i), (-1, i), bg))
t.setStyle(TableStyle(style))
return t
def _section(story: list, title: str, rows: list[dict] | dict, styles) -> None:
story.append(Paragraph(title, styles["Heading2"]))
story.append(Spacer(1, 0.25 * cm))
story.append(_pdf_table(rows))
story.append(Spacer(1, 0.5 * cm))
def _save_pdf(data: dict, path: str, report_id: str, generated_at: str) -> None:
styles = getSampleStyleSheet()
story: list = []
story.append(Paragraph("OTel BI Platform — Generated Report", styles["Title"]))
story.append(Spacer(1, 0.2 * cm))
story.append(Paragraph(
f"Report ID: {report_id}   |   Generated: {generated_at}",
styles["Normal"],
))
story.append(Spacer(1, 0.6 * cm))
story.append(Paragraph("AdventureWorks DW", styles["Heading1"]))
story.append(Spacer(1, 0.3 * cm))
_section(story, "Sales KPIs", data.get("aw_sales_kpis", {}), styles)
_section(story, "Sales History", data.get("aw_sales_history", []), styles)
story.append(PageBreak())
_section(story, "Sales Forecast", data.get("aw_sales_forecast", []), styles)
_section(story, "Rep Scores", data.get("aw_rep_scores", []), styles)
_section(story, "Product Demand", data.get("aw_product_demand", []), styles)
story.append(PageBreak())
story.append(Paragraph("WideWorldImporters DW", styles["Heading1"]))
story.append(Spacer(1, 0.3 * cm))
_section(story, "Sales KPIs", data.get("wwi_sales_kpis", {}), styles)
_section(story, "Stock Recommendations", data.get("wwi_stock_recommendations", []), styles)
story.append(PageBreak())
_section(story, "Supplier Scores", data.get("wwi_supplier_scores", []), styles)
_section(story, "Business Events", data.get("wwi_business_events", []), styles)
doc = SimpleDocTemplate(
path,
pagesize=landscape(A4),
leftMargin=_MARGIN,
rightMargin=_MARGIN,
topMargin=_MARGIN,
bottomMargin=_MARGIN,
)
doc.build(story)
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
def save_report(data: dict, output_dir: str) -> dict:
"""Generate XLSX and PDF reports from aggregated BI data and write both to *output_dir*."""
now = datetime.now(timezone.utc)
ts = now.strftime("%Y%m%d_%H%M%S")
uid = uuid.uuid4().hex[:6]
report_id = f"{ts}_{uid}"
generated_at = now.isoformat()
out = Path(output_dir)
out.mkdir(parents=True, exist_ok=True)
base = f"otel_bi_report_{report_id}"
xlsx_path = str(out / f"{base}.xlsx")
pdf_path = str(out / f"{base}.pdf")
_save_xlsx(data, xlsx_path, report_id, generated_at)
_save_pdf(data, pdf_path, report_id, generated_at)
return {
"report_id": report_id,
"generated_at": generated_at,
"xlsx": {"filename": f"{base}.xlsx", "path": xlsx_path},
"pdf": {"filename": f"{base}.pdf", "path": pdf_path},
}

View File

@@ -2,11 +2,9 @@ from __future__ import annotations
from dataclasses import dataclass
from functools import lru_cache
from time import time
from uuid import uuid4
import jwt
from fastapi import Depends, Header, HTTPException, status
from fastapi import Depends, HTTPException, status
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
from jwt import InvalidTokenError, PyJWKClient
@@ -23,14 +21,6 @@ class FrontendPrincipal:
token: str
@dataclass
class InternalPrincipal:
subject: str
scopes: list[str]
claims: dict
token: str
class FrontendJWTVerifier:
@property
def jwks_url(self) -> str:
@@ -66,7 +56,6 @@ class FrontendJWTVerifier:
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="FRONTEND_JWT_AUDIENCE is not configured.",
)
try:
signing_key = self._jwks_client().get_signing_key_from_jwt(token).key
claims = jwt.decode(
@@ -92,103 +81,13 @@ class FrontendJWTVerifier:
scopes = self._extract_scopes(claims)
required = settings.frontend_required_scopes_list
missing = [scope for scope in required if scope not in scopes]
missing = [s for s in required if s not in scopes]
if missing:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail=f"Missing required scope(s): {', '.join(missing)}",
)
return FrontendPrincipal(
subject=subject, scopes=scopes, claims=claims, token=token
)
class InternalTokenManager:
token_type = "internal-service"
@staticmethod
def _assert_secret() -> str:
secret = settings.internal_service_shared_secret
if not secret or secret == "change-me":
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="INTERNAL_SERVICE_SHARED_SECRET must be configured.",
)
if len(secret.encode("utf-8")) < 32:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=(
"INTERNAL_SERVICE_SHARED_SECRET must be at least 32 bytes for "
"HS256 token signing."
),
)
return secret
def mint(
self,
*,
subject: str,
scopes: list[str],
source_service: str,
) -> str:
now = int(time())
payload = {
"sub": subject,
"scope": " ".join(scopes),
"iss": source_service,
"aud": settings.internal_service_token_audience,
"typ": self.token_type,
"iat": now,
"nbf": now,
"exp": now + settings.internal_service_token_ttl_seconds,
"jti": str(uuid4()),
}
return jwt.encode(payload, self._assert_secret(), algorithm="HS256")
def verify(self, token: str) -> InternalPrincipal:
try:
claims = jwt.decode(
token,
self._assert_secret(),
algorithms=["HS256"],
audience=settings.internal_service_token_audience,
options={
"require": ["sub", "iss", "aud", "exp", "iat", "nbf", "jti", "typ"]
},
leeway=settings.internal_token_clock_skew_seconds,
)
except InvalidTokenError as exc:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid internal service token.",
) from exc
subject = str(claims.get("sub") or "")
if not subject:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Internal token missing subject.",
)
issuer = str(claims.get("iss") or "")
if issuer not in settings.internal_service_allowed_issuers_list:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Internal token issuer is not allowed.",
)
token_type = str(claims.get("typ") or "")
if token_type != self.token_type:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Internal token type is invalid.",
)
scope = claims.get("scope")
scopes = [item for item in str(scope).split(" ") if item] if scope else []
return InternalPrincipal(
subject=subject, scopes=scopes, claims=claims, token=token
)
return FrontendPrincipal(subject=subject, scopes=scopes, claims=claims, token=token)
@lru_cache(maxsize=1)
@@ -196,36 +95,14 @@ def get_frontend_verifier() -> FrontendJWTVerifier:
return FrontendJWTVerifier()
@lru_cache(maxsize=1)
def get_internal_token_manager() -> InternalTokenManager:
return InternalTokenManager()
def require_frontend_principal(
credentials: HTTPAuthorizationCredentials | None = Depends(BEARER_SCHEME),
) -> FrontendPrincipal:
if not settings.require_frontend_auth:
return FrontendPrincipal(subject="anonymous", scopes=[], claims={}, token="")
if credentials is None or credentials.scheme.lower() != "bearer":
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Missing bearer token.",
)
return get_frontend_verifier().verify(credentials.credentials)
def require_internal_principal(
internal_token: str | None = Header(default=None, alias="x-internal-service-token"),
) -> InternalPrincipal:
if not settings.internal_service_auth_enabled:
return InternalPrincipal(
subject="internal-unauth", scopes=[], claims={}, token=""
)
if not internal_token:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Missing x-internal-service-token header.",
)
return get_internal_token_manager().verify(internal_token)

View File

@@ -1 +0,0 @@
"""Database helpers for warehouse connections."""

View File

@@ -1,34 +0,0 @@
from __future__ import annotations
from sqlalchemy import create_engine, event
from sqlalchemy.engine import Engine
from app.core.config import settings
def _create_read_only_engine(connection_url: str) -> Engine:
engine = create_engine(
connection_url, pool_pre_ping=True, pool_recycle=3600, future=True
)
@event.listens_for(engine, "connect")
def _on_connect(dbapi_connection, _connection_record) -> None:
cursor = dbapi_connection.cursor()
try:
cursor.execute("SET TRANSACTION ISOLATION LEVEL READ COMMITTED;")
finally:
cursor.close()
return engine
def create_warehouse_engines() -> dict[str, Engine]:
return {
"wwi": _create_read_only_engine(settings.wwi_connection_url),
"aw": _create_read_only_engine(settings.aw_connection_url),
}
def dispose_engines(engines: dict[str, Engine]) -> None:
for engine in engines.values():
engine.dispose()

View File

@@ -1,27 +0,0 @@
from __future__ import annotations
from sqlalchemy import create_engine
from sqlalchemy.engine import Engine
from sqlalchemy.orm import Session, sessionmaker
from app.core.config import settings
from app.db.postgres_models import Base
def create_postgres_engine() -> Engine:
return create_engine(
settings.postgres_connection_url,
pool_pre_ping=True,
pool_recycle=3600,
future=True,
)
def initialize_postgres_schema(engine: Engine) -> None:
Base.metadata.create_all(bind=engine)
def create_postgres_session_factory(engine: Engine) -> sessionmaker[Session]:
return sessionmaker(
bind=engine, autoflush=False, autocommit=False, expire_on_commit=False
)

View File

@@ -1,86 +0,0 @@
from __future__ import annotations
from datetime import datetime, timezone
from uuid import uuid4
from sqlalchemy import JSON, DateTime, Float, Integer, String, Text
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
def _utcnow() -> datetime:
return datetime.now(timezone.utc)
class Base(DeclarativeBase):
pass
class AuditLog(Base):
__tablename__ = "audit_logs"
id: Mapped[str] = mapped_column(
String(36), primary_key=True, default=lambda: str(uuid4())
)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), default=_utcnow, index=True
)
method: Mapped[str] = mapped_column(String(12), index=True)
path: Mapped[str] = mapped_column(String(300), index=True)
query_string: Mapped[str] = mapped_column(String(1000), default="")
status_code: Mapped[int] = mapped_column(Integer, index=True)
duration_ms: Mapped[float] = mapped_column(Float)
trace_id: Mapped[str | None] = mapped_column(String(32), nullable=True, index=True)
span_id: Mapped[str | None] = mapped_column(String(16), nullable=True, index=True)
client_ip: Mapped[str | None] = mapped_column(String(120), nullable=True)
user_agent: Mapped[str | None] = mapped_column(Text, nullable=True)
details: Mapped[dict] = mapped_column(JSON, default=dict)
class ForecastRun(Base):
__tablename__ = "forecast_runs"
id: Mapped[str] = mapped_column(
String(36), primary_key=True, default=lambda: str(uuid4())
)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), default=_utcnow, index=True
)
horizon_days: Mapped[int] = mapped_column(Integer)
point_count: Mapped[int] = mapped_column(Integer)
trigger_source: Mapped[str] = mapped_column(String(64), index=True)
trace_id: Mapped[str | None] = mapped_column(String(32), nullable=True, index=True)
span_id: Mapped[str | None] = mapped_column(String(16), nullable=True, index=True)
payload: Mapped[list[dict]] = mapped_column(JSON, default=list)
class RankingRun(Base):
__tablename__ = "ranking_runs"
id: Mapped[str] = mapped_column(
String(36), primary_key=True, default=lambda: str(uuid4())
)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), default=_utcnow, index=True
)
top_n: Mapped[int] = mapped_column(Integer)
item_count: Mapped[int] = mapped_column(Integer)
trigger_source: Mapped[str] = mapped_column(String(64), index=True)
trace_id: Mapped[str | None] = mapped_column(String(32), nullable=True, index=True)
span_id: Mapped[str | None] = mapped_column(String(16), nullable=True, index=True)
payload: Mapped[list[dict]] = mapped_column(JSON, default=list)
class RecommendationRun(Base):
__tablename__ = "recommendation_runs"
id: Mapped[str] = mapped_column(
String(36), primary_key=True, default=lambda: str(uuid4())
)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), default=_utcnow, index=True
)
item_count: Mapped[int] = mapped_column(Integer)
trigger_source: Mapped[str] = mapped_column(String(64), index=True)
trace_id: Mapped[str | None] = mapped_column(String(32), nullable=True, index=True)
span_id: Mapped[str | None] = mapped_column(String(16), nullable=True, index=True)
payload: Mapped[list[dict]] = mapped_column(JSON, default=list)

View File

@@ -1,167 +0,0 @@
from __future__ import annotations
AW_DAILY_SALES_QUERIES = [
"""
SELECT
CAST(d.FullDateAlternateKey AS date) AS sale_date,
SUM(f.SalesAmount) AS revenue,
SUM(f.TotalProductCost) AS cost,
SUM(f.OrderQuantity) AS quantity,
COUNT_BIG(*) AS orders
FROM dbo.FactInternetSales AS f
INNER JOIN dbo.DimDate AS d ON d.DateKey = f.OrderDateKey
GROUP BY CAST(d.FullDateAlternateKey AS date)
ORDER BY sale_date;
""",
"""
SELECT
CAST(OrderDate AS date) AS sale_date,
SUM(SalesAmount) AS revenue,
SUM(TotalProductCost) AS cost,
SUM(OrderQuantity) AS quantity,
COUNT_BIG(*) AS orders
FROM dbo.FactInternetSales
GROUP BY CAST(OrderDate AS date)
ORDER BY sale_date;
""",
]
WWI_DAILY_SALES_QUERIES = [
"""
SELECT
CAST(i.InvoiceDate AS date) AS sale_date,
SUM(il.ExtendedPrice) AS revenue,
SUM(il.TaxAmount) AS cost,
SUM(il.Quantity) AS quantity,
COUNT_BIG(DISTINCT i.InvoiceID) AS orders
FROM Sales.Invoices AS i
INNER JOIN Sales.InvoiceLines AS il ON il.InvoiceID = i.InvoiceID
GROUP BY CAST(i.InvoiceDate AS date)
ORDER BY sale_date;
""",
"""
SELECT
CAST(i.InvoiceDate AS date) AS sale_date,
SUM(il.UnitPrice * il.Quantity) AS revenue,
CAST(0 AS float) AS cost,
SUM(il.Quantity) AS quantity,
COUNT_BIG(DISTINCT i.InvoiceID) AS orders
FROM Sales.Invoices AS i
INNER JOIN Sales.InvoiceLines AS il ON il.InvoiceID = i.InvoiceID
GROUP BY CAST(i.InvoiceDate AS date)
ORDER BY sale_date;
""",
]
AW_PRODUCT_PERFORMANCE_QUERIES = [
"""
SELECT
p.ProductAlternateKey AS product_id,
p.EnglishProductName AS product_name,
COALESCE(sc.EnglishProductSubcategoryName, 'Unknown') AS category_name,
SUM(f.SalesAmount) AS revenue,
SUM(f.TotalProductCost) AS cost,
SUM(f.OrderQuantity) AS quantity,
COUNT_BIG(*) AS orders
FROM dbo.FactInternetSales AS f
INNER JOIN dbo.DimProduct AS p ON p.ProductKey = f.ProductKey
LEFT JOIN dbo.DimProductSubcategory AS sc ON sc.ProductSubcategoryKey = p.ProductSubcategoryKey
GROUP BY p.ProductAlternateKey, p.EnglishProductName, sc.EnglishProductSubcategoryName
ORDER BY revenue DESC;
""",
"""
SELECT
CAST(ProductKey AS nvarchar(100)) AS product_id,
CAST(ProductKey AS nvarchar(100)) AS product_name,
'Unknown' AS category_name,
SUM(SalesAmount) AS revenue,
SUM(TotalProductCost) AS cost,
SUM(OrderQuantity) AS quantity,
COUNT_BIG(*) AS orders
FROM dbo.FactInternetSales
GROUP BY ProductKey
ORDER BY revenue DESC;
""",
]
WWI_PRODUCT_PERFORMANCE_QUERIES = [
"""
SELECT
CAST(s.StockItemID AS nvarchar(100)) AS product_id,
s.StockItemName AS product_name,
COALESCE(cg.StockGroupName, 'Unknown') AS category_name,
SUM(il.ExtendedPrice) AS revenue,
SUM(il.TaxAmount) AS cost,
SUM(il.Quantity) AS quantity,
COUNT_BIG(*) AS orders
FROM Sales.InvoiceLines AS il
INNER JOIN Warehouse.StockItems AS s ON s.StockItemID = il.StockItemID
LEFT JOIN Warehouse.StockItemStockGroups AS sig ON sig.StockItemID = s.StockItemID
LEFT JOIN Warehouse.StockGroups AS cg ON cg.StockGroupID = sig.StockGroupID
GROUP BY s.StockItemID, s.StockItemName, cg.StockGroupName
ORDER BY revenue DESC;
""",
"""
SELECT
CAST(il.StockItemID AS nvarchar(100)) AS product_id,
CAST(il.StockItemID AS nvarchar(100)) AS product_name,
'Unknown' AS category_name,
SUM(il.UnitPrice * il.Quantity) AS revenue,
CAST(0 AS float) AS cost,
SUM(il.Quantity) AS quantity,
COUNT_BIG(*) AS orders
FROM Sales.InvoiceLines AS il
GROUP BY il.StockItemID
ORDER BY revenue DESC;
""",
]
AW_CUSTOMER_QUERIES = [
"""
SELECT
CAST(c.CustomerAlternateKey AS nvarchar(100)) AS customer_id,
c.FirstName + ' ' + c.LastName AS customer_name,
SUM(f.SalesAmount) AS revenue,
COUNT_BIG(*) AS orders
FROM dbo.FactInternetSales AS f
INNER JOIN dbo.DimCustomer AS c ON c.CustomerKey = f.CustomerKey
GROUP BY c.CustomerAlternateKey, c.FirstName, c.LastName
ORDER BY revenue DESC;
""",
"""
SELECT
CAST(CustomerKey AS nvarchar(100)) AS customer_id,
CAST(CustomerKey AS nvarchar(100)) AS customer_name,
SUM(SalesAmount) AS revenue,
COUNT_BIG(*) AS orders
FROM dbo.FactInternetSales
GROUP BY CustomerKey
ORDER BY revenue DESC;
""",
]
WWI_CUSTOMER_QUERIES = [
"""
SELECT
CAST(c.CustomerID AS nvarchar(100)) AS customer_id,
c.CustomerName AS customer_name,
SUM(il.ExtendedPrice) AS revenue,
COUNT_BIG(DISTINCT i.InvoiceID) AS orders
FROM Sales.Invoices AS i
INNER JOIN Sales.InvoiceLines AS il ON il.InvoiceID = i.InvoiceID
INNER JOIN Sales.Customers AS c ON c.CustomerID = i.CustomerID
GROUP BY c.CustomerID, c.CustomerName
ORDER BY revenue DESC;
""",
"""
SELECT
CAST(i.CustomerID AS nvarchar(100)) AS customer_id,
CAST(i.CustomerID AS nvarchar(100)) AS customer_name,
SUM(il.UnitPrice * il.Quantity) AS revenue,
COUNT_BIG(DISTINCT i.InvoiceID) AS orders
FROM Sales.Invoices AS i
INNER JOIN Sales.InvoiceLines AS il ON il.InvoiceID = i.InvoiceID
GROUP BY i.CustomerID
ORDER BY revenue DESC;
""",
]

View File

View File

View File

@@ -0,0 +1,258 @@
from __future__ import annotations
import logging
from opentelemetry import metrics, trace
from sqlalchemy.orm import sessionmaker, Session
from app.core.audit import append_audit
from app.domain.aw.models import AWSalesForecast, AWRepScore, AWProductDemand, AWAnomalyRun
LOGGER = logging.getLogger(__name__)
tracer = trace.get_tracer("otel-bi.domain.aw")
meter = metrics.get_meter("otel-bi.domain.aw")
_persist_counter = meter.create_counter(
"aw_persist_writes_total",
description="Number of AW PostgreSQL write operations",
)
def _current_span_context() -> tuple[str | None, str | None]:
ctx = trace.get_current_span().get_span_context()
if not ctx.is_valid:
return None, None
return f"{ctx.trace_id:032x}", f"{ctx.span_id:016x}"
def _actor_type(trigger_source: str) -> str:
return "scheduler" if trigger_source.startswith("scheduler") else "api"
# ---------------------------------------------------------------------------
# Persist functions — called after Go service returns data
# ---------------------------------------------------------------------------
def persist_forecast(
factory: sessionmaker[Session],
data: list[dict],
horizon_days: int,
trigger_source: str,
) -> None:
trace_id, span_id = _current_span_context()
try:
with factory() as session:
session.add(AWSalesForecast(
horizon_days=horizon_days,
point_count=len(data),
trigger_source=trigger_source,
trace_id=trace_id,
span_id=span_id,
payload=data,
))
session.commit()
_persist_counter.add(1, {"entity": "sales_forecast"})
except Exception as exc: # noqa: BLE001
LOGGER.warning("Failed to persist AW forecast: %s", exc)
append_audit(
factory,
action="forecast.generated",
actor_type=_actor_type(trigger_source),
actor_id=trigger_source,
domain="aw",
service="otel-bi-backend",
entity_type="sales_forecast",
payload={"horizon_days": horizon_days, "point_count": len(data)},
)
def persist_rep_scores(
factory: sessionmaker[Session],
data: list[dict],
top_n: int,
trigger_source: str,
) -> None:
trace_id, span_id = _current_span_context()
try:
with factory() as session:
session.add(AWRepScore(
rep_count=len(data),
trigger_source=trigger_source,
trace_id=trace_id,
span_id=span_id,
payload=data,
))
session.commit()
_persist_counter.add(1, {"entity": "rep_scores"})
except Exception as exc: # noqa: BLE001
LOGGER.warning("Failed to persist AW rep scores: %s", exc)
append_audit(
factory,
action="scores.generated",
actor_type=_actor_type(trigger_source),
actor_id=trigger_source,
domain="aw",
service="otel-bi-backend",
entity_type="rep_scores",
payload={"rep_count": len(data), "top_n": top_n},
)
def persist_product_demand(
factory: sessionmaker[Session],
data: list[dict],
top_n: int,
trigger_source: str,
) -> None:
trace_id, span_id = _current_span_context()
try:
with factory() as session:
session.add(AWProductDemand(
product_count=len(data),
top_n=top_n,
trigger_source=trigger_source,
trace_id=trace_id,
span_id=span_id,
payload=data,
))
session.commit()
_persist_counter.add(1, {"entity": "product_demand"})
except Exception as exc: # noqa: BLE001
LOGGER.warning("Failed to persist AW product demand: %s", exc)
append_audit(
factory,
action="scores.generated",
actor_type=_actor_type(trigger_source),
actor_id=trigger_source,
domain="aw",
service="otel-bi-backend",
entity_type="product_demand",
payload={"product_count": len(data), "top_n": top_n},
)
def persist_anomaly_run(
factory: sessionmaker[Session],
data: list[dict],
trigger_source: str,
) -> None:
anomaly_count = sum(1 for p in data if p.get("is_anomaly"))
trace_id, span_id = _current_span_context()
try:
with factory() as session:
session.add(AWAnomalyRun(
anomaly_count=anomaly_count,
series_days=365,
window_days=30,
threshold_sigma=2.0,
trigger_source=trigger_source,
trace_id=trace_id,
span_id=span_id,
payload=data,
))
session.commit()
_persist_counter.add(1, {"entity": "anomaly_run"})
except Exception as exc: # noqa: BLE001
LOGGER.warning("Failed to persist AW anomaly run: %s", exc)
append_audit(
factory,
action="anomaly_detection.ran",
actor_type=_actor_type(trigger_source),
actor_id=trigger_source,
domain="aw",
service="otel-bi-backend",
entity_type="anomaly_detection",
payload={"series_days": 365, "window_days": 30, "anomaly_count": anomaly_count},
)
# ---------------------------------------------------------------------------
# Read functions — query PostgreSQL for stored results
# ---------------------------------------------------------------------------
def list_forecasts(factory: sessionmaker[Session], limit: int = 50) -> list[dict]:
with factory() as session:
rows = (
session.query(AWSalesForecast)
.order_by(AWSalesForecast.created_at.desc())
.limit(limit)
.all()
)
return [
{
"id": r.id,
"created_at": r.created_at.isoformat(),
"horizon_days": r.horizon_days,
"point_count": r.point_count,
"trigger_source": r.trigger_source,
"trace_id": r.trace_id,
}
for r in rows
]
def list_rep_scores(factory: sessionmaker[Session], limit: int = 50) -> list[dict]:
with factory() as session:
rows = (
session.query(AWRepScore)
.order_by(AWRepScore.computed_at.desc())
.limit(limit)
.all()
)
return [
{
"id": r.id,
"computed_at": r.computed_at.isoformat(),
"rep_count": r.rep_count,
"trigger_source": r.trigger_source,
"trace_id": r.trace_id,
"payload": r.payload,
}
for r in rows
]
def list_product_demand(factory: sessionmaker[Session], limit: int = 50) -> list[dict]:
with factory() as session:
rows = (
session.query(AWProductDemand)
.order_by(AWProductDemand.computed_at.desc())
.limit(limit)
.all()
)
return [
{
"id": r.id,
"computed_at": r.computed_at.isoformat(),
"product_count": r.product_count,
"top_n": r.top_n,
"trigger_source": r.trigger_source,
"trace_id": r.trace_id,
"payload": r.payload,
}
for r in rows
]
def list_anomaly_runs(factory: sessionmaker[Session], limit: int = 20) -> list[dict]:
with factory() as session:
rows = (
session.query(AWAnomalyRun)
.order_by(AWAnomalyRun.detected_at.desc())
.limit(limit)
.all()
)
return [
{
"id": r.id,
"detected_at": r.detected_at.isoformat(),
"anomaly_count": r.anomaly_count,
"series_days": r.series_days,
"window_days": r.window_days,
"threshold_sigma": r.threshold_sigma,
"trigger_source": r.trigger_source,
"trace_id": r.trace_id,
}
for r in rows
]

View File

@@ -0,0 +1,77 @@
from __future__ import annotations
from datetime import datetime, timezone
from uuid import uuid4
from sqlalchemy import JSON, DateTime, Integer, String
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
def _utcnow() -> datetime:
return datetime.now(timezone.utc)
class AWBase(DeclarativeBase):
pass
class AWSalesForecast(AWBase):
"""Persisted AW sales forecast runs."""
__tablename__ = "aw_sales_forecasts"
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid4()))
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow, index=True)
horizon_days: Mapped[int] = mapped_column(Integer)
point_count: Mapped[int] = mapped_column(Integer)
trigger_source: Mapped[str] = mapped_column(String(64), index=True)
trace_id: Mapped[str | None] = mapped_column(String(32), nullable=True, index=True)
span_id: Mapped[str | None] = mapped_column(String(16), nullable=True)
payload: Mapped[list[dict]] = mapped_column(JSON, default=list)
class AWRepScore(AWBase):
"""Persisted AW sales rep performance scoring runs."""
__tablename__ = "aw_rep_scores"
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid4()))
computed_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow, index=True)
rep_count: Mapped[int] = mapped_column(Integer)
trigger_source: Mapped[str] = mapped_column(String(64), index=True)
trace_id: Mapped[str | None] = mapped_column(String(32), nullable=True, index=True)
span_id: Mapped[str | None] = mapped_column(String(16), nullable=True)
payload: Mapped[list[dict]] = mapped_column(JSON, default=list)
class AWProductDemand(AWBase):
"""Persisted AW product demand scoring runs."""
__tablename__ = "aw_product_demand"
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid4()))
computed_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow, index=True)
product_count: Mapped[int] = mapped_column(Integer)
top_n: Mapped[int] = mapped_column(Integer)
trigger_source: Mapped[str] = mapped_column(String(64), index=True)
trace_id: Mapped[str | None] = mapped_column(String(32), nullable=True, index=True)
span_id: Mapped[str | None] = mapped_column(String(16), nullable=True)
payload: Mapped[list[dict]] = mapped_column(JSON, default=list)
class AWAnomalyRun(AWBase):
"""Persisted AW revenue anomaly detection runs."""
__tablename__ = "aw_anomaly_runs"
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid4()))
detected_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow, index=True)
anomaly_count: Mapped[int] = mapped_column(Integer)
series_days: Mapped[int] = mapped_column(Integer)
window_days: Mapped[int] = mapped_column(Integer)
threshold_sigma: Mapped[float] = mapped_column(default=2.0)
trigger_source: Mapped[str] = mapped_column(String(64), index=True)
trace_id: Mapped[str | None] = mapped_column(String(32), nullable=True, index=True)
span_id: Mapped[str | None] = mapped_column(String(16), nullable=True)
# Full annotated series (date, revenue, rolling_mean, lower_band, upper_band, is_anomaly, z_score)
payload: Mapped[list[dict]] = mapped_column(JSON, default=list)

View File

@@ -0,0 +1,131 @@
from __future__ import annotations
# ---------------------------------------------------------------------------
# AdventureWorksDW2022 — read-only MSSQL queries
# Each list contains fallback variants tried in order.
# ---------------------------------------------------------------------------
# Daily sales combining FactInternetSales + FactResellerSales
AW_DAILY_SALES: list[str] = [
"""
SELECT
CAST(d.FullDateAlternateKey AS date) AS sale_date,
SUM(f.SalesAmount) AS revenue,
SUM(f.TotalProductCost) AS cost,
SUM(f.OrderQuantity) AS quantity,
COUNT_BIG(*) AS orders
FROM dbo.FactInternetSales AS f
INNER JOIN dbo.DimDate AS d ON d.DateKey = f.OrderDateKey
GROUP BY CAST(d.FullDateAlternateKey AS date)
UNION ALL
SELECT
CAST(d.FullDateAlternateKey AS date) AS sale_date,
SUM(r.SalesAmount) AS revenue,
SUM(r.TotalProductCost) AS cost,
SUM(r.OrderQuantity) AS quantity,
COUNT_BIG(*) AS orders
FROM dbo.FactResellerSales AS r
INNER JOIN dbo.DimDate AS d ON d.DateKey = r.OrderDateKey
GROUP BY CAST(d.FullDateAlternateKey AS date)
ORDER BY sale_date;
""",
# Fallback: internet sales only using OrderDate column directly
"""
SELECT
CAST(OrderDate AS date) AS sale_date,
SUM(SalesAmount) AS revenue,
SUM(TotalProductCost) AS cost,
SUM(OrderQuantity) AS quantity,
COUNT_BIG(*) AS orders
FROM dbo.FactInternetSales
GROUP BY CAST(OrderDate AS date)
ORDER BY sale_date;
""",
]
# Sales rep performance — reseller sales attributed to employees
AW_REP_PERFORMANCE: list[str] = [
"""
SELECT
e.EmployeeKey AS employee_key,
e.FirstName + ' ' + e.LastName AS rep_name,
COALESCE(e.Title, 'Sales Rep') AS rep_title,
COALESCE(st.SalesTerritoryRegion, 'Unknown') AS territory,
SUM(r.SalesAmount) AS revenue,
SUM(r.TotalProductCost) AS cost,
COUNT_BIG(*) AS orders,
AVG(r.SalesAmount) AS avg_deal_size
FROM dbo.FactResellerSales AS r
INNER JOIN dbo.DimEmployee AS e
ON e.EmployeeKey = r.EmployeeKey
INNER JOIN dbo.DimSalesTerritory AS st
ON st.SalesTerritoryKey = r.SalesTerritoryKey
WHERE e.SalesPersonFlag = 1
GROUP BY
e.EmployeeKey,
e.FirstName, e.LastName,
e.Title,
st.SalesTerritoryRegion
ORDER BY revenue DESC;
""",
# Fallback without SalesPersonFlag filter
"""
SELECT
e.EmployeeKey AS employee_key,
e.FirstName + ' ' + e.LastName AS rep_name,
COALESCE(e.Title, 'Employee') AS rep_title,
'Unknown' AS territory,
SUM(r.SalesAmount) AS revenue,
SUM(r.TotalProductCost) AS cost,
COUNT_BIG(*) AS orders,
AVG(r.SalesAmount) AS avg_deal_size
FROM dbo.FactResellerSales AS r
INNER JOIN dbo.DimEmployee AS e ON e.EmployeeKey = r.EmployeeKey
GROUP BY e.EmployeeKey, e.FirstName, e.LastName, e.Title
ORDER BY revenue DESC;
""",
]
# Product demand — internet sales with full category hierarchy
AW_PRODUCT_DEMAND: list[str] = [
"""
SELECT
p.ProductAlternateKey AS product_id,
p.EnglishProductName AS product_name,
COALESCE(pc.EnglishProductCategoryName, 'Unknown') AS category,
SUM(f.SalesAmount) AS revenue,
SUM(f.TotalProductCost) AS cost,
SUM(f.OrderQuantity) AS quantity,
COUNT_BIG(*) AS orders
FROM dbo.FactInternetSales AS f
INNER JOIN dbo.DimProduct AS p
ON p.ProductKey = f.ProductKey
LEFT JOIN dbo.DimProductSubcategory AS sc
ON sc.ProductSubcategoryKey = p.ProductSubcategoryKey
LEFT JOIN dbo.DimProductCategory AS pc
ON pc.ProductCategoryKey = sc.ProductCategoryKey
GROUP BY
p.ProductAlternateKey,
p.EnglishProductName,
pc.EnglishProductCategoryName
ORDER BY revenue DESC;
""",
# Fallback: no category join
"""
SELECT
CAST(f.ProductKey AS nvarchar(50)) AS product_id,
COALESCE(p.EnglishProductName, CAST(f.ProductKey AS nvarchar(50))) AS product_name,
'Unknown' AS category,
SUM(f.SalesAmount) AS revenue,
SUM(f.TotalProductCost) AS cost,
SUM(f.OrderQuantity) AS quantity,
COUNT_BIG(*) AS orders
FROM dbo.FactInternetSales AS f
LEFT JOIN dbo.DimProduct AS p ON p.ProductKey = f.ProductKey
GROUP BY f.ProductKey, p.EnglishProductName
ORDER BY revenue DESC;
""",
]

View File

View File

@@ -0,0 +1,297 @@
from __future__ import annotations
import logging
from datetime import datetime, timedelta, timezone
from opentelemetry import metrics, trace
from sqlalchemy.orm import sessionmaker, Session
from app.core.audit import append_audit
from app.domain.wwi.models import (
WWIReorderRecommendation,
WWISupplierScore,
WWIWhatIfScenario,
WWIBusinessEvent,
)
LOGGER = logging.getLogger(__name__)
tracer = trace.get_tracer("otel-bi.domain.wwi")
meter = metrics.get_meter("otel-bi.domain.wwi")
_persist_counter = meter.create_counter(
"wwi_persist_writes_total",
description="Number of WWI PostgreSQL write operations",
)
_event_counter = meter.create_counter(
"wwi_business_events_generated_total",
description="Business events automatically generated",
)
def _current_span_context() -> tuple[str | None, str | None]:
ctx = trace.get_current_span().get_span_context()
if not ctx.is_valid:
return None, None
return f"{ctx.trace_id:032x}", f"{ctx.span_id:016x}"
def _actor_type(trigger_source: str) -> str:
return "scheduler" if trigger_source.startswith("scheduler") else "api"
# ---------------------------------------------------------------------------
# Persist functions — called after Go service returns data
# ---------------------------------------------------------------------------
def persist_reorder_recommendations(
factory: sessionmaker[Session],
data: list[dict],
trigger_source: str,
) -> None:
trace_id, span_id = _current_span_context()
try:
with factory() as session:
session.add(WWIReorderRecommendation(
item_count=len(data),
trigger_source=trigger_source,
trace_id=trace_id,
span_id=span_id,
payload=data,
))
session.commit()
_persist_counter.add(1, {"entity": "reorder_recommendations"})
except Exception as exc: # noqa: BLE001
LOGGER.warning("Failed to persist WWI reorder recommendations: %s", exc)
append_audit(
factory,
action="recommendations.generated",
actor_type=_actor_type(trigger_source),
actor_id=trigger_source,
domain="wwi",
service="otel-bi-backend",
entity_type="reorder_recommendations",
payload={"item_count": len(data)},
)
def persist_supplier_scores(
factory: sessionmaker[Session],
data: list[dict],
top_n: int,
trigger_source: str,
) -> None:
trace_id, span_id = _current_span_context()
try:
with factory() as session:
session.add(WWISupplierScore(
supplier_count=len(data),
top_n=top_n,
trigger_source=trigger_source,
trace_id=trace_id,
span_id=span_id,
payload=data,
))
session.commit()
_persist_counter.add(1, {"entity": "supplier_scores"})
except Exception as exc: # noqa: BLE001
LOGGER.warning("Failed to persist WWI supplier scores: %s", exc)
append_audit(
factory,
action="scores.generated",
actor_type=_actor_type(trigger_source),
actor_id=trigger_source,
domain="wwi",
service="otel-bi-backend",
entity_type="supplier_scores",
payload={"supplier_count": len(data), "top_n": top_n},
)
def persist_whatif_scenario(
factory: sessionmaker[Session],
result: dict,
) -> None:
trace_id, span_id = _current_span_context()
try:
with factory() as session:
session.add(WWIWhatIfScenario(
stock_item_key=result["stock_item_key"],
stock_item_name=result["stock_item_name"],
demand_multiplier=result["demand_multiplier"],
current_stock=result["current_stock"],
avg_daily_demand=result["adjusted_daily_demand"],
projected_days_until_stockout=result.get("projected_days_until_stockout"),
recommended_order_qty=float(result["recommended_order_qty"]),
trace_id=trace_id,
span_id=span_id,
result=result,
))
session.commit()
_persist_counter.add(1, {"entity": "whatif_scenario"})
except Exception as exc: # noqa: BLE001
LOGGER.warning("Failed to persist WWI what-if scenario: %s", exc)
append_audit(
factory,
action="scenario.submitted",
actor_type="user",
domain="wwi",
service="otel-bi-backend",
entity_type="whatif_scenario",
payload={
"stock_item_key": result["stock_item_key"],
"demand_multiplier": result["demand_multiplier"],
"projected_days_until_stockout": result.get("projected_days_until_stockout"),
},
)
# ---------------------------------------------------------------------------
# Business events — generated from reorder data in Python (PostgreSQL writes)
# ---------------------------------------------------------------------------
def generate_stock_events(
factory: sessionmaker[Session],
recommendations: list[dict],
) -> None:
"""Write LOW_STOCK events for HIGH-urgency items, deduplicating within 24h."""
trace_id, span_id = _current_span_context()
cutoff = datetime.now(timezone.utc) - timedelta(hours=24)
try:
with factory() as session:
for item in recommendations:
if item.get("urgency") != "HIGH":
continue
entity_key = str(item["stock_item_key"])
existing = (
session.query(WWIBusinessEvent)
.filter(
WWIBusinessEvent.event_type == "LOW_STOCK",
WWIBusinessEvent.entity_key == entity_key,
WWIBusinessEvent.occurred_at >= cutoff,
)
.first()
)
if existing:
continue
days_str = (
f"{item['days_until_stockout']:.1f} days"
if item.get("days_until_stockout") is not None
else "immediately"
)
session.add(WWIBusinessEvent(
event_type="LOW_STOCK",
severity="HIGH",
entity_key=entity_key,
entity_name=item["stock_item_name"],
message=(
f"Stock for '{item['stock_item_name']}' will be exhausted in {days_str}. "
f"Current stock: {item['current_stock']:.0f} units, "
f"daily demand: {item['avg_daily_demand']:.1f} units."
),
trace_id=trace_id,
span_id=span_id,
details={
"current_stock": item["current_stock"],
"avg_daily_demand": item["avg_daily_demand"],
"recommended_reorder_qty": item["recommended_reorder_qty"],
},
))
_event_counter.add(1, {"event_type": "LOW_STOCK"})
session.commit()
except Exception as exc: # noqa: BLE001
LOGGER.warning("Failed to persist WWI business events: %s", exc)
# ---------------------------------------------------------------------------
# Read functions — query PostgreSQL for stored results
# ---------------------------------------------------------------------------
def get_business_events(factory: sessionmaker[Session], limit: int = 100) -> list[dict]:
with tracer.start_as_current_span("wwi.analytics.business_events"):
with factory() as session:
rows = (
session.query(WWIBusinessEvent)
.order_by(WWIBusinessEvent.occurred_at.desc())
.limit(limit)
.all()
)
return [
{
"id": r.id,
"occurred_at": r.occurred_at.isoformat(),
"event_type": r.event_type,
"severity": r.severity,
"entity_key": r.entity_key,
"entity_name": r.entity_name,
"message": r.message,
"trace_id": r.trace_id,
"details": r.details,
}
for r in rows
]
def list_reorder_recommendations(factory: sessionmaker[Session], limit: int = 50) -> list[dict]:
with factory() as session:
rows = (
session.query(WWIReorderRecommendation)
.order_by(WWIReorderRecommendation.created_at.desc())
.limit(limit)
.all()
)
return [
{
"id": r.id,
"created_at": r.created_at.isoformat(),
"item_count": r.item_count,
"trigger_source": r.trigger_source,
"trace_id": r.trace_id,
}
for r in rows
]
def list_supplier_scores(factory: sessionmaker[Session], limit: int = 50) -> list[dict]:
with factory() as session:
rows = (
session.query(WWISupplierScore)
.order_by(WWISupplierScore.computed_at.desc())
.limit(limit)
.all()
)
return [
{
"id": r.id,
"computed_at": r.computed_at.isoformat(),
"supplier_count": r.supplier_count,
"top_n": r.top_n,
"trigger_source": r.trigger_source,
"trace_id": r.trace_id,
"payload": r.payload,
}
for r in rows
]
def list_whatif_scenarios(factory: sessionmaker[Session], limit: int = 50) -> list[dict]:
with factory() as session:
rows = (
session.query(WWIWhatIfScenario)
.order_by(WWIWhatIfScenario.created_at.desc())
.limit(limit)
.all()
)
return [
{
"id": r.id,
"created_at": r.created_at.isoformat(),
"stock_item_key": r.stock_item_key,
"stock_item_name": r.stock_item_name,
"demand_multiplier": r.demand_multiplier,
"projected_days_until_stockout": r.projected_days_until_stockout,
"recommended_order_qty": r.recommended_order_qty,
"result": r.result,
}
for r in rows
]

View File

@@ -0,0 +1,80 @@
from __future__ import annotations
from datetime import datetime, timezone
from uuid import uuid4
from sqlalchemy import JSON, DateTime, Float, Integer, String, Text
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
def _utcnow() -> datetime:
return datetime.now(timezone.utc)
class WWIBase(DeclarativeBase):
pass
class WWIReorderRecommendation(WWIBase):
"""Persisted WWI stock reorder recommendation runs."""
__tablename__ = "wwi_reorder_recommendations"
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid4()))
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow, index=True)
item_count: Mapped[int] = mapped_column(Integer)
trigger_source: Mapped[str] = mapped_column(String(64), index=True)
trace_id: Mapped[str | None] = mapped_column(String(32), nullable=True, index=True)
span_id: Mapped[str | None] = mapped_column(String(16), nullable=True)
payload: Mapped[list[dict]] = mapped_column(JSON, default=list)
class WWISupplierScore(WWIBase):
"""Persisted WWI supplier reliability scoring runs."""
__tablename__ = "wwi_supplier_scores"
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid4()))
computed_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow, index=True)
supplier_count: Mapped[int] = mapped_column(Integer)
top_n: Mapped[int] = mapped_column(Integer)
trigger_source: Mapped[str] = mapped_column(String(64), index=True)
trace_id: Mapped[str | None] = mapped_column(String(32), nullable=True, index=True)
span_id: Mapped[str | None] = mapped_column(String(16), nullable=True)
payload: Mapped[list[dict]] = mapped_column(JSON, default=list)
class WWIWhatIfScenario(WWIBase):
"""User-submitted what-if simulation results."""
__tablename__ = "wwi_whatif_scenarios"
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid4()))
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow, index=True)
stock_item_key: Mapped[int] = mapped_column(Integer, index=True)
stock_item_name: Mapped[str] = mapped_column(String(200))
demand_multiplier: Mapped[float] = mapped_column(Float)
current_stock: Mapped[float] = mapped_column(Float)
avg_daily_demand: Mapped[float] = mapped_column(Float)
projected_days_until_stockout: Mapped[float | None] = mapped_column(Float, nullable=True)
recommended_order_qty: Mapped[float] = mapped_column(Float)
trace_id: Mapped[str | None] = mapped_column(String(32), nullable=True, index=True)
span_id: Mapped[str | None] = mapped_column(String(16), nullable=True)
result: Mapped[dict] = mapped_column(JSON, default=dict)
class WWIBusinessEvent(WWIBase):
"""Automatically generated business alert events."""
__tablename__ = "wwi_business_events"
id: Mapped[str] = mapped_column(String(36), primary_key=True, default=lambda: str(uuid4()))
occurred_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=_utcnow, index=True)
event_type: Mapped[str] = mapped_column(String(50), index=True) # LOW_STOCK, ORDER_DROP, SUPPLIER_RISK
severity: Mapped[str] = mapped_column(String(20), index=True) # HIGH, MEDIUM, LOW
entity_key: Mapped[str] = mapped_column(String(100), index=True)
entity_name: Mapped[str] = mapped_column(String(200))
message: Mapped[str] = mapped_column(Text)
trace_id: Mapped[str | None] = mapped_column(String(32), nullable=True, index=True)
span_id: Mapped[str | None] = mapped_column(String(16), nullable=True)
details: Mapped[dict] = mapped_column(JSON, default=dict)

View File

@@ -0,0 +1,171 @@
from __future__ import annotations
# ---------------------------------------------------------------------------
# WideWorldImportersDW — read-only MSSQL queries
#
# Column names in this DW use spaces and require bracket notation.
# Each list contains fallback variants tried in order.
# ---------------------------------------------------------------------------
# Daily sales from Fact.Sale joined to Dimension.Date
WWI_DAILY_SALES: list[str] = [
"""
SELECT
d.[Date] AS sale_date,
SUM(s.[Total Excluding Tax]) AS revenue,
SUM(s.[Total Excluding Tax] - s.[Profit]) AS cost,
SUM(CAST(s.[Quantity] AS FLOAT)) AS quantity,
COUNT_BIG(*) AS orders
FROM [Fact].[Sale] AS s
INNER JOIN [Dimension].[Date] AS d
ON d.[Date Key] = s.[Delivery Date Key]
GROUP BY d.[Date]
ORDER BY d.[Date];
""",
# Fallback: use Invoice Date Key if Delivery Date Key is missing
"""
SELECT
d.[Date] AS sale_date,
SUM(s.[Total Excluding Tax]) AS revenue,
SUM(s.[Total Excluding Tax] - s.[Profit]) AS cost,
SUM(CAST(s.[Quantity] AS FLOAT)) AS quantity,
COUNT_BIG(*) AS orders
FROM [Fact].[Sale] AS s
INNER JOIN [Dimension].[Date] AS d
ON d.[Date Key] = s.[Invoice Date Key]
GROUP BY d.[Date]
ORDER BY d.[Date];
""",
]
# Current stock levels per stock item (net movement quantity)
WWI_STOCK_LEVELS: list[str] = [
"""
SELECT
si.[Stock Item Key] AS stock_item_key,
si.[Stock Item] AS stock_item_name,
si.[Unit Price] AS unit_price,
si.[Lead Time Days] AS lead_time_days,
SUM(CAST(m.[Quantity] AS FLOAT)) AS current_stock
FROM [Dimension].[Stock Item] AS si
LEFT JOIN [Fact].[Movement] AS m
ON m.[Stock Item Key] = si.[Stock Item Key]
WHERE si.[Stock Item Key] <> 0
GROUP BY
si.[Stock Item Key],
si.[Stock Item],
si.[Unit Price],
si.[Lead Time Days];
""",
# Fallback: without movement (returns 0 stock)
"""
SELECT
si.[Stock Item Key] AS stock_item_key,
si.[Stock Item] AS stock_item_name,
si.[Unit Price] AS unit_price,
si.[Lead Time Days] AS lead_time_days,
CAST(0 AS FLOAT) AS current_stock
FROM [Dimension].[Stock Item] AS si
WHERE si.[Stock Item Key] <> 0;
""",
]
# 90-day demand velocity per stock item from Fact.Sale
WWI_DEMAND_VELOCITY: list[str] = [
"""
SELECT
s.[Stock Item Key] AS stock_item_key,
SUM(CAST(s.[Quantity] AS FLOAT)) AS qty_sold_90d,
COUNT_BIG(DISTINCT s.[WWI Invoice ID]) AS invoice_count_90d
FROM [Fact].[Sale] AS s
INNER JOIN [Dimension].[Date] AS d
ON d.[Date Key] = s.[Delivery Date Key]
WHERE d.[Date] >= DATEADD(day, -90, GETDATE())
AND s.[Stock Item Key] <> 0
GROUP BY s.[Stock Item Key];
""",
"""
SELECT
s.[Stock Item Key] AS stock_item_key,
SUM(CAST(s.[Quantity] AS FLOAT)) AS qty_sold_90d,
COUNT_BIG(DISTINCT s.[WWI Invoice ID]) AS invoice_count_90d
FROM [Fact].[Sale] AS s
INNER JOIN [Dimension].[Date] AS d
ON d.[Date Key] = s.[Invoice Date Key]
WHERE d.[Date] >= DATEADD(day, -90, GETDATE())
AND s.[Stock Item Key] <> 0
GROUP BY s.[Stock Item Key];
""",
]
# Supplier reliability data from Fact.Purchase
WWI_SUPPLIER_PERFORMANCE: list[str] = [
"""
SELECT
sup.[Supplier Key] AS supplier_key,
sup.[Supplier] AS supplier_name,
sup.[Category] AS category,
COUNT_BIG(*) AS total_orders,
SUM(CAST(p.[Ordered Outers] AS FLOAT)) AS total_ordered_outers,
SUM(CAST(p.[Received Outers] AS FLOAT)) AS total_received_outers,
SUM(CASE WHEN p.[Is Order Finalized] = 1 THEN 1 ELSE 0 END) AS finalized_orders
FROM [Dimension].[Supplier] AS sup
INNER JOIN [Fact].[Purchase] AS p
ON p.[Supplier Key] = sup.[Supplier Key]
WHERE sup.[Supplier Key] <> 0
GROUP BY
sup.[Supplier Key],
sup.[Supplier],
sup.[Category]
ORDER BY total_orders DESC;
""",
# Fallback: without Is Order Finalized
"""
SELECT
sup.[Supplier Key] AS supplier_key,
sup.[Supplier] AS supplier_name,
sup.[Category] AS category,
COUNT_BIG(*) AS total_orders,
SUM(CAST(p.[Ordered Outers] AS FLOAT)) AS total_ordered_outers,
SUM(CAST(p.[Received Outers] AS FLOAT)) AS total_received_outers,
COUNT_BIG(*) AS finalized_orders
FROM [Dimension].[Supplier] AS sup
INNER JOIN [Fact].[Purchase] AS p
ON p.[Supplier Key] = sup.[Supplier Key]
WHERE sup.[Supplier Key] <> 0
GROUP BY
sup.[Supplier Key],
sup.[Supplier],
sup.[Category]
ORDER BY total_orders DESC;
""",
]
# Single stock item detail for what-if scenario computation
WWI_STOCK_ITEM_DETAIL = """
SELECT
si.[Stock Item Key] AS stock_item_key,
si.[Stock Item] AS stock_item_name,
si.[Unit Price] AS unit_price,
si.[Lead Time Days] AS lead_time_days,
COALESCE(SUM(CAST(m.[Quantity] AS FLOAT)), 0) AS current_stock
FROM [Dimension].[Stock Item] AS si
LEFT JOIN [Fact].[Movement] AS m
ON m.[Stock Item Key] = si.[Stock Item Key]
WHERE si.[Stock Item Key] = :stock_item_key
GROUP BY
si.[Stock Item Key],
si.[Stock Item],
si.[Unit Price],
si.[Lead Time Days];
"""
WWI_STOCK_ITEM_DEMAND = """
SELECT
SUM(CAST(s.[Quantity] AS FLOAT)) / NULLIF(90.0, 0) AS avg_daily_demand
FROM [Fact].[Sale] AS s
INNER JOIN [Dimension].[Date] AS d
ON d.[Date Key] = s.[Delivery Date Key]
WHERE s.[Stock Item Key] = :stock_item_key
AND d.[Date] >= DATEADD(day, -90, GETDATE());
"""

100
backend/app/main.py Normal file
View File

@@ -0,0 +1,100 @@
from __future__ import annotations
import logging
from contextlib import asynccontextmanager
import httpx
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from starlette.middleware.base import BaseHTTPMiddleware
from starlette.requests import Request as StarletteRequest
from starlette.responses import Response as StarletteResponse
from app.core.audit import SharedBase
from app.core.config import settings
from app.core.db import create_postgres_engine, create_session_factory
from app.core.executor import get_executor, shutdown_executor
from app.core.otel import configure_otel, instrument_fastapi, instrument_sqlalchemy, shutdown_otel
from app.domain.aw.models import AWBase
from app.domain.wwi.models import WWIBase
from app.routers import aw, platform, wwi
LOGGER = logging.getLogger(__name__)
class SecurityHeadersMiddleware(BaseHTTPMiddleware):
async def dispatch(self, request: StarletteRequest, call_next) -> StarletteResponse:
response = await call_next(request)
response.headers["X-Content-Type-Options"] = "nosniff"
response.headers["X-Frame-Options"] = "DENY"
response.headers["Referrer-Policy"] = "strict-origin-when-cross-origin"
return response
@asynccontextmanager
async def lifespan(app: FastAPI):
# --- startup ---
providers = configure_otel(settings)
LOGGER.info("OTel configured for %s", settings.otel_service_name)
pg_engine = create_postgres_engine()
instrument_sqlalchemy({"pg": pg_engine})
SharedBase.metadata.create_all(pg_engine)
AWBase.metadata.create_all(pg_engine)
WWIBase.metadata.create_all(pg_engine)
pg_factory = create_session_factory(pg_engine)
analytics_client = httpx.AsyncClient(
base_url=settings.analytics_service_url,
timeout=httpx.Timeout(60.0),
)
executor = get_executor()
app.state.pg_engine = pg_engine
app.state.pg_factory = pg_factory
app.state.analytics_client = analytics_client
LOGGER.info("Ready: analytics_service=%s thread_pool_workers=%d",
settings.analytics_service_url, executor._max_workers) # noqa: SLF001
instrument_fastapi(app)
yield
# --- shutdown ---
LOGGER.info("Shutting down")
await analytics_client.aclose()
shutdown_executor()
pg_engine.dispose()
shutdown_otel(providers)
def create_app() -> FastAPI:
app = FastAPI(
title="otel-bi-backend",
version="1.0.0",
lifespan=lifespan,
docs_url="/docs" if settings.app_env != "prod" else None,
redoc_url=None,
)
app.add_middleware(SecurityHeadersMiddleware)
app.add_middleware(
CORSMiddleware,
allow_origins=settings.cors_origins_list,
allow_credentials=True,
allow_methods=["GET", "POST", "DELETE"],
allow_headers=["Authorization", "Content-Type"],
)
app.include_router(platform.router)
app.include_router(aw.router)
app.include_router(wwi.router)
return app
app = create_app()

View File

464
backend/app/routers/aw.py Normal file
View File

@@ -0,0 +1,464 @@
from __future__ import annotations
import asyncio
import logging
from datetime import datetime, timezone
from typing import Any, Literal
import httpx
from fastapi import APIRouter, Depends, HTTPException, Query, Request, Response
from opentelemetry import propagate, trace
from sqlalchemy.orm import sessionmaker, Session
from app.core.audit import ExportRecord, append_audit, current_span_context
from app.core.config import settings
from app.core.executor import get_executor
from app.core.export import to_pdf_bytes
from app.core.security import FrontendPrincipal, require_frontend_principal
from app.domain.aw import analytics
LOGGER = logging.getLogger(__name__)
tracer = trace.get_tracer("otel-bi.routers.aw")
router = APIRouter(prefix="/api/aw", tags=["aw"])
_XLSX_MEDIA = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
_PDF_MEDIA = "application/pdf"
def _trace_headers() -> dict[str, str]:
ctx = trace.get_current_span().get_span_context()
if not ctx.is_valid:
return {}
return {"x-trace-id": f"{ctx.trace_id:032x}", "x-span-id": f"{ctx.span_id:016x}"}
def _propagation_headers() -> dict[str, str]:
headers: dict[str, str] = {}
propagate.inject(headers)
return headers
async def _get(client: httpx.AsyncClient, path: str, params: dict | None = None) -> Any:
try:
r = await client.get(path, params=params, headers=_propagation_headers())
r.raise_for_status()
return r.json()
except httpx.HTTPStatusError as exc:
raise HTTPException(status_code=502, detail=f"Analytics service error: {exc.response.status_code}")
except httpx.RequestError as exc:
raise HTTPException(status_code=503, detail=f"Analytics service unavailable: {exc}")
async def _post(client: httpx.AsyncClient, path: str, json: dict) -> Any:
try:
r = await client.post(path, json=json, headers=_propagation_headers())
r.raise_for_status()
return r.json()
except httpx.HTTPStatusError as exc:
raise HTTPException(
status_code=502 if exc.response.status_code != 404 else 404,
detail=f"Analytics service error: {exc.response.status_code}",
)
except httpx.RequestError as exc:
raise HTTPException(status_code=503, detail=f"Analytics service unavailable: {exc}")
def _record_export(
pg_factory: sessionmaker[Session],
domain: str,
source_view: str,
fmt: str,
filters: dict,
row_count: int,
file_size_bytes: int,
actor_id: str,
trace_id: str | None,
span_id: str | None,
) -> None:
try:
with pg_factory() as session:
session.add(ExportRecord(
domain=domain, service="otel-bi-backend", source_view=source_view,
format=fmt, filters_applied=filters, row_count=row_count,
file_size_bytes=file_size_bytes, actor_id=actor_id,
trace_id=trace_id, span_id=span_id,
))
session.commit()
except Exception as exc: # noqa: BLE001
LOGGER.warning("Failed to record export metadata: %s", exc)
append_audit(
pg_factory,
action="export.created", actor_type="user", actor_id=actor_id,
domain=domain, service="otel-bi-backend", entity_type=source_view,
payload={"format": fmt, "row_count": row_count, "file_size_bytes": file_size_bytes, **filters},
)
async def _proxy_xlsx(
client: httpx.AsyncClient,
go_path: str,
params: dict,
filename_stem: str,
domain: str,
source_view: str,
filters: dict,
actor_id: str,
pg_factory: sessionmaker[Session],
) -> Response:
"""Fetch XLSX bytes from Go, write ExportRecord, return response."""
try:
r = await client.get(go_path, params=params, headers=_propagation_headers())
r.raise_for_status()
except httpx.HTTPStatusError as exc:
raise HTTPException(status_code=502, detail=f"Analytics service error: {exc.response.status_code}")
except httpx.RequestError as exc:
raise HTTPException(status_code=503, detail=f"Analytics service unavailable: {exc}")
content = r.content
row_count = int(r.headers.get("X-Row-Count", "0"))
today = datetime.now(timezone.utc).strftime("%Y%m%d")
filename = f"{filename_stem}_{today}.xlsx"
trace_id, span_id = current_span_context()
await asyncio.get_running_loop().run_in_executor(
get_executor(),
lambda: _record_export(pg_factory, domain, source_view, "xlsx", filters,
row_count, len(content), actor_id, trace_id, span_id),
)
return Response(
content=content, media_type=_XLSX_MEDIA,
headers={"Content-Disposition": f'attachment; filename="{filename}"'},
)
def _make_pdf(
data: list[dict],
filename_stem: str,
pdf_title: str,
domain: str,
source_view: str,
filters: dict,
actor_id: str,
pg_factory: sessionmaker[Session],
) -> Response:
with tracer.start_as_current_span(f"export.{domain}.{source_view}") as span:
span.set_attribute("export.format", "pdf")
span.set_attribute("export.row_count", len(data))
content = to_pdf_bytes(data, title=pdf_title)
span.set_attribute("export.file_size_bytes", len(content))
today = datetime.now(timezone.utc).strftime("%Y%m%d")
filename = f"{filename_stem}_{today}.pdf"
trace_id, span_id = current_span_context()
_record_export(pg_factory, domain, source_view, "pdf", filters,
len(data), len(content), actor_id, trace_id, span_id)
return Response(
content=content, media_type=_PDF_MEDIA,
headers={"Content-Disposition": f'attachment; filename="{filename}"'},
)
# ---------------------------------------------------------------------------
# Sales
# ---------------------------------------------------------------------------
@router.get("/sales/kpis")
async def aw_sales_kpis(
response: Response, request: Request,
principal: FrontendPrincipal = Depends(require_frontend_principal),
) -> dict:
response.headers.update(_trace_headers())
return await _get(request.app.state.analytics_client, "/aw/sales/kpis")
@router.get("/sales/history")
async def aw_sales_history(
response: Response, request: Request,
days_back: int = Query(default=settings.default_history_days, ge=30, le=1460),
principal: FrontendPrincipal = Depends(require_frontend_principal),
) -> list[dict]:
response.headers.update(_trace_headers())
return await _get(request.app.state.analytics_client, "/aw/sales/history", {"days_back": days_back})
@router.get("/sales/forecast")
async def aw_sales_forecast(
response: Response, request: Request,
horizon_days: int = Query(default=settings.forecast_horizon_days, ge=7, le=180),
principal: FrontendPrincipal = Depends(require_frontend_principal),
) -> list[dict]:
response.headers.update(_trace_headers())
client = request.app.state.analytics_client
pg_factory = request.app.state.pg_factory
data = await _get(client, "/aw/sales/forecast", {"horizon_days": horizon_days})
loop = asyncio.get_running_loop()
await loop.run_in_executor(
get_executor(),
lambda: analytics.persist_forecast(pg_factory, data, horizon_days, "api.sales.forecast"),
)
return data
# ---------------------------------------------------------------------------
# Rep scores & product demand
# ---------------------------------------------------------------------------
@router.get("/reps/scores")
async def aw_rep_scores(
response: Response, request: Request,
top_n: int = Query(default=settings.ranking_default_top_n, ge=3, le=100),
principal: FrontendPrincipal = Depends(require_frontend_principal),
) -> list[dict]:
response.headers.update(_trace_headers())
client = request.app.state.analytics_client
pg_factory = request.app.state.pg_factory
data = await _get(client, "/aw/reps/scores", {"top_n": top_n})
loop = asyncio.get_running_loop()
await loop.run_in_executor(
get_executor(),
lambda: analytics.persist_rep_scores(pg_factory, data, top_n, "api.reps.scores"),
)
return data
@router.get("/products/demand")
async def aw_product_demand(
response: Response, request: Request,
top_n: int = Query(default=settings.ranking_default_top_n, ge=3, le=100),
principal: FrontendPrincipal = Depends(require_frontend_principal),
) -> list[dict]:
response.headers.update(_trace_headers())
client = request.app.state.analytics_client
pg_factory = request.app.state.pg_factory
data = await _get(client, "/aw/products/demand", {"top_n": top_n})
loop = asyncio.get_running_loop()
await loop.run_in_executor(
get_executor(),
lambda: analytics.persist_product_demand(pg_factory, data, top_n, "api.products.demand"),
)
return data
# ---------------------------------------------------------------------------
# Anomaly detection
# ---------------------------------------------------------------------------
@router.get("/anomalies")
async def aw_anomalies(
response: Response, request: Request,
principal: FrontendPrincipal = Depends(require_frontend_principal),
) -> list[dict]:
response.headers.update(_trace_headers())
client = request.app.state.analytics_client
pg_factory = request.app.state.pg_factory
data = await _get(client, "/aw/anomalies")
loop = asyncio.get_running_loop()
await loop.run_in_executor(
get_executor(),
lambda: analytics.persist_anomaly_run(pg_factory, data, "api.aw.anomalies"),
)
return data
# ---------------------------------------------------------------------------
# Stored records
# ---------------------------------------------------------------------------
@router.get("/records/forecasts")
async def aw_records_forecasts(
response: Response, request: Request,
limit: int = Query(default=settings.storage_default_limit, ge=1, le=500),
principal: FrontendPrincipal = Depends(require_frontend_principal),
) -> list[dict]:
response.headers.update(_trace_headers())
pg_factory = request.app.state.pg_factory
return await asyncio.get_running_loop().run_in_executor(
get_executor(), lambda: analytics.list_forecasts(pg_factory, limit=limit)
)
@router.get("/records/rep-scores")
async def aw_records_rep_scores(
response: Response, request: Request,
limit: int = Query(default=settings.storage_default_limit, ge=1, le=500),
principal: FrontendPrincipal = Depends(require_frontend_principal),
) -> list[dict]:
response.headers.update(_trace_headers())
pg_factory = request.app.state.pg_factory
return await asyncio.get_running_loop().run_in_executor(
get_executor(), lambda: analytics.list_rep_scores(pg_factory, limit=limit)
)
@router.get("/records/product-demand")
async def aw_records_product_demand(
response: Response, request: Request,
limit: int = Query(default=settings.storage_default_limit, ge=1, le=500),
principal: FrontendPrincipal = Depends(require_frontend_principal),
) -> list[dict]:
response.headers.update(_trace_headers())
pg_factory = request.app.state.pg_factory
return await asyncio.get_running_loop().run_in_executor(
get_executor(), lambda: analytics.list_product_demand(pg_factory, limit=limit)
)
# ---------------------------------------------------------------------------
# Exports
# ---------------------------------------------------------------------------
@router.get("/export/sales-history")
async def export_aw_sales_history(
request: Request,
format: Literal["xlsx", "pdf"] = Query(default="xlsx"),
days_back: int = Query(default=settings.default_history_days, ge=30, le=1460),
principal: FrontendPrincipal = Depends(require_frontend_principal),
) -> Response:
client = request.app.state.analytics_client
pg_factory = request.app.state.pg_factory
actor_id = principal.subject
filters = {"days_back": days_back}
if format == "xlsx":
return await _proxy_xlsx(client, "/aw/export/sales-history", filters,
"aw_sales_history", "aw", "sales-history", filters, actor_id, pg_factory)
data = await _get(client, "/aw/sales/history", filters)
return await asyncio.get_running_loop().run_in_executor(
get_executor(),
lambda: _make_pdf(data, "aw_sales_history", "AdventureWorks — Sales History",
"aw", "sales-history", filters, actor_id, pg_factory),
)
@router.get("/export/sales-forecast")
async def export_aw_sales_forecast(
request: Request,
format: Literal["xlsx", "pdf"] = Query(default="xlsx"),
horizon_days: int = Query(default=settings.forecast_horizon_days, ge=7, le=180),
principal: FrontendPrincipal = Depends(require_frontend_principal),
) -> Response:
client = request.app.state.analytics_client
pg_factory = request.app.state.pg_factory
actor_id = principal.subject
filters = {"horizon_days": horizon_days}
if format == "xlsx":
return await _proxy_xlsx(client, "/aw/export/sales-forecast", filters,
"aw_sales_forecast", "aw", "sales-forecast", filters, actor_id, pg_factory)
data = await _get(client, "/aw/sales/forecast", filters)
return await asyncio.get_running_loop().run_in_executor(
get_executor(),
lambda: _make_pdf(data, "aw_sales_forecast", "AdventureWorks — Sales Forecast",
"aw", "sales-forecast", filters, actor_id, pg_factory),
)
@router.get("/export/rep-scores")
async def export_aw_rep_scores(
request: Request,
format: Literal["xlsx", "pdf"] = Query(default="xlsx"),
top_n: int = Query(default=settings.ranking_default_top_n, ge=3, le=100),
principal: FrontendPrincipal = Depends(require_frontend_principal),
) -> Response:
client = request.app.state.analytics_client
pg_factory = request.app.state.pg_factory
actor_id = principal.subject
filters = {"top_n": top_n}
if format == "xlsx":
return await _proxy_xlsx(client, "/aw/export/rep-scores", filters,
"aw_rep_scores", "aw", "rep-scores", filters, actor_id, pg_factory)
data = await _get(client, "/aw/reps/scores", filters)
return await asyncio.get_running_loop().run_in_executor(
get_executor(),
lambda: _make_pdf(data, "aw_rep_scores", "AdventureWorks — Sales Rep Performance",
"aw", "rep-scores", filters, actor_id, pg_factory),
)
@router.get("/export/product-demand")
async def export_aw_product_demand(
request: Request,
format: Literal["xlsx", "pdf"] = Query(default="xlsx"),
top_n: int = Query(default=settings.ranking_default_top_n, ge=3, le=100),
principal: FrontendPrincipal = Depends(require_frontend_principal),
) -> Response:
client = request.app.state.analytics_client
pg_factory = request.app.state.pg_factory
actor_id = principal.subject
filters = {"top_n": top_n}
if format == "xlsx":
return await _proxy_xlsx(client, "/aw/export/product-demand", filters,
"aw_product_demand", "aw", "product-demand", filters, actor_id, pg_factory)
data = await _get(client, "/aw/products/demand", filters)
return await asyncio.get_running_loop().run_in_executor(
get_executor(),
lambda: _make_pdf(data, "aw_product_demand", "AdventureWorks — Product Demand Scores",
"aw", "product-demand", filters, actor_id, pg_factory),
)
# ---------------------------------------------------------------------------
# Job triggers
# ---------------------------------------------------------------------------
@router.post("/jobs/{job_name}/trigger")
async def trigger_aw_job(
job_name: str, response: Response, request: Request,
principal: FrontendPrincipal = Depends(require_frontend_principal),
) -> dict:
response.headers.update(_trace_headers())
return await _post(request.app.state.analytics_client, f"/scheduler/aw/{job_name}/trigger", {})
@router.get("/jobs")
async def aw_job_history(
response: Response, request: Request,
limit: int = Query(default=50, ge=1, le=200),
principal: FrontendPrincipal = Depends(require_frontend_principal),
) -> list[dict]:
response.headers.update(_trace_headers())
pg_factory = request.app.state.pg_factory
return await asyncio.get_running_loop().run_in_executor(
get_executor(), lambda: _list_jobs(pg_factory, "aw", limit)
)
def _list_jobs(pg_factory, domain: str, limit: int) -> list[dict]:
from app.core.audit import JobExecution
with pg_factory() as session:
rows = (
session.query(JobExecution)
.filter_by(domain=domain)
.order_by(JobExecution.started_at.desc())
.limit(limit)
.all()
)
return [
{
"id": r.id,
"job_name": r.job_name,
"domain": r.domain,
"status": r.status,
"started_at": r.started_at.isoformat(),
"completed_at": r.completed_at.isoformat() if r.completed_at else None,
"duration_ms": r.duration_ms,
"records_processed": r.records_processed,
"error_message": r.error_message,
"trace_id": r.trace_id,
}
for r in rows
]

View File

@@ -0,0 +1,254 @@
from __future__ import annotations
import asyncio
import logging
from fastapi import APIRouter, Depends, Query, Request, Response
from opentelemetry import propagate, trace
from app.core.audit import AuditLog, ExportRecord, append_audit
from app.core.config import settings
from app.core.executor import get_executor
from app.core.reports import save_report
from app.core.security import FrontendPrincipal, require_frontend_principal
from app.domain.wwi import analytics as wwi_analytics
LOGGER = logging.getLogger(__name__)
router = APIRouter(tags=["platform"])
def _trace_headers() -> dict[str, str]:
ctx = trace.get_current_span().get_span_context()
if not ctx.is_valid:
return {}
return {"x-trace-id": f"{ctx.trace_id:032x}", "x-span-id": f"{ctx.span_id:016x}"}
# ---------------------------------------------------------------------------
# System
# ---------------------------------------------------------------------------
@router.get("/api/config")
def frontend_config() -> dict:
return {
"oidc_enabled": settings.require_frontend_auth,
"oidc_authority": settings.frontend_jwt_issuer_url,
"oidc_client_id": settings.frontend_oidc_client_id,
"oidc_scope": settings.frontend_oidc_scope,
}
@router.get("/api/health")
def health(response: Response) -> dict:
response.headers.update(_trace_headers())
return {"status": "ok", "service": "otel-bi-backend"}
@router.get("/api/telemetry/status")
def telemetry_status(
response: Response,
principal: FrontendPrincipal = Depends(require_frontend_principal),
) -> dict:
response.headers.update(_trace_headers())
return {
"status": "instrumented",
"service": "otel-bi-backend",
"collector_endpoint": settings.otel_collector_endpoint,
"subject": principal.subject,
**_trace_headers(),
}
# ---------------------------------------------------------------------------
# Cross-domain report generation
# ---------------------------------------------------------------------------
def _propagation_headers() -> dict[str, str]:
headers: dict[str, str] = {}
propagate.inject(headers)
return headers
@router.post("/api/reports/generate")
async def generate_report(
request: Request,
response: Response,
principal: FrontendPrincipal = Depends(require_frontend_principal),
) -> dict:
response.headers.update(_trace_headers())
client = request.app.state.analytics_client
pg_factory = request.app.state.pg_factory
actor_id = principal.subject
loop = asyncio.get_running_loop()
executor = get_executor()
import httpx as _httpx
async def _fetch(path: str, params: dict | None = None):
try:
r = await client.get(path, params=params, headers=_propagation_headers())
r.raise_for_status()
return r.json()
except (_httpx.HTTPStatusError, _httpx.RequestError):
return {}
(
aw_kpis, aw_history, aw_forecast,
aw_reps, aw_products,
wwi_kpis, wwi_stock, wwi_suppliers,
) = await asyncio.gather(
_fetch("/aw/sales/kpis"),
_fetch("/aw/sales/history", {"days_back": settings.default_history_days}),
_fetch("/aw/sales/forecast", {"horizon_days": settings.forecast_horizon_days}),
_fetch("/aw/reps/scores", {"top_n": settings.ranking_default_top_n}),
_fetch("/aw/products/demand", {"top_n": settings.ranking_default_top_n}),
_fetch("/wwi/sales/kpis"),
_fetch("/wwi/stock/recommendations"),
_fetch("/wwi/suppliers/scores", {"top_n": settings.ranking_default_top_n}),
)
wwi_events = await loop.run_in_executor(
executor, lambda: wwi_analytics.get_business_events(pg_factory, 200)
)
data = {
"aw_sales_kpis": aw_kpis,
"aw_sales_history": aw_history,
"aw_sales_forecast": aw_forecast,
"aw_rep_scores": aw_reps,
"aw_product_demand": aw_products,
"wwi_sales_kpis": wwi_kpis,
"wwi_stock_recommendations": wwi_stock,
"wwi_supplier_scores": wwi_suppliers,
"wwi_business_events": wwi_events,
}
report = await loop.run_in_executor(
executor, lambda: save_report(data, settings.report_output_dir)
)
append_audit(
pg_factory,
action="report.generated", actor_type="user", actor_id=actor_id,
domain="platform", service="otel-bi-backend", entity_type="full_report",
payload={
"report_id": report["report_id"],
"xlsx": report["xlsx"]["filename"],
"pdf": report["pdf"]["filename"],
},
)
return {**report, "output_dir": settings.report_output_dir, **_trace_headers()}
# ---------------------------------------------------------------------------
# Audit log
# ---------------------------------------------------------------------------
@router.get("/api/audit")
async def audit_log(
response: Response, request: Request,
limit: int = Query(default=100, ge=1, le=500),
domain: str | None = Query(default=None),
principal: FrontendPrincipal = Depends(require_frontend_principal),
) -> list[dict]:
response.headers.update(_trace_headers())
pg_factory = request.app.state.pg_factory
def _query():
with pg_factory() as session:
q = session.query(AuditLog).order_by(AuditLog.occurred_at.desc())
if domain:
q = q.filter_by(domain=domain)
rows = q.limit(limit).all()
return [
{
"id": r.id,
"occurred_at": r.occurred_at.isoformat(),
"action": r.action,
"status": r.status,
"actor_type": r.actor_type,
"actor_id": r.actor_id,
"domain": r.domain,
"service": r.service,
"entity_type": r.entity_type,
"trace_id": r.trace_id,
"payload": r.payload,
}
for r in rows
]
return await asyncio.get_running_loop().run_in_executor(get_executor(), _query)
# ---------------------------------------------------------------------------
# Export history
# ---------------------------------------------------------------------------
@router.get("/api/exports")
async def export_history(
response: Response, request: Request,
limit: int = Query(default=100, ge=1, le=500),
domain: str | None = Query(default=None),
principal: FrontendPrincipal = Depends(require_frontend_principal),
) -> list[dict]:
response.headers.update(_trace_headers())
pg_factory = request.app.state.pg_factory
def _query():
with pg_factory() as session:
q = session.query(ExportRecord).order_by(ExportRecord.created_at.desc())
if domain:
q = q.filter_by(domain=domain)
rows = q.limit(limit).all()
return [
{
"id": r.id,
"exported_at": r.created_at.isoformat(),
"domain": r.domain,
"service": r.service,
"source_view": r.source_view,
"format": r.format,
"filters_applied": r.filters_applied,
"row_count": r.row_count,
"file_size_bytes": r.file_size_bytes,
"actor_id": r.actor_id,
"trace_id": r.trace_id,
}
for r in rows
]
return await asyncio.get_running_loop().run_in_executor(get_executor(), _query)
# ---------------------------------------------------------------------------
# Job history (platform-level — both domains in one response)
# ---------------------------------------------------------------------------
@router.get("/api/jobs/aw")
async def jobs_aw(
response: Response, request: Request,
limit: int = Query(default=50, ge=1, le=200),
principal: FrontendPrincipal = Depends(require_frontend_principal),
) -> list[dict]:
response.headers.update(_trace_headers())
from app.routers.aw import _list_jobs
pg_factory = request.app.state.pg_factory
return await asyncio.get_running_loop().run_in_executor(
get_executor(), lambda: _list_jobs(pg_factory, "aw", limit)
)
@router.get("/api/jobs/wwi")
async def jobs_wwi(
response: Response, request: Request,
limit: int = Query(default=50, ge=1, le=200),
principal: FrontendPrincipal = Depends(require_frontend_principal),
) -> list[dict]:
response.headers.update(_trace_headers())
from app.routers.wwi import _list_jobs
pg_factory = request.app.state.pg_factory
return await asyncio.get_running_loop().run_in_executor(
get_executor(), lambda: _list_jobs(pg_factory, "wwi", limit)
)

440
backend/app/routers/wwi.py Normal file
View File

@@ -0,0 +1,440 @@
from __future__ import annotations
import asyncio
import logging
from datetime import datetime, timezone
from typing import Any, Literal
import httpx
from fastapi import APIRouter, Depends, HTTPException, Query, Request, Response
from opentelemetry import propagate, trace
from pydantic import BaseModel, Field
from sqlalchemy.orm import sessionmaker, Session
from app.core.audit import ExportRecord, append_audit, current_span_context
from app.core.config import settings
from app.core.executor import get_executor
from app.core.export import to_pdf_bytes
from app.core.security import FrontendPrincipal, require_frontend_principal
from app.domain.wwi import analytics
LOGGER = logging.getLogger(__name__)
tracer = trace.get_tracer("otel-bi.routers.wwi")
router = APIRouter(prefix="/api/wwi", tags=["wwi"])
_XLSX_MEDIA = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
_PDF_MEDIA = "application/pdf"
class WhatIfRequest(BaseModel):
stock_item_key: int = Field(..., ge=1)
demand_multiplier: float = Field(default=1.0, ge=0.1, le=5.0)
def _trace_headers() -> dict[str, str]:
ctx = trace.get_current_span().get_span_context()
if not ctx.is_valid:
return {}
return {"x-trace-id": f"{ctx.trace_id:032x}", "x-span-id": f"{ctx.span_id:016x}"}
def _propagation_headers() -> dict[str, str]:
headers: dict[str, str] = {}
propagate.inject(headers)
return headers
async def _get(client: httpx.AsyncClient, path: str, params: dict | None = None) -> Any:
try:
r = await client.get(path, params=params, headers=_propagation_headers())
r.raise_for_status()
return r.json()
except httpx.HTTPStatusError as exc:
raise HTTPException(status_code=502, detail=f"Analytics service error: {exc.response.status_code}")
except httpx.RequestError as exc:
raise HTTPException(status_code=503, detail=f"Analytics service unavailable: {exc}")
async def _post(client: httpx.AsyncClient, path: str, json: dict) -> Any:
try:
r = await client.post(path, json=json, headers=_propagation_headers())
r.raise_for_status()
return r.json()
except httpx.HTTPStatusError as exc:
raise HTTPException(status_code=502 if exc.response.status_code != 404 else 404,
detail=f"Analytics service error: {exc.response.status_code}")
except httpx.RequestError as exc:
raise HTTPException(status_code=503, detail=f"Analytics service unavailable: {exc}")
def _record_export(
pg_factory: sessionmaker[Session],
domain: str,
source_view: str,
fmt: str,
filters: dict,
row_count: int,
file_size_bytes: int,
actor_id: str,
trace_id: str | None,
span_id: str | None,
) -> None:
try:
with pg_factory() as session:
session.add(ExportRecord(
domain=domain, service="otel-bi-backend", source_view=source_view,
format=fmt, filters_applied=filters, row_count=row_count,
file_size_bytes=file_size_bytes, actor_id=actor_id,
trace_id=trace_id, span_id=span_id,
))
session.commit()
except Exception as exc: # noqa: BLE001
LOGGER.warning("Failed to record export metadata: %s", exc)
append_audit(
pg_factory,
action="export.created", actor_type="user", actor_id=actor_id,
domain=domain, service="otel-bi-backend", entity_type=source_view,
payload={"format": fmt, "row_count": row_count, "file_size_bytes": file_size_bytes, **filters},
)
async def _proxy_xlsx(
client: httpx.AsyncClient,
go_path: str,
params: dict,
filename_stem: str,
domain: str,
source_view: str,
filters: dict,
actor_id: str,
pg_factory: sessionmaker[Session],
) -> Response:
"""Fetch XLSX bytes from Go, write ExportRecord, return response."""
try:
r = await client.get(go_path, params=params, headers=_propagation_headers())
r.raise_for_status()
except httpx.HTTPStatusError as exc:
raise HTTPException(status_code=502, detail=f"Analytics service error: {exc.response.status_code}")
except httpx.RequestError as exc:
raise HTTPException(status_code=503, detail=f"Analytics service unavailable: {exc}")
content = r.content
row_count = int(r.headers.get("X-Row-Count", "0"))
today = datetime.now(timezone.utc).strftime("%Y%m%d")
filename = f"{filename_stem}_{today}.xlsx"
trace_id, span_id = current_span_context()
await asyncio.get_running_loop().run_in_executor(
get_executor(),
lambda: _record_export(pg_factory, domain, source_view, "xlsx", filters,
row_count, len(content), actor_id, trace_id, span_id),
)
return Response(
content=content, media_type=_XLSX_MEDIA,
headers={"Content-Disposition": f'attachment; filename="{filename}"'},
)
def _make_pdf(
data: list[dict],
filename_stem: str,
pdf_title: str,
domain: str,
source_view: str,
filters: dict,
actor_id: str,
pg_factory: sessionmaker[Session],
) -> Response:
with tracer.start_as_current_span(f"export.{domain}.{source_view}") as span:
span.set_attribute("export.format", "pdf")
span.set_attribute("export.row_count", len(data))
content = to_pdf_bytes(data, title=pdf_title)
span.set_attribute("export.file_size_bytes", len(content))
today = datetime.now(timezone.utc).strftime("%Y%m%d")
filename = f"{filename_stem}_{today}.pdf"
trace_id, span_id = current_span_context()
_record_export(pg_factory, domain, source_view, "pdf", filters,
len(data), len(content), actor_id, trace_id, span_id)
return Response(
content=content, media_type=_PDF_MEDIA,
headers={"Content-Disposition": f'attachment; filename="{filename}"'},
)
# ---------------------------------------------------------------------------
# KPIs
# ---------------------------------------------------------------------------
@router.get("/sales/kpis")
async def wwi_sales_kpis(
response: Response, request: Request,
principal: FrontendPrincipal = Depends(require_frontend_principal),
) -> dict:
response.headers.update(_trace_headers())
return await _get(request.app.state.analytics_client, "/wwi/sales/kpis")
# ---------------------------------------------------------------------------
# Stock & reorder
# ---------------------------------------------------------------------------
@router.get("/stock/recommendations")
async def wwi_reorder_recommendations(
response: Response, request: Request,
principal: FrontendPrincipal = Depends(require_frontend_principal),
) -> list[dict]:
response.headers.update(_trace_headers())
client = request.app.state.analytics_client
pg_factory = request.app.state.pg_factory
data = await _get(client, "/wwi/stock/recommendations")
loop = asyncio.get_running_loop()
await loop.run_in_executor(
get_executor(),
lambda: (
analytics.generate_stock_events(pg_factory, data),
analytics.persist_reorder_recommendations(pg_factory, data, "api.stock.recommendations"),
),
)
return data
# ---------------------------------------------------------------------------
# Supplier scores
# ---------------------------------------------------------------------------
@router.get("/suppliers/scores")
async def wwi_supplier_scores(
response: Response, request: Request,
top_n: int = Query(default=settings.ranking_default_top_n, ge=3, le=100),
principal: FrontendPrincipal = Depends(require_frontend_principal),
) -> list[dict]:
response.headers.update(_trace_headers())
client = request.app.state.analytics_client
pg_factory = request.app.state.pg_factory
data = await _get(client, "/wwi/suppliers/scores", {"top_n": top_n})
loop = asyncio.get_running_loop()
await loop.run_in_executor(
get_executor(),
lambda: analytics.persist_supplier_scores(pg_factory, data, top_n, "api.suppliers.scores"),
)
return data
# ---------------------------------------------------------------------------
# Business events
# ---------------------------------------------------------------------------
@router.get("/events")
async def wwi_business_events(
response: Response, request: Request,
limit: int = Query(default=100, ge=1, le=500),
principal: FrontendPrincipal = Depends(require_frontend_principal),
) -> list[dict]:
response.headers.update(_trace_headers())
pg_factory = request.app.state.pg_factory
return await asyncio.get_running_loop().run_in_executor(
get_executor(), lambda: analytics.get_business_events(pg_factory, limit=limit)
)
# ---------------------------------------------------------------------------
# What-if scenarios
# ---------------------------------------------------------------------------
@router.post("/scenarios")
async def wwi_create_scenario(
body: WhatIfRequest, response: Response, request: Request,
principal: FrontendPrincipal = Depends(require_frontend_principal),
) -> dict:
response.headers.update(_trace_headers())
client = request.app.state.analytics_client
pg_factory = request.app.state.pg_factory
result = await _post(client, "/wwi/scenarios", {
"stock_item_key": body.stock_item_key,
"demand_multiplier": body.demand_multiplier,
})
loop = asyncio.get_running_loop()
await loop.run_in_executor(
get_executor(),
lambda: analytics.persist_whatif_scenario(pg_factory, result),
)
return result
@router.get("/scenarios")
async def wwi_list_scenarios(
response: Response, request: Request,
limit: int = Query(default=settings.storage_default_limit, ge=1, le=500),
principal: FrontendPrincipal = Depends(require_frontend_principal),
) -> list[dict]:
response.headers.update(_trace_headers())
pg_factory = request.app.state.pg_factory
return await asyncio.get_running_loop().run_in_executor(
get_executor(), lambda: analytics.list_whatif_scenarios(pg_factory, limit=limit)
)
# ---------------------------------------------------------------------------
# Stored records
# ---------------------------------------------------------------------------
@router.get("/records/reorder-recommendations")
async def wwi_records_reorder(
response: Response, request: Request,
limit: int = Query(default=settings.storage_default_limit, ge=1, le=500),
principal: FrontendPrincipal = Depends(require_frontend_principal),
) -> list[dict]:
response.headers.update(_trace_headers())
pg_factory = request.app.state.pg_factory
return await asyncio.get_running_loop().run_in_executor(
get_executor(), lambda: analytics.list_reorder_recommendations(pg_factory, limit=limit)
)
@router.get("/records/supplier-scores")
async def wwi_records_supplier_scores(
response: Response, request: Request,
limit: int = Query(default=settings.storage_default_limit, ge=1, le=500),
principal: FrontendPrincipal = Depends(require_frontend_principal),
) -> list[dict]:
response.headers.update(_trace_headers())
pg_factory = request.app.state.pg_factory
return await asyncio.get_running_loop().run_in_executor(
get_executor(), lambda: analytics.list_supplier_scores(pg_factory, limit=limit)
)
# ---------------------------------------------------------------------------
# Exports
# ---------------------------------------------------------------------------
@router.get("/export/stock-recommendations")
async def export_wwi_stock_recommendations(
request: Request,
format: Literal["xlsx", "pdf"] = Query(default="xlsx"),
principal: FrontendPrincipal = Depends(require_frontend_principal),
) -> Response:
client = request.app.state.analytics_client
pg_factory = request.app.state.pg_factory
actor_id = principal.subject
if format == "xlsx":
return await _proxy_xlsx(client, "/wwi/export/stock-recommendations", {},
"wwi_stock_recommendations", "wwi", "stock-recommendations",
{}, actor_id, pg_factory)
data = await _get(client, "/wwi/stock/recommendations")
return await asyncio.get_running_loop().run_in_executor(
get_executor(),
lambda: _make_pdf(data, "wwi_stock_recommendations",
"WideWorldImporters — Stock Reorder Recommendations",
"wwi", "stock-recommendations", {}, actor_id, pg_factory),
)
@router.get("/export/supplier-scores")
async def export_wwi_supplier_scores(
request: Request,
format: Literal["xlsx", "pdf"] = Query(default="xlsx"),
top_n: int = Query(default=settings.ranking_default_top_n, ge=3, le=100),
principal: FrontendPrincipal = Depends(require_frontend_principal),
) -> Response:
client = request.app.state.analytics_client
pg_factory = request.app.state.pg_factory
actor_id = principal.subject
filters = {"top_n": top_n}
if format == "xlsx":
return await _proxy_xlsx(client, "/wwi/export/supplier-scores", filters,
"wwi_supplier_scores", "wwi", "supplier-scores",
filters, actor_id, pg_factory)
data = await _get(client, "/wwi/suppliers/scores", filters)
return await asyncio.get_running_loop().run_in_executor(
get_executor(),
lambda: _make_pdf(data, "wwi_supplier_scores",
"WideWorldImporters — Supplier Reliability Scores",
"wwi", "supplier-scores", filters, actor_id, pg_factory),
)
@router.get("/export/business-events")
async def export_wwi_business_events(
request: Request,
format: Literal["xlsx", "pdf"] = Query(default="xlsx"),
limit: int = Query(default=100, ge=1, le=500),
principal: FrontendPrincipal = Depends(require_frontend_principal),
) -> Response:
pg_factory = request.app.state.pg_factory
actor_id = principal.subject
filters = {"limit": limit}
data = await asyncio.get_running_loop().run_in_executor(
get_executor(), lambda: analytics.get_business_events(pg_factory, limit=limit)
)
return await asyncio.get_running_loop().run_in_executor(
get_executor(),
lambda: _make_pdf(data, "wwi_business_events",
"WideWorldImporters — Business Events",
"wwi", "business-events", filters, actor_id, pg_factory),
)
# ---------------------------------------------------------------------------
# Job triggers
# ---------------------------------------------------------------------------
@router.post("/jobs/{job_name}/trigger")
async def trigger_wwi_job(
job_name: str, response: Response, request: Request,
principal: FrontendPrincipal = Depends(require_frontend_principal),
) -> dict:
response.headers.update(_trace_headers())
return await _post(request.app.state.analytics_client, f"/scheduler/wwi/{job_name}/trigger", {})
@router.get("/jobs")
async def wwi_job_history(
response: Response, request: Request,
limit: int = Query(default=50, ge=1, le=200),
principal: FrontendPrincipal = Depends(require_frontend_principal),
) -> list[dict]:
response.headers.update(_trace_headers())
pg_factory = request.app.state.pg_factory
return await asyncio.get_running_loop().run_in_executor(
get_executor(), lambda: _list_jobs(pg_factory, "wwi", limit)
)
def _list_jobs(pg_factory, domain: str, limit: int) -> list[dict]:
from app.core.audit import JobExecution
with pg_factory() as session:
rows = (
session.query(JobExecution)
.filter_by(domain=domain)
.order_by(JobExecution.started_at.desc())
.limit(limit)
.all()
)
return [
{
"id": r.id,
"job_name": r.job_name,
"domain": r.domain,
"status": r.status,
"started_at": r.started_at.isoformat(),
"completed_at": r.completed_at.isoformat() if r.completed_at else None,
"duration_ms": r.duration_ms,
"records_processed": r.records_processed,
"error_message": r.error_message,
"trace_id": r.trace_id,
}
for r in rows
]

View File

@@ -1 +0,0 @@
"""Business logic services."""

View File

@@ -1,373 +0,0 @@
from __future__ import annotations
from dataclasses import dataclass
from datetime import date, timedelta
from math import sqrt
import numpy as np
import pandas as pd
from opentelemetry import trace
from sklearn.linear_model import LinearRegression
from app.core.config import settings
from app.services.persistence_service import PersistenceService
from app.services.warehouse_service import ReadOnlyWarehouseClient
@dataclass
class DashboardSnapshot:
kpis: dict
history: list[dict]
forecasts: list[dict]
rankings: list[dict]
recommendations: list[dict]
class AnalyticsService:
def __init__(
self,
warehouse_client: ReadOnlyWarehouseClient,
persistence_service: PersistenceService | None = None,
) -> None:
self.warehouse_client = warehouse_client
self.persistence_service = persistence_service
self.tracer = trace.get_tracer(__name__)
@staticmethod
def _normalize_frame(df: pd.DataFrame, date_col: str = "sale_date") -> pd.DataFrame:
normalized = df.copy()
normalized[date_col] = pd.to_datetime(normalized[date_col], errors="coerce")
for numeric in ("revenue", "cost", "quantity", "orders"):
if numeric in normalized.columns:
normalized[numeric] = pd.to_numeric(
normalized[numeric], errors="coerce"
).fillna(0.0)
return normalized.dropna(subset=[date_col])
def load_sales_history(self, days_back: int | None = None) -> pd.DataFrame:
with self.tracer.start_as_current_span("analytics.load_sales_history"):
daily_sales = self._normalize_frame(
self.warehouse_client.fetch_daily_sales()
)
days = days_back or settings.default_history_days
min_date = pd.Timestamp(date.today() - timedelta(days=days))
filtered = daily_sales[daily_sales["sale_date"] >= min_date]
return (
filtered.groupby("sale_date", as_index=False)[
["revenue", "cost", "quantity", "orders"]
]
.sum()
.sort_values("sale_date")
)
def get_kpis(self) -> dict:
with self.tracer.start_as_current_span("analytics.kpis"):
sales = self.load_sales_history(days_back=180)
if sales.empty:
return {
"total_revenue": 0.0,
"gross_margin_pct": 0.0,
"total_quantity": 0.0,
"avg_order_value": 0.0,
"records_in_window": 0,
}
total_revenue = float(sales["revenue"].sum())
total_cost = float(sales["cost"].sum())
total_orders = max(float(sales["orders"].sum()), 1.0)
margin_pct = (
((total_revenue - total_cost) / total_revenue * 100)
if total_revenue
else 0.0
)
return {
"total_revenue": round(total_revenue, 2),
"gross_margin_pct": round(margin_pct, 2),
"total_quantity": round(float(sales["quantity"].sum()), 2),
"avg_order_value": round(total_revenue / total_orders, 2),
"records_in_window": int(sales.shape[0]),
}
def get_history_points(self, days_back: int | None = None) -> list[dict]:
with self.tracer.start_as_current_span("analytics.history_points"):
sales = self.load_sales_history(days_back=days_back)
if sales.empty:
return []
return [
{
"date": pd.Timestamp(row["sale_date"]).date().isoformat(),
"revenue": round(float(row["revenue"]), 2),
"cost": round(float(row["cost"]), 2),
"quantity": round(float(row["quantity"]), 2),
}
for _, row in sales.iterrows()
]
def get_forecast(
self,
horizon_days: int | None = None,
*,
trigger_source: str = "api.forecasts",
persist: bool = True,
) -> list[dict]:
with self.tracer.start_as_current_span("analytics.forecast"):
horizon = horizon_days or settings.forecast_horizon_days
sales = self.load_sales_history(days_back=720)
if sales.empty:
return []
series = (
sales.set_index("sale_date")["revenue"]
.sort_index()
.resample("D")
.sum()
.fillna(0.0)
)
y = series.values
x = np.arange(len(y), dtype=float).reshape(-1, 1)
model = LinearRegression()
model.fit(x, y)
baseline = model.predict(x)
residual = y - baseline
sigma = float(np.std(residual)) if len(residual) > 1 else 0.0
weekday_baseline = series.groupby(series.index.weekday).mean()
overall_mean = float(series.mean()) if len(series) else 0.0
weekday_factor = (
weekday_baseline / overall_mean
if overall_mean > 0
else pd.Series([1.0] * 7, index=range(7))
)
weekday_factor = weekday_factor.replace([np.inf, -np.inf], 1.0).fillna(1.0)
future_x = np.arange(len(y), len(y) + horizon, dtype=float).reshape(-1, 1)
raw_forecast = model.predict(future_x)
predictions: list[dict] = []
start_date = series.index.max().date()
for idx, point in enumerate(raw_forecast, start=1):
day = start_date + timedelta(days=idx)
factor = (
float(weekday_factor.loc[day.weekday()])
if day.weekday() in weekday_factor.index
else 1.0
)
yhat = max(float(point) * factor, 0.0)
ci = 1.96 * sigma * sqrt(1 + idx / max(len(y), 1))
predictions.append(
{
"date": day.isoformat(),
"predicted_revenue": round(yhat, 2),
"lower_bound": round(max(yhat - ci, 0.0), 2),
"upper_bound": round(yhat + ci, 2),
}
)
if persist and self.persistence_service is not None:
span_context = trace.get_current_span().get_span_context()
trace_id = (
f"{span_context.trace_id:032x}" if span_context.is_valid else None
)
span_id = (
f"{span_context.span_id:016x}" if span_context.is_valid else None
)
self.persistence_service.record_forecast_run(
horizon_days=horizon,
payload=predictions,
trigger_source=trigger_source,
trace_id=trace_id,
span_id=span_id,
)
return predictions
def get_rankings(
self,
top_n: int | None = None,
*,
trigger_source: str = "api.rankings",
persist: bool = True,
) -> list[dict]:
with self.tracer.start_as_current_span("analytics.rankings"):
n = top_n or settings.ranking_default_top_n
products = self.warehouse_client.fetch_product_performance().copy()
if products.empty:
return []
products["revenue"] = pd.to_numeric(
products["revenue"], errors="coerce"
).fillna(0.0)
products["cost"] = pd.to_numeric(products["cost"], errors="coerce").fillna(
0.0
)
products["quantity"] = pd.to_numeric(
products["quantity"], errors="coerce"
).fillna(0.0)
products["orders"] = pd.to_numeric(
products["orders"], errors="coerce"
).fillna(0.0)
grouped = (
products.groupby(
["product_id", "product_name", "category_name"], as_index=False
)[["revenue", "cost", "quantity", "orders"]]
.sum()
.sort_values("revenue", ascending=False)
)
grouped["margin_pct"] = np.where(
grouped["revenue"] > 0,
((grouped["revenue"] - grouped["cost"]) / grouped["revenue"]) * 100,
0.0,
)
revenue_norm = grouped["revenue"] / max(
float(grouped["revenue"].max()), 1.0
)
margin_norm = (grouped["margin_pct"] + 100) / 200
velocity_norm = grouped["quantity"] / max(
float(grouped["quantity"].max()), 1.0
)
grouped["score"] = (
(0.55 * revenue_norm)
+ (0.30 * margin_norm.clip(0, 1))
+ (0.15 * velocity_norm)
)
ranked = (
grouped.sort_values("score", ascending=False)
.head(n)
.reset_index(drop=True)
)
result = [
{
"rank": int(idx + 1),
"product_id": str(row["product_id"]),
"product_name": str(row["product_name"]),
"category": str(row["category_name"]),
"revenue": round(float(row["revenue"]), 2),
"margin_pct": round(float(row["margin_pct"]), 2),
"score": round(float(row["score"]) * 100, 2),
}
for idx, row in ranked.iterrows()
]
if persist and self.persistence_service is not None:
span_context = trace.get_current_span().get_span_context()
trace_id = (
f"{span_context.trace_id:032x}" if span_context.is_valid else None
)
span_id = (
f"{span_context.span_id:016x}" if span_context.is_valid else None
)
self.persistence_service.record_ranking_run(
top_n=n,
payload=result,
trigger_source=trigger_source,
trace_id=trace_id,
span_id=span_id,
)
return result
def get_recommendations(
self,
rankings: list[dict] | None = None,
*,
trigger_source: str = "api.recommendations",
persist: bool = True,
) -> list[dict]:
with self.tracer.start_as_current_span("analytics.recommendations"):
ranking_rows = (
rankings
if rankings is not None
else self.get_rankings(
top_n=20, trigger_source=trigger_source, persist=persist
)
)
customers = self.warehouse_client.fetch_customer_performance().copy()
if customers.empty:
customers = pd.DataFrame(columns=["customer_name", "revenue", "orders"])
recommendations: list[dict] = []
if ranking_rows:
champion = ranking_rows[0]
recommendations.append(
{
"title": "Double down on champion SKU",
"priority": "high",
"summary": (
f"Promote '{champion['product_name']}' with score {champion['score']:.2f} "
f"and margin {champion['margin_pct']:.2f}%."
),
}
)
low_margin = next(
(row for row in ranking_rows if row["margin_pct"] < 10), None
)
if low_margin:
recommendations.append(
{
"title": "Review pricing for low-margin bestseller",
"priority": "medium",
"summary": (
f"'{low_margin['product_name']}' has strong rank but only "
f"{low_margin['margin_pct']:.2f}% margin."
),
}
)
if not customers.empty:
customers["revenue"] = pd.to_numeric(
customers["revenue"], errors="coerce"
).fillna(0.0)
customers["orders"] = pd.to_numeric(
customers["orders"], errors="coerce"
).fillna(0.0)
customer = customers.sort_values("revenue", ascending=False).iloc[0]
recommendations.append(
{
"title": "Protect top customer relationship",
"priority": "high",
"summary": (
f"Prioritize retention for '{customer['customer_name']}' with "
f"{float(customer['orders']):.0f} orders and {float(customer['revenue']):.2f} revenue."
),
}
)
result = recommendations[:5]
if persist and self.persistence_service is not None:
span_context = trace.get_current_span().get_span_context()
trace_id = (
f"{span_context.trace_id:032x}" if span_context.is_valid else None
)
span_id = (
f"{span_context.span_id:016x}" if span_context.is_valid else None
)
self.persistence_service.record_recommendation_run(
payload=result,
trigger_source=trigger_source,
trace_id=trace_id,
span_id=span_id,
)
return result
def get_dashboard(self) -> DashboardSnapshot:
with self.tracer.start_as_current_span("analytics.dashboard"):
rankings = self.get_rankings(trigger_source="api.dashboard", persist=True)
return DashboardSnapshot(
kpis=self.get_kpis(),
history=self.get_history_points(),
forecasts=self.get_forecast(
trigger_source="api.dashboard", persist=True
),
rankings=rankings,
recommendations=self.get_recommendations(
rankings=rankings,
trigger_source="api.dashboard",
persist=True,
),
)

View File

@@ -1,281 +0,0 @@
from __future__ import annotations
import logging
from time import perf_counter
from opentelemetry import metrics, trace
from sqlalchemy import desc, select
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.orm import Session, sessionmaker
from app.db.postgres_models import AuditLog, ForecastRun, RankingRun, RecommendationRun
LOGGER = logging.getLogger(__name__)
class PersistenceService:
def __init__(self, session_factory: sessionmaker[Session]) -> None:
self.session_factory = session_factory
self.tracer = trace.get_tracer(__name__)
self.meter = metrics.get_meter(__name__)
self.write_counter = self.meter.create_counter(
name="postgres_persist_writes_total",
description="Total writes to app persistence PostgreSQL",
)
self.write_latency = self.meter.create_histogram(
name="postgres_persist_write_latency_ms",
unit="ms",
description="Latency of app persistence write operations",
)
@staticmethod
def _to_audit_dict(row: AuditLog) -> dict:
return {
"id": row.id,
"created_at": row.created_at.isoformat(),
"method": row.method,
"path": row.path,
"query_string": row.query_string,
"status_code": row.status_code,
"duration_ms": row.duration_ms,
"trace_id": row.trace_id,
"span_id": row.span_id,
"client_ip": row.client_ip,
"user_agent": row.user_agent,
"details": row.details,
}
@staticmethod
def _to_forecast_dict(row: ForecastRun) -> dict:
return {
"id": row.id,
"created_at": row.created_at.isoformat(),
"horizon_days": row.horizon_days,
"point_count": row.point_count,
"trigger_source": row.trigger_source,
"trace_id": row.trace_id,
"span_id": row.span_id,
"payload": row.payload,
}
@staticmethod
def _to_ranking_dict(row: RankingRun) -> dict:
return {
"id": row.id,
"created_at": row.created_at.isoformat(),
"top_n": row.top_n,
"item_count": row.item_count,
"trigger_source": row.trigger_source,
"trace_id": row.trace_id,
"span_id": row.span_id,
"payload": row.payload,
}
@staticmethod
def _to_recommendation_dict(row: RecommendationRun) -> dict:
return {
"id": row.id,
"created_at": row.created_at.isoformat(),
"item_count": row.item_count,
"trigger_source": row.trigger_source,
"trace_id": row.trace_id,
"span_id": row.span_id,
"payload": row.payload,
}
def record_audit_log(
self,
*,
method: str,
path: str,
query_string: str,
status_code: int,
duration_ms: float,
trace_id: str | None,
span_id: str | None,
client_ip: str | None,
user_agent: str | None,
details: dict | None = None,
) -> None:
started = perf_counter()
with self.tracer.start_as_current_span("persist.audit_log"):
try:
with self.session_factory() as session:
session.add(
AuditLog(
method=method,
path=path,
query_string=query_string[:1000],
status_code=status_code,
duration_ms=duration_ms,
trace_id=trace_id,
span_id=span_id,
client_ip=client_ip,
user_agent=user_agent,
details=details or {},
)
)
session.commit()
self.write_counter.add(
1, attributes={"entity": "audit", "status": "ok"}
)
except SQLAlchemyError as exc:
LOGGER.exception("Failed to persist audit log: %s", exc)
self.write_counter.add(
1, attributes={"entity": "audit", "status": "error"}
)
finally:
self.write_latency.record(
(perf_counter() - started) * 1000,
attributes={"entity": "audit"},
)
def record_forecast_run(
self,
*,
horizon_days: int,
payload: list[dict],
trigger_source: str,
trace_id: str | None,
span_id: str | None,
) -> None:
started = perf_counter()
with self.tracer.start_as_current_span("persist.forecast_run"):
try:
with self.session_factory() as session:
session.add(
ForecastRun(
horizon_days=horizon_days,
point_count=len(payload),
trigger_source=trigger_source,
trace_id=trace_id,
span_id=span_id,
payload=payload,
)
)
session.commit()
self.write_counter.add(
1, attributes={"entity": "forecast", "status": "ok"}
)
except SQLAlchemyError as exc:
LOGGER.exception("Failed to persist forecast run: %s", exc)
self.write_counter.add(
1, attributes={"entity": "forecast", "status": "error"}
)
finally:
self.write_latency.record(
(perf_counter() - started) * 1000,
attributes={"entity": "forecast"},
)
def record_ranking_run(
self,
*,
top_n: int,
payload: list[dict],
trigger_source: str,
trace_id: str | None,
span_id: str | None,
) -> None:
started = perf_counter()
with self.tracer.start_as_current_span("persist.ranking_run"):
try:
with self.session_factory() as session:
session.add(
RankingRun(
top_n=top_n,
item_count=len(payload),
trigger_source=trigger_source,
trace_id=trace_id,
span_id=span_id,
payload=payload,
)
)
session.commit()
self.write_counter.add(
1, attributes={"entity": "ranking", "status": "ok"}
)
except SQLAlchemyError as exc:
LOGGER.exception("Failed to persist ranking run: %s", exc)
self.write_counter.add(
1, attributes={"entity": "ranking", "status": "error"}
)
finally:
self.write_latency.record(
(perf_counter() - started) * 1000,
attributes={"entity": "ranking"},
)
def record_recommendation_run(
self,
*,
payload: list[dict],
trigger_source: str,
trace_id: str | None,
span_id: str | None,
) -> None:
started = perf_counter()
with self.tracer.start_as_current_span("persist.recommendation_run"):
try:
with self.session_factory() as session:
session.add(
RecommendationRun(
item_count=len(payload),
trigger_source=trigger_source,
trace_id=trace_id,
span_id=span_id,
payload=payload,
)
)
session.commit()
self.write_counter.add(
1, attributes={"entity": "recommendation", "status": "ok"}
)
except SQLAlchemyError as exc:
LOGGER.exception("Failed to persist recommendation run: %s", exc)
self.write_counter.add(
1, attributes={"entity": "recommendation", "status": "error"}
)
finally:
self.write_latency.record(
(perf_counter() - started) * 1000,
attributes={"entity": "recommendation"},
)
def list_audit_logs(self, limit: int) -> list[dict]:
with self.tracer.start_as_current_span("persist.list_audit_logs"):
with self.session_factory() as session:
rows = session.execute(
select(AuditLog).order_by(desc(AuditLog.created_at)).limit(limit)
).scalars()
return [self._to_audit_dict(row) for row in rows]
def list_forecast_runs(self, limit: int) -> list[dict]:
with self.tracer.start_as_current_span("persist.list_forecast_runs"):
with self.session_factory() as session:
rows = session.execute(
select(ForecastRun)
.order_by(desc(ForecastRun.created_at))
.limit(limit)
).scalars()
return [self._to_forecast_dict(row) for row in rows]
def list_ranking_runs(self, limit: int) -> list[dict]:
with self.tracer.start_as_current_span("persist.list_ranking_runs"):
with self.session_factory() as session:
rows = session.execute(
select(RankingRun)
.order_by(desc(RankingRun.created_at))
.limit(limit)
).scalars()
return [self._to_ranking_dict(row) for row in rows]
def list_recommendation_runs(self, limit: int) -> list[dict]:
with self.tracer.start_as_current_span("persist.list_recommendation_runs"):
with self.session_factory() as session:
rows = session.execute(
select(RecommendationRun)
.order_by(desc(RecommendationRun.created_at))
.limit(limit)
).scalars()
return [self._to_recommendation_dict(row) for row in rows]

View File

@@ -1,101 +0,0 @@
from __future__ import annotations
import hashlib
import logging
from collections.abc import Sequence
from time import perf_counter
import pandas as pd
from opentelemetry import metrics, trace
from sqlalchemy import text
from sqlalchemy.engine import Engine
from sqlalchemy.exc import SQLAlchemyError
from app.db import queries
LOGGER = logging.getLogger(__name__)
class ReadOnlyWarehouseClient:
def __init__(self, engines: dict[str, Engine]) -> None:
self.engines = engines
self.tracer = trace.get_tracer(__name__)
self.meter = metrics.get_meter(__name__)
self.query_counter = self.meter.create_counter(
name="warehouse_queries_total",
description="Total warehouse query executions",
)
self.query_latency = self.meter.create_histogram(
name="warehouse_query_latency_ms",
unit="ms",
description="Warehouse query latency",
)
def _validate_read_only_query(self, sql: str) -> None:
normalized = sql.strip().lower()
if not (normalized.startswith("select") or normalized.startswith("with")):
raise ValueError("Only read-only SELECT/CTE SQL statements are allowed.")
def _run_query_list(
self, source: str, sql_candidates: Sequence[str]
) -> pd.DataFrame:
engine = self.engines[source]
last_error: Exception | None = None
for candidate in sql_candidates:
self._validate_read_only_query(candidate)
query_hash = hashlib.sha256(candidate.encode("utf-8")).hexdigest()[:12]
with self.tracer.start_as_current_span("warehouse.query") as span:
span.set_attribute("db.system", "mssql")
span.set_attribute("db.source", source)
span.set_attribute("db.query.hash", query_hash)
started = perf_counter()
try:
with engine.connect() as conn:
with self.tracer.start_as_current_span(
"warehouse.query.execute"
):
df = pd.read_sql_query(sql=text(candidate), con=conn)
elapsed_ms = (perf_counter() - started) * 1000
self.query_latency.record(elapsed_ms, attributes={"source": source})
self.query_counter.add(
1, attributes={"source": source, "status": "ok"}
)
return df
except SQLAlchemyError as exc:
last_error = exc
elapsed_ms = (perf_counter() - started) * 1000
self.query_latency.record(elapsed_ms, attributes={"source": source})
self.query_counter.add(
1, attributes={"source": source, "status": "error"}
)
LOGGER.warning(
"Query failed for %s with hash %s: %s", source, query_hash, exc
)
if last_error is not None:
raise RuntimeError(
f"All query candidates failed for source '{source}'."
) from last_error
return pd.DataFrame()
def fetch_daily_sales(self) -> pd.DataFrame:
aw = self._run_query_list("aw", queries.AW_DAILY_SALES_QUERIES)
aw["source"] = "AdventureWorks2022DWH"
wwi = self._run_query_list("wwi", queries.WWI_DAILY_SALES_QUERIES)
wwi["source"] = "WorldWideImporters"
return pd.concat([aw, wwi], ignore_index=True)
def fetch_product_performance(self) -> pd.DataFrame:
aw = self._run_query_list("aw", queries.AW_PRODUCT_PERFORMANCE_QUERIES)
aw["source"] = "AdventureWorks2022DWH"
wwi = self._run_query_list("wwi", queries.WWI_PRODUCT_PERFORMANCE_QUERIES)
wwi["source"] = "WorldWideImporters"
return pd.concat([aw, wwi], ignore_index=True)
def fetch_customer_performance(self) -> pd.DataFrame:
aw = self._run_query_list("aw", queries.AW_CUSTOMER_QUERIES)
aw["source"] = "AdventureWorks2022DWH"
wwi = self._run_query_list("wwi", queries.WWI_CUSTOMER_QUERIES)
wwi["source"] = "WorldWideImporters"
return pd.concat([aw, wwi], ignore_index=True)