Add initial work from Codex

2026-03-20 15:13:33 +01:00
parent 19771ddd37
commit adb5c1a439
48 changed files with 7054 additions and 16 deletions
--- a/backend/app/init.py
+++ b/backend/app/init.py
@@ -0,0 +1 @@
+"""Backend application package."""
--- a/backend/app/core/config.py
+++ b/backend/app/core/config.py
@@ -0,0 +1,135 @@
+from __future__ import annotations
+
+from functools import lru_cache
+from urllib.parse import quote_plus
+
+from pydantic import Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+
+
+class Settings(BaseSettings):
+    model_config = SettingsConfigDict(
+        env_file=".env",
+        env_file_encoding="utf-8",
+        extra="ignore",
+    )
+
+    app_name: str = "otel-bi-backend"
+    app_env: str = "dev"
+    log_level: str = "INFO"
+
+    api_host: str = "0.0.0.0"
+    api_port: int = 8000
+
+    cors_origins: str = "http://localhost:5173"
+    request_timeout_seconds: float = 20.0
+
+    mssql_host: str = "localhost"
+    mssql_port: int = 1433
+    mssql_username: str = "sa"
+    mssql_password: str = "Password!123"
+    mssql_driver: str = "ODBC Driver 18 for SQL Server"
+    mssql_trust_server_certificate: bool = False
+
+    wwi_database: str = "WorldWideImporters"
+    aw_database: str = "AdventureWorks2022DWH"
+    wwi_connection_string: str | None = None
+    aw_connection_string: str | None = None
+    postgres_host: str = "localhost"
+    postgres_port: int = 5432
+    postgres_database: str = "otel_bi_app"
+    postgres_username: str = "otel_bi_app"
+    postgres_password: str = "otel_bi_app"
+    postgres_sslmode: str = "require"
+    postgres_connection_string: str | None = None
+    postgres_required: bool = True
+    query_service_url: str = "http://localhost:8101"
+    analytics_service_url: str = "http://localhost:8102"
+    persistence_service_url: str = "http://localhost:8103"
+    require_frontend_auth: bool = True
+    frontend_jwt_issuer_url: str = ""
+    frontend_jwt_audience: str = ""
+    frontend_jwt_jwks_url: str | None = None
+    frontend_jwt_algorithm: str = "RS256"
+    frontend_required_scopes: str = ""
+    frontend_clock_skew_seconds: int = Field(default=30, ge=0, le=300)
+    internal_service_auth_enabled: bool = True
+    internal_service_shared_secret: str = "change-me"
+    internal_service_token_ttl_seconds: int = Field(default=120, ge=30, le=900)
+    internal_service_token_audience: str = "bi-internal"
+    internal_service_allowed_issuers: str = "api-gateway"
+    internal_token_clock_skew_seconds: int = Field(default=15, ge=0, le=120)
+
+    otel_service_name: str = "otel-bi-backend"
+    otel_service_namespace: str = "final-thesis"
+    otel_collector_endpoint: str = "http://localhost:4318"
+    otel_export_timeout_ms: int = 10000
+
+    forecast_horizon_days: int = Field(default=30, ge=7, le=180)
+    default_history_days: int = Field(default=365, ge=30, le=1460)
+    ranking_default_top_n: int = Field(default=10, ge=3, le=100)
+    storage_default_limit: int = Field(default=50, ge=10, le=500)
+
+    @property
+    def cors_origins_list(self) -> list[str]:
+        return [
+            origin.strip() for origin in self.cors_origins.split(",") if origin.strip()
+        ]
+
+    @property
+    def frontend_required_scopes_list(self) -> list[str]:
+        return [
+            scope.strip()
+            for scope in self.frontend_required_scopes.split(" ")
+            if scope.strip()
+        ]
+
+    @property
+    def internal_service_allowed_issuers_list(self) -> list[str]:
+        return [
+            issuer.strip()
+            for issuer in self.internal_service_allowed_issuers.split(",")
+            if issuer.strip()
+        ]
+
+    def _build_mssql_connection_url(self, database: str) -> str:
+        driver = quote_plus(self.mssql_driver)
+        user = quote_plus(self.mssql_username)
+        password = quote_plus(self.mssql_password)
+        trust_cert = "yes" if self.mssql_trust_server_certificate else "no"
+        return (
+            f"mssql+pyodbc://{user}:{password}@{self.mssql_host}:{self.mssql_port}/{database}"
+            f"?driver={driver}&TrustServerCertificate={trust_cert}&ApplicationIntent=ReadOnly"
+        )
+
+    @property
+    def wwi_connection_url(self) -> str:
+        return self.wwi_connection_string or self._build_mssql_connection_url(
+            self.wwi_database
+        )
+
+    @property
+    def aw_connection_url(self) -> str:
+        return self.aw_connection_string or self._build_mssql_connection_url(
+            self.aw_database
+        )
+
+    @property
+    def postgres_connection_url(self) -> str:
+        if self.postgres_connection_string:
+            return self.postgres_connection_string
+
+        user = quote_plus(self.postgres_username)
+        password = quote_plus(self.postgres_password)
+        return (
+            f"postgresql+psycopg://{user}:{password}@{self.postgres_host}:{self.postgres_port}/"
+            f"{self.postgres_database}?sslmode={self.postgres_sslmode}"
+        )
+
+
+@lru_cache
+def get_settings() -> Settings:
+    return Settings()
+
+
+settings = get_settings()
--- a/backend/app/core/otel.py
+++ b/backend/app/core/otel.py
@@ -0,0 +1,103 @@
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass
+from typing import Any
+
+from fastapi import FastAPI
+from opentelemetry import metrics, trace
+from opentelemetry.baggage.propagation import W3CBaggagePropagator
+from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
+from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
+from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
+from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
+from opentelemetry.instrumentation.logging import LoggingInstrumentor
+from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor
+from opentelemetry.propagate import set_global_textmap
+from opentelemetry.propagators.composite import CompositePropagator
+from opentelemetry.sdk.metrics import MeterProvider
+from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
+from opentelemetry.sdk.resources import Resource
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import BatchSpanProcessor
+from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator
+
+try:
+    from opentelemetry.instrumentation.system_metrics import SystemMetricsInstrumentor
+except ImportError:  # pragma: no cover - defensive fallback for minimal envs
+    SystemMetricsInstrumentor = None  # type: ignore[assignment]
+
+from app.core.config import Settings
+
+LOGGER = logging.getLogger(__name__)
+
+
+@dataclass
+class TelemetryProviders:
+    tracer_provider: TracerProvider
+    meter_provider: MeterProvider
+
+
+def configure_otel(settings: Settings) -> TelemetryProviders:
+    set_global_textmap(
+        CompositePropagator([TraceContextTextMapPropagator(), W3CBaggagePropagator()])
+    )
+    resource = Resource.create(
+        {
+            "service.name": settings.otel_service_name,
+            "service.namespace": settings.otel_service_namespace,
+            "deployment.environment": settings.app_env,
+        }
+    )
+
+    trace_exporter = OTLPSpanExporter(
+        endpoint=f"{settings.otel_collector_endpoint}/v1/traces",
+        timeout=settings.otel_export_timeout_ms / 1000,
+    )
+    tracer_provider = TracerProvider(resource=resource)
+    tracer_provider.add_span_processor(BatchSpanProcessor(trace_exporter))
+    trace.set_tracer_provider(tracer_provider)
+
+    metric_reader = PeriodicExportingMetricReader(
+        exporter=OTLPMetricExporter(
+            endpoint=f"{settings.otel_collector_endpoint}/v1/metrics",
+            timeout=settings.otel_export_timeout_ms / 1000,
+        ),
+        export_interval_millis=10000,
+    )
+    meter_provider = MeterProvider(resource=resource, metric_readers=[metric_reader])
+    metrics.set_meter_provider(meter_provider)
+
+    LoggingInstrumentor().instrument(set_logging_format=True)
+    if SystemMetricsInstrumentor is not None:
+        SystemMetricsInstrumentor().instrument()
+    else:
+        LOGGER.warning(
+            "System metrics instrumentor not available, runtime host metrics disabled."
+        )
+    LOGGER.info("OpenTelemetry providers configured")
+    return TelemetryProviders(
+        tracer_provider=tracer_provider, meter_provider=meter_provider
+    )
+
+
+def instrument_fastapi(app: FastAPI) -> None:
+    FastAPIInstrumentor.instrument_app(app)
+
+
+def instrument_sqlalchemy_engines(engines: dict[str, Any]) -> None:
+    for engine in engines.values():
+        SQLAlchemyInstrumentor().instrument(engine=engine)
+
+
+def instrument_httpx_clients() -> None:
+    HTTPXClientInstrumentor().instrument()
+
+
+def shutdown_otel(providers: TelemetryProviders) -> None:
+    HTTPXClientInstrumentor().uninstrument()
+    if SystemMetricsInstrumentor is not None:
+        SystemMetricsInstrumentor().uninstrument()
+    LoggingInstrumentor().uninstrument()
+    providers.meter_provider.shutdown()
+    providers.tracer_provider.shutdown()
--- a/backend/app/core/security.py
+++ b/backend/app/core/security.py
@@ -0,0 +1,231 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from functools import lru_cache
+from time import time
+from uuid import uuid4
+
+import jwt
+from fastapi import Depends, Header, HTTPException, status
+from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
+from jwt import InvalidTokenError, PyJWKClient
+
+from app.core.config import settings
+
+BEARER_SCHEME = HTTPBearer(auto_error=False)
+
+
+@dataclass
+class FrontendPrincipal:
+    subject: str
+    scopes: list[str]
+    claims: dict
+    token: str
+
+
+@dataclass
+class InternalPrincipal:
+    subject: str
+    scopes: list[str]
+    claims: dict
+    token: str
+
+
+class FrontendJWTVerifier:
+    @property
+    def jwks_url(self) -> str:
+        if not settings.frontend_jwt_jwks_url:
+            raise HTTPException(
+                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                detail="FRONTEND_JWT_JWKS_URL is not configured.",
+            )
+        return settings.frontend_jwt_jwks_url
+
+    @lru_cache(maxsize=1)
+    def _jwks_client(self) -> PyJWKClient:
+        return PyJWKClient(self.jwks_url)
+
+    @staticmethod
+    def _extract_scopes(claims: dict) -> list[str]:
+        scope = claims.get("scope")
+        if isinstance(scope, str):
+            return [item for item in scope.split(" ") if item]
+        scp = claims.get("scp")
+        if isinstance(scp, list):
+            return [str(item) for item in scp]
+        return []
+
+    def verify(self, token: str) -> FrontendPrincipal:
+        if not settings.frontend_jwt_issuer_url:
+            raise HTTPException(
+                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                detail="FRONTEND_JWT_ISSUER_URL is not configured.",
+            )
+        if not settings.frontend_jwt_audience:
+            raise HTTPException(
+                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                detail="FRONTEND_JWT_AUDIENCE is not configured.",
+            )
+
+        try:
+            signing_key = self._jwks_client().get_signing_key_from_jwt(token).key
+            claims = jwt.decode(
+                token,
+                key=signing_key,
+                algorithms=[settings.frontend_jwt_algorithm],
+                audience=settings.frontend_jwt_audience,
+                issuer=settings.frontend_jwt_issuer_url,
+                leeway=settings.frontend_clock_skew_seconds,
+            )
+        except InvalidTokenError as exc:
+            raise HTTPException(
+                status_code=status.HTTP_401_UNAUTHORIZED,
+                detail="Invalid frontend access token.",
+            ) from exc
+
+        subject = str(claims.get("sub") or "")
+        if not subject:
+            raise HTTPException(
+                status_code=status.HTTP_401_UNAUTHORIZED,
+                detail="Frontend token missing subject.",
+            )
+
+        scopes = self._extract_scopes(claims)
+        required = settings.frontend_required_scopes_list
+        missing = [scope for scope in required if scope not in scopes]
+        if missing:
+            raise HTTPException(
+                status_code=status.HTTP_403_FORBIDDEN,
+                detail=f"Missing required scope(s): {', '.join(missing)}",
+            )
+        return FrontendPrincipal(
+            subject=subject, scopes=scopes, claims=claims, token=token
+        )
+
+
+class InternalTokenManager:
+    token_type = "internal-service"
+
+    @staticmethod
+    def _assert_secret() -> str:
+        secret = settings.internal_service_shared_secret
+        if not secret or secret == "change-me":
+            raise HTTPException(
+                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                detail="INTERNAL_SERVICE_SHARED_SECRET must be configured.",
+            )
+        if len(secret.encode("utf-8")) < 32:
+            raise HTTPException(
+                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                detail=(
+                    "INTERNAL_SERVICE_SHARED_SECRET must be at least 32 bytes for "
+                    "HS256 token signing."
+                ),
+            )
+        return secret
+
+    def mint(
+        self,
+        *,
+        subject: str,
+        scopes: list[str],
+        source_service: str,
+    ) -> str:
+        now = int(time())
+        payload = {
+            "sub": subject,
+            "scope": " ".join(scopes),
+            "iss": source_service,
+            "aud": settings.internal_service_token_audience,
+            "typ": self.token_type,
+            "iat": now,
+            "nbf": now,
+            "exp": now + settings.internal_service_token_ttl_seconds,
+            "jti": str(uuid4()),
+        }
+        return jwt.encode(payload, self._assert_secret(), algorithm="HS256")
+
+    def verify(self, token: str) -> InternalPrincipal:
+        try:
+            claims = jwt.decode(
+                token,
+                self._assert_secret(),
+                algorithms=["HS256"],
+                audience=settings.internal_service_token_audience,
+                options={
+                    "require": ["sub", "iss", "aud", "exp", "iat", "nbf", "jti", "typ"]
+                },
+                leeway=settings.internal_token_clock_skew_seconds,
+            )
+        except InvalidTokenError as exc:
+            raise HTTPException(
+                status_code=status.HTTP_401_UNAUTHORIZED,
+                detail="Invalid internal service token.",
+            ) from exc
+
+        subject = str(claims.get("sub") or "")
+        if not subject:
+            raise HTTPException(
+                status_code=status.HTTP_401_UNAUTHORIZED,
+                detail="Internal token missing subject.",
+            )
+
+        issuer = str(claims.get("iss") or "")
+        if issuer not in settings.internal_service_allowed_issuers_list:
+            raise HTTPException(
+                status_code=status.HTTP_401_UNAUTHORIZED,
+                detail="Internal token issuer is not allowed.",
+            )
+
+        token_type = str(claims.get("typ") or "")
+        if token_type != self.token_type:
+            raise HTTPException(
+                status_code=status.HTTP_401_UNAUTHORIZED,
+                detail="Internal token type is invalid.",
+            )
+
+        scope = claims.get("scope")
+        scopes = [item for item in str(scope).split(" ") if item] if scope else []
+        return InternalPrincipal(
+            subject=subject, scopes=scopes, claims=claims, token=token
+        )
+
+
+@lru_cache(maxsize=1)
+def get_frontend_verifier() -> FrontendJWTVerifier:
+    return FrontendJWTVerifier()
+
+
+@lru_cache(maxsize=1)
+def get_internal_token_manager() -> InternalTokenManager:
+    return InternalTokenManager()
+
+
+def require_frontend_principal(
+    credentials: HTTPAuthorizationCredentials | None = Depends(BEARER_SCHEME),
+) -> FrontendPrincipal:
+    if not settings.require_frontend_auth:
+        return FrontendPrincipal(subject="anonymous", scopes=[], claims={}, token="")
+
+    if credentials is None or credentials.scheme.lower() != "bearer":
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Missing bearer token.",
+        )
+    return get_frontend_verifier().verify(credentials.credentials)
+
+
+def require_internal_principal(
+    internal_token: str | None = Header(default=None, alias="x-internal-service-token"),
+) -> InternalPrincipal:
+    if not settings.internal_service_auth_enabled:
+        return InternalPrincipal(
+            subject="internal-unauth", scopes=[], claims={}, token=""
+        )
+
+    if not internal_token:
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail="Missing x-internal-service-token header.",
+        )
+    return get_internal_token_manager().verify(internal_token)
--- a/backend/app/db/init.py
+++ b/backend/app/db/init.py
@@ -0,0 +1 @@
+"""Database helpers for warehouse connections."""
--- a/backend/app/db/engine.py
+++ b/backend/app/db/engine.py
@@ -0,0 +1,34 @@
+from __future__ import annotations
+
+from sqlalchemy import create_engine, event
+from sqlalchemy.engine import Engine
+
+from app.core.config import settings
+
+
+def _create_read_only_engine(connection_url: str) -> Engine:
+    engine = create_engine(
+        connection_url, pool_pre_ping=True, pool_recycle=3600, future=True
+    )
+
+    @event.listens_for(engine, "connect")
+    def _on_connect(dbapi_connection, _connection_record) -> None:
+        cursor = dbapi_connection.cursor()
+        try:
+            cursor.execute("SET TRANSACTION ISOLATION LEVEL READ COMMITTED;")
+        finally:
+            cursor.close()
+
+    return engine
+
+
+def create_warehouse_engines() -> dict[str, Engine]:
+    return {
+        "wwi": _create_read_only_engine(settings.wwi_connection_url),
+        "aw": _create_read_only_engine(settings.aw_connection_url),
+    }
+
+
+def dispose_engines(engines: dict[str, Engine]) -> None:
+    for engine in engines.values():
+        engine.dispose()
--- a/backend/app/db/postgres.py
+++ b/backend/app/db/postgres.py
@@ -0,0 +1,27 @@
+from __future__ import annotations
+
+from sqlalchemy import create_engine
+from sqlalchemy.engine import Engine
+from sqlalchemy.orm import Session, sessionmaker
+
+from app.core.config import settings
+from app.db.postgres_models import Base
+
+
+def create_postgres_engine() -> Engine:
+    return create_engine(
+        settings.postgres_connection_url,
+        pool_pre_ping=True,
+        pool_recycle=3600,
+        future=True,
+    )
+
+
+def initialize_postgres_schema(engine: Engine) -> None:
+    Base.metadata.create_all(bind=engine)
+
+
+def create_postgres_session_factory(engine: Engine) -> sessionmaker[Session]:
+    return sessionmaker(
+        bind=engine, autoflush=False, autocommit=False, expire_on_commit=False
+    )
--- a/backend/app/db/postgres_models.py
+++ b/backend/app/db/postgres_models.py
@@ -0,0 +1,86 @@
+from __future__ import annotations
+
+from datetime import datetime, timezone
+from uuid import uuid4
+
+from sqlalchemy import JSON, DateTime, Float, Integer, String, Text
+from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
+
+
+def _utcnow() -> datetime:
+    return datetime.now(timezone.utc)
+
+
+class Base(DeclarativeBase):
+    pass
+
+
+class AuditLog(Base):
+    __tablename__ = "audit_logs"
+
+    id: Mapped[str] = mapped_column(
+        String(36), primary_key=True, default=lambda: str(uuid4())
+    )
+    created_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True), default=_utcnow, index=True
+    )
+    method: Mapped[str] = mapped_column(String(12), index=True)
+    path: Mapped[str] = mapped_column(String(300), index=True)
+    query_string: Mapped[str] = mapped_column(String(1000), default="")
+    status_code: Mapped[int] = mapped_column(Integer, index=True)
+    duration_ms: Mapped[float] = mapped_column(Float)
+    trace_id: Mapped[str | None] = mapped_column(String(32), nullable=True, index=True)
+    span_id: Mapped[str | None] = mapped_column(String(16), nullable=True, index=True)
+    client_ip: Mapped[str | None] = mapped_column(String(120), nullable=True)
+    user_agent: Mapped[str | None] = mapped_column(Text, nullable=True)
+    details: Mapped[dict] = mapped_column(JSON, default=dict)
+
+
+class ForecastRun(Base):
+    __tablename__ = "forecast_runs"
+
+    id: Mapped[str] = mapped_column(
+        String(36), primary_key=True, default=lambda: str(uuid4())
+    )
+    created_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True), default=_utcnow, index=True
+    )
+    horizon_days: Mapped[int] = mapped_column(Integer)
+    point_count: Mapped[int] = mapped_column(Integer)
+    trigger_source: Mapped[str] = mapped_column(String(64), index=True)
+    trace_id: Mapped[str | None] = mapped_column(String(32), nullable=True, index=True)
+    span_id: Mapped[str | None] = mapped_column(String(16), nullable=True, index=True)
+    payload: Mapped[list[dict]] = mapped_column(JSON, default=list)
+
+
+class RankingRun(Base):
+    __tablename__ = "ranking_runs"
+
+    id: Mapped[str] = mapped_column(
+        String(36), primary_key=True, default=lambda: str(uuid4())
+    )
+    created_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True), default=_utcnow, index=True
+    )
+    top_n: Mapped[int] = mapped_column(Integer)
+    item_count: Mapped[int] = mapped_column(Integer)
+    trigger_source: Mapped[str] = mapped_column(String(64), index=True)
+    trace_id: Mapped[str | None] = mapped_column(String(32), nullable=True, index=True)
+    span_id: Mapped[str | None] = mapped_column(String(16), nullable=True, index=True)
+    payload: Mapped[list[dict]] = mapped_column(JSON, default=list)
+
+
+class RecommendationRun(Base):
+    __tablename__ = "recommendation_runs"
+
+    id: Mapped[str] = mapped_column(
+        String(36), primary_key=True, default=lambda: str(uuid4())
+    )
+    created_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True), default=_utcnow, index=True
+    )
+    item_count: Mapped[int] = mapped_column(Integer)
+    trigger_source: Mapped[str] = mapped_column(String(64), index=True)
+    trace_id: Mapped[str | None] = mapped_column(String(32), nullable=True, index=True)
+    span_id: Mapped[str | None] = mapped_column(String(16), nullable=True, index=True)
+    payload: Mapped[list[dict]] = mapped_column(JSON, default=list)
--- a/backend/app/db/queries.py
+++ b/backend/app/db/queries.py
@@ -0,0 +1,167 @@
+from __future__ import annotations
+
+AW_DAILY_SALES_QUERIES = [
+    """
+    SELECT
+      CAST(d.FullDateAlternateKey AS date) AS sale_date,
+      SUM(f.SalesAmount) AS revenue,
+      SUM(f.TotalProductCost) AS cost,
+      SUM(f.OrderQuantity) AS quantity,
+      COUNT_BIG(*) AS orders
+    FROM dbo.FactInternetSales AS f
+    INNER JOIN dbo.DimDate AS d ON d.DateKey = f.OrderDateKey
+    GROUP BY CAST(d.FullDateAlternateKey AS date)
+    ORDER BY sale_date;
+    """,
+    """
+    SELECT
+      CAST(OrderDate AS date) AS sale_date,
+      SUM(SalesAmount) AS revenue,
+      SUM(TotalProductCost) AS cost,
+      SUM(OrderQuantity) AS quantity,
+      COUNT_BIG(*) AS orders
+    FROM dbo.FactInternetSales
+    GROUP BY CAST(OrderDate AS date)
+    ORDER BY sale_date;
+    """,
+]
+
+WWI_DAILY_SALES_QUERIES = [
+    """
+    SELECT
+      CAST(i.InvoiceDate AS date) AS sale_date,
+      SUM(il.ExtendedPrice) AS revenue,
+      SUM(il.TaxAmount) AS cost,
+      SUM(il.Quantity) AS quantity,
+      COUNT_BIG(DISTINCT i.InvoiceID) AS orders
+    FROM Sales.Invoices AS i
+    INNER JOIN Sales.InvoiceLines AS il ON il.InvoiceID = i.InvoiceID
+    GROUP BY CAST(i.InvoiceDate AS date)
+    ORDER BY sale_date;
+    """,
+    """
+    SELECT
+      CAST(i.InvoiceDate AS date) AS sale_date,
+      SUM(il.UnitPrice * il.Quantity) AS revenue,
+      CAST(0 AS float) AS cost,
+      SUM(il.Quantity) AS quantity,
+      COUNT_BIG(DISTINCT i.InvoiceID) AS orders
+    FROM Sales.Invoices AS i
+    INNER JOIN Sales.InvoiceLines AS il ON il.InvoiceID = i.InvoiceID
+    GROUP BY CAST(i.InvoiceDate AS date)
+    ORDER BY sale_date;
+    """,
+]
+
+AW_PRODUCT_PERFORMANCE_QUERIES = [
+    """
+    SELECT
+      p.ProductAlternateKey AS product_id,
+      p.EnglishProductName AS product_name,
+      COALESCE(sc.EnglishProductSubcategoryName, 'Unknown') AS category_name,
+      SUM(f.SalesAmount) AS revenue,
+      SUM(f.TotalProductCost) AS cost,
+      SUM(f.OrderQuantity) AS quantity,
+      COUNT_BIG(*) AS orders
+    FROM dbo.FactInternetSales AS f
+    INNER JOIN dbo.DimProduct AS p ON p.ProductKey = f.ProductKey
+    LEFT JOIN dbo.DimProductSubcategory AS sc ON sc.ProductSubcategoryKey = p.ProductSubcategoryKey
+    GROUP BY p.ProductAlternateKey, p.EnglishProductName, sc.EnglishProductSubcategoryName
+    ORDER BY revenue DESC;
+    """,
+    """
+    SELECT
+      CAST(ProductKey AS nvarchar(100)) AS product_id,
+      CAST(ProductKey AS nvarchar(100)) AS product_name,
+      'Unknown' AS category_name,
+      SUM(SalesAmount) AS revenue,
+      SUM(TotalProductCost) AS cost,
+      SUM(OrderQuantity) AS quantity,
+      COUNT_BIG(*) AS orders
+    FROM dbo.FactInternetSales
+    GROUP BY ProductKey
+    ORDER BY revenue DESC;
+    """,
+]
+
+WWI_PRODUCT_PERFORMANCE_QUERIES = [
+    """
+    SELECT
+      CAST(s.StockItemID AS nvarchar(100)) AS product_id,
+      s.StockItemName AS product_name,
+      COALESCE(cg.StockGroupName, 'Unknown') AS category_name,
+      SUM(il.ExtendedPrice) AS revenue,
+      SUM(il.TaxAmount) AS cost,
+      SUM(il.Quantity) AS quantity,
+      COUNT_BIG(*) AS orders
+    FROM Sales.InvoiceLines AS il
+    INNER JOIN Warehouse.StockItems AS s ON s.StockItemID = il.StockItemID
+    LEFT JOIN Warehouse.StockItemStockGroups AS sig ON sig.StockItemID = s.StockItemID
+    LEFT JOIN Warehouse.StockGroups AS cg ON cg.StockGroupID = sig.StockGroupID
+    GROUP BY s.StockItemID, s.StockItemName, cg.StockGroupName
+    ORDER BY revenue DESC;
+    """,
+    """
+    SELECT
+      CAST(il.StockItemID AS nvarchar(100)) AS product_id,
+      CAST(il.StockItemID AS nvarchar(100)) AS product_name,
+      'Unknown' AS category_name,
+      SUM(il.UnitPrice * il.Quantity) AS revenue,
+      CAST(0 AS float) AS cost,
+      SUM(il.Quantity) AS quantity,
+      COUNT_BIG(*) AS orders
+    FROM Sales.InvoiceLines AS il
+    GROUP BY il.StockItemID
+    ORDER BY revenue DESC;
+    """,
+]
+
+AW_CUSTOMER_QUERIES = [
+    """
+    SELECT
+      CAST(c.CustomerAlternateKey AS nvarchar(100)) AS customer_id,
+      c.FirstName + ' ' + c.LastName AS customer_name,
+      SUM(f.SalesAmount) AS revenue,
+      COUNT_BIG(*) AS orders
+    FROM dbo.FactInternetSales AS f
+    INNER JOIN dbo.DimCustomer AS c ON c.CustomerKey = f.CustomerKey
+    GROUP BY c.CustomerAlternateKey, c.FirstName, c.LastName
+    ORDER BY revenue DESC;
+    """,
+    """
+    SELECT
+      CAST(CustomerKey AS nvarchar(100)) AS customer_id,
+      CAST(CustomerKey AS nvarchar(100)) AS customer_name,
+      SUM(SalesAmount) AS revenue,
+      COUNT_BIG(*) AS orders
+    FROM dbo.FactInternetSales
+    GROUP BY CustomerKey
+    ORDER BY revenue DESC;
+    """,
+]
+
+WWI_CUSTOMER_QUERIES = [
+    """
+    SELECT
+      CAST(c.CustomerID AS nvarchar(100)) AS customer_id,
+      c.CustomerName AS customer_name,
+      SUM(il.ExtendedPrice) AS revenue,
+      COUNT_BIG(DISTINCT i.InvoiceID) AS orders
+    FROM Sales.Invoices AS i
+    INNER JOIN Sales.InvoiceLines AS il ON il.InvoiceID = i.InvoiceID
+    INNER JOIN Sales.Customers AS c ON c.CustomerID = i.CustomerID
+    GROUP BY c.CustomerID, c.CustomerName
+    ORDER BY revenue DESC;
+    """,
+    """
+    SELECT
+      CAST(i.CustomerID AS nvarchar(100)) AS customer_id,
+      CAST(i.CustomerID AS nvarchar(100)) AS customer_name,
+      SUM(il.UnitPrice * il.Quantity) AS revenue,
+      COUNT_BIG(DISTINCT i.InvoiceID) AS orders
+    FROM Sales.Invoices AS i
+    INNER JOIN Sales.InvoiceLines AS il ON il.InvoiceID = i.InvoiceID
+    GROUP BY i.CustomerID
+    ORDER BY revenue DESC;
+    """,
+]
--- a/backend/app/services/init.py
+++ b/backend/app/services/init.py
@@ -0,0 +1 @@
+"""Business logic services."""
--- a/backend/app/services/analytics_service.py
+++ b/backend/app/services/analytics_service.py
@@ -0,0 +1,373 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from datetime import date, timedelta
+from math import sqrt
+
+import numpy as np
+import pandas as pd
+from opentelemetry import trace
+from sklearn.linear_model import LinearRegression
+
+from app.core.config import settings
+from app.services.persistence_service import PersistenceService
+from app.services.warehouse_service import ReadOnlyWarehouseClient
+
+
+@dataclass
+class DashboardSnapshot:
+    kpis: dict
+    history: list[dict]
+    forecasts: list[dict]
+    rankings: list[dict]
+    recommendations: list[dict]
+
+
+class AnalyticsService:
+    def __init__(
+        self,
+        warehouse_client: ReadOnlyWarehouseClient,
+        persistence_service: PersistenceService | None = None,
+    ) -> None:
+        self.warehouse_client = warehouse_client
+        self.persistence_service = persistence_service
+        self.tracer = trace.get_tracer(__name__)
+
+    @staticmethod
+    def _normalize_frame(df: pd.DataFrame, date_col: str = "sale_date") -> pd.DataFrame:
+        normalized = df.copy()
+        normalized[date_col] = pd.to_datetime(normalized[date_col], errors="coerce")
+        for numeric in ("revenue", "cost", "quantity", "orders"):
+            if numeric in normalized.columns:
+                normalized[numeric] = pd.to_numeric(
+                    normalized[numeric], errors="coerce"
+                ).fillna(0.0)
+        return normalized.dropna(subset=[date_col])
+
+    def load_sales_history(self, days_back: int | None = None) -> pd.DataFrame:
+        with self.tracer.start_as_current_span("analytics.load_sales_history"):
+            daily_sales = self._normalize_frame(
+                self.warehouse_client.fetch_daily_sales()
+            )
+            days = days_back or settings.default_history_days
+            min_date = pd.Timestamp(date.today() - timedelta(days=days))
+            filtered = daily_sales[daily_sales["sale_date"] >= min_date]
+            return (
+                filtered.groupby("sale_date", as_index=False)[
+                    ["revenue", "cost", "quantity", "orders"]
+                ]
+                .sum()
+                .sort_values("sale_date")
+            )
+
+    def get_kpis(self) -> dict:
+        with self.tracer.start_as_current_span("analytics.kpis"):
+            sales = self.load_sales_history(days_back=180)
+            if sales.empty:
+                return {
+                    "total_revenue": 0.0,
+                    "gross_margin_pct": 0.0,
+                    "total_quantity": 0.0,
+                    "avg_order_value": 0.0,
+                    "records_in_window": 0,
+                }
+
+            total_revenue = float(sales["revenue"].sum())
+            total_cost = float(sales["cost"].sum())
+            total_orders = max(float(sales["orders"].sum()), 1.0)
+            margin_pct = (
+                ((total_revenue - total_cost) / total_revenue * 100)
+                if total_revenue
+                else 0.0
+            )
+            return {
+                "total_revenue": round(total_revenue, 2),
+                "gross_margin_pct": round(margin_pct, 2),
+                "total_quantity": round(float(sales["quantity"].sum()), 2),
+                "avg_order_value": round(total_revenue / total_orders, 2),
+                "records_in_window": int(sales.shape[0]),
+            }
+
+    def get_history_points(self, days_back: int | None = None) -> list[dict]:
+        with self.tracer.start_as_current_span("analytics.history_points"):
+            sales = self.load_sales_history(days_back=days_back)
+            if sales.empty:
+                return []
+            return [
+                {
+                    "date": pd.Timestamp(row["sale_date"]).date().isoformat(),
+                    "revenue": round(float(row["revenue"]), 2),
+                    "cost": round(float(row["cost"]), 2),
+                    "quantity": round(float(row["quantity"]), 2),
+                }
+                for _, row in sales.iterrows()
+            ]
+
+    def get_forecast(
+        self,
+        horizon_days: int | None = None,
+        *,
+        trigger_source: str = "api.forecasts",
+        persist: bool = True,
+    ) -> list[dict]:
+        with self.tracer.start_as_current_span("analytics.forecast"):
+            horizon = horizon_days or settings.forecast_horizon_days
+            sales = self.load_sales_history(days_back=720)
+            if sales.empty:
+                return []
+
+            series = (
+                sales.set_index("sale_date")["revenue"]
+                .sort_index()
+                .resample("D")
+                .sum()
+                .fillna(0.0)
+            )
+            y = series.values
+            x = np.arange(len(y), dtype=float).reshape(-1, 1)
+            model = LinearRegression()
+            model.fit(x, y)
+            baseline = model.predict(x)
+            residual = y - baseline
+            sigma = float(np.std(residual)) if len(residual) > 1 else 0.0
+
+            weekday_baseline = series.groupby(series.index.weekday).mean()
+            overall_mean = float(series.mean()) if len(series) else 0.0
+            weekday_factor = (
+                weekday_baseline / overall_mean
+                if overall_mean > 0
+                else pd.Series([1.0] * 7, index=range(7))
+            )
+            weekday_factor = weekday_factor.replace([np.inf, -np.inf], 1.0).fillna(1.0)
+
+            future_x = np.arange(len(y), len(y) + horizon, dtype=float).reshape(-1, 1)
+            raw_forecast = model.predict(future_x)
+
+            predictions: list[dict] = []
+            start_date = series.index.max().date()
+            for idx, point in enumerate(raw_forecast, start=1):
+                day = start_date + timedelta(days=idx)
+                factor = (
+                    float(weekday_factor.loc[day.weekday()])
+                    if day.weekday() in weekday_factor.index
+                    else 1.0
+                )
+                yhat = max(float(point) * factor, 0.0)
+                ci = 1.96 * sigma * sqrt(1 + idx / max(len(y), 1))
+                predictions.append(
+                    {
+                        "date": day.isoformat(),
+                        "predicted_revenue": round(yhat, 2),
+                        "lower_bound": round(max(yhat - ci, 0.0), 2),
+                        "upper_bound": round(yhat + ci, 2),
+                    }
+                )
+
+            if persist and self.persistence_service is not None:
+                span_context = trace.get_current_span().get_span_context()
+                trace_id = (
+                    f"{span_context.trace_id:032x}" if span_context.is_valid else None
+                )
+                span_id = (
+                    f"{span_context.span_id:016x}" if span_context.is_valid else None
+                )
+                self.persistence_service.record_forecast_run(
+                    horizon_days=horizon,
+                    payload=predictions,
+                    trigger_source=trigger_source,
+                    trace_id=trace_id,
+                    span_id=span_id,
+                )
+
+            return predictions
+
+    def get_rankings(
+        self,
+        top_n: int | None = None,
+        *,
+        trigger_source: str = "api.rankings",
+        persist: bool = True,
+    ) -> list[dict]:
+        with self.tracer.start_as_current_span("analytics.rankings"):
+            n = top_n or settings.ranking_default_top_n
+            products = self.warehouse_client.fetch_product_performance().copy()
+            if products.empty:
+                return []
+
+            products["revenue"] = pd.to_numeric(
+                products["revenue"], errors="coerce"
+            ).fillna(0.0)
+            products["cost"] = pd.to_numeric(products["cost"], errors="coerce").fillna(
+                0.0
+            )
+            products["quantity"] = pd.to_numeric(
+                products["quantity"], errors="coerce"
+            ).fillna(0.0)
+            products["orders"] = pd.to_numeric(
+                products["orders"], errors="coerce"
+            ).fillna(0.0)
+
+            grouped = (
+                products.groupby(
+                    ["product_id", "product_name", "category_name"], as_index=False
+                )[["revenue", "cost", "quantity", "orders"]]
+                .sum()
+                .sort_values("revenue", ascending=False)
+            )
+
+            grouped["margin_pct"] = np.where(
+                grouped["revenue"] > 0,
+                ((grouped["revenue"] - grouped["cost"]) / grouped["revenue"]) * 100,
+                0.0,
+            )
+
+            revenue_norm = grouped["revenue"] / max(
+                float(grouped["revenue"].max()), 1.0
+            )
+            margin_norm = (grouped["margin_pct"] + 100) / 200
+            velocity_norm = grouped["quantity"] / max(
+                float(grouped["quantity"].max()), 1.0
+            )
+            grouped["score"] = (
+                (0.55 * revenue_norm)
+                + (0.30 * margin_norm.clip(0, 1))
+                + (0.15 * velocity_norm)
+            )
+            ranked = (
+                grouped.sort_values("score", ascending=False)
+                .head(n)
+                .reset_index(drop=True)
+            )
+
+            result = [
+                {
+                    "rank": int(idx + 1),
+                    "product_id": str(row["product_id"]),
+                    "product_name": str(row["product_name"]),
+                    "category": str(row["category_name"]),
+                    "revenue": round(float(row["revenue"]), 2),
+                    "margin_pct": round(float(row["margin_pct"]), 2),
+                    "score": round(float(row["score"]) * 100, 2),
+                }
+                for idx, row in ranked.iterrows()
+            ]
+
+            if persist and self.persistence_service is not None:
+                span_context = trace.get_current_span().get_span_context()
+                trace_id = (
+                    f"{span_context.trace_id:032x}" if span_context.is_valid else None
+                )
+                span_id = (
+                    f"{span_context.span_id:016x}" if span_context.is_valid else None
+                )
+                self.persistence_service.record_ranking_run(
+                    top_n=n,
+                    payload=result,
+                    trigger_source=trigger_source,
+                    trace_id=trace_id,
+                    span_id=span_id,
+                )
+
+            return result
+
+    def get_recommendations(
+        self,
+        rankings: list[dict] | None = None,
+        *,
+        trigger_source: str = "api.recommendations",
+        persist: bool = True,
+    ) -> list[dict]:
+        with self.tracer.start_as_current_span("analytics.recommendations"):
+            ranking_rows = (
+                rankings
+                if rankings is not None
+                else self.get_rankings(
+                    top_n=20, trigger_source=trigger_source, persist=persist
+                )
+            )
+            customers = self.warehouse_client.fetch_customer_performance().copy()
+            if customers.empty:
+                customers = pd.DataFrame(columns=["customer_name", "revenue", "orders"])
+
+            recommendations: list[dict] = []
+
+            if ranking_rows:
+                champion = ranking_rows[0]
+                recommendations.append(
+                    {
+                        "title": "Double down on champion SKU",
+                        "priority": "high",
+                        "summary": (
+                            f"Promote '{champion['product_name']}' with score {champion['score']:.2f} "
+                            f"and margin {champion['margin_pct']:.2f}%."
+                        ),
+                    }
+                )
+
+                low_margin = next(
+                    (row for row in ranking_rows if row["margin_pct"] < 10), None
+                )
+                if low_margin:
+                    recommendations.append(
+                        {
+                            "title": "Review pricing for low-margin bestseller",
+                            "priority": "medium",
+                            "summary": (
+                                f"'{low_margin['product_name']}' has strong rank but only "
+                                f"{low_margin['margin_pct']:.2f}% margin."
+                            ),
+                        }
+                    )
+
+            if not customers.empty:
+                customers["revenue"] = pd.to_numeric(
+                    customers["revenue"], errors="coerce"
+                ).fillna(0.0)
+                customers["orders"] = pd.to_numeric(
+                    customers["orders"], errors="coerce"
+                ).fillna(0.0)
+                customer = customers.sort_values("revenue", ascending=False).iloc[0]
+                recommendations.append(
+                    {
+                        "title": "Protect top customer relationship",
+                        "priority": "high",
+                        "summary": (
+                            f"Prioritize retention for '{customer['customer_name']}' with "
+                            f"{float(customer['orders']):.0f} orders and {float(customer['revenue']):.2f} revenue."
+                        ),
+                    }
+                )
+
+            result = recommendations[:5]
+            if persist and self.persistence_service is not None:
+                span_context = trace.get_current_span().get_span_context()
+                trace_id = (
+                    f"{span_context.trace_id:032x}" if span_context.is_valid else None
+                )
+                span_id = (
+                    f"{span_context.span_id:016x}" if span_context.is_valid else None
+                )
+                self.persistence_service.record_recommendation_run(
+                    payload=result,
+                    trigger_source=trigger_source,
+                    trace_id=trace_id,
+                    span_id=span_id,
+                )
+            return result
+
+    def get_dashboard(self) -> DashboardSnapshot:
+        with self.tracer.start_as_current_span("analytics.dashboard"):
+            rankings = self.get_rankings(trigger_source="api.dashboard", persist=True)
+            return DashboardSnapshot(
+                kpis=self.get_kpis(),
+                history=self.get_history_points(),
+                forecasts=self.get_forecast(
+                    trigger_source="api.dashboard", persist=True
+                ),
+                rankings=rankings,
+                recommendations=self.get_recommendations(
+                    rankings=rankings,
+                    trigger_source="api.dashboard",
+                    persist=True,
+                ),
+            )
--- a/backend/app/services/persistence_service.py
+++ b/backend/app/services/persistence_service.py
@@ -0,0 +1,281 @@
+from __future__ import annotations
+
+import logging
+from time import perf_counter
+
+from opentelemetry import metrics, trace
+from sqlalchemy import desc, select
+from sqlalchemy.exc import SQLAlchemyError
+from sqlalchemy.orm import Session, sessionmaker
+
+from app.db.postgres_models import AuditLog, ForecastRun, RankingRun, RecommendationRun
+
+LOGGER = logging.getLogger(__name__)
+
+
+class PersistenceService:
+    def __init__(self, session_factory: sessionmaker[Session]) -> None:
+        self.session_factory = session_factory
+        self.tracer = trace.get_tracer(__name__)
+        self.meter = metrics.get_meter(__name__)
+        self.write_counter = self.meter.create_counter(
+            name="postgres_persist_writes_total",
+            description="Total writes to app persistence PostgreSQL",
+        )
+        self.write_latency = self.meter.create_histogram(
+            name="postgres_persist_write_latency_ms",
+            unit="ms",
+            description="Latency of app persistence write operations",
+        )
+
+    @staticmethod
+    def _to_audit_dict(row: AuditLog) -> dict:
+        return {
+            "id": row.id,
+            "created_at": row.created_at.isoformat(),
+            "method": row.method,
+            "path": row.path,
+            "query_string": row.query_string,
+            "status_code": row.status_code,
+            "duration_ms": row.duration_ms,
+            "trace_id": row.trace_id,
+            "span_id": row.span_id,
+            "client_ip": row.client_ip,
+            "user_agent": row.user_agent,
+            "details": row.details,
+        }
+
+    @staticmethod
+    def _to_forecast_dict(row: ForecastRun) -> dict:
+        return {
+            "id": row.id,
+            "created_at": row.created_at.isoformat(),
+            "horizon_days": row.horizon_days,
+            "point_count": row.point_count,
+            "trigger_source": row.trigger_source,
+            "trace_id": row.trace_id,
+            "span_id": row.span_id,
+            "payload": row.payload,
+        }
+
+    @staticmethod
+    def _to_ranking_dict(row: RankingRun) -> dict:
+        return {
+            "id": row.id,
+            "created_at": row.created_at.isoformat(),
+            "top_n": row.top_n,
+            "item_count": row.item_count,
+            "trigger_source": row.trigger_source,
+            "trace_id": row.trace_id,
+            "span_id": row.span_id,
+            "payload": row.payload,
+        }
+
+    @staticmethod
+    def _to_recommendation_dict(row: RecommendationRun) -> dict:
+        return {
+            "id": row.id,
+            "created_at": row.created_at.isoformat(),
+            "item_count": row.item_count,
+            "trigger_source": row.trigger_source,
+            "trace_id": row.trace_id,
+            "span_id": row.span_id,
+            "payload": row.payload,
+        }
+
+    def record_audit_log(
+        self,
+        *,
+        method: str,
+        path: str,
+        query_string: str,
+        status_code: int,
+        duration_ms: float,
+        trace_id: str | None,
+        span_id: str | None,
+        client_ip: str | None,
+        user_agent: str | None,
+        details: dict | None = None,
+    ) -> None:
+        started = perf_counter()
+        with self.tracer.start_as_current_span("persist.audit_log"):
+            try:
+                with self.session_factory() as session:
+                    session.add(
+                        AuditLog(
+                            method=method,
+                            path=path,
+                            query_string=query_string[:1000],
+                            status_code=status_code,
+                            duration_ms=duration_ms,
+                            trace_id=trace_id,
+                            span_id=span_id,
+                            client_ip=client_ip,
+                            user_agent=user_agent,
+                            details=details or {},
+                        )
+                    )
+                    session.commit()
+                self.write_counter.add(
+                    1, attributes={"entity": "audit", "status": "ok"}
+                )
+            except SQLAlchemyError as exc:
+                LOGGER.exception("Failed to persist audit log: %s", exc)
+                self.write_counter.add(
+                    1, attributes={"entity": "audit", "status": "error"}
+                )
+            finally:
+                self.write_latency.record(
+                    (perf_counter() - started) * 1000,
+                    attributes={"entity": "audit"},
+                )
+
+    def record_forecast_run(
+        self,
+        *,
+        horizon_days: int,
+        payload: list[dict],
+        trigger_source: str,
+        trace_id: str | None,
+        span_id: str | None,
+    ) -> None:
+        started = perf_counter()
+        with self.tracer.start_as_current_span("persist.forecast_run"):
+            try:
+                with self.session_factory() as session:
+                    session.add(
+                        ForecastRun(
+                            horizon_days=horizon_days,
+                            point_count=len(payload),
+                            trigger_source=trigger_source,
+                            trace_id=trace_id,
+                            span_id=span_id,
+                            payload=payload,
+                        )
+                    )
+                    session.commit()
+                self.write_counter.add(
+                    1, attributes={"entity": "forecast", "status": "ok"}
+                )
+            except SQLAlchemyError as exc:
+                LOGGER.exception("Failed to persist forecast run: %s", exc)
+                self.write_counter.add(
+                    1, attributes={"entity": "forecast", "status": "error"}
+                )
+            finally:
+                self.write_latency.record(
+                    (perf_counter() - started) * 1000,
+                    attributes={"entity": "forecast"},
+                )
+
+    def record_ranking_run(
+        self,
+        *,
+        top_n: int,
+        payload: list[dict],
+        trigger_source: str,
+        trace_id: str | None,
+        span_id: str | None,
+    ) -> None:
+        started = perf_counter()
+        with self.tracer.start_as_current_span("persist.ranking_run"):
+            try:
+                with self.session_factory() as session:
+                    session.add(
+                        RankingRun(
+                            top_n=top_n,
+                            item_count=len(payload),
+                            trigger_source=trigger_source,
+                            trace_id=trace_id,
+                            span_id=span_id,
+                            payload=payload,
+                        )
+                    )
+                    session.commit()
+                self.write_counter.add(
+                    1, attributes={"entity": "ranking", "status": "ok"}
+                )
+            except SQLAlchemyError as exc:
+                LOGGER.exception("Failed to persist ranking run: %s", exc)
+                self.write_counter.add(
+                    1, attributes={"entity": "ranking", "status": "error"}
+                )
+            finally:
+                self.write_latency.record(
+                    (perf_counter() - started) * 1000,
+                    attributes={"entity": "ranking"},
+                )
+
+    def record_recommendation_run(
+        self,
+        *,
+        payload: list[dict],
+        trigger_source: str,
+        trace_id: str | None,
+        span_id: str | None,
+    ) -> None:
+        started = perf_counter()
+        with self.tracer.start_as_current_span("persist.recommendation_run"):
+            try:
+                with self.session_factory() as session:
+                    session.add(
+                        RecommendationRun(
+                            item_count=len(payload),
+                            trigger_source=trigger_source,
+                            trace_id=trace_id,
+                            span_id=span_id,
+                            payload=payload,
+                        )
+                    )
+                    session.commit()
+                self.write_counter.add(
+                    1, attributes={"entity": "recommendation", "status": "ok"}
+                )
+            except SQLAlchemyError as exc:
+                LOGGER.exception("Failed to persist recommendation run: %s", exc)
+                self.write_counter.add(
+                    1, attributes={"entity": "recommendation", "status": "error"}
+                )
+            finally:
+                self.write_latency.record(
+                    (perf_counter() - started) * 1000,
+                    attributes={"entity": "recommendation"},
+                )
+
+    def list_audit_logs(self, limit: int) -> list[dict]:
+        with self.tracer.start_as_current_span("persist.list_audit_logs"):
+            with self.session_factory() as session:
+                rows = session.execute(
+                    select(AuditLog).order_by(desc(AuditLog.created_at)).limit(limit)
+                ).scalars()
+                return [self._to_audit_dict(row) for row in rows]
+
+    def list_forecast_runs(self, limit: int) -> list[dict]:
+        with self.tracer.start_as_current_span("persist.list_forecast_runs"):
+            with self.session_factory() as session:
+                rows = session.execute(
+                    select(ForecastRun)
+                    .order_by(desc(ForecastRun.created_at))
+                    .limit(limit)
+                ).scalars()
+                return [self._to_forecast_dict(row) for row in rows]
+
+    def list_ranking_runs(self, limit: int) -> list[dict]:
+        with self.tracer.start_as_current_span("persist.list_ranking_runs"):
+            with self.session_factory() as session:
+                rows = session.execute(
+                    select(RankingRun)
+                    .order_by(desc(RankingRun.created_at))
+                    .limit(limit)
+                ).scalars()
+                return [self._to_ranking_dict(row) for row in rows]
+
+    def list_recommendation_runs(self, limit: int) -> list[dict]:
+        with self.tracer.start_as_current_span("persist.list_recommendation_runs"):
+            with self.session_factory() as session:
+                rows = session.execute(
+                    select(RecommendationRun)
+                    .order_by(desc(RecommendationRun.created_at))
+                    .limit(limit)
+                ).scalars()
+                return [self._to_recommendation_dict(row) for row in rows]
--- a/backend/app/services/warehouse_service.py
+++ b/backend/app/services/warehouse_service.py
@@ -0,0 +1,101 @@
+from __future__ import annotations
+
+import hashlib
+import logging
+from collections.abc import Sequence
+from time import perf_counter
+
+import pandas as pd
+from opentelemetry import metrics, trace
+from sqlalchemy import text
+from sqlalchemy.engine import Engine
+from sqlalchemy.exc import SQLAlchemyError
+
+from app.db import queries
+
+LOGGER = logging.getLogger(__name__)
+
+
+class ReadOnlyWarehouseClient:
+    def __init__(self, engines: dict[str, Engine]) -> None:
+        self.engines = engines
+        self.tracer = trace.get_tracer(__name__)
+        self.meter = metrics.get_meter(__name__)
+        self.query_counter = self.meter.create_counter(
+            name="warehouse_queries_total",
+            description="Total warehouse query executions",
+        )
+        self.query_latency = self.meter.create_histogram(
+            name="warehouse_query_latency_ms",
+            unit="ms",
+            description="Warehouse query latency",
+        )
+
+    def _validate_read_only_query(self, sql: str) -> None:
+        normalized = sql.strip().lower()
+        if not (normalized.startswith("select") or normalized.startswith("with")):
+            raise ValueError("Only read-only SELECT/CTE SQL statements are allowed.")
+
+    def _run_query_list(
+        self, source: str, sql_candidates: Sequence[str]
+    ) -> pd.DataFrame:
+        engine = self.engines[source]
+        last_error: Exception | None = None
+
+        for candidate in sql_candidates:
+            self._validate_read_only_query(candidate)
+            query_hash = hashlib.sha256(candidate.encode("utf-8")).hexdigest()[:12]
+            with self.tracer.start_as_current_span("warehouse.query") as span:
+                span.set_attribute("db.system", "mssql")
+                span.set_attribute("db.source", source)
+                span.set_attribute("db.query.hash", query_hash)
+                started = perf_counter()
+                try:
+                    with engine.connect() as conn:
+                        with self.tracer.start_as_current_span(
+                            "warehouse.query.execute"
+                        ):
+                            df = pd.read_sql_query(sql=text(candidate), con=conn)
+                    elapsed_ms = (perf_counter() - started) * 1000
+                    self.query_latency.record(elapsed_ms, attributes={"source": source})
+                    self.query_counter.add(
+                        1, attributes={"source": source, "status": "ok"}
+                    )
+                    return df
+                except SQLAlchemyError as exc:
+                    last_error = exc
+                    elapsed_ms = (perf_counter() - started) * 1000
+                    self.query_latency.record(elapsed_ms, attributes={"source": source})
+                    self.query_counter.add(
+                        1, attributes={"source": source, "status": "error"}
+                    )
+                    LOGGER.warning(
+                        "Query failed for %s with hash %s: %s", source, query_hash, exc
+                    )
+
+        if last_error is not None:
+            raise RuntimeError(
+                f"All query candidates failed for source '{source}'."
+            ) from last_error
+        return pd.DataFrame()
+
+    def fetch_daily_sales(self) -> pd.DataFrame:
+        aw = self._run_query_list("aw", queries.AW_DAILY_SALES_QUERIES)
+        aw["source"] = "AdventureWorks2022DWH"
+        wwi = self._run_query_list("wwi", queries.WWI_DAILY_SALES_QUERIES)
+        wwi["source"] = "WorldWideImporters"
+        return pd.concat([aw, wwi], ignore_index=True)
+
+    def fetch_product_performance(self) -> pd.DataFrame:
+        aw = self._run_query_list("aw", queries.AW_PRODUCT_PERFORMANCE_QUERIES)
+        aw["source"] = "AdventureWorks2022DWH"
+        wwi = self._run_query_list("wwi", queries.WWI_PRODUCT_PERFORMANCE_QUERIES)
+        wwi["source"] = "WorldWideImporters"
+        return pd.concat([aw, wwi], ignore_index=True)
+
+    def fetch_customer_performance(self) -> pd.DataFrame:
+        aw = self._run_query_list("aw", queries.AW_CUSTOMER_QUERIES)
+        aw["source"] = "AdventureWorks2022DWH"
+        wwi = self._run_query_list("wwi", queries.WWI_CUSTOMER_QUERIES)
+        wwi["source"] = "WorldWideImporters"
+        return pd.concat([aw, wwi], ignore_index=True)
				`@@ -0,0 +1 @@`
				`"""Database helpers for warehouse connections."""`