Add initial work from Codex
This commit is contained in:
1
backend/app/__init__.py
Normal file
1
backend/app/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Backend application package."""
|
||||
135
backend/app/core/config.py
Normal file
135
backend/app/core/config.py
Normal file
@@ -0,0 +1,135 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from functools import lru_cache
|
||||
from urllib.parse import quote_plus
|
||||
|
||||
from pydantic import Field
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
model_config = SettingsConfigDict(
|
||||
env_file=".env",
|
||||
env_file_encoding="utf-8",
|
||||
extra="ignore",
|
||||
)
|
||||
|
||||
app_name: str = "otel-bi-backend"
|
||||
app_env: str = "dev"
|
||||
log_level: str = "INFO"
|
||||
|
||||
api_host: str = "0.0.0.0"
|
||||
api_port: int = 8000
|
||||
|
||||
cors_origins: str = "http://localhost:5173"
|
||||
request_timeout_seconds: float = 20.0
|
||||
|
||||
mssql_host: str = "localhost"
|
||||
mssql_port: int = 1433
|
||||
mssql_username: str = "sa"
|
||||
mssql_password: str = "Password!123"
|
||||
mssql_driver: str = "ODBC Driver 18 for SQL Server"
|
||||
mssql_trust_server_certificate: bool = False
|
||||
|
||||
wwi_database: str = "WorldWideImporters"
|
||||
aw_database: str = "AdventureWorks2022DWH"
|
||||
wwi_connection_string: str | None = None
|
||||
aw_connection_string: str | None = None
|
||||
postgres_host: str = "localhost"
|
||||
postgres_port: int = 5432
|
||||
postgres_database: str = "otel_bi_app"
|
||||
postgres_username: str = "otel_bi_app"
|
||||
postgres_password: str = "otel_bi_app"
|
||||
postgres_sslmode: str = "require"
|
||||
postgres_connection_string: str | None = None
|
||||
postgres_required: bool = True
|
||||
query_service_url: str = "http://localhost:8101"
|
||||
analytics_service_url: str = "http://localhost:8102"
|
||||
persistence_service_url: str = "http://localhost:8103"
|
||||
require_frontend_auth: bool = True
|
||||
frontend_jwt_issuer_url: str = ""
|
||||
frontend_jwt_audience: str = ""
|
||||
frontend_jwt_jwks_url: str | None = None
|
||||
frontend_jwt_algorithm: str = "RS256"
|
||||
frontend_required_scopes: str = ""
|
||||
frontend_clock_skew_seconds: int = Field(default=30, ge=0, le=300)
|
||||
internal_service_auth_enabled: bool = True
|
||||
internal_service_shared_secret: str = "change-me"
|
||||
internal_service_token_ttl_seconds: int = Field(default=120, ge=30, le=900)
|
||||
internal_service_token_audience: str = "bi-internal"
|
||||
internal_service_allowed_issuers: str = "api-gateway"
|
||||
internal_token_clock_skew_seconds: int = Field(default=15, ge=0, le=120)
|
||||
|
||||
otel_service_name: str = "otel-bi-backend"
|
||||
otel_service_namespace: str = "final-thesis"
|
||||
otel_collector_endpoint: str = "http://localhost:4318"
|
||||
otel_export_timeout_ms: int = 10000
|
||||
|
||||
forecast_horizon_days: int = Field(default=30, ge=7, le=180)
|
||||
default_history_days: int = Field(default=365, ge=30, le=1460)
|
||||
ranking_default_top_n: int = Field(default=10, ge=3, le=100)
|
||||
storage_default_limit: int = Field(default=50, ge=10, le=500)
|
||||
|
||||
@property
|
||||
def cors_origins_list(self) -> list[str]:
|
||||
return [
|
||||
origin.strip() for origin in self.cors_origins.split(",") if origin.strip()
|
||||
]
|
||||
|
||||
@property
|
||||
def frontend_required_scopes_list(self) -> list[str]:
|
||||
return [
|
||||
scope.strip()
|
||||
for scope in self.frontend_required_scopes.split(" ")
|
||||
if scope.strip()
|
||||
]
|
||||
|
||||
@property
|
||||
def internal_service_allowed_issuers_list(self) -> list[str]:
|
||||
return [
|
||||
issuer.strip()
|
||||
for issuer in self.internal_service_allowed_issuers.split(",")
|
||||
if issuer.strip()
|
||||
]
|
||||
|
||||
def _build_mssql_connection_url(self, database: str) -> str:
|
||||
driver = quote_plus(self.mssql_driver)
|
||||
user = quote_plus(self.mssql_username)
|
||||
password = quote_plus(self.mssql_password)
|
||||
trust_cert = "yes" if self.mssql_trust_server_certificate else "no"
|
||||
return (
|
||||
f"mssql+pyodbc://{user}:{password}@{self.mssql_host}:{self.mssql_port}/{database}"
|
||||
f"?driver={driver}&TrustServerCertificate={trust_cert}&ApplicationIntent=ReadOnly"
|
||||
)
|
||||
|
||||
@property
|
||||
def wwi_connection_url(self) -> str:
|
||||
return self.wwi_connection_string or self._build_mssql_connection_url(
|
||||
self.wwi_database
|
||||
)
|
||||
|
||||
@property
|
||||
def aw_connection_url(self) -> str:
|
||||
return self.aw_connection_string or self._build_mssql_connection_url(
|
||||
self.aw_database
|
||||
)
|
||||
|
||||
@property
|
||||
def postgres_connection_url(self) -> str:
|
||||
if self.postgres_connection_string:
|
||||
return self.postgres_connection_string
|
||||
|
||||
user = quote_plus(self.postgres_username)
|
||||
password = quote_plus(self.postgres_password)
|
||||
return (
|
||||
f"postgresql+psycopg://{user}:{password}@{self.postgres_host}:{self.postgres_port}/"
|
||||
f"{self.postgres_database}?sslmode={self.postgres_sslmode}"
|
||||
)
|
||||
|
||||
|
||||
@lru_cache
|
||||
def get_settings() -> Settings:
|
||||
return Settings()
|
||||
|
||||
|
||||
settings = get_settings()
|
||||
103
backend/app/core/otel.py
Normal file
103
backend/app/core/otel.py
Normal file
@@ -0,0 +1,103 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
from fastapi import FastAPI
|
||||
from opentelemetry import metrics, trace
|
||||
from opentelemetry.baggage.propagation import W3CBaggagePropagator
|
||||
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
|
||||
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
|
||||
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
|
||||
from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
|
||||
from opentelemetry.instrumentation.logging import LoggingInstrumentor
|
||||
from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor
|
||||
from opentelemetry.propagate import set_global_textmap
|
||||
from opentelemetry.propagators.composite import CompositePropagator
|
||||
from opentelemetry.sdk.metrics import MeterProvider
|
||||
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
|
||||
from opentelemetry.sdk.resources import Resource
|
||||
from opentelemetry.sdk.trace import TracerProvider
|
||||
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
||||
from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator
|
||||
|
||||
try:
|
||||
from opentelemetry.instrumentation.system_metrics import SystemMetricsInstrumentor
|
||||
except ImportError: # pragma: no cover - defensive fallback for minimal envs
|
||||
SystemMetricsInstrumentor = None # type: ignore[assignment]
|
||||
|
||||
from app.core.config import Settings
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class TelemetryProviders:
|
||||
tracer_provider: TracerProvider
|
||||
meter_provider: MeterProvider
|
||||
|
||||
|
||||
def configure_otel(settings: Settings) -> TelemetryProviders:
|
||||
set_global_textmap(
|
||||
CompositePropagator([TraceContextTextMapPropagator(), W3CBaggagePropagator()])
|
||||
)
|
||||
resource = Resource.create(
|
||||
{
|
||||
"service.name": settings.otel_service_name,
|
||||
"service.namespace": settings.otel_service_namespace,
|
||||
"deployment.environment": settings.app_env,
|
||||
}
|
||||
)
|
||||
|
||||
trace_exporter = OTLPSpanExporter(
|
||||
endpoint=f"{settings.otel_collector_endpoint}/v1/traces",
|
||||
timeout=settings.otel_export_timeout_ms / 1000,
|
||||
)
|
||||
tracer_provider = TracerProvider(resource=resource)
|
||||
tracer_provider.add_span_processor(BatchSpanProcessor(trace_exporter))
|
||||
trace.set_tracer_provider(tracer_provider)
|
||||
|
||||
metric_reader = PeriodicExportingMetricReader(
|
||||
exporter=OTLPMetricExporter(
|
||||
endpoint=f"{settings.otel_collector_endpoint}/v1/metrics",
|
||||
timeout=settings.otel_export_timeout_ms / 1000,
|
||||
),
|
||||
export_interval_millis=10000,
|
||||
)
|
||||
meter_provider = MeterProvider(resource=resource, metric_readers=[metric_reader])
|
||||
metrics.set_meter_provider(meter_provider)
|
||||
|
||||
LoggingInstrumentor().instrument(set_logging_format=True)
|
||||
if SystemMetricsInstrumentor is not None:
|
||||
SystemMetricsInstrumentor().instrument()
|
||||
else:
|
||||
LOGGER.warning(
|
||||
"System metrics instrumentor not available, runtime host metrics disabled."
|
||||
)
|
||||
LOGGER.info("OpenTelemetry providers configured")
|
||||
return TelemetryProviders(
|
||||
tracer_provider=tracer_provider, meter_provider=meter_provider
|
||||
)
|
||||
|
||||
|
||||
def instrument_fastapi(app: FastAPI) -> None:
|
||||
FastAPIInstrumentor.instrument_app(app)
|
||||
|
||||
|
||||
def instrument_sqlalchemy_engines(engines: dict[str, Any]) -> None:
|
||||
for engine in engines.values():
|
||||
SQLAlchemyInstrumentor().instrument(engine=engine)
|
||||
|
||||
|
||||
def instrument_httpx_clients() -> None:
|
||||
HTTPXClientInstrumentor().instrument()
|
||||
|
||||
|
||||
def shutdown_otel(providers: TelemetryProviders) -> None:
|
||||
HTTPXClientInstrumentor().uninstrument()
|
||||
if SystemMetricsInstrumentor is not None:
|
||||
SystemMetricsInstrumentor().uninstrument()
|
||||
LoggingInstrumentor().uninstrument()
|
||||
providers.meter_provider.shutdown()
|
||||
providers.tracer_provider.shutdown()
|
||||
231
backend/app/core/security.py
Normal file
231
backend/app/core/security.py
Normal file
@@ -0,0 +1,231 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from functools import lru_cache
|
||||
from time import time
|
||||
from uuid import uuid4
|
||||
|
||||
import jwt
|
||||
from fastapi import Depends, Header, HTTPException, status
|
||||
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
|
||||
from jwt import InvalidTokenError, PyJWKClient
|
||||
|
||||
from app.core.config import settings
|
||||
|
||||
BEARER_SCHEME = HTTPBearer(auto_error=False)
|
||||
|
||||
|
||||
@dataclass
|
||||
class FrontendPrincipal:
|
||||
subject: str
|
||||
scopes: list[str]
|
||||
claims: dict
|
||||
token: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class InternalPrincipal:
|
||||
subject: str
|
||||
scopes: list[str]
|
||||
claims: dict
|
||||
token: str
|
||||
|
||||
|
||||
class FrontendJWTVerifier:
|
||||
@property
|
||||
def jwks_url(self) -> str:
|
||||
if not settings.frontend_jwt_jwks_url:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="FRONTEND_JWT_JWKS_URL is not configured.",
|
||||
)
|
||||
return settings.frontend_jwt_jwks_url
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def _jwks_client(self) -> PyJWKClient:
|
||||
return PyJWKClient(self.jwks_url)
|
||||
|
||||
@staticmethod
|
||||
def _extract_scopes(claims: dict) -> list[str]:
|
||||
scope = claims.get("scope")
|
||||
if isinstance(scope, str):
|
||||
return [item for item in scope.split(" ") if item]
|
||||
scp = claims.get("scp")
|
||||
if isinstance(scp, list):
|
||||
return [str(item) for item in scp]
|
||||
return []
|
||||
|
||||
def verify(self, token: str) -> FrontendPrincipal:
|
||||
if not settings.frontend_jwt_issuer_url:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="FRONTEND_JWT_ISSUER_URL is not configured.",
|
||||
)
|
||||
if not settings.frontend_jwt_audience:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="FRONTEND_JWT_AUDIENCE is not configured.",
|
||||
)
|
||||
|
||||
try:
|
||||
signing_key = self._jwks_client().get_signing_key_from_jwt(token).key
|
||||
claims = jwt.decode(
|
||||
token,
|
||||
key=signing_key,
|
||||
algorithms=[settings.frontend_jwt_algorithm],
|
||||
audience=settings.frontend_jwt_audience,
|
||||
issuer=settings.frontend_jwt_issuer_url,
|
||||
leeway=settings.frontend_clock_skew_seconds,
|
||||
)
|
||||
except InvalidTokenError as exc:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Invalid frontend access token.",
|
||||
) from exc
|
||||
|
||||
subject = str(claims.get("sub") or "")
|
||||
if not subject:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Frontend token missing subject.",
|
||||
)
|
||||
|
||||
scopes = self._extract_scopes(claims)
|
||||
required = settings.frontend_required_scopes_list
|
||||
missing = [scope for scope in required if scope not in scopes]
|
||||
if missing:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_403_FORBIDDEN,
|
||||
detail=f"Missing required scope(s): {', '.join(missing)}",
|
||||
)
|
||||
return FrontendPrincipal(
|
||||
subject=subject, scopes=scopes, claims=claims, token=token
|
||||
)
|
||||
|
||||
|
||||
class InternalTokenManager:
|
||||
token_type = "internal-service"
|
||||
|
||||
@staticmethod
|
||||
def _assert_secret() -> str:
|
||||
secret = settings.internal_service_shared_secret
|
||||
if not secret or secret == "change-me":
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail="INTERNAL_SERVICE_SHARED_SECRET must be configured.",
|
||||
)
|
||||
if len(secret.encode("utf-8")) < 32:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
detail=(
|
||||
"INTERNAL_SERVICE_SHARED_SECRET must be at least 32 bytes for "
|
||||
"HS256 token signing."
|
||||
),
|
||||
)
|
||||
return secret
|
||||
|
||||
def mint(
|
||||
self,
|
||||
*,
|
||||
subject: str,
|
||||
scopes: list[str],
|
||||
source_service: str,
|
||||
) -> str:
|
||||
now = int(time())
|
||||
payload = {
|
||||
"sub": subject,
|
||||
"scope": " ".join(scopes),
|
||||
"iss": source_service,
|
||||
"aud": settings.internal_service_token_audience,
|
||||
"typ": self.token_type,
|
||||
"iat": now,
|
||||
"nbf": now,
|
||||
"exp": now + settings.internal_service_token_ttl_seconds,
|
||||
"jti": str(uuid4()),
|
||||
}
|
||||
return jwt.encode(payload, self._assert_secret(), algorithm="HS256")
|
||||
|
||||
def verify(self, token: str) -> InternalPrincipal:
|
||||
try:
|
||||
claims = jwt.decode(
|
||||
token,
|
||||
self._assert_secret(),
|
||||
algorithms=["HS256"],
|
||||
audience=settings.internal_service_token_audience,
|
||||
options={
|
||||
"require": ["sub", "iss", "aud", "exp", "iat", "nbf", "jti", "typ"]
|
||||
},
|
||||
leeway=settings.internal_token_clock_skew_seconds,
|
||||
)
|
||||
except InvalidTokenError as exc:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Invalid internal service token.",
|
||||
) from exc
|
||||
|
||||
subject = str(claims.get("sub") or "")
|
||||
if not subject:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Internal token missing subject.",
|
||||
)
|
||||
|
||||
issuer = str(claims.get("iss") or "")
|
||||
if issuer not in settings.internal_service_allowed_issuers_list:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Internal token issuer is not allowed.",
|
||||
)
|
||||
|
||||
token_type = str(claims.get("typ") or "")
|
||||
if token_type != self.token_type:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Internal token type is invalid.",
|
||||
)
|
||||
|
||||
scope = claims.get("scope")
|
||||
scopes = [item for item in str(scope).split(" ") if item] if scope else []
|
||||
return InternalPrincipal(
|
||||
subject=subject, scopes=scopes, claims=claims, token=token
|
||||
)
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def get_frontend_verifier() -> FrontendJWTVerifier:
|
||||
return FrontendJWTVerifier()
|
||||
|
||||
|
||||
@lru_cache(maxsize=1)
|
||||
def get_internal_token_manager() -> InternalTokenManager:
|
||||
return InternalTokenManager()
|
||||
|
||||
|
||||
def require_frontend_principal(
|
||||
credentials: HTTPAuthorizationCredentials | None = Depends(BEARER_SCHEME),
|
||||
) -> FrontendPrincipal:
|
||||
if not settings.require_frontend_auth:
|
||||
return FrontendPrincipal(subject="anonymous", scopes=[], claims={}, token="")
|
||||
|
||||
if credentials is None or credentials.scheme.lower() != "bearer":
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Missing bearer token.",
|
||||
)
|
||||
return get_frontend_verifier().verify(credentials.credentials)
|
||||
|
||||
|
||||
def require_internal_principal(
|
||||
internal_token: str | None = Header(default=None, alias="x-internal-service-token"),
|
||||
) -> InternalPrincipal:
|
||||
if not settings.internal_service_auth_enabled:
|
||||
return InternalPrincipal(
|
||||
subject="internal-unauth", scopes=[], claims={}, token=""
|
||||
)
|
||||
|
||||
if not internal_token:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_401_UNAUTHORIZED,
|
||||
detail="Missing x-internal-service-token header.",
|
||||
)
|
||||
return get_internal_token_manager().verify(internal_token)
|
||||
1
backend/app/db/__init__.py
Normal file
1
backend/app/db/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Database helpers for warehouse connections."""
|
||||
34
backend/app/db/engine.py
Normal file
34
backend/app/db/engine.py
Normal file
@@ -0,0 +1,34 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from sqlalchemy import create_engine, event
|
||||
from sqlalchemy.engine import Engine
|
||||
|
||||
from app.core.config import settings
|
||||
|
||||
|
||||
def _create_read_only_engine(connection_url: str) -> Engine:
|
||||
engine = create_engine(
|
||||
connection_url, pool_pre_ping=True, pool_recycle=3600, future=True
|
||||
)
|
||||
|
||||
@event.listens_for(engine, "connect")
|
||||
def _on_connect(dbapi_connection, _connection_record) -> None:
|
||||
cursor = dbapi_connection.cursor()
|
||||
try:
|
||||
cursor.execute("SET TRANSACTION ISOLATION LEVEL READ COMMITTED;")
|
||||
finally:
|
||||
cursor.close()
|
||||
|
||||
return engine
|
||||
|
||||
|
||||
def create_warehouse_engines() -> dict[str, Engine]:
|
||||
return {
|
||||
"wwi": _create_read_only_engine(settings.wwi_connection_url),
|
||||
"aw": _create_read_only_engine(settings.aw_connection_url),
|
||||
}
|
||||
|
||||
|
||||
def dispose_engines(engines: dict[str, Engine]) -> None:
|
||||
for engine in engines.values():
|
||||
engine.dispose()
|
||||
27
backend/app/db/postgres.py
Normal file
27
backend/app/db/postgres.py
Normal file
@@ -0,0 +1,27 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.engine import Engine
|
||||
from sqlalchemy.orm import Session, sessionmaker
|
||||
|
||||
from app.core.config import settings
|
||||
from app.db.postgres_models import Base
|
||||
|
||||
|
||||
def create_postgres_engine() -> Engine:
|
||||
return create_engine(
|
||||
settings.postgres_connection_url,
|
||||
pool_pre_ping=True,
|
||||
pool_recycle=3600,
|
||||
future=True,
|
||||
)
|
||||
|
||||
|
||||
def initialize_postgres_schema(engine: Engine) -> None:
|
||||
Base.metadata.create_all(bind=engine)
|
||||
|
||||
|
||||
def create_postgres_session_factory(engine: Engine) -> sessionmaker[Session]:
|
||||
return sessionmaker(
|
||||
bind=engine, autoflush=False, autocommit=False, expire_on_commit=False
|
||||
)
|
||||
86
backend/app/db/postgres_models.py
Normal file
86
backend/app/db/postgres_models.py
Normal file
@@ -0,0 +1,86 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from uuid import uuid4
|
||||
|
||||
from sqlalchemy import JSON, DateTime, Float, Integer, String, Text
|
||||
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
|
||||
|
||||
|
||||
def _utcnow() -> datetime:
|
||||
return datetime.now(timezone.utc)
|
||||
|
||||
|
||||
class Base(DeclarativeBase):
|
||||
pass
|
||||
|
||||
|
||||
class AuditLog(Base):
|
||||
__tablename__ = "audit_logs"
|
||||
|
||||
id: Mapped[str] = mapped_column(
|
||||
String(36), primary_key=True, default=lambda: str(uuid4())
|
||||
)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), default=_utcnow, index=True
|
||||
)
|
||||
method: Mapped[str] = mapped_column(String(12), index=True)
|
||||
path: Mapped[str] = mapped_column(String(300), index=True)
|
||||
query_string: Mapped[str] = mapped_column(String(1000), default="")
|
||||
status_code: Mapped[int] = mapped_column(Integer, index=True)
|
||||
duration_ms: Mapped[float] = mapped_column(Float)
|
||||
trace_id: Mapped[str | None] = mapped_column(String(32), nullable=True, index=True)
|
||||
span_id: Mapped[str | None] = mapped_column(String(16), nullable=True, index=True)
|
||||
client_ip: Mapped[str | None] = mapped_column(String(120), nullable=True)
|
||||
user_agent: Mapped[str | None] = mapped_column(Text, nullable=True)
|
||||
details: Mapped[dict] = mapped_column(JSON, default=dict)
|
||||
|
||||
|
||||
class ForecastRun(Base):
|
||||
__tablename__ = "forecast_runs"
|
||||
|
||||
id: Mapped[str] = mapped_column(
|
||||
String(36), primary_key=True, default=lambda: str(uuid4())
|
||||
)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), default=_utcnow, index=True
|
||||
)
|
||||
horizon_days: Mapped[int] = mapped_column(Integer)
|
||||
point_count: Mapped[int] = mapped_column(Integer)
|
||||
trigger_source: Mapped[str] = mapped_column(String(64), index=True)
|
||||
trace_id: Mapped[str | None] = mapped_column(String(32), nullable=True, index=True)
|
||||
span_id: Mapped[str | None] = mapped_column(String(16), nullable=True, index=True)
|
||||
payload: Mapped[list[dict]] = mapped_column(JSON, default=list)
|
||||
|
||||
|
||||
class RankingRun(Base):
|
||||
__tablename__ = "ranking_runs"
|
||||
|
||||
id: Mapped[str] = mapped_column(
|
||||
String(36), primary_key=True, default=lambda: str(uuid4())
|
||||
)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), default=_utcnow, index=True
|
||||
)
|
||||
top_n: Mapped[int] = mapped_column(Integer)
|
||||
item_count: Mapped[int] = mapped_column(Integer)
|
||||
trigger_source: Mapped[str] = mapped_column(String(64), index=True)
|
||||
trace_id: Mapped[str | None] = mapped_column(String(32), nullable=True, index=True)
|
||||
span_id: Mapped[str | None] = mapped_column(String(16), nullable=True, index=True)
|
||||
payload: Mapped[list[dict]] = mapped_column(JSON, default=list)
|
||||
|
||||
|
||||
class RecommendationRun(Base):
|
||||
__tablename__ = "recommendation_runs"
|
||||
|
||||
id: Mapped[str] = mapped_column(
|
||||
String(36), primary_key=True, default=lambda: str(uuid4())
|
||||
)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), default=_utcnow, index=True
|
||||
)
|
||||
item_count: Mapped[int] = mapped_column(Integer)
|
||||
trigger_source: Mapped[str] = mapped_column(String(64), index=True)
|
||||
trace_id: Mapped[str | None] = mapped_column(String(32), nullable=True, index=True)
|
||||
span_id: Mapped[str | None] = mapped_column(String(16), nullable=True, index=True)
|
||||
payload: Mapped[list[dict]] = mapped_column(JSON, default=list)
|
||||
167
backend/app/db/queries.py
Normal file
167
backend/app/db/queries.py
Normal file
@@ -0,0 +1,167 @@
|
||||
from __future__ import annotations
|
||||
|
||||
AW_DAILY_SALES_QUERIES = [
|
||||
"""
|
||||
SELECT
|
||||
CAST(d.FullDateAlternateKey AS date) AS sale_date,
|
||||
SUM(f.SalesAmount) AS revenue,
|
||||
SUM(f.TotalProductCost) AS cost,
|
||||
SUM(f.OrderQuantity) AS quantity,
|
||||
COUNT_BIG(*) AS orders
|
||||
FROM dbo.FactInternetSales AS f
|
||||
INNER JOIN dbo.DimDate AS d ON d.DateKey = f.OrderDateKey
|
||||
GROUP BY CAST(d.FullDateAlternateKey AS date)
|
||||
ORDER BY sale_date;
|
||||
""",
|
||||
"""
|
||||
SELECT
|
||||
CAST(OrderDate AS date) AS sale_date,
|
||||
SUM(SalesAmount) AS revenue,
|
||||
SUM(TotalProductCost) AS cost,
|
||||
SUM(OrderQuantity) AS quantity,
|
||||
COUNT_BIG(*) AS orders
|
||||
FROM dbo.FactInternetSales
|
||||
GROUP BY CAST(OrderDate AS date)
|
||||
ORDER BY sale_date;
|
||||
""",
|
||||
]
|
||||
|
||||
WWI_DAILY_SALES_QUERIES = [
|
||||
"""
|
||||
SELECT
|
||||
CAST(i.InvoiceDate AS date) AS sale_date,
|
||||
SUM(il.ExtendedPrice) AS revenue,
|
||||
SUM(il.TaxAmount) AS cost,
|
||||
SUM(il.Quantity) AS quantity,
|
||||
COUNT_BIG(DISTINCT i.InvoiceID) AS orders
|
||||
FROM Sales.Invoices AS i
|
||||
INNER JOIN Sales.InvoiceLines AS il ON il.InvoiceID = i.InvoiceID
|
||||
GROUP BY CAST(i.InvoiceDate AS date)
|
||||
ORDER BY sale_date;
|
||||
""",
|
||||
"""
|
||||
SELECT
|
||||
CAST(i.InvoiceDate AS date) AS sale_date,
|
||||
SUM(il.UnitPrice * il.Quantity) AS revenue,
|
||||
CAST(0 AS float) AS cost,
|
||||
SUM(il.Quantity) AS quantity,
|
||||
COUNT_BIG(DISTINCT i.InvoiceID) AS orders
|
||||
FROM Sales.Invoices AS i
|
||||
INNER JOIN Sales.InvoiceLines AS il ON il.InvoiceID = i.InvoiceID
|
||||
GROUP BY CAST(i.InvoiceDate AS date)
|
||||
ORDER BY sale_date;
|
||||
""",
|
||||
]
|
||||
|
||||
AW_PRODUCT_PERFORMANCE_QUERIES = [
|
||||
"""
|
||||
SELECT
|
||||
p.ProductAlternateKey AS product_id,
|
||||
p.EnglishProductName AS product_name,
|
||||
COALESCE(sc.EnglishProductSubcategoryName, 'Unknown') AS category_name,
|
||||
SUM(f.SalesAmount) AS revenue,
|
||||
SUM(f.TotalProductCost) AS cost,
|
||||
SUM(f.OrderQuantity) AS quantity,
|
||||
COUNT_BIG(*) AS orders
|
||||
FROM dbo.FactInternetSales AS f
|
||||
INNER JOIN dbo.DimProduct AS p ON p.ProductKey = f.ProductKey
|
||||
LEFT JOIN dbo.DimProductSubcategory AS sc ON sc.ProductSubcategoryKey = p.ProductSubcategoryKey
|
||||
GROUP BY p.ProductAlternateKey, p.EnglishProductName, sc.EnglishProductSubcategoryName
|
||||
ORDER BY revenue DESC;
|
||||
""",
|
||||
"""
|
||||
SELECT
|
||||
CAST(ProductKey AS nvarchar(100)) AS product_id,
|
||||
CAST(ProductKey AS nvarchar(100)) AS product_name,
|
||||
'Unknown' AS category_name,
|
||||
SUM(SalesAmount) AS revenue,
|
||||
SUM(TotalProductCost) AS cost,
|
||||
SUM(OrderQuantity) AS quantity,
|
||||
COUNT_BIG(*) AS orders
|
||||
FROM dbo.FactInternetSales
|
||||
GROUP BY ProductKey
|
||||
ORDER BY revenue DESC;
|
||||
""",
|
||||
]
|
||||
|
||||
WWI_PRODUCT_PERFORMANCE_QUERIES = [
|
||||
"""
|
||||
SELECT
|
||||
CAST(s.StockItemID AS nvarchar(100)) AS product_id,
|
||||
s.StockItemName AS product_name,
|
||||
COALESCE(cg.StockGroupName, 'Unknown') AS category_name,
|
||||
SUM(il.ExtendedPrice) AS revenue,
|
||||
SUM(il.TaxAmount) AS cost,
|
||||
SUM(il.Quantity) AS quantity,
|
||||
COUNT_BIG(*) AS orders
|
||||
FROM Sales.InvoiceLines AS il
|
||||
INNER JOIN Warehouse.StockItems AS s ON s.StockItemID = il.StockItemID
|
||||
LEFT JOIN Warehouse.StockItemStockGroups AS sig ON sig.StockItemID = s.StockItemID
|
||||
LEFT JOIN Warehouse.StockGroups AS cg ON cg.StockGroupID = sig.StockGroupID
|
||||
GROUP BY s.StockItemID, s.StockItemName, cg.StockGroupName
|
||||
ORDER BY revenue DESC;
|
||||
""",
|
||||
"""
|
||||
SELECT
|
||||
CAST(il.StockItemID AS nvarchar(100)) AS product_id,
|
||||
CAST(il.StockItemID AS nvarchar(100)) AS product_name,
|
||||
'Unknown' AS category_name,
|
||||
SUM(il.UnitPrice * il.Quantity) AS revenue,
|
||||
CAST(0 AS float) AS cost,
|
||||
SUM(il.Quantity) AS quantity,
|
||||
COUNT_BIG(*) AS orders
|
||||
FROM Sales.InvoiceLines AS il
|
||||
GROUP BY il.StockItemID
|
||||
ORDER BY revenue DESC;
|
||||
""",
|
||||
]
|
||||
|
||||
AW_CUSTOMER_QUERIES = [
|
||||
"""
|
||||
SELECT
|
||||
CAST(c.CustomerAlternateKey AS nvarchar(100)) AS customer_id,
|
||||
c.FirstName + ' ' + c.LastName AS customer_name,
|
||||
SUM(f.SalesAmount) AS revenue,
|
||||
COUNT_BIG(*) AS orders
|
||||
FROM dbo.FactInternetSales AS f
|
||||
INNER JOIN dbo.DimCustomer AS c ON c.CustomerKey = f.CustomerKey
|
||||
GROUP BY c.CustomerAlternateKey, c.FirstName, c.LastName
|
||||
ORDER BY revenue DESC;
|
||||
""",
|
||||
"""
|
||||
SELECT
|
||||
CAST(CustomerKey AS nvarchar(100)) AS customer_id,
|
||||
CAST(CustomerKey AS nvarchar(100)) AS customer_name,
|
||||
SUM(SalesAmount) AS revenue,
|
||||
COUNT_BIG(*) AS orders
|
||||
FROM dbo.FactInternetSales
|
||||
GROUP BY CustomerKey
|
||||
ORDER BY revenue DESC;
|
||||
""",
|
||||
]
|
||||
|
||||
WWI_CUSTOMER_QUERIES = [
|
||||
"""
|
||||
SELECT
|
||||
CAST(c.CustomerID AS nvarchar(100)) AS customer_id,
|
||||
c.CustomerName AS customer_name,
|
||||
SUM(il.ExtendedPrice) AS revenue,
|
||||
COUNT_BIG(DISTINCT i.InvoiceID) AS orders
|
||||
FROM Sales.Invoices AS i
|
||||
INNER JOIN Sales.InvoiceLines AS il ON il.InvoiceID = i.InvoiceID
|
||||
INNER JOIN Sales.Customers AS c ON c.CustomerID = i.CustomerID
|
||||
GROUP BY c.CustomerID, c.CustomerName
|
||||
ORDER BY revenue DESC;
|
||||
""",
|
||||
"""
|
||||
SELECT
|
||||
CAST(i.CustomerID AS nvarchar(100)) AS customer_id,
|
||||
CAST(i.CustomerID AS nvarchar(100)) AS customer_name,
|
||||
SUM(il.UnitPrice * il.Quantity) AS revenue,
|
||||
COUNT_BIG(DISTINCT i.InvoiceID) AS orders
|
||||
FROM Sales.Invoices AS i
|
||||
INNER JOIN Sales.InvoiceLines AS il ON il.InvoiceID = i.InvoiceID
|
||||
GROUP BY i.CustomerID
|
||||
ORDER BY revenue DESC;
|
||||
""",
|
||||
]
|
||||
1
backend/app/services/__init__.py
Normal file
1
backend/app/services/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Business logic services."""
|
||||
373
backend/app/services/analytics_service.py
Normal file
373
backend/app/services/analytics_service.py
Normal file
@@ -0,0 +1,373 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import date, timedelta
|
||||
from math import sqrt
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from opentelemetry import trace
|
||||
from sklearn.linear_model import LinearRegression
|
||||
|
||||
from app.core.config import settings
|
||||
from app.services.persistence_service import PersistenceService
|
||||
from app.services.warehouse_service import ReadOnlyWarehouseClient
|
||||
|
||||
|
||||
@dataclass
|
||||
class DashboardSnapshot:
|
||||
kpis: dict
|
||||
history: list[dict]
|
||||
forecasts: list[dict]
|
||||
rankings: list[dict]
|
||||
recommendations: list[dict]
|
||||
|
||||
|
||||
class AnalyticsService:
|
||||
def __init__(
|
||||
self,
|
||||
warehouse_client: ReadOnlyWarehouseClient,
|
||||
persistence_service: PersistenceService | None = None,
|
||||
) -> None:
|
||||
self.warehouse_client = warehouse_client
|
||||
self.persistence_service = persistence_service
|
||||
self.tracer = trace.get_tracer(__name__)
|
||||
|
||||
@staticmethod
|
||||
def _normalize_frame(df: pd.DataFrame, date_col: str = "sale_date") -> pd.DataFrame:
|
||||
normalized = df.copy()
|
||||
normalized[date_col] = pd.to_datetime(normalized[date_col], errors="coerce")
|
||||
for numeric in ("revenue", "cost", "quantity", "orders"):
|
||||
if numeric in normalized.columns:
|
||||
normalized[numeric] = pd.to_numeric(
|
||||
normalized[numeric], errors="coerce"
|
||||
).fillna(0.0)
|
||||
return normalized.dropna(subset=[date_col])
|
||||
|
||||
def load_sales_history(self, days_back: int | None = None) -> pd.DataFrame:
|
||||
with self.tracer.start_as_current_span("analytics.load_sales_history"):
|
||||
daily_sales = self._normalize_frame(
|
||||
self.warehouse_client.fetch_daily_sales()
|
||||
)
|
||||
days = days_back or settings.default_history_days
|
||||
min_date = pd.Timestamp(date.today() - timedelta(days=days))
|
||||
filtered = daily_sales[daily_sales["sale_date"] >= min_date]
|
||||
return (
|
||||
filtered.groupby("sale_date", as_index=False)[
|
||||
["revenue", "cost", "quantity", "orders"]
|
||||
]
|
||||
.sum()
|
||||
.sort_values("sale_date")
|
||||
)
|
||||
|
||||
def get_kpis(self) -> dict:
|
||||
with self.tracer.start_as_current_span("analytics.kpis"):
|
||||
sales = self.load_sales_history(days_back=180)
|
||||
if sales.empty:
|
||||
return {
|
||||
"total_revenue": 0.0,
|
||||
"gross_margin_pct": 0.0,
|
||||
"total_quantity": 0.0,
|
||||
"avg_order_value": 0.0,
|
||||
"records_in_window": 0,
|
||||
}
|
||||
|
||||
total_revenue = float(sales["revenue"].sum())
|
||||
total_cost = float(sales["cost"].sum())
|
||||
total_orders = max(float(sales["orders"].sum()), 1.0)
|
||||
margin_pct = (
|
||||
((total_revenue - total_cost) / total_revenue * 100)
|
||||
if total_revenue
|
||||
else 0.0
|
||||
)
|
||||
return {
|
||||
"total_revenue": round(total_revenue, 2),
|
||||
"gross_margin_pct": round(margin_pct, 2),
|
||||
"total_quantity": round(float(sales["quantity"].sum()), 2),
|
||||
"avg_order_value": round(total_revenue / total_orders, 2),
|
||||
"records_in_window": int(sales.shape[0]),
|
||||
}
|
||||
|
||||
def get_history_points(self, days_back: int | None = None) -> list[dict]:
|
||||
with self.tracer.start_as_current_span("analytics.history_points"):
|
||||
sales = self.load_sales_history(days_back=days_back)
|
||||
if sales.empty:
|
||||
return []
|
||||
return [
|
||||
{
|
||||
"date": pd.Timestamp(row["sale_date"]).date().isoformat(),
|
||||
"revenue": round(float(row["revenue"]), 2),
|
||||
"cost": round(float(row["cost"]), 2),
|
||||
"quantity": round(float(row["quantity"]), 2),
|
||||
}
|
||||
for _, row in sales.iterrows()
|
||||
]
|
||||
|
||||
def get_forecast(
|
||||
self,
|
||||
horizon_days: int | None = None,
|
||||
*,
|
||||
trigger_source: str = "api.forecasts",
|
||||
persist: bool = True,
|
||||
) -> list[dict]:
|
||||
with self.tracer.start_as_current_span("analytics.forecast"):
|
||||
horizon = horizon_days or settings.forecast_horizon_days
|
||||
sales = self.load_sales_history(days_back=720)
|
||||
if sales.empty:
|
||||
return []
|
||||
|
||||
series = (
|
||||
sales.set_index("sale_date")["revenue"]
|
||||
.sort_index()
|
||||
.resample("D")
|
||||
.sum()
|
||||
.fillna(0.0)
|
||||
)
|
||||
y = series.values
|
||||
x = np.arange(len(y), dtype=float).reshape(-1, 1)
|
||||
model = LinearRegression()
|
||||
model.fit(x, y)
|
||||
baseline = model.predict(x)
|
||||
residual = y - baseline
|
||||
sigma = float(np.std(residual)) if len(residual) > 1 else 0.0
|
||||
|
||||
weekday_baseline = series.groupby(series.index.weekday).mean()
|
||||
overall_mean = float(series.mean()) if len(series) else 0.0
|
||||
weekday_factor = (
|
||||
weekday_baseline / overall_mean
|
||||
if overall_mean > 0
|
||||
else pd.Series([1.0] * 7, index=range(7))
|
||||
)
|
||||
weekday_factor = weekday_factor.replace([np.inf, -np.inf], 1.0).fillna(1.0)
|
||||
|
||||
future_x = np.arange(len(y), len(y) + horizon, dtype=float).reshape(-1, 1)
|
||||
raw_forecast = model.predict(future_x)
|
||||
|
||||
predictions: list[dict] = []
|
||||
start_date = series.index.max().date()
|
||||
for idx, point in enumerate(raw_forecast, start=1):
|
||||
day = start_date + timedelta(days=idx)
|
||||
factor = (
|
||||
float(weekday_factor.loc[day.weekday()])
|
||||
if day.weekday() in weekday_factor.index
|
||||
else 1.0
|
||||
)
|
||||
yhat = max(float(point) * factor, 0.0)
|
||||
ci = 1.96 * sigma * sqrt(1 + idx / max(len(y), 1))
|
||||
predictions.append(
|
||||
{
|
||||
"date": day.isoformat(),
|
||||
"predicted_revenue": round(yhat, 2),
|
||||
"lower_bound": round(max(yhat - ci, 0.0), 2),
|
||||
"upper_bound": round(yhat + ci, 2),
|
||||
}
|
||||
)
|
||||
|
||||
if persist and self.persistence_service is not None:
|
||||
span_context = trace.get_current_span().get_span_context()
|
||||
trace_id = (
|
||||
f"{span_context.trace_id:032x}" if span_context.is_valid else None
|
||||
)
|
||||
span_id = (
|
||||
f"{span_context.span_id:016x}" if span_context.is_valid else None
|
||||
)
|
||||
self.persistence_service.record_forecast_run(
|
||||
horizon_days=horizon,
|
||||
payload=predictions,
|
||||
trigger_source=trigger_source,
|
||||
trace_id=trace_id,
|
||||
span_id=span_id,
|
||||
)
|
||||
|
||||
return predictions
|
||||
|
||||
def get_rankings(
|
||||
self,
|
||||
top_n: int | None = None,
|
||||
*,
|
||||
trigger_source: str = "api.rankings",
|
||||
persist: bool = True,
|
||||
) -> list[dict]:
|
||||
with self.tracer.start_as_current_span("analytics.rankings"):
|
||||
n = top_n or settings.ranking_default_top_n
|
||||
products = self.warehouse_client.fetch_product_performance().copy()
|
||||
if products.empty:
|
||||
return []
|
||||
|
||||
products["revenue"] = pd.to_numeric(
|
||||
products["revenue"], errors="coerce"
|
||||
).fillna(0.0)
|
||||
products["cost"] = pd.to_numeric(products["cost"], errors="coerce").fillna(
|
||||
0.0
|
||||
)
|
||||
products["quantity"] = pd.to_numeric(
|
||||
products["quantity"], errors="coerce"
|
||||
).fillna(0.0)
|
||||
products["orders"] = pd.to_numeric(
|
||||
products["orders"], errors="coerce"
|
||||
).fillna(0.0)
|
||||
|
||||
grouped = (
|
||||
products.groupby(
|
||||
["product_id", "product_name", "category_name"], as_index=False
|
||||
)[["revenue", "cost", "quantity", "orders"]]
|
||||
.sum()
|
||||
.sort_values("revenue", ascending=False)
|
||||
)
|
||||
|
||||
grouped["margin_pct"] = np.where(
|
||||
grouped["revenue"] > 0,
|
||||
((grouped["revenue"] - grouped["cost"]) / grouped["revenue"]) * 100,
|
||||
0.0,
|
||||
)
|
||||
|
||||
revenue_norm = grouped["revenue"] / max(
|
||||
float(grouped["revenue"].max()), 1.0
|
||||
)
|
||||
margin_norm = (grouped["margin_pct"] + 100) / 200
|
||||
velocity_norm = grouped["quantity"] / max(
|
||||
float(grouped["quantity"].max()), 1.0
|
||||
)
|
||||
grouped["score"] = (
|
||||
(0.55 * revenue_norm)
|
||||
+ (0.30 * margin_norm.clip(0, 1))
|
||||
+ (0.15 * velocity_norm)
|
||||
)
|
||||
ranked = (
|
||||
grouped.sort_values("score", ascending=False)
|
||||
.head(n)
|
||||
.reset_index(drop=True)
|
||||
)
|
||||
|
||||
result = [
|
||||
{
|
||||
"rank": int(idx + 1),
|
||||
"product_id": str(row["product_id"]),
|
||||
"product_name": str(row["product_name"]),
|
||||
"category": str(row["category_name"]),
|
||||
"revenue": round(float(row["revenue"]), 2),
|
||||
"margin_pct": round(float(row["margin_pct"]), 2),
|
||||
"score": round(float(row["score"]) * 100, 2),
|
||||
}
|
||||
for idx, row in ranked.iterrows()
|
||||
]
|
||||
|
||||
if persist and self.persistence_service is not None:
|
||||
span_context = trace.get_current_span().get_span_context()
|
||||
trace_id = (
|
||||
f"{span_context.trace_id:032x}" if span_context.is_valid else None
|
||||
)
|
||||
span_id = (
|
||||
f"{span_context.span_id:016x}" if span_context.is_valid else None
|
||||
)
|
||||
self.persistence_service.record_ranking_run(
|
||||
top_n=n,
|
||||
payload=result,
|
||||
trigger_source=trigger_source,
|
||||
trace_id=trace_id,
|
||||
span_id=span_id,
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def get_recommendations(
|
||||
self,
|
||||
rankings: list[dict] | None = None,
|
||||
*,
|
||||
trigger_source: str = "api.recommendations",
|
||||
persist: bool = True,
|
||||
) -> list[dict]:
|
||||
with self.tracer.start_as_current_span("analytics.recommendations"):
|
||||
ranking_rows = (
|
||||
rankings
|
||||
if rankings is not None
|
||||
else self.get_rankings(
|
||||
top_n=20, trigger_source=trigger_source, persist=persist
|
||||
)
|
||||
)
|
||||
customers = self.warehouse_client.fetch_customer_performance().copy()
|
||||
if customers.empty:
|
||||
customers = pd.DataFrame(columns=["customer_name", "revenue", "orders"])
|
||||
|
||||
recommendations: list[dict] = []
|
||||
|
||||
if ranking_rows:
|
||||
champion = ranking_rows[0]
|
||||
recommendations.append(
|
||||
{
|
||||
"title": "Double down on champion SKU",
|
||||
"priority": "high",
|
||||
"summary": (
|
||||
f"Promote '{champion['product_name']}' with score {champion['score']:.2f} "
|
||||
f"and margin {champion['margin_pct']:.2f}%."
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
low_margin = next(
|
||||
(row for row in ranking_rows if row["margin_pct"] < 10), None
|
||||
)
|
||||
if low_margin:
|
||||
recommendations.append(
|
||||
{
|
||||
"title": "Review pricing for low-margin bestseller",
|
||||
"priority": "medium",
|
||||
"summary": (
|
||||
f"'{low_margin['product_name']}' has strong rank but only "
|
||||
f"{low_margin['margin_pct']:.2f}% margin."
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
if not customers.empty:
|
||||
customers["revenue"] = pd.to_numeric(
|
||||
customers["revenue"], errors="coerce"
|
||||
).fillna(0.0)
|
||||
customers["orders"] = pd.to_numeric(
|
||||
customers["orders"], errors="coerce"
|
||||
).fillna(0.0)
|
||||
customer = customers.sort_values("revenue", ascending=False).iloc[0]
|
||||
recommendations.append(
|
||||
{
|
||||
"title": "Protect top customer relationship",
|
||||
"priority": "high",
|
||||
"summary": (
|
||||
f"Prioritize retention for '{customer['customer_name']}' with "
|
||||
f"{float(customer['orders']):.0f} orders and {float(customer['revenue']):.2f} revenue."
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
result = recommendations[:5]
|
||||
if persist and self.persistence_service is not None:
|
||||
span_context = trace.get_current_span().get_span_context()
|
||||
trace_id = (
|
||||
f"{span_context.trace_id:032x}" if span_context.is_valid else None
|
||||
)
|
||||
span_id = (
|
||||
f"{span_context.span_id:016x}" if span_context.is_valid else None
|
||||
)
|
||||
self.persistence_service.record_recommendation_run(
|
||||
payload=result,
|
||||
trigger_source=trigger_source,
|
||||
trace_id=trace_id,
|
||||
span_id=span_id,
|
||||
)
|
||||
return result
|
||||
|
||||
def get_dashboard(self) -> DashboardSnapshot:
|
||||
with self.tracer.start_as_current_span("analytics.dashboard"):
|
||||
rankings = self.get_rankings(trigger_source="api.dashboard", persist=True)
|
||||
return DashboardSnapshot(
|
||||
kpis=self.get_kpis(),
|
||||
history=self.get_history_points(),
|
||||
forecasts=self.get_forecast(
|
||||
trigger_source="api.dashboard", persist=True
|
||||
),
|
||||
rankings=rankings,
|
||||
recommendations=self.get_recommendations(
|
||||
rankings=rankings,
|
||||
trigger_source="api.dashboard",
|
||||
persist=True,
|
||||
),
|
||||
)
|
||||
281
backend/app/services/persistence_service.py
Normal file
281
backend/app/services/persistence_service.py
Normal file
@@ -0,0 +1,281 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from time import perf_counter
|
||||
|
||||
from opentelemetry import metrics, trace
|
||||
from sqlalchemy import desc, select
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
from sqlalchemy.orm import Session, sessionmaker
|
||||
|
||||
from app.db.postgres_models import AuditLog, ForecastRun, RankingRun, RecommendationRun
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PersistenceService:
|
||||
def __init__(self, session_factory: sessionmaker[Session]) -> None:
|
||||
self.session_factory = session_factory
|
||||
self.tracer = trace.get_tracer(__name__)
|
||||
self.meter = metrics.get_meter(__name__)
|
||||
self.write_counter = self.meter.create_counter(
|
||||
name="postgres_persist_writes_total",
|
||||
description="Total writes to app persistence PostgreSQL",
|
||||
)
|
||||
self.write_latency = self.meter.create_histogram(
|
||||
name="postgres_persist_write_latency_ms",
|
||||
unit="ms",
|
||||
description="Latency of app persistence write operations",
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _to_audit_dict(row: AuditLog) -> dict:
|
||||
return {
|
||||
"id": row.id,
|
||||
"created_at": row.created_at.isoformat(),
|
||||
"method": row.method,
|
||||
"path": row.path,
|
||||
"query_string": row.query_string,
|
||||
"status_code": row.status_code,
|
||||
"duration_ms": row.duration_ms,
|
||||
"trace_id": row.trace_id,
|
||||
"span_id": row.span_id,
|
||||
"client_ip": row.client_ip,
|
||||
"user_agent": row.user_agent,
|
||||
"details": row.details,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _to_forecast_dict(row: ForecastRun) -> dict:
|
||||
return {
|
||||
"id": row.id,
|
||||
"created_at": row.created_at.isoformat(),
|
||||
"horizon_days": row.horizon_days,
|
||||
"point_count": row.point_count,
|
||||
"trigger_source": row.trigger_source,
|
||||
"trace_id": row.trace_id,
|
||||
"span_id": row.span_id,
|
||||
"payload": row.payload,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _to_ranking_dict(row: RankingRun) -> dict:
|
||||
return {
|
||||
"id": row.id,
|
||||
"created_at": row.created_at.isoformat(),
|
||||
"top_n": row.top_n,
|
||||
"item_count": row.item_count,
|
||||
"trigger_source": row.trigger_source,
|
||||
"trace_id": row.trace_id,
|
||||
"span_id": row.span_id,
|
||||
"payload": row.payload,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _to_recommendation_dict(row: RecommendationRun) -> dict:
|
||||
return {
|
||||
"id": row.id,
|
||||
"created_at": row.created_at.isoformat(),
|
||||
"item_count": row.item_count,
|
||||
"trigger_source": row.trigger_source,
|
||||
"trace_id": row.trace_id,
|
||||
"span_id": row.span_id,
|
||||
"payload": row.payload,
|
||||
}
|
||||
|
||||
def record_audit_log(
|
||||
self,
|
||||
*,
|
||||
method: str,
|
||||
path: str,
|
||||
query_string: str,
|
||||
status_code: int,
|
||||
duration_ms: float,
|
||||
trace_id: str | None,
|
||||
span_id: str | None,
|
||||
client_ip: str | None,
|
||||
user_agent: str | None,
|
||||
details: dict | None = None,
|
||||
) -> None:
|
||||
started = perf_counter()
|
||||
with self.tracer.start_as_current_span("persist.audit_log"):
|
||||
try:
|
||||
with self.session_factory() as session:
|
||||
session.add(
|
||||
AuditLog(
|
||||
method=method,
|
||||
path=path,
|
||||
query_string=query_string[:1000],
|
||||
status_code=status_code,
|
||||
duration_ms=duration_ms,
|
||||
trace_id=trace_id,
|
||||
span_id=span_id,
|
||||
client_ip=client_ip,
|
||||
user_agent=user_agent,
|
||||
details=details or {},
|
||||
)
|
||||
)
|
||||
session.commit()
|
||||
self.write_counter.add(
|
||||
1, attributes={"entity": "audit", "status": "ok"}
|
||||
)
|
||||
except SQLAlchemyError as exc:
|
||||
LOGGER.exception("Failed to persist audit log: %s", exc)
|
||||
self.write_counter.add(
|
||||
1, attributes={"entity": "audit", "status": "error"}
|
||||
)
|
||||
finally:
|
||||
self.write_latency.record(
|
||||
(perf_counter() - started) * 1000,
|
||||
attributes={"entity": "audit"},
|
||||
)
|
||||
|
||||
def record_forecast_run(
|
||||
self,
|
||||
*,
|
||||
horizon_days: int,
|
||||
payload: list[dict],
|
||||
trigger_source: str,
|
||||
trace_id: str | None,
|
||||
span_id: str | None,
|
||||
) -> None:
|
||||
started = perf_counter()
|
||||
with self.tracer.start_as_current_span("persist.forecast_run"):
|
||||
try:
|
||||
with self.session_factory() as session:
|
||||
session.add(
|
||||
ForecastRun(
|
||||
horizon_days=horizon_days,
|
||||
point_count=len(payload),
|
||||
trigger_source=trigger_source,
|
||||
trace_id=trace_id,
|
||||
span_id=span_id,
|
||||
payload=payload,
|
||||
)
|
||||
)
|
||||
session.commit()
|
||||
self.write_counter.add(
|
||||
1, attributes={"entity": "forecast", "status": "ok"}
|
||||
)
|
||||
except SQLAlchemyError as exc:
|
||||
LOGGER.exception("Failed to persist forecast run: %s", exc)
|
||||
self.write_counter.add(
|
||||
1, attributes={"entity": "forecast", "status": "error"}
|
||||
)
|
||||
finally:
|
||||
self.write_latency.record(
|
||||
(perf_counter() - started) * 1000,
|
||||
attributes={"entity": "forecast"},
|
||||
)
|
||||
|
||||
def record_ranking_run(
|
||||
self,
|
||||
*,
|
||||
top_n: int,
|
||||
payload: list[dict],
|
||||
trigger_source: str,
|
||||
trace_id: str | None,
|
||||
span_id: str | None,
|
||||
) -> None:
|
||||
started = perf_counter()
|
||||
with self.tracer.start_as_current_span("persist.ranking_run"):
|
||||
try:
|
||||
with self.session_factory() as session:
|
||||
session.add(
|
||||
RankingRun(
|
||||
top_n=top_n,
|
||||
item_count=len(payload),
|
||||
trigger_source=trigger_source,
|
||||
trace_id=trace_id,
|
||||
span_id=span_id,
|
||||
payload=payload,
|
||||
)
|
||||
)
|
||||
session.commit()
|
||||
self.write_counter.add(
|
||||
1, attributes={"entity": "ranking", "status": "ok"}
|
||||
)
|
||||
except SQLAlchemyError as exc:
|
||||
LOGGER.exception("Failed to persist ranking run: %s", exc)
|
||||
self.write_counter.add(
|
||||
1, attributes={"entity": "ranking", "status": "error"}
|
||||
)
|
||||
finally:
|
||||
self.write_latency.record(
|
||||
(perf_counter() - started) * 1000,
|
||||
attributes={"entity": "ranking"},
|
||||
)
|
||||
|
||||
def record_recommendation_run(
|
||||
self,
|
||||
*,
|
||||
payload: list[dict],
|
||||
trigger_source: str,
|
||||
trace_id: str | None,
|
||||
span_id: str | None,
|
||||
) -> None:
|
||||
started = perf_counter()
|
||||
with self.tracer.start_as_current_span("persist.recommendation_run"):
|
||||
try:
|
||||
with self.session_factory() as session:
|
||||
session.add(
|
||||
RecommendationRun(
|
||||
item_count=len(payload),
|
||||
trigger_source=trigger_source,
|
||||
trace_id=trace_id,
|
||||
span_id=span_id,
|
||||
payload=payload,
|
||||
)
|
||||
)
|
||||
session.commit()
|
||||
self.write_counter.add(
|
||||
1, attributes={"entity": "recommendation", "status": "ok"}
|
||||
)
|
||||
except SQLAlchemyError as exc:
|
||||
LOGGER.exception("Failed to persist recommendation run: %s", exc)
|
||||
self.write_counter.add(
|
||||
1, attributes={"entity": "recommendation", "status": "error"}
|
||||
)
|
||||
finally:
|
||||
self.write_latency.record(
|
||||
(perf_counter() - started) * 1000,
|
||||
attributes={"entity": "recommendation"},
|
||||
)
|
||||
|
||||
def list_audit_logs(self, limit: int) -> list[dict]:
|
||||
with self.tracer.start_as_current_span("persist.list_audit_logs"):
|
||||
with self.session_factory() as session:
|
||||
rows = session.execute(
|
||||
select(AuditLog).order_by(desc(AuditLog.created_at)).limit(limit)
|
||||
).scalars()
|
||||
return [self._to_audit_dict(row) for row in rows]
|
||||
|
||||
def list_forecast_runs(self, limit: int) -> list[dict]:
|
||||
with self.tracer.start_as_current_span("persist.list_forecast_runs"):
|
||||
with self.session_factory() as session:
|
||||
rows = session.execute(
|
||||
select(ForecastRun)
|
||||
.order_by(desc(ForecastRun.created_at))
|
||||
.limit(limit)
|
||||
).scalars()
|
||||
return [self._to_forecast_dict(row) for row in rows]
|
||||
|
||||
def list_ranking_runs(self, limit: int) -> list[dict]:
|
||||
with self.tracer.start_as_current_span("persist.list_ranking_runs"):
|
||||
with self.session_factory() as session:
|
||||
rows = session.execute(
|
||||
select(RankingRun)
|
||||
.order_by(desc(RankingRun.created_at))
|
||||
.limit(limit)
|
||||
).scalars()
|
||||
return [self._to_ranking_dict(row) for row in rows]
|
||||
|
||||
def list_recommendation_runs(self, limit: int) -> list[dict]:
|
||||
with self.tracer.start_as_current_span("persist.list_recommendation_runs"):
|
||||
with self.session_factory() as session:
|
||||
rows = session.execute(
|
||||
select(RecommendationRun)
|
||||
.order_by(desc(RecommendationRun.created_at))
|
||||
.limit(limit)
|
||||
).scalars()
|
||||
return [self._to_recommendation_dict(row) for row in rows]
|
||||
101
backend/app/services/warehouse_service.py
Normal file
101
backend/app/services/warehouse_service.py
Normal file
@@ -0,0 +1,101 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
from collections.abc import Sequence
|
||||
from time import perf_counter
|
||||
|
||||
import pandas as pd
|
||||
from opentelemetry import metrics, trace
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.engine import Engine
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
|
||||
from app.db import queries
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ReadOnlyWarehouseClient:
|
||||
def __init__(self, engines: dict[str, Engine]) -> None:
|
||||
self.engines = engines
|
||||
self.tracer = trace.get_tracer(__name__)
|
||||
self.meter = metrics.get_meter(__name__)
|
||||
self.query_counter = self.meter.create_counter(
|
||||
name="warehouse_queries_total",
|
||||
description="Total warehouse query executions",
|
||||
)
|
||||
self.query_latency = self.meter.create_histogram(
|
||||
name="warehouse_query_latency_ms",
|
||||
unit="ms",
|
||||
description="Warehouse query latency",
|
||||
)
|
||||
|
||||
def _validate_read_only_query(self, sql: str) -> None:
|
||||
normalized = sql.strip().lower()
|
||||
if not (normalized.startswith("select") or normalized.startswith("with")):
|
||||
raise ValueError("Only read-only SELECT/CTE SQL statements are allowed.")
|
||||
|
||||
def _run_query_list(
|
||||
self, source: str, sql_candidates: Sequence[str]
|
||||
) -> pd.DataFrame:
|
||||
engine = self.engines[source]
|
||||
last_error: Exception | None = None
|
||||
|
||||
for candidate in sql_candidates:
|
||||
self._validate_read_only_query(candidate)
|
||||
query_hash = hashlib.sha256(candidate.encode("utf-8")).hexdigest()[:12]
|
||||
with self.tracer.start_as_current_span("warehouse.query") as span:
|
||||
span.set_attribute("db.system", "mssql")
|
||||
span.set_attribute("db.source", source)
|
||||
span.set_attribute("db.query.hash", query_hash)
|
||||
started = perf_counter()
|
||||
try:
|
||||
with engine.connect() as conn:
|
||||
with self.tracer.start_as_current_span(
|
||||
"warehouse.query.execute"
|
||||
):
|
||||
df = pd.read_sql_query(sql=text(candidate), con=conn)
|
||||
elapsed_ms = (perf_counter() - started) * 1000
|
||||
self.query_latency.record(elapsed_ms, attributes={"source": source})
|
||||
self.query_counter.add(
|
||||
1, attributes={"source": source, "status": "ok"}
|
||||
)
|
||||
return df
|
||||
except SQLAlchemyError as exc:
|
||||
last_error = exc
|
||||
elapsed_ms = (perf_counter() - started) * 1000
|
||||
self.query_latency.record(elapsed_ms, attributes={"source": source})
|
||||
self.query_counter.add(
|
||||
1, attributes={"source": source, "status": "error"}
|
||||
)
|
||||
LOGGER.warning(
|
||||
"Query failed for %s with hash %s: %s", source, query_hash, exc
|
||||
)
|
||||
|
||||
if last_error is not None:
|
||||
raise RuntimeError(
|
||||
f"All query candidates failed for source '{source}'."
|
||||
) from last_error
|
||||
return pd.DataFrame()
|
||||
|
||||
def fetch_daily_sales(self) -> pd.DataFrame:
|
||||
aw = self._run_query_list("aw", queries.AW_DAILY_SALES_QUERIES)
|
||||
aw["source"] = "AdventureWorks2022DWH"
|
||||
wwi = self._run_query_list("wwi", queries.WWI_DAILY_SALES_QUERIES)
|
||||
wwi["source"] = "WorldWideImporters"
|
||||
return pd.concat([aw, wwi], ignore_index=True)
|
||||
|
||||
def fetch_product_performance(self) -> pd.DataFrame:
|
||||
aw = self._run_query_list("aw", queries.AW_PRODUCT_PERFORMANCE_QUERIES)
|
||||
aw["source"] = "AdventureWorks2022DWH"
|
||||
wwi = self._run_query_list("wwi", queries.WWI_PRODUCT_PERFORMANCE_QUERIES)
|
||||
wwi["source"] = "WorldWideImporters"
|
||||
return pd.concat([aw, wwi], ignore_index=True)
|
||||
|
||||
def fetch_customer_performance(self) -> pd.DataFrame:
|
||||
aw = self._run_query_list("aw", queries.AW_CUSTOMER_QUERIES)
|
||||
aw["source"] = "AdventureWorks2022DWH"
|
||||
wwi = self._run_query_list("wwi", queries.WWI_CUSTOMER_QUERIES)
|
||||
wwi["source"] = "WorldWideImporters"
|
||||
return pd.concat([aw, wwi], ignore_index=True)
|
||||
Reference in New Issue
Block a user