Add initial work from Codex

This commit is contained in:
2026-03-20 15:13:33 +01:00
parent 19771ddd37
commit adb5c1a439
48 changed files with 7054 additions and 16 deletions

135
backend/app/core/config.py Normal file
View File

@@ -0,0 +1,135 @@
from __future__ import annotations
from functools import lru_cache
from urllib.parse import quote_plus
from pydantic import Field
from pydantic_settings import BaseSettings, SettingsConfigDict
class Settings(BaseSettings):
model_config = SettingsConfigDict(
env_file=".env",
env_file_encoding="utf-8",
extra="ignore",
)
app_name: str = "otel-bi-backend"
app_env: str = "dev"
log_level: str = "INFO"
api_host: str = "0.0.0.0"
api_port: int = 8000
cors_origins: str = "http://localhost:5173"
request_timeout_seconds: float = 20.0
mssql_host: str = "localhost"
mssql_port: int = 1433
mssql_username: str = "sa"
mssql_password: str = "Password!123"
mssql_driver: str = "ODBC Driver 18 for SQL Server"
mssql_trust_server_certificate: bool = False
wwi_database: str = "WorldWideImporters"
aw_database: str = "AdventureWorks2022DWH"
wwi_connection_string: str | None = None
aw_connection_string: str | None = None
postgres_host: str = "localhost"
postgres_port: int = 5432
postgres_database: str = "otel_bi_app"
postgres_username: str = "otel_bi_app"
postgres_password: str = "otel_bi_app"
postgres_sslmode: str = "require"
postgres_connection_string: str | None = None
postgres_required: bool = True
query_service_url: str = "http://localhost:8101"
analytics_service_url: str = "http://localhost:8102"
persistence_service_url: str = "http://localhost:8103"
require_frontend_auth: bool = True
frontend_jwt_issuer_url: str = ""
frontend_jwt_audience: str = ""
frontend_jwt_jwks_url: str | None = None
frontend_jwt_algorithm: str = "RS256"
frontend_required_scopes: str = ""
frontend_clock_skew_seconds: int = Field(default=30, ge=0, le=300)
internal_service_auth_enabled: bool = True
internal_service_shared_secret: str = "change-me"
internal_service_token_ttl_seconds: int = Field(default=120, ge=30, le=900)
internal_service_token_audience: str = "bi-internal"
internal_service_allowed_issuers: str = "api-gateway"
internal_token_clock_skew_seconds: int = Field(default=15, ge=0, le=120)
otel_service_name: str = "otel-bi-backend"
otel_service_namespace: str = "final-thesis"
otel_collector_endpoint: str = "http://localhost:4318"
otel_export_timeout_ms: int = 10000
forecast_horizon_days: int = Field(default=30, ge=7, le=180)
default_history_days: int = Field(default=365, ge=30, le=1460)
ranking_default_top_n: int = Field(default=10, ge=3, le=100)
storage_default_limit: int = Field(default=50, ge=10, le=500)
@property
def cors_origins_list(self) -> list[str]:
return [
origin.strip() for origin in self.cors_origins.split(",") if origin.strip()
]
@property
def frontend_required_scopes_list(self) -> list[str]:
return [
scope.strip()
for scope in self.frontend_required_scopes.split(" ")
if scope.strip()
]
@property
def internal_service_allowed_issuers_list(self) -> list[str]:
return [
issuer.strip()
for issuer in self.internal_service_allowed_issuers.split(",")
if issuer.strip()
]
def _build_mssql_connection_url(self, database: str) -> str:
driver = quote_plus(self.mssql_driver)
user = quote_plus(self.mssql_username)
password = quote_plus(self.mssql_password)
trust_cert = "yes" if self.mssql_trust_server_certificate else "no"
return (
f"mssql+pyodbc://{user}:{password}@{self.mssql_host}:{self.mssql_port}/{database}"
f"?driver={driver}&TrustServerCertificate={trust_cert}&ApplicationIntent=ReadOnly"
)
@property
def wwi_connection_url(self) -> str:
return self.wwi_connection_string or self._build_mssql_connection_url(
self.wwi_database
)
@property
def aw_connection_url(self) -> str:
return self.aw_connection_string or self._build_mssql_connection_url(
self.aw_database
)
@property
def postgres_connection_url(self) -> str:
if self.postgres_connection_string:
return self.postgres_connection_string
user = quote_plus(self.postgres_username)
password = quote_plus(self.postgres_password)
return (
f"postgresql+psycopg://{user}:{password}@{self.postgres_host}:{self.postgres_port}/"
f"{self.postgres_database}?sslmode={self.postgres_sslmode}"
)
@lru_cache
def get_settings() -> Settings:
return Settings()
settings = get_settings()

103
backend/app/core/otel.py Normal file
View File

@@ -0,0 +1,103 @@
from __future__ import annotations
import logging
from dataclasses import dataclass
from typing import Any
from fastapi import FastAPI
from opentelemetry import metrics, trace
from opentelemetry.baggage.propagation import W3CBaggagePropagator
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
from opentelemetry.instrumentation.httpx import HTTPXClientInstrumentor
from opentelemetry.instrumentation.logging import LoggingInstrumentor
from opentelemetry.instrumentation.sqlalchemy import SQLAlchemyInstrumentor
from opentelemetry.propagate import set_global_textmap
from opentelemetry.propagators.composite import CompositePropagator
from opentelemetry.sdk.metrics import MeterProvider
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
from opentelemetry.sdk.resources import Resource
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator
try:
from opentelemetry.instrumentation.system_metrics import SystemMetricsInstrumentor
except ImportError: # pragma: no cover - defensive fallback for minimal envs
SystemMetricsInstrumentor = None # type: ignore[assignment]
from app.core.config import Settings
LOGGER = logging.getLogger(__name__)
@dataclass
class TelemetryProviders:
tracer_provider: TracerProvider
meter_provider: MeterProvider
def configure_otel(settings: Settings) -> TelemetryProviders:
set_global_textmap(
CompositePropagator([TraceContextTextMapPropagator(), W3CBaggagePropagator()])
)
resource = Resource.create(
{
"service.name": settings.otel_service_name,
"service.namespace": settings.otel_service_namespace,
"deployment.environment": settings.app_env,
}
)
trace_exporter = OTLPSpanExporter(
endpoint=f"{settings.otel_collector_endpoint}/v1/traces",
timeout=settings.otel_export_timeout_ms / 1000,
)
tracer_provider = TracerProvider(resource=resource)
tracer_provider.add_span_processor(BatchSpanProcessor(trace_exporter))
trace.set_tracer_provider(tracer_provider)
metric_reader = PeriodicExportingMetricReader(
exporter=OTLPMetricExporter(
endpoint=f"{settings.otel_collector_endpoint}/v1/metrics",
timeout=settings.otel_export_timeout_ms / 1000,
),
export_interval_millis=10000,
)
meter_provider = MeterProvider(resource=resource, metric_readers=[metric_reader])
metrics.set_meter_provider(meter_provider)
LoggingInstrumentor().instrument(set_logging_format=True)
if SystemMetricsInstrumentor is not None:
SystemMetricsInstrumentor().instrument()
else:
LOGGER.warning(
"System metrics instrumentor not available, runtime host metrics disabled."
)
LOGGER.info("OpenTelemetry providers configured")
return TelemetryProviders(
tracer_provider=tracer_provider, meter_provider=meter_provider
)
def instrument_fastapi(app: FastAPI) -> None:
FastAPIInstrumentor.instrument_app(app)
def instrument_sqlalchemy_engines(engines: dict[str, Any]) -> None:
for engine in engines.values():
SQLAlchemyInstrumentor().instrument(engine=engine)
def instrument_httpx_clients() -> None:
HTTPXClientInstrumentor().instrument()
def shutdown_otel(providers: TelemetryProviders) -> None:
HTTPXClientInstrumentor().uninstrument()
if SystemMetricsInstrumentor is not None:
SystemMetricsInstrumentor().uninstrument()
LoggingInstrumentor().uninstrument()
providers.meter_provider.shutdown()
providers.tracer_provider.shutdown()

View File

@@ -0,0 +1,231 @@
from __future__ import annotations
from dataclasses import dataclass
from functools import lru_cache
from time import time
from uuid import uuid4
import jwt
from fastapi import Depends, Header, HTTPException, status
from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer
from jwt import InvalidTokenError, PyJWKClient
from app.core.config import settings
BEARER_SCHEME = HTTPBearer(auto_error=False)
@dataclass
class FrontendPrincipal:
subject: str
scopes: list[str]
claims: dict
token: str
@dataclass
class InternalPrincipal:
subject: str
scopes: list[str]
claims: dict
token: str
class FrontendJWTVerifier:
@property
def jwks_url(self) -> str:
if not settings.frontend_jwt_jwks_url:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="FRONTEND_JWT_JWKS_URL is not configured.",
)
return settings.frontend_jwt_jwks_url
@lru_cache(maxsize=1)
def _jwks_client(self) -> PyJWKClient:
return PyJWKClient(self.jwks_url)
@staticmethod
def _extract_scopes(claims: dict) -> list[str]:
scope = claims.get("scope")
if isinstance(scope, str):
return [item for item in scope.split(" ") if item]
scp = claims.get("scp")
if isinstance(scp, list):
return [str(item) for item in scp]
return []
def verify(self, token: str) -> FrontendPrincipal:
if not settings.frontend_jwt_issuer_url:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="FRONTEND_JWT_ISSUER_URL is not configured.",
)
if not settings.frontend_jwt_audience:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="FRONTEND_JWT_AUDIENCE is not configured.",
)
try:
signing_key = self._jwks_client().get_signing_key_from_jwt(token).key
claims = jwt.decode(
token,
key=signing_key,
algorithms=[settings.frontend_jwt_algorithm],
audience=settings.frontend_jwt_audience,
issuer=settings.frontend_jwt_issuer_url,
leeway=settings.frontend_clock_skew_seconds,
)
except InvalidTokenError as exc:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid frontend access token.",
) from exc
subject = str(claims.get("sub") or "")
if not subject:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Frontend token missing subject.",
)
scopes = self._extract_scopes(claims)
required = settings.frontend_required_scopes_list
missing = [scope for scope in required if scope not in scopes]
if missing:
raise HTTPException(
status_code=status.HTTP_403_FORBIDDEN,
detail=f"Missing required scope(s): {', '.join(missing)}",
)
return FrontendPrincipal(
subject=subject, scopes=scopes, claims=claims, token=token
)
class InternalTokenManager:
token_type = "internal-service"
@staticmethod
def _assert_secret() -> str:
secret = settings.internal_service_shared_secret
if not secret or secret == "change-me":
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="INTERNAL_SERVICE_SHARED_SECRET must be configured.",
)
if len(secret.encode("utf-8")) < 32:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=(
"INTERNAL_SERVICE_SHARED_SECRET must be at least 32 bytes for "
"HS256 token signing."
),
)
return secret
def mint(
self,
*,
subject: str,
scopes: list[str],
source_service: str,
) -> str:
now = int(time())
payload = {
"sub": subject,
"scope": " ".join(scopes),
"iss": source_service,
"aud": settings.internal_service_token_audience,
"typ": self.token_type,
"iat": now,
"nbf": now,
"exp": now + settings.internal_service_token_ttl_seconds,
"jti": str(uuid4()),
}
return jwt.encode(payload, self._assert_secret(), algorithm="HS256")
def verify(self, token: str) -> InternalPrincipal:
try:
claims = jwt.decode(
token,
self._assert_secret(),
algorithms=["HS256"],
audience=settings.internal_service_token_audience,
options={
"require": ["sub", "iss", "aud", "exp", "iat", "nbf", "jti", "typ"]
},
leeway=settings.internal_token_clock_skew_seconds,
)
except InvalidTokenError as exc:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Invalid internal service token.",
) from exc
subject = str(claims.get("sub") or "")
if not subject:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Internal token missing subject.",
)
issuer = str(claims.get("iss") or "")
if issuer not in settings.internal_service_allowed_issuers_list:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Internal token issuer is not allowed.",
)
token_type = str(claims.get("typ") or "")
if token_type != self.token_type:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Internal token type is invalid.",
)
scope = claims.get("scope")
scopes = [item for item in str(scope).split(" ") if item] if scope else []
return InternalPrincipal(
subject=subject, scopes=scopes, claims=claims, token=token
)
@lru_cache(maxsize=1)
def get_frontend_verifier() -> FrontendJWTVerifier:
return FrontendJWTVerifier()
@lru_cache(maxsize=1)
def get_internal_token_manager() -> InternalTokenManager:
return InternalTokenManager()
def require_frontend_principal(
credentials: HTTPAuthorizationCredentials | None = Depends(BEARER_SCHEME),
) -> FrontendPrincipal:
if not settings.require_frontend_auth:
return FrontendPrincipal(subject="anonymous", scopes=[], claims={}, token="")
if credentials is None or credentials.scheme.lower() != "bearer":
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Missing bearer token.",
)
return get_frontend_verifier().verify(credentials.credentials)
def require_internal_principal(
internal_token: str | None = Header(default=None, alias="x-internal-service-token"),
) -> InternalPrincipal:
if not settings.internal_service_auth_enabled:
return InternalPrincipal(
subject="internal-unauth", scopes=[], claims={}, token=""
)
if not internal_token:
raise HTTPException(
status_code=status.HTTP_401_UNAUTHORIZED,
detail="Missing x-internal-service-token header.",
)
return get_internal_token_manager().verify(internal_token)