Add initial work from Codex
This commit is contained in:
1
backend/app/services/__init__.py
Normal file
1
backend/app/services/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Business logic services."""
|
||||
373
backend/app/services/analytics_service.py
Normal file
373
backend/app/services/analytics_service.py
Normal file
@@ -0,0 +1,373 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import date, timedelta
|
||||
from math import sqrt
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from opentelemetry import trace
|
||||
from sklearn.linear_model import LinearRegression
|
||||
|
||||
from app.core.config import settings
|
||||
from app.services.persistence_service import PersistenceService
|
||||
from app.services.warehouse_service import ReadOnlyWarehouseClient
|
||||
|
||||
|
||||
@dataclass
|
||||
class DashboardSnapshot:
|
||||
kpis: dict
|
||||
history: list[dict]
|
||||
forecasts: list[dict]
|
||||
rankings: list[dict]
|
||||
recommendations: list[dict]
|
||||
|
||||
|
||||
class AnalyticsService:
|
||||
def __init__(
|
||||
self,
|
||||
warehouse_client: ReadOnlyWarehouseClient,
|
||||
persistence_service: PersistenceService | None = None,
|
||||
) -> None:
|
||||
self.warehouse_client = warehouse_client
|
||||
self.persistence_service = persistence_service
|
||||
self.tracer = trace.get_tracer(__name__)
|
||||
|
||||
@staticmethod
|
||||
def _normalize_frame(df: pd.DataFrame, date_col: str = "sale_date") -> pd.DataFrame:
|
||||
normalized = df.copy()
|
||||
normalized[date_col] = pd.to_datetime(normalized[date_col], errors="coerce")
|
||||
for numeric in ("revenue", "cost", "quantity", "orders"):
|
||||
if numeric in normalized.columns:
|
||||
normalized[numeric] = pd.to_numeric(
|
||||
normalized[numeric], errors="coerce"
|
||||
).fillna(0.0)
|
||||
return normalized.dropna(subset=[date_col])
|
||||
|
||||
def load_sales_history(self, days_back: int | None = None) -> pd.DataFrame:
|
||||
with self.tracer.start_as_current_span("analytics.load_sales_history"):
|
||||
daily_sales = self._normalize_frame(
|
||||
self.warehouse_client.fetch_daily_sales()
|
||||
)
|
||||
days = days_back or settings.default_history_days
|
||||
min_date = pd.Timestamp(date.today() - timedelta(days=days))
|
||||
filtered = daily_sales[daily_sales["sale_date"] >= min_date]
|
||||
return (
|
||||
filtered.groupby("sale_date", as_index=False)[
|
||||
["revenue", "cost", "quantity", "orders"]
|
||||
]
|
||||
.sum()
|
||||
.sort_values("sale_date")
|
||||
)
|
||||
|
||||
def get_kpis(self) -> dict:
|
||||
with self.tracer.start_as_current_span("analytics.kpis"):
|
||||
sales = self.load_sales_history(days_back=180)
|
||||
if sales.empty:
|
||||
return {
|
||||
"total_revenue": 0.0,
|
||||
"gross_margin_pct": 0.0,
|
||||
"total_quantity": 0.0,
|
||||
"avg_order_value": 0.0,
|
||||
"records_in_window": 0,
|
||||
}
|
||||
|
||||
total_revenue = float(sales["revenue"].sum())
|
||||
total_cost = float(sales["cost"].sum())
|
||||
total_orders = max(float(sales["orders"].sum()), 1.0)
|
||||
margin_pct = (
|
||||
((total_revenue - total_cost) / total_revenue * 100)
|
||||
if total_revenue
|
||||
else 0.0
|
||||
)
|
||||
return {
|
||||
"total_revenue": round(total_revenue, 2),
|
||||
"gross_margin_pct": round(margin_pct, 2),
|
||||
"total_quantity": round(float(sales["quantity"].sum()), 2),
|
||||
"avg_order_value": round(total_revenue / total_orders, 2),
|
||||
"records_in_window": int(sales.shape[0]),
|
||||
}
|
||||
|
||||
def get_history_points(self, days_back: int | None = None) -> list[dict]:
|
||||
with self.tracer.start_as_current_span("analytics.history_points"):
|
||||
sales = self.load_sales_history(days_back=days_back)
|
||||
if sales.empty:
|
||||
return []
|
||||
return [
|
||||
{
|
||||
"date": pd.Timestamp(row["sale_date"]).date().isoformat(),
|
||||
"revenue": round(float(row["revenue"]), 2),
|
||||
"cost": round(float(row["cost"]), 2),
|
||||
"quantity": round(float(row["quantity"]), 2),
|
||||
}
|
||||
for _, row in sales.iterrows()
|
||||
]
|
||||
|
||||
def get_forecast(
|
||||
self,
|
||||
horizon_days: int | None = None,
|
||||
*,
|
||||
trigger_source: str = "api.forecasts",
|
||||
persist: bool = True,
|
||||
) -> list[dict]:
|
||||
with self.tracer.start_as_current_span("analytics.forecast"):
|
||||
horizon = horizon_days or settings.forecast_horizon_days
|
||||
sales = self.load_sales_history(days_back=720)
|
||||
if sales.empty:
|
||||
return []
|
||||
|
||||
series = (
|
||||
sales.set_index("sale_date")["revenue"]
|
||||
.sort_index()
|
||||
.resample("D")
|
||||
.sum()
|
||||
.fillna(0.0)
|
||||
)
|
||||
y = series.values
|
||||
x = np.arange(len(y), dtype=float).reshape(-1, 1)
|
||||
model = LinearRegression()
|
||||
model.fit(x, y)
|
||||
baseline = model.predict(x)
|
||||
residual = y - baseline
|
||||
sigma = float(np.std(residual)) if len(residual) > 1 else 0.0
|
||||
|
||||
weekday_baseline = series.groupby(series.index.weekday).mean()
|
||||
overall_mean = float(series.mean()) if len(series) else 0.0
|
||||
weekday_factor = (
|
||||
weekday_baseline / overall_mean
|
||||
if overall_mean > 0
|
||||
else pd.Series([1.0] * 7, index=range(7))
|
||||
)
|
||||
weekday_factor = weekday_factor.replace([np.inf, -np.inf], 1.0).fillna(1.0)
|
||||
|
||||
future_x = np.arange(len(y), len(y) + horizon, dtype=float).reshape(-1, 1)
|
||||
raw_forecast = model.predict(future_x)
|
||||
|
||||
predictions: list[dict] = []
|
||||
start_date = series.index.max().date()
|
||||
for idx, point in enumerate(raw_forecast, start=1):
|
||||
day = start_date + timedelta(days=idx)
|
||||
factor = (
|
||||
float(weekday_factor.loc[day.weekday()])
|
||||
if day.weekday() in weekday_factor.index
|
||||
else 1.0
|
||||
)
|
||||
yhat = max(float(point) * factor, 0.0)
|
||||
ci = 1.96 * sigma * sqrt(1 + idx / max(len(y), 1))
|
||||
predictions.append(
|
||||
{
|
||||
"date": day.isoformat(),
|
||||
"predicted_revenue": round(yhat, 2),
|
||||
"lower_bound": round(max(yhat - ci, 0.0), 2),
|
||||
"upper_bound": round(yhat + ci, 2),
|
||||
}
|
||||
)
|
||||
|
||||
if persist and self.persistence_service is not None:
|
||||
span_context = trace.get_current_span().get_span_context()
|
||||
trace_id = (
|
||||
f"{span_context.trace_id:032x}" if span_context.is_valid else None
|
||||
)
|
||||
span_id = (
|
||||
f"{span_context.span_id:016x}" if span_context.is_valid else None
|
||||
)
|
||||
self.persistence_service.record_forecast_run(
|
||||
horizon_days=horizon,
|
||||
payload=predictions,
|
||||
trigger_source=trigger_source,
|
||||
trace_id=trace_id,
|
||||
span_id=span_id,
|
||||
)
|
||||
|
||||
return predictions
|
||||
|
||||
def get_rankings(
|
||||
self,
|
||||
top_n: int | None = None,
|
||||
*,
|
||||
trigger_source: str = "api.rankings",
|
||||
persist: bool = True,
|
||||
) -> list[dict]:
|
||||
with self.tracer.start_as_current_span("analytics.rankings"):
|
||||
n = top_n or settings.ranking_default_top_n
|
||||
products = self.warehouse_client.fetch_product_performance().copy()
|
||||
if products.empty:
|
||||
return []
|
||||
|
||||
products["revenue"] = pd.to_numeric(
|
||||
products["revenue"], errors="coerce"
|
||||
).fillna(0.0)
|
||||
products["cost"] = pd.to_numeric(products["cost"], errors="coerce").fillna(
|
||||
0.0
|
||||
)
|
||||
products["quantity"] = pd.to_numeric(
|
||||
products["quantity"], errors="coerce"
|
||||
).fillna(0.0)
|
||||
products["orders"] = pd.to_numeric(
|
||||
products["orders"], errors="coerce"
|
||||
).fillna(0.0)
|
||||
|
||||
grouped = (
|
||||
products.groupby(
|
||||
["product_id", "product_name", "category_name"], as_index=False
|
||||
)[["revenue", "cost", "quantity", "orders"]]
|
||||
.sum()
|
||||
.sort_values("revenue", ascending=False)
|
||||
)
|
||||
|
||||
grouped["margin_pct"] = np.where(
|
||||
grouped["revenue"] > 0,
|
||||
((grouped["revenue"] - grouped["cost"]) / grouped["revenue"]) * 100,
|
||||
0.0,
|
||||
)
|
||||
|
||||
revenue_norm = grouped["revenue"] / max(
|
||||
float(grouped["revenue"].max()), 1.0
|
||||
)
|
||||
margin_norm = (grouped["margin_pct"] + 100) / 200
|
||||
velocity_norm = grouped["quantity"] / max(
|
||||
float(grouped["quantity"].max()), 1.0
|
||||
)
|
||||
grouped["score"] = (
|
||||
(0.55 * revenue_norm)
|
||||
+ (0.30 * margin_norm.clip(0, 1))
|
||||
+ (0.15 * velocity_norm)
|
||||
)
|
||||
ranked = (
|
||||
grouped.sort_values("score", ascending=False)
|
||||
.head(n)
|
||||
.reset_index(drop=True)
|
||||
)
|
||||
|
||||
result = [
|
||||
{
|
||||
"rank": int(idx + 1),
|
||||
"product_id": str(row["product_id"]),
|
||||
"product_name": str(row["product_name"]),
|
||||
"category": str(row["category_name"]),
|
||||
"revenue": round(float(row["revenue"]), 2),
|
||||
"margin_pct": round(float(row["margin_pct"]), 2),
|
||||
"score": round(float(row["score"]) * 100, 2),
|
||||
}
|
||||
for idx, row in ranked.iterrows()
|
||||
]
|
||||
|
||||
if persist and self.persistence_service is not None:
|
||||
span_context = trace.get_current_span().get_span_context()
|
||||
trace_id = (
|
||||
f"{span_context.trace_id:032x}" if span_context.is_valid else None
|
||||
)
|
||||
span_id = (
|
||||
f"{span_context.span_id:016x}" if span_context.is_valid else None
|
||||
)
|
||||
self.persistence_service.record_ranking_run(
|
||||
top_n=n,
|
||||
payload=result,
|
||||
trigger_source=trigger_source,
|
||||
trace_id=trace_id,
|
||||
span_id=span_id,
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def get_recommendations(
|
||||
self,
|
||||
rankings: list[dict] | None = None,
|
||||
*,
|
||||
trigger_source: str = "api.recommendations",
|
||||
persist: bool = True,
|
||||
) -> list[dict]:
|
||||
with self.tracer.start_as_current_span("analytics.recommendations"):
|
||||
ranking_rows = (
|
||||
rankings
|
||||
if rankings is not None
|
||||
else self.get_rankings(
|
||||
top_n=20, trigger_source=trigger_source, persist=persist
|
||||
)
|
||||
)
|
||||
customers = self.warehouse_client.fetch_customer_performance().copy()
|
||||
if customers.empty:
|
||||
customers = pd.DataFrame(columns=["customer_name", "revenue", "orders"])
|
||||
|
||||
recommendations: list[dict] = []
|
||||
|
||||
if ranking_rows:
|
||||
champion = ranking_rows[0]
|
||||
recommendations.append(
|
||||
{
|
||||
"title": "Double down on champion SKU",
|
||||
"priority": "high",
|
||||
"summary": (
|
||||
f"Promote '{champion['product_name']}' with score {champion['score']:.2f} "
|
||||
f"and margin {champion['margin_pct']:.2f}%."
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
low_margin = next(
|
||||
(row for row in ranking_rows if row["margin_pct"] < 10), None
|
||||
)
|
||||
if low_margin:
|
||||
recommendations.append(
|
||||
{
|
||||
"title": "Review pricing for low-margin bestseller",
|
||||
"priority": "medium",
|
||||
"summary": (
|
||||
f"'{low_margin['product_name']}' has strong rank but only "
|
||||
f"{low_margin['margin_pct']:.2f}% margin."
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
if not customers.empty:
|
||||
customers["revenue"] = pd.to_numeric(
|
||||
customers["revenue"], errors="coerce"
|
||||
).fillna(0.0)
|
||||
customers["orders"] = pd.to_numeric(
|
||||
customers["orders"], errors="coerce"
|
||||
).fillna(0.0)
|
||||
customer = customers.sort_values("revenue", ascending=False).iloc[0]
|
||||
recommendations.append(
|
||||
{
|
||||
"title": "Protect top customer relationship",
|
||||
"priority": "high",
|
||||
"summary": (
|
||||
f"Prioritize retention for '{customer['customer_name']}' with "
|
||||
f"{float(customer['orders']):.0f} orders and {float(customer['revenue']):.2f} revenue."
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
result = recommendations[:5]
|
||||
if persist and self.persistence_service is not None:
|
||||
span_context = trace.get_current_span().get_span_context()
|
||||
trace_id = (
|
||||
f"{span_context.trace_id:032x}" if span_context.is_valid else None
|
||||
)
|
||||
span_id = (
|
||||
f"{span_context.span_id:016x}" if span_context.is_valid else None
|
||||
)
|
||||
self.persistence_service.record_recommendation_run(
|
||||
payload=result,
|
||||
trigger_source=trigger_source,
|
||||
trace_id=trace_id,
|
||||
span_id=span_id,
|
||||
)
|
||||
return result
|
||||
|
||||
def get_dashboard(self) -> DashboardSnapshot:
|
||||
with self.tracer.start_as_current_span("analytics.dashboard"):
|
||||
rankings = self.get_rankings(trigger_source="api.dashboard", persist=True)
|
||||
return DashboardSnapshot(
|
||||
kpis=self.get_kpis(),
|
||||
history=self.get_history_points(),
|
||||
forecasts=self.get_forecast(
|
||||
trigger_source="api.dashboard", persist=True
|
||||
),
|
||||
rankings=rankings,
|
||||
recommendations=self.get_recommendations(
|
||||
rankings=rankings,
|
||||
trigger_source="api.dashboard",
|
||||
persist=True,
|
||||
),
|
||||
)
|
||||
281
backend/app/services/persistence_service.py
Normal file
281
backend/app/services/persistence_service.py
Normal file
@@ -0,0 +1,281 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from time import perf_counter
|
||||
|
||||
from opentelemetry import metrics, trace
|
||||
from sqlalchemy import desc, select
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
from sqlalchemy.orm import Session, sessionmaker
|
||||
|
||||
from app.db.postgres_models import AuditLog, ForecastRun, RankingRun, RecommendationRun
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class PersistenceService:
|
||||
def __init__(self, session_factory: sessionmaker[Session]) -> None:
|
||||
self.session_factory = session_factory
|
||||
self.tracer = trace.get_tracer(__name__)
|
||||
self.meter = metrics.get_meter(__name__)
|
||||
self.write_counter = self.meter.create_counter(
|
||||
name="postgres_persist_writes_total",
|
||||
description="Total writes to app persistence PostgreSQL",
|
||||
)
|
||||
self.write_latency = self.meter.create_histogram(
|
||||
name="postgres_persist_write_latency_ms",
|
||||
unit="ms",
|
||||
description="Latency of app persistence write operations",
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _to_audit_dict(row: AuditLog) -> dict:
|
||||
return {
|
||||
"id": row.id,
|
||||
"created_at": row.created_at.isoformat(),
|
||||
"method": row.method,
|
||||
"path": row.path,
|
||||
"query_string": row.query_string,
|
||||
"status_code": row.status_code,
|
||||
"duration_ms": row.duration_ms,
|
||||
"trace_id": row.trace_id,
|
||||
"span_id": row.span_id,
|
||||
"client_ip": row.client_ip,
|
||||
"user_agent": row.user_agent,
|
||||
"details": row.details,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _to_forecast_dict(row: ForecastRun) -> dict:
|
||||
return {
|
||||
"id": row.id,
|
||||
"created_at": row.created_at.isoformat(),
|
||||
"horizon_days": row.horizon_days,
|
||||
"point_count": row.point_count,
|
||||
"trigger_source": row.trigger_source,
|
||||
"trace_id": row.trace_id,
|
||||
"span_id": row.span_id,
|
||||
"payload": row.payload,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _to_ranking_dict(row: RankingRun) -> dict:
|
||||
return {
|
||||
"id": row.id,
|
||||
"created_at": row.created_at.isoformat(),
|
||||
"top_n": row.top_n,
|
||||
"item_count": row.item_count,
|
||||
"trigger_source": row.trigger_source,
|
||||
"trace_id": row.trace_id,
|
||||
"span_id": row.span_id,
|
||||
"payload": row.payload,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _to_recommendation_dict(row: RecommendationRun) -> dict:
|
||||
return {
|
||||
"id": row.id,
|
||||
"created_at": row.created_at.isoformat(),
|
||||
"item_count": row.item_count,
|
||||
"trigger_source": row.trigger_source,
|
||||
"trace_id": row.trace_id,
|
||||
"span_id": row.span_id,
|
||||
"payload": row.payload,
|
||||
}
|
||||
|
||||
def record_audit_log(
|
||||
self,
|
||||
*,
|
||||
method: str,
|
||||
path: str,
|
||||
query_string: str,
|
||||
status_code: int,
|
||||
duration_ms: float,
|
||||
trace_id: str | None,
|
||||
span_id: str | None,
|
||||
client_ip: str | None,
|
||||
user_agent: str | None,
|
||||
details: dict | None = None,
|
||||
) -> None:
|
||||
started = perf_counter()
|
||||
with self.tracer.start_as_current_span("persist.audit_log"):
|
||||
try:
|
||||
with self.session_factory() as session:
|
||||
session.add(
|
||||
AuditLog(
|
||||
method=method,
|
||||
path=path,
|
||||
query_string=query_string[:1000],
|
||||
status_code=status_code,
|
||||
duration_ms=duration_ms,
|
||||
trace_id=trace_id,
|
||||
span_id=span_id,
|
||||
client_ip=client_ip,
|
||||
user_agent=user_agent,
|
||||
details=details or {},
|
||||
)
|
||||
)
|
||||
session.commit()
|
||||
self.write_counter.add(
|
||||
1, attributes={"entity": "audit", "status": "ok"}
|
||||
)
|
||||
except SQLAlchemyError as exc:
|
||||
LOGGER.exception("Failed to persist audit log: %s", exc)
|
||||
self.write_counter.add(
|
||||
1, attributes={"entity": "audit", "status": "error"}
|
||||
)
|
||||
finally:
|
||||
self.write_latency.record(
|
||||
(perf_counter() - started) * 1000,
|
||||
attributes={"entity": "audit"},
|
||||
)
|
||||
|
||||
def record_forecast_run(
|
||||
self,
|
||||
*,
|
||||
horizon_days: int,
|
||||
payload: list[dict],
|
||||
trigger_source: str,
|
||||
trace_id: str | None,
|
||||
span_id: str | None,
|
||||
) -> None:
|
||||
started = perf_counter()
|
||||
with self.tracer.start_as_current_span("persist.forecast_run"):
|
||||
try:
|
||||
with self.session_factory() as session:
|
||||
session.add(
|
||||
ForecastRun(
|
||||
horizon_days=horizon_days,
|
||||
point_count=len(payload),
|
||||
trigger_source=trigger_source,
|
||||
trace_id=trace_id,
|
||||
span_id=span_id,
|
||||
payload=payload,
|
||||
)
|
||||
)
|
||||
session.commit()
|
||||
self.write_counter.add(
|
||||
1, attributes={"entity": "forecast", "status": "ok"}
|
||||
)
|
||||
except SQLAlchemyError as exc:
|
||||
LOGGER.exception("Failed to persist forecast run: %s", exc)
|
||||
self.write_counter.add(
|
||||
1, attributes={"entity": "forecast", "status": "error"}
|
||||
)
|
||||
finally:
|
||||
self.write_latency.record(
|
||||
(perf_counter() - started) * 1000,
|
||||
attributes={"entity": "forecast"},
|
||||
)
|
||||
|
||||
def record_ranking_run(
|
||||
self,
|
||||
*,
|
||||
top_n: int,
|
||||
payload: list[dict],
|
||||
trigger_source: str,
|
||||
trace_id: str | None,
|
||||
span_id: str | None,
|
||||
) -> None:
|
||||
started = perf_counter()
|
||||
with self.tracer.start_as_current_span("persist.ranking_run"):
|
||||
try:
|
||||
with self.session_factory() as session:
|
||||
session.add(
|
||||
RankingRun(
|
||||
top_n=top_n,
|
||||
item_count=len(payload),
|
||||
trigger_source=trigger_source,
|
||||
trace_id=trace_id,
|
||||
span_id=span_id,
|
||||
payload=payload,
|
||||
)
|
||||
)
|
||||
session.commit()
|
||||
self.write_counter.add(
|
||||
1, attributes={"entity": "ranking", "status": "ok"}
|
||||
)
|
||||
except SQLAlchemyError as exc:
|
||||
LOGGER.exception("Failed to persist ranking run: %s", exc)
|
||||
self.write_counter.add(
|
||||
1, attributes={"entity": "ranking", "status": "error"}
|
||||
)
|
||||
finally:
|
||||
self.write_latency.record(
|
||||
(perf_counter() - started) * 1000,
|
||||
attributes={"entity": "ranking"},
|
||||
)
|
||||
|
||||
def record_recommendation_run(
|
||||
self,
|
||||
*,
|
||||
payload: list[dict],
|
||||
trigger_source: str,
|
||||
trace_id: str | None,
|
||||
span_id: str | None,
|
||||
) -> None:
|
||||
started = perf_counter()
|
||||
with self.tracer.start_as_current_span("persist.recommendation_run"):
|
||||
try:
|
||||
with self.session_factory() as session:
|
||||
session.add(
|
||||
RecommendationRun(
|
||||
item_count=len(payload),
|
||||
trigger_source=trigger_source,
|
||||
trace_id=trace_id,
|
||||
span_id=span_id,
|
||||
payload=payload,
|
||||
)
|
||||
)
|
||||
session.commit()
|
||||
self.write_counter.add(
|
||||
1, attributes={"entity": "recommendation", "status": "ok"}
|
||||
)
|
||||
except SQLAlchemyError as exc:
|
||||
LOGGER.exception("Failed to persist recommendation run: %s", exc)
|
||||
self.write_counter.add(
|
||||
1, attributes={"entity": "recommendation", "status": "error"}
|
||||
)
|
||||
finally:
|
||||
self.write_latency.record(
|
||||
(perf_counter() - started) * 1000,
|
||||
attributes={"entity": "recommendation"},
|
||||
)
|
||||
|
||||
def list_audit_logs(self, limit: int) -> list[dict]:
|
||||
with self.tracer.start_as_current_span("persist.list_audit_logs"):
|
||||
with self.session_factory() as session:
|
||||
rows = session.execute(
|
||||
select(AuditLog).order_by(desc(AuditLog.created_at)).limit(limit)
|
||||
).scalars()
|
||||
return [self._to_audit_dict(row) for row in rows]
|
||||
|
||||
def list_forecast_runs(self, limit: int) -> list[dict]:
|
||||
with self.tracer.start_as_current_span("persist.list_forecast_runs"):
|
||||
with self.session_factory() as session:
|
||||
rows = session.execute(
|
||||
select(ForecastRun)
|
||||
.order_by(desc(ForecastRun.created_at))
|
||||
.limit(limit)
|
||||
).scalars()
|
||||
return [self._to_forecast_dict(row) for row in rows]
|
||||
|
||||
def list_ranking_runs(self, limit: int) -> list[dict]:
|
||||
with self.tracer.start_as_current_span("persist.list_ranking_runs"):
|
||||
with self.session_factory() as session:
|
||||
rows = session.execute(
|
||||
select(RankingRun)
|
||||
.order_by(desc(RankingRun.created_at))
|
||||
.limit(limit)
|
||||
).scalars()
|
||||
return [self._to_ranking_dict(row) for row in rows]
|
||||
|
||||
def list_recommendation_runs(self, limit: int) -> list[dict]:
|
||||
with self.tracer.start_as_current_span("persist.list_recommendation_runs"):
|
||||
with self.session_factory() as session:
|
||||
rows = session.execute(
|
||||
select(RecommendationRun)
|
||||
.order_by(desc(RecommendationRun.created_at))
|
||||
.limit(limit)
|
||||
).scalars()
|
||||
return [self._to_recommendation_dict(row) for row in rows]
|
||||
101
backend/app/services/warehouse_service.py
Normal file
101
backend/app/services/warehouse_service.py
Normal file
@@ -0,0 +1,101 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import logging
|
||||
from collections.abc import Sequence
|
||||
from time import perf_counter
|
||||
|
||||
import pandas as pd
|
||||
from opentelemetry import metrics, trace
|
||||
from sqlalchemy import text
|
||||
from sqlalchemy.engine import Engine
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
|
||||
from app.db import queries
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ReadOnlyWarehouseClient:
|
||||
def __init__(self, engines: dict[str, Engine]) -> None:
|
||||
self.engines = engines
|
||||
self.tracer = trace.get_tracer(__name__)
|
||||
self.meter = metrics.get_meter(__name__)
|
||||
self.query_counter = self.meter.create_counter(
|
||||
name="warehouse_queries_total",
|
||||
description="Total warehouse query executions",
|
||||
)
|
||||
self.query_latency = self.meter.create_histogram(
|
||||
name="warehouse_query_latency_ms",
|
||||
unit="ms",
|
||||
description="Warehouse query latency",
|
||||
)
|
||||
|
||||
def _validate_read_only_query(self, sql: str) -> None:
|
||||
normalized = sql.strip().lower()
|
||||
if not (normalized.startswith("select") or normalized.startswith("with")):
|
||||
raise ValueError("Only read-only SELECT/CTE SQL statements are allowed.")
|
||||
|
||||
def _run_query_list(
|
||||
self, source: str, sql_candidates: Sequence[str]
|
||||
) -> pd.DataFrame:
|
||||
engine = self.engines[source]
|
||||
last_error: Exception | None = None
|
||||
|
||||
for candidate in sql_candidates:
|
||||
self._validate_read_only_query(candidate)
|
||||
query_hash = hashlib.sha256(candidate.encode("utf-8")).hexdigest()[:12]
|
||||
with self.tracer.start_as_current_span("warehouse.query") as span:
|
||||
span.set_attribute("db.system", "mssql")
|
||||
span.set_attribute("db.source", source)
|
||||
span.set_attribute("db.query.hash", query_hash)
|
||||
started = perf_counter()
|
||||
try:
|
||||
with engine.connect() as conn:
|
||||
with self.tracer.start_as_current_span(
|
||||
"warehouse.query.execute"
|
||||
):
|
||||
df = pd.read_sql_query(sql=text(candidate), con=conn)
|
||||
elapsed_ms = (perf_counter() - started) * 1000
|
||||
self.query_latency.record(elapsed_ms, attributes={"source": source})
|
||||
self.query_counter.add(
|
||||
1, attributes={"source": source, "status": "ok"}
|
||||
)
|
||||
return df
|
||||
except SQLAlchemyError as exc:
|
||||
last_error = exc
|
||||
elapsed_ms = (perf_counter() - started) * 1000
|
||||
self.query_latency.record(elapsed_ms, attributes={"source": source})
|
||||
self.query_counter.add(
|
||||
1, attributes={"source": source, "status": "error"}
|
||||
)
|
||||
LOGGER.warning(
|
||||
"Query failed for %s with hash %s: %s", source, query_hash, exc
|
||||
)
|
||||
|
||||
if last_error is not None:
|
||||
raise RuntimeError(
|
||||
f"All query candidates failed for source '{source}'."
|
||||
) from last_error
|
||||
return pd.DataFrame()
|
||||
|
||||
def fetch_daily_sales(self) -> pd.DataFrame:
|
||||
aw = self._run_query_list("aw", queries.AW_DAILY_SALES_QUERIES)
|
||||
aw["source"] = "AdventureWorks2022DWH"
|
||||
wwi = self._run_query_list("wwi", queries.WWI_DAILY_SALES_QUERIES)
|
||||
wwi["source"] = "WorldWideImporters"
|
||||
return pd.concat([aw, wwi], ignore_index=True)
|
||||
|
||||
def fetch_product_performance(self) -> pd.DataFrame:
|
||||
aw = self._run_query_list("aw", queries.AW_PRODUCT_PERFORMANCE_QUERIES)
|
||||
aw["source"] = "AdventureWorks2022DWH"
|
||||
wwi = self._run_query_list("wwi", queries.WWI_PRODUCT_PERFORMANCE_QUERIES)
|
||||
wwi["source"] = "WorldWideImporters"
|
||||
return pd.concat([aw, wwi], ignore_index=True)
|
||||
|
||||
def fetch_customer_performance(self) -> pd.DataFrame:
|
||||
aw = self._run_query_list("aw", queries.AW_CUSTOMER_QUERIES)
|
||||
aw["source"] = "AdventureWorks2022DWH"
|
||||
wwi = self._run_query_list("wwi", queries.WWI_CUSTOMER_QUERIES)
|
||||
wwi["source"] = "WorldWideImporters"
|
||||
return pd.concat([aw, wwi], ignore_index=True)
|
||||
Reference in New Issue
Block a user