Add initial work from Codex
This commit is contained in:
373
backend/app/services/analytics_service.py
Normal file
373
backend/app/services/analytics_service.py
Normal file
@@ -0,0 +1,373 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import date, timedelta
|
||||
from math import sqrt
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from opentelemetry import trace
|
||||
from sklearn.linear_model import LinearRegression
|
||||
|
||||
from app.core.config import settings
|
||||
from app.services.persistence_service import PersistenceService
|
||||
from app.services.warehouse_service import ReadOnlyWarehouseClient
|
||||
|
||||
|
||||
@dataclass
|
||||
class DashboardSnapshot:
|
||||
kpis: dict
|
||||
history: list[dict]
|
||||
forecasts: list[dict]
|
||||
rankings: list[dict]
|
||||
recommendations: list[dict]
|
||||
|
||||
|
||||
class AnalyticsService:
|
||||
def __init__(
|
||||
self,
|
||||
warehouse_client: ReadOnlyWarehouseClient,
|
||||
persistence_service: PersistenceService | None = None,
|
||||
) -> None:
|
||||
self.warehouse_client = warehouse_client
|
||||
self.persistence_service = persistence_service
|
||||
self.tracer = trace.get_tracer(__name__)
|
||||
|
||||
@staticmethod
|
||||
def _normalize_frame(df: pd.DataFrame, date_col: str = "sale_date") -> pd.DataFrame:
|
||||
normalized = df.copy()
|
||||
normalized[date_col] = pd.to_datetime(normalized[date_col], errors="coerce")
|
||||
for numeric in ("revenue", "cost", "quantity", "orders"):
|
||||
if numeric in normalized.columns:
|
||||
normalized[numeric] = pd.to_numeric(
|
||||
normalized[numeric], errors="coerce"
|
||||
).fillna(0.0)
|
||||
return normalized.dropna(subset=[date_col])
|
||||
|
||||
def load_sales_history(self, days_back: int | None = None) -> pd.DataFrame:
|
||||
with self.tracer.start_as_current_span("analytics.load_sales_history"):
|
||||
daily_sales = self._normalize_frame(
|
||||
self.warehouse_client.fetch_daily_sales()
|
||||
)
|
||||
days = days_back or settings.default_history_days
|
||||
min_date = pd.Timestamp(date.today() - timedelta(days=days))
|
||||
filtered = daily_sales[daily_sales["sale_date"] >= min_date]
|
||||
return (
|
||||
filtered.groupby("sale_date", as_index=False)[
|
||||
["revenue", "cost", "quantity", "orders"]
|
||||
]
|
||||
.sum()
|
||||
.sort_values("sale_date")
|
||||
)
|
||||
|
||||
def get_kpis(self) -> dict:
|
||||
with self.tracer.start_as_current_span("analytics.kpis"):
|
||||
sales = self.load_sales_history(days_back=180)
|
||||
if sales.empty:
|
||||
return {
|
||||
"total_revenue": 0.0,
|
||||
"gross_margin_pct": 0.0,
|
||||
"total_quantity": 0.0,
|
||||
"avg_order_value": 0.0,
|
||||
"records_in_window": 0,
|
||||
}
|
||||
|
||||
total_revenue = float(sales["revenue"].sum())
|
||||
total_cost = float(sales["cost"].sum())
|
||||
total_orders = max(float(sales["orders"].sum()), 1.0)
|
||||
margin_pct = (
|
||||
((total_revenue - total_cost) / total_revenue * 100)
|
||||
if total_revenue
|
||||
else 0.0
|
||||
)
|
||||
return {
|
||||
"total_revenue": round(total_revenue, 2),
|
||||
"gross_margin_pct": round(margin_pct, 2),
|
||||
"total_quantity": round(float(sales["quantity"].sum()), 2),
|
||||
"avg_order_value": round(total_revenue / total_orders, 2),
|
||||
"records_in_window": int(sales.shape[0]),
|
||||
}
|
||||
|
||||
def get_history_points(self, days_back: int | None = None) -> list[dict]:
|
||||
with self.tracer.start_as_current_span("analytics.history_points"):
|
||||
sales = self.load_sales_history(days_back=days_back)
|
||||
if sales.empty:
|
||||
return []
|
||||
return [
|
||||
{
|
||||
"date": pd.Timestamp(row["sale_date"]).date().isoformat(),
|
||||
"revenue": round(float(row["revenue"]), 2),
|
||||
"cost": round(float(row["cost"]), 2),
|
||||
"quantity": round(float(row["quantity"]), 2),
|
||||
}
|
||||
for _, row in sales.iterrows()
|
||||
]
|
||||
|
||||
def get_forecast(
|
||||
self,
|
||||
horizon_days: int | None = None,
|
||||
*,
|
||||
trigger_source: str = "api.forecasts",
|
||||
persist: bool = True,
|
||||
) -> list[dict]:
|
||||
with self.tracer.start_as_current_span("analytics.forecast"):
|
||||
horizon = horizon_days or settings.forecast_horizon_days
|
||||
sales = self.load_sales_history(days_back=720)
|
||||
if sales.empty:
|
||||
return []
|
||||
|
||||
series = (
|
||||
sales.set_index("sale_date")["revenue"]
|
||||
.sort_index()
|
||||
.resample("D")
|
||||
.sum()
|
||||
.fillna(0.0)
|
||||
)
|
||||
y = series.values
|
||||
x = np.arange(len(y), dtype=float).reshape(-1, 1)
|
||||
model = LinearRegression()
|
||||
model.fit(x, y)
|
||||
baseline = model.predict(x)
|
||||
residual = y - baseline
|
||||
sigma = float(np.std(residual)) if len(residual) > 1 else 0.0
|
||||
|
||||
weekday_baseline = series.groupby(series.index.weekday).mean()
|
||||
overall_mean = float(series.mean()) if len(series) else 0.0
|
||||
weekday_factor = (
|
||||
weekday_baseline / overall_mean
|
||||
if overall_mean > 0
|
||||
else pd.Series([1.0] * 7, index=range(7))
|
||||
)
|
||||
weekday_factor = weekday_factor.replace([np.inf, -np.inf], 1.0).fillna(1.0)
|
||||
|
||||
future_x = np.arange(len(y), len(y) + horizon, dtype=float).reshape(-1, 1)
|
||||
raw_forecast = model.predict(future_x)
|
||||
|
||||
predictions: list[dict] = []
|
||||
start_date = series.index.max().date()
|
||||
for idx, point in enumerate(raw_forecast, start=1):
|
||||
day = start_date + timedelta(days=idx)
|
||||
factor = (
|
||||
float(weekday_factor.loc[day.weekday()])
|
||||
if day.weekday() in weekday_factor.index
|
||||
else 1.0
|
||||
)
|
||||
yhat = max(float(point) * factor, 0.0)
|
||||
ci = 1.96 * sigma * sqrt(1 + idx / max(len(y), 1))
|
||||
predictions.append(
|
||||
{
|
||||
"date": day.isoformat(),
|
||||
"predicted_revenue": round(yhat, 2),
|
||||
"lower_bound": round(max(yhat - ci, 0.0), 2),
|
||||
"upper_bound": round(yhat + ci, 2),
|
||||
}
|
||||
)
|
||||
|
||||
if persist and self.persistence_service is not None:
|
||||
span_context = trace.get_current_span().get_span_context()
|
||||
trace_id = (
|
||||
f"{span_context.trace_id:032x}" if span_context.is_valid else None
|
||||
)
|
||||
span_id = (
|
||||
f"{span_context.span_id:016x}" if span_context.is_valid else None
|
||||
)
|
||||
self.persistence_service.record_forecast_run(
|
||||
horizon_days=horizon,
|
||||
payload=predictions,
|
||||
trigger_source=trigger_source,
|
||||
trace_id=trace_id,
|
||||
span_id=span_id,
|
||||
)
|
||||
|
||||
return predictions
|
||||
|
||||
def get_rankings(
|
||||
self,
|
||||
top_n: int | None = None,
|
||||
*,
|
||||
trigger_source: str = "api.rankings",
|
||||
persist: bool = True,
|
||||
) -> list[dict]:
|
||||
with self.tracer.start_as_current_span("analytics.rankings"):
|
||||
n = top_n or settings.ranking_default_top_n
|
||||
products = self.warehouse_client.fetch_product_performance().copy()
|
||||
if products.empty:
|
||||
return []
|
||||
|
||||
products["revenue"] = pd.to_numeric(
|
||||
products["revenue"], errors="coerce"
|
||||
).fillna(0.0)
|
||||
products["cost"] = pd.to_numeric(products["cost"], errors="coerce").fillna(
|
||||
0.0
|
||||
)
|
||||
products["quantity"] = pd.to_numeric(
|
||||
products["quantity"], errors="coerce"
|
||||
).fillna(0.0)
|
||||
products["orders"] = pd.to_numeric(
|
||||
products["orders"], errors="coerce"
|
||||
).fillna(0.0)
|
||||
|
||||
grouped = (
|
||||
products.groupby(
|
||||
["product_id", "product_name", "category_name"], as_index=False
|
||||
)[["revenue", "cost", "quantity", "orders"]]
|
||||
.sum()
|
||||
.sort_values("revenue", ascending=False)
|
||||
)
|
||||
|
||||
grouped["margin_pct"] = np.where(
|
||||
grouped["revenue"] > 0,
|
||||
((grouped["revenue"] - grouped["cost"]) / grouped["revenue"]) * 100,
|
||||
0.0,
|
||||
)
|
||||
|
||||
revenue_norm = grouped["revenue"] / max(
|
||||
float(grouped["revenue"].max()), 1.0
|
||||
)
|
||||
margin_norm = (grouped["margin_pct"] + 100) / 200
|
||||
velocity_norm = grouped["quantity"] / max(
|
||||
float(grouped["quantity"].max()), 1.0
|
||||
)
|
||||
grouped["score"] = (
|
||||
(0.55 * revenue_norm)
|
||||
+ (0.30 * margin_norm.clip(0, 1))
|
||||
+ (0.15 * velocity_norm)
|
||||
)
|
||||
ranked = (
|
||||
grouped.sort_values("score", ascending=False)
|
||||
.head(n)
|
||||
.reset_index(drop=True)
|
||||
)
|
||||
|
||||
result = [
|
||||
{
|
||||
"rank": int(idx + 1),
|
||||
"product_id": str(row["product_id"]),
|
||||
"product_name": str(row["product_name"]),
|
||||
"category": str(row["category_name"]),
|
||||
"revenue": round(float(row["revenue"]), 2),
|
||||
"margin_pct": round(float(row["margin_pct"]), 2),
|
||||
"score": round(float(row["score"]) * 100, 2),
|
||||
}
|
||||
for idx, row in ranked.iterrows()
|
||||
]
|
||||
|
||||
if persist and self.persistence_service is not None:
|
||||
span_context = trace.get_current_span().get_span_context()
|
||||
trace_id = (
|
||||
f"{span_context.trace_id:032x}" if span_context.is_valid else None
|
||||
)
|
||||
span_id = (
|
||||
f"{span_context.span_id:016x}" if span_context.is_valid else None
|
||||
)
|
||||
self.persistence_service.record_ranking_run(
|
||||
top_n=n,
|
||||
payload=result,
|
||||
trigger_source=trigger_source,
|
||||
trace_id=trace_id,
|
||||
span_id=span_id,
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
def get_recommendations(
|
||||
self,
|
||||
rankings: list[dict] | None = None,
|
||||
*,
|
||||
trigger_source: str = "api.recommendations",
|
||||
persist: bool = True,
|
||||
) -> list[dict]:
|
||||
with self.tracer.start_as_current_span("analytics.recommendations"):
|
||||
ranking_rows = (
|
||||
rankings
|
||||
if rankings is not None
|
||||
else self.get_rankings(
|
||||
top_n=20, trigger_source=trigger_source, persist=persist
|
||||
)
|
||||
)
|
||||
customers = self.warehouse_client.fetch_customer_performance().copy()
|
||||
if customers.empty:
|
||||
customers = pd.DataFrame(columns=["customer_name", "revenue", "orders"])
|
||||
|
||||
recommendations: list[dict] = []
|
||||
|
||||
if ranking_rows:
|
||||
champion = ranking_rows[0]
|
||||
recommendations.append(
|
||||
{
|
||||
"title": "Double down on champion SKU",
|
||||
"priority": "high",
|
||||
"summary": (
|
||||
f"Promote '{champion['product_name']}' with score {champion['score']:.2f} "
|
||||
f"and margin {champion['margin_pct']:.2f}%."
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
low_margin = next(
|
||||
(row for row in ranking_rows if row["margin_pct"] < 10), None
|
||||
)
|
||||
if low_margin:
|
||||
recommendations.append(
|
||||
{
|
||||
"title": "Review pricing for low-margin bestseller",
|
||||
"priority": "medium",
|
||||
"summary": (
|
||||
f"'{low_margin['product_name']}' has strong rank but only "
|
||||
f"{low_margin['margin_pct']:.2f}% margin."
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
if not customers.empty:
|
||||
customers["revenue"] = pd.to_numeric(
|
||||
customers["revenue"], errors="coerce"
|
||||
).fillna(0.0)
|
||||
customers["orders"] = pd.to_numeric(
|
||||
customers["orders"], errors="coerce"
|
||||
).fillna(0.0)
|
||||
customer = customers.sort_values("revenue", ascending=False).iloc[0]
|
||||
recommendations.append(
|
||||
{
|
||||
"title": "Protect top customer relationship",
|
||||
"priority": "high",
|
||||
"summary": (
|
||||
f"Prioritize retention for '{customer['customer_name']}' with "
|
||||
f"{float(customer['orders']):.0f} orders and {float(customer['revenue']):.2f} revenue."
|
||||
),
|
||||
}
|
||||
)
|
||||
|
||||
result = recommendations[:5]
|
||||
if persist and self.persistence_service is not None:
|
||||
span_context = trace.get_current_span().get_span_context()
|
||||
trace_id = (
|
||||
f"{span_context.trace_id:032x}" if span_context.is_valid else None
|
||||
)
|
||||
span_id = (
|
||||
f"{span_context.span_id:016x}" if span_context.is_valid else None
|
||||
)
|
||||
self.persistence_service.record_recommendation_run(
|
||||
payload=result,
|
||||
trigger_source=trigger_source,
|
||||
trace_id=trace_id,
|
||||
span_id=span_id,
|
||||
)
|
||||
return result
|
||||
|
||||
def get_dashboard(self) -> DashboardSnapshot:
|
||||
with self.tracer.start_as_current_span("analytics.dashboard"):
|
||||
rankings = self.get_rankings(trigger_source="api.dashboard", persist=True)
|
||||
return DashboardSnapshot(
|
||||
kpis=self.get_kpis(),
|
||||
history=self.get_history_points(),
|
||||
forecasts=self.get_forecast(
|
||||
trigger_source="api.dashboard", persist=True
|
||||
),
|
||||
rankings=rankings,
|
||||
recommendations=self.get_recommendations(
|
||||
rankings=rankings,
|
||||
trigger_source="api.dashboard",
|
||||
persist=True,
|
||||
),
|
||||
)
|
||||
Reference in New Issue
Block a user