374 lines
14 KiB
Python
374 lines
14 KiB
Python
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
from datetime import date, timedelta
|
|
from math import sqrt
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
from opentelemetry import trace
|
|
from sklearn.linear_model import LinearRegression
|
|
|
|
from app.core.config import settings
|
|
from app.services.persistence_service import PersistenceService
|
|
from app.services.warehouse_service import ReadOnlyWarehouseClient
|
|
|
|
|
|
@dataclass
|
|
class DashboardSnapshot:
|
|
kpis: dict
|
|
history: list[dict]
|
|
forecasts: list[dict]
|
|
rankings: list[dict]
|
|
recommendations: list[dict]
|
|
|
|
|
|
class AnalyticsService:
|
|
def __init__(
|
|
self,
|
|
warehouse_client: ReadOnlyWarehouseClient,
|
|
persistence_service: PersistenceService | None = None,
|
|
) -> None:
|
|
self.warehouse_client = warehouse_client
|
|
self.persistence_service = persistence_service
|
|
self.tracer = trace.get_tracer(__name__)
|
|
|
|
@staticmethod
|
|
def _normalize_frame(df: pd.DataFrame, date_col: str = "sale_date") -> pd.DataFrame:
|
|
normalized = df.copy()
|
|
normalized[date_col] = pd.to_datetime(normalized[date_col], errors="coerce")
|
|
for numeric in ("revenue", "cost", "quantity", "orders"):
|
|
if numeric in normalized.columns:
|
|
normalized[numeric] = pd.to_numeric(
|
|
normalized[numeric], errors="coerce"
|
|
).fillna(0.0)
|
|
return normalized.dropna(subset=[date_col])
|
|
|
|
def load_sales_history(self, days_back: int | None = None) -> pd.DataFrame:
|
|
with self.tracer.start_as_current_span("analytics.load_sales_history"):
|
|
daily_sales = self._normalize_frame(
|
|
self.warehouse_client.fetch_daily_sales()
|
|
)
|
|
days = days_back or settings.default_history_days
|
|
min_date = pd.Timestamp(date.today() - timedelta(days=days))
|
|
filtered = daily_sales[daily_sales["sale_date"] >= min_date]
|
|
return (
|
|
filtered.groupby("sale_date", as_index=False)[
|
|
["revenue", "cost", "quantity", "orders"]
|
|
]
|
|
.sum()
|
|
.sort_values("sale_date")
|
|
)
|
|
|
|
def get_kpis(self) -> dict:
|
|
with self.tracer.start_as_current_span("analytics.kpis"):
|
|
sales = self.load_sales_history(days_back=180)
|
|
if sales.empty:
|
|
return {
|
|
"total_revenue": 0.0,
|
|
"gross_margin_pct": 0.0,
|
|
"total_quantity": 0.0,
|
|
"avg_order_value": 0.0,
|
|
"records_in_window": 0,
|
|
}
|
|
|
|
total_revenue = float(sales["revenue"].sum())
|
|
total_cost = float(sales["cost"].sum())
|
|
total_orders = max(float(sales["orders"].sum()), 1.0)
|
|
margin_pct = (
|
|
((total_revenue - total_cost) / total_revenue * 100)
|
|
if total_revenue
|
|
else 0.0
|
|
)
|
|
return {
|
|
"total_revenue": round(total_revenue, 2),
|
|
"gross_margin_pct": round(margin_pct, 2),
|
|
"total_quantity": round(float(sales["quantity"].sum()), 2),
|
|
"avg_order_value": round(total_revenue / total_orders, 2),
|
|
"records_in_window": int(sales.shape[0]),
|
|
}
|
|
|
|
def get_history_points(self, days_back: int | None = None) -> list[dict]:
|
|
with self.tracer.start_as_current_span("analytics.history_points"):
|
|
sales = self.load_sales_history(days_back=days_back)
|
|
if sales.empty:
|
|
return []
|
|
return [
|
|
{
|
|
"date": pd.Timestamp(row["sale_date"]).date().isoformat(),
|
|
"revenue": round(float(row["revenue"]), 2),
|
|
"cost": round(float(row["cost"]), 2),
|
|
"quantity": round(float(row["quantity"]), 2),
|
|
}
|
|
for _, row in sales.iterrows()
|
|
]
|
|
|
|
def get_forecast(
|
|
self,
|
|
horizon_days: int | None = None,
|
|
*,
|
|
trigger_source: str = "api.forecasts",
|
|
persist: bool = True,
|
|
) -> list[dict]:
|
|
with self.tracer.start_as_current_span("analytics.forecast"):
|
|
horizon = horizon_days or settings.forecast_horizon_days
|
|
sales = self.load_sales_history(days_back=720)
|
|
if sales.empty:
|
|
return []
|
|
|
|
series = (
|
|
sales.set_index("sale_date")["revenue"]
|
|
.sort_index()
|
|
.resample("D")
|
|
.sum()
|
|
.fillna(0.0)
|
|
)
|
|
y = series.values
|
|
x = np.arange(len(y), dtype=float).reshape(-1, 1)
|
|
model = LinearRegression()
|
|
model.fit(x, y)
|
|
baseline = model.predict(x)
|
|
residual = y - baseline
|
|
sigma = float(np.std(residual)) if len(residual) > 1 else 0.0
|
|
|
|
weekday_baseline = series.groupby(series.index.weekday).mean()
|
|
overall_mean = float(series.mean()) if len(series) else 0.0
|
|
weekday_factor = (
|
|
weekday_baseline / overall_mean
|
|
if overall_mean > 0
|
|
else pd.Series([1.0] * 7, index=range(7))
|
|
)
|
|
weekday_factor = weekday_factor.replace([np.inf, -np.inf], 1.0).fillna(1.0)
|
|
|
|
future_x = np.arange(len(y), len(y) + horizon, dtype=float).reshape(-1, 1)
|
|
raw_forecast = model.predict(future_x)
|
|
|
|
predictions: list[dict] = []
|
|
start_date = series.index.max().date()
|
|
for idx, point in enumerate(raw_forecast, start=1):
|
|
day = start_date + timedelta(days=idx)
|
|
factor = (
|
|
float(weekday_factor.loc[day.weekday()])
|
|
if day.weekday() in weekday_factor.index
|
|
else 1.0
|
|
)
|
|
yhat = max(float(point) * factor, 0.0)
|
|
ci = 1.96 * sigma * sqrt(1 + idx / max(len(y), 1))
|
|
predictions.append(
|
|
{
|
|
"date": day.isoformat(),
|
|
"predicted_revenue": round(yhat, 2),
|
|
"lower_bound": round(max(yhat - ci, 0.0), 2),
|
|
"upper_bound": round(yhat + ci, 2),
|
|
}
|
|
)
|
|
|
|
if persist and self.persistence_service is not None:
|
|
span_context = trace.get_current_span().get_span_context()
|
|
trace_id = (
|
|
f"{span_context.trace_id:032x}" if span_context.is_valid else None
|
|
)
|
|
span_id = (
|
|
f"{span_context.span_id:016x}" if span_context.is_valid else None
|
|
)
|
|
self.persistence_service.record_forecast_run(
|
|
horizon_days=horizon,
|
|
payload=predictions,
|
|
trigger_source=trigger_source,
|
|
trace_id=trace_id,
|
|
span_id=span_id,
|
|
)
|
|
|
|
return predictions
|
|
|
|
def get_rankings(
|
|
self,
|
|
top_n: int | None = None,
|
|
*,
|
|
trigger_source: str = "api.rankings",
|
|
persist: bool = True,
|
|
) -> list[dict]:
|
|
with self.tracer.start_as_current_span("analytics.rankings"):
|
|
n = top_n or settings.ranking_default_top_n
|
|
products = self.warehouse_client.fetch_product_performance().copy()
|
|
if products.empty:
|
|
return []
|
|
|
|
products["revenue"] = pd.to_numeric(
|
|
products["revenue"], errors="coerce"
|
|
).fillna(0.0)
|
|
products["cost"] = pd.to_numeric(products["cost"], errors="coerce").fillna(
|
|
0.0
|
|
)
|
|
products["quantity"] = pd.to_numeric(
|
|
products["quantity"], errors="coerce"
|
|
).fillna(0.0)
|
|
products["orders"] = pd.to_numeric(
|
|
products["orders"], errors="coerce"
|
|
).fillna(0.0)
|
|
|
|
grouped = (
|
|
products.groupby(
|
|
["product_id", "product_name", "category_name"], as_index=False
|
|
)[["revenue", "cost", "quantity", "orders"]]
|
|
.sum()
|
|
.sort_values("revenue", ascending=False)
|
|
)
|
|
|
|
grouped["margin_pct"] = np.where(
|
|
grouped["revenue"] > 0,
|
|
((grouped["revenue"] - grouped["cost"]) / grouped["revenue"]) * 100,
|
|
0.0,
|
|
)
|
|
|
|
revenue_norm = grouped["revenue"] / max(
|
|
float(grouped["revenue"].max()), 1.0
|
|
)
|
|
margin_norm = (grouped["margin_pct"] + 100) / 200
|
|
velocity_norm = grouped["quantity"] / max(
|
|
float(grouped["quantity"].max()), 1.0
|
|
)
|
|
grouped["score"] = (
|
|
(0.55 * revenue_norm)
|
|
+ (0.30 * margin_norm.clip(0, 1))
|
|
+ (0.15 * velocity_norm)
|
|
)
|
|
ranked = (
|
|
grouped.sort_values("score", ascending=False)
|
|
.head(n)
|
|
.reset_index(drop=True)
|
|
)
|
|
|
|
result = [
|
|
{
|
|
"rank": int(idx + 1),
|
|
"product_id": str(row["product_id"]),
|
|
"product_name": str(row["product_name"]),
|
|
"category": str(row["category_name"]),
|
|
"revenue": round(float(row["revenue"]), 2),
|
|
"margin_pct": round(float(row["margin_pct"]), 2),
|
|
"score": round(float(row["score"]) * 100, 2),
|
|
}
|
|
for idx, row in ranked.iterrows()
|
|
]
|
|
|
|
if persist and self.persistence_service is not None:
|
|
span_context = trace.get_current_span().get_span_context()
|
|
trace_id = (
|
|
f"{span_context.trace_id:032x}" if span_context.is_valid else None
|
|
)
|
|
span_id = (
|
|
f"{span_context.span_id:016x}" if span_context.is_valid else None
|
|
)
|
|
self.persistence_service.record_ranking_run(
|
|
top_n=n,
|
|
payload=result,
|
|
trigger_source=trigger_source,
|
|
trace_id=trace_id,
|
|
span_id=span_id,
|
|
)
|
|
|
|
return result
|
|
|
|
def get_recommendations(
|
|
self,
|
|
rankings: list[dict] | None = None,
|
|
*,
|
|
trigger_source: str = "api.recommendations",
|
|
persist: bool = True,
|
|
) -> list[dict]:
|
|
with self.tracer.start_as_current_span("analytics.recommendations"):
|
|
ranking_rows = (
|
|
rankings
|
|
if rankings is not None
|
|
else self.get_rankings(
|
|
top_n=20, trigger_source=trigger_source, persist=persist
|
|
)
|
|
)
|
|
customers = self.warehouse_client.fetch_customer_performance().copy()
|
|
if customers.empty:
|
|
customers = pd.DataFrame(columns=["customer_name", "revenue", "orders"])
|
|
|
|
recommendations: list[dict] = []
|
|
|
|
if ranking_rows:
|
|
champion = ranking_rows[0]
|
|
recommendations.append(
|
|
{
|
|
"title": "Double down on champion SKU",
|
|
"priority": "high",
|
|
"summary": (
|
|
f"Promote '{champion['product_name']}' with score {champion['score']:.2f} "
|
|
f"and margin {champion['margin_pct']:.2f}%."
|
|
),
|
|
}
|
|
)
|
|
|
|
low_margin = next(
|
|
(row for row in ranking_rows if row["margin_pct"] < 10), None
|
|
)
|
|
if low_margin:
|
|
recommendations.append(
|
|
{
|
|
"title": "Review pricing for low-margin bestseller",
|
|
"priority": "medium",
|
|
"summary": (
|
|
f"'{low_margin['product_name']}' has strong rank but only "
|
|
f"{low_margin['margin_pct']:.2f}% margin."
|
|
),
|
|
}
|
|
)
|
|
|
|
if not customers.empty:
|
|
customers["revenue"] = pd.to_numeric(
|
|
customers["revenue"], errors="coerce"
|
|
).fillna(0.0)
|
|
customers["orders"] = pd.to_numeric(
|
|
customers["orders"], errors="coerce"
|
|
).fillna(0.0)
|
|
customer = customers.sort_values("revenue", ascending=False).iloc[0]
|
|
recommendations.append(
|
|
{
|
|
"title": "Protect top customer relationship",
|
|
"priority": "high",
|
|
"summary": (
|
|
f"Prioritize retention for '{customer['customer_name']}' with "
|
|
f"{float(customer['orders']):.0f} orders and {float(customer['revenue']):.2f} revenue."
|
|
),
|
|
}
|
|
)
|
|
|
|
result = recommendations[:5]
|
|
if persist and self.persistence_service is not None:
|
|
span_context = trace.get_current_span().get_span_context()
|
|
trace_id = (
|
|
f"{span_context.trace_id:032x}" if span_context.is_valid else None
|
|
)
|
|
span_id = (
|
|
f"{span_context.span_id:016x}" if span_context.is_valid else None
|
|
)
|
|
self.persistence_service.record_recommendation_run(
|
|
payload=result,
|
|
trigger_source=trigger_source,
|
|
trace_id=trace_id,
|
|
span_id=span_id,
|
|
)
|
|
return result
|
|
|
|
def get_dashboard(self) -> DashboardSnapshot:
|
|
with self.tracer.start_as_current_span("analytics.dashboard"):
|
|
rankings = self.get_rankings(trigger_source="api.dashboard", persist=True)
|
|
return DashboardSnapshot(
|
|
kpis=self.get_kpis(),
|
|
history=self.get_history_points(),
|
|
forecasts=self.get_forecast(
|
|
trigger_source="api.dashboard", persist=True
|
|
),
|
|
rankings=rankings,
|
|
recommendations=self.get_recommendations(
|
|
rankings=rankings,
|
|
trigger_source="api.dashboard",
|
|
persist=True,
|
|
),
|
|
)
|