from __future__ import annotations from dataclasses import dataclass from datetime import date, timedelta from math import sqrt import numpy as np import pandas as pd from opentelemetry import trace from sklearn.linear_model import LinearRegression from app.core.config import settings from app.services.persistence_service import PersistenceService from app.services.warehouse_service import ReadOnlyWarehouseClient @dataclass class DashboardSnapshot: kpis: dict history: list[dict] forecasts: list[dict] rankings: list[dict] recommendations: list[dict] class AnalyticsService: def __init__( self, warehouse_client: ReadOnlyWarehouseClient, persistence_service: PersistenceService | None = None, ) -> None: self.warehouse_client = warehouse_client self.persistence_service = persistence_service self.tracer = trace.get_tracer(__name__) @staticmethod def _normalize_frame(df: pd.DataFrame, date_col: str = "sale_date") -> pd.DataFrame: normalized = df.copy() normalized[date_col] = pd.to_datetime(normalized[date_col], errors="coerce") for numeric in ("revenue", "cost", "quantity", "orders"): if numeric in normalized.columns: normalized[numeric] = pd.to_numeric( normalized[numeric], errors="coerce" ).fillna(0.0) return normalized.dropna(subset=[date_col]) def load_sales_history(self, days_back: int | None = None) -> pd.DataFrame: with self.tracer.start_as_current_span("analytics.load_sales_history"): daily_sales = self._normalize_frame( self.warehouse_client.fetch_daily_sales() ) days = days_back or settings.default_history_days min_date = pd.Timestamp(date.today() - timedelta(days=days)) filtered = daily_sales[daily_sales["sale_date"] >= min_date] return ( filtered.groupby("sale_date", as_index=False)[ ["revenue", "cost", "quantity", "orders"] ] .sum() .sort_values("sale_date") ) def get_kpis(self) -> dict: with self.tracer.start_as_current_span("analytics.kpis"): sales = self.load_sales_history(days_back=180) if sales.empty: return { "total_revenue": 0.0, "gross_margin_pct": 0.0, "total_quantity": 0.0, "avg_order_value": 0.0, "records_in_window": 0, } total_revenue = float(sales["revenue"].sum()) total_cost = float(sales["cost"].sum()) total_orders = max(float(sales["orders"].sum()), 1.0) margin_pct = ( ((total_revenue - total_cost) / total_revenue * 100) if total_revenue else 0.0 ) return { "total_revenue": round(total_revenue, 2), "gross_margin_pct": round(margin_pct, 2), "total_quantity": round(float(sales["quantity"].sum()), 2), "avg_order_value": round(total_revenue / total_orders, 2), "records_in_window": int(sales.shape[0]), } def get_history_points(self, days_back: int | None = None) -> list[dict]: with self.tracer.start_as_current_span("analytics.history_points"): sales = self.load_sales_history(days_back=days_back) if sales.empty: return [] return [ { "date": pd.Timestamp(row["sale_date"]).date().isoformat(), "revenue": round(float(row["revenue"]), 2), "cost": round(float(row["cost"]), 2), "quantity": round(float(row["quantity"]), 2), } for _, row in sales.iterrows() ] def get_forecast( self, horizon_days: int | None = None, *, trigger_source: str = "api.forecasts", persist: bool = True, ) -> list[dict]: with self.tracer.start_as_current_span("analytics.forecast"): horizon = horizon_days or settings.forecast_horizon_days sales = self.load_sales_history(days_back=720) if sales.empty: return [] series = ( sales.set_index("sale_date")["revenue"] .sort_index() .resample("D") .sum() .fillna(0.0) ) y = series.values x = np.arange(len(y), dtype=float).reshape(-1, 1) model = LinearRegression() model.fit(x, y) baseline = model.predict(x) residual = y - baseline sigma = float(np.std(residual)) if len(residual) > 1 else 0.0 weekday_baseline = series.groupby(series.index.weekday).mean() overall_mean = float(series.mean()) if len(series) else 0.0 weekday_factor = ( weekday_baseline / overall_mean if overall_mean > 0 else pd.Series([1.0] * 7, index=range(7)) ) weekday_factor = weekday_factor.replace([np.inf, -np.inf], 1.0).fillna(1.0) future_x = np.arange(len(y), len(y) + horizon, dtype=float).reshape(-1, 1) raw_forecast = model.predict(future_x) predictions: list[dict] = [] start_date = series.index.max().date() for idx, point in enumerate(raw_forecast, start=1): day = start_date + timedelta(days=idx) factor = ( float(weekday_factor.loc[day.weekday()]) if day.weekday() in weekday_factor.index else 1.0 ) yhat = max(float(point) * factor, 0.0) ci = 1.96 * sigma * sqrt(1 + idx / max(len(y), 1)) predictions.append( { "date": day.isoformat(), "predicted_revenue": round(yhat, 2), "lower_bound": round(max(yhat - ci, 0.0), 2), "upper_bound": round(yhat + ci, 2), } ) if persist and self.persistence_service is not None: span_context = trace.get_current_span().get_span_context() trace_id = ( f"{span_context.trace_id:032x}" if span_context.is_valid else None ) span_id = ( f"{span_context.span_id:016x}" if span_context.is_valid else None ) self.persistence_service.record_forecast_run( horizon_days=horizon, payload=predictions, trigger_source=trigger_source, trace_id=trace_id, span_id=span_id, ) return predictions def get_rankings( self, top_n: int | None = None, *, trigger_source: str = "api.rankings", persist: bool = True, ) -> list[dict]: with self.tracer.start_as_current_span("analytics.rankings"): n = top_n or settings.ranking_default_top_n products = self.warehouse_client.fetch_product_performance().copy() if products.empty: return [] products["revenue"] = pd.to_numeric( products["revenue"], errors="coerce" ).fillna(0.0) products["cost"] = pd.to_numeric(products["cost"], errors="coerce").fillna( 0.0 ) products["quantity"] = pd.to_numeric( products["quantity"], errors="coerce" ).fillna(0.0) products["orders"] = pd.to_numeric( products["orders"], errors="coerce" ).fillna(0.0) grouped = ( products.groupby( ["product_id", "product_name", "category_name"], as_index=False )[["revenue", "cost", "quantity", "orders"]] .sum() .sort_values("revenue", ascending=False) ) grouped["margin_pct"] = np.where( grouped["revenue"] > 0, ((grouped["revenue"] - grouped["cost"]) / grouped["revenue"]) * 100, 0.0, ) revenue_norm = grouped["revenue"] / max( float(grouped["revenue"].max()), 1.0 ) margin_norm = (grouped["margin_pct"] + 100) / 200 velocity_norm = grouped["quantity"] / max( float(grouped["quantity"].max()), 1.0 ) grouped["score"] = ( (0.55 * revenue_norm) + (0.30 * margin_norm.clip(0, 1)) + (0.15 * velocity_norm) ) ranked = ( grouped.sort_values("score", ascending=False) .head(n) .reset_index(drop=True) ) result = [ { "rank": int(idx + 1), "product_id": str(row["product_id"]), "product_name": str(row["product_name"]), "category": str(row["category_name"]), "revenue": round(float(row["revenue"]), 2), "margin_pct": round(float(row["margin_pct"]), 2), "score": round(float(row["score"]) * 100, 2), } for idx, row in ranked.iterrows() ] if persist and self.persistence_service is not None: span_context = trace.get_current_span().get_span_context() trace_id = ( f"{span_context.trace_id:032x}" if span_context.is_valid else None ) span_id = ( f"{span_context.span_id:016x}" if span_context.is_valid else None ) self.persistence_service.record_ranking_run( top_n=n, payload=result, trigger_source=trigger_source, trace_id=trace_id, span_id=span_id, ) return result def get_recommendations( self, rankings: list[dict] | None = None, *, trigger_source: str = "api.recommendations", persist: bool = True, ) -> list[dict]: with self.tracer.start_as_current_span("analytics.recommendations"): ranking_rows = ( rankings if rankings is not None else self.get_rankings( top_n=20, trigger_source=trigger_source, persist=persist ) ) customers = self.warehouse_client.fetch_customer_performance().copy() if customers.empty: customers = pd.DataFrame(columns=["customer_name", "revenue", "orders"]) recommendations: list[dict] = [] if ranking_rows: champion = ranking_rows[0] recommendations.append( { "title": "Double down on champion SKU", "priority": "high", "summary": ( f"Promote '{champion['product_name']}' with score {champion['score']:.2f} " f"and margin {champion['margin_pct']:.2f}%." ), } ) low_margin = next( (row for row in ranking_rows if row["margin_pct"] < 10), None ) if low_margin: recommendations.append( { "title": "Review pricing for low-margin bestseller", "priority": "medium", "summary": ( f"'{low_margin['product_name']}' has strong rank but only " f"{low_margin['margin_pct']:.2f}% margin." ), } ) if not customers.empty: customers["revenue"] = pd.to_numeric( customers["revenue"], errors="coerce" ).fillna(0.0) customers["orders"] = pd.to_numeric( customers["orders"], errors="coerce" ).fillna(0.0) customer = customers.sort_values("revenue", ascending=False).iloc[0] recommendations.append( { "title": "Protect top customer relationship", "priority": "high", "summary": ( f"Prioritize retention for '{customer['customer_name']}' with " f"{float(customer['orders']):.0f} orders and {float(customer['revenue']):.2f} revenue." ), } ) result = recommendations[:5] if persist and self.persistence_service is not None: span_context = trace.get_current_span().get_span_context() trace_id = ( f"{span_context.trace_id:032x}" if span_context.is_valid else None ) span_id = ( f"{span_context.span_id:016x}" if span_context.is_valid else None ) self.persistence_service.record_recommendation_run( payload=result, trigger_source=trigger_source, trace_id=trace_id, span_id=span_id, ) return result def get_dashboard(self) -> DashboardSnapshot: with self.tracer.start_as_current_span("analytics.dashboard"): rankings = self.get_rankings(trigger_source="api.dashboard", persist=True) return DashboardSnapshot( kpis=self.get_kpis(), history=self.get_history_points(), forecasts=self.get_forecast( trigger_source="api.dashboard", persist=True ), rankings=rankings, recommendations=self.get_recommendations( rankings=rankings, trigger_source="api.dashboard", persist=True, ), )