zavrsni-rad-otel-app/backend/app/services/analytics_service.py

from __future__ import annotations

from dataclasses import dataclass
from datetime import date, timedelta
from math import sqrt

import numpy as np
import pandas as pd
from opentelemetry import trace
from sklearn.linear_model import LinearRegression

from app.core.config import settings
from app.services.persistence_service import PersistenceService
from app.services.warehouse_service import ReadOnlyWarehouseClient


@dataclass
class DashboardSnapshot:
    kpis: dict
    history: list[dict]
    forecasts: list[dict]
    rankings: list[dict]
    recommendations: list[dict]


class AnalyticsService:
    def __init__(
        self,
        warehouse_client: ReadOnlyWarehouseClient,
        persistence_service: PersistenceService | None = None,
    ) -> None:
        self.warehouse_client = warehouse_client
        self.persistence_service = persistence_service
        self.tracer = trace.get_tracer(__name__)

    @staticmethod
    def _normalize_frame(df: pd.DataFrame, date_col: str = "sale_date") -> pd.DataFrame:
        normalized = df.copy()
        normalized[date_col] = pd.to_datetime(normalized[date_col], errors="coerce")
        for numeric in ("revenue", "cost", "quantity", "orders"):
            if numeric in normalized.columns:
                normalized[numeric] = pd.to_numeric(
                    normalized[numeric], errors="coerce"
                ).fillna(0.0)
        return normalized.dropna(subset=[date_col])

    def load_sales_history(self, days_back: int | None = None) -> pd.DataFrame:
        with self.tracer.start_as_current_span("analytics.load_sales_history"):
            daily_sales = self._normalize_frame(
                self.warehouse_client.fetch_daily_sales()
            )
            days = days_back or settings.default_history_days
            min_date = pd.Timestamp(date.today() - timedelta(days=days))
            filtered = daily_sales[daily_sales["sale_date"] >= min_date]
            return (
                filtered.groupby("sale_date", as_index=False)[
                    ["revenue", "cost", "quantity", "orders"]
                ]
                .sum()
                .sort_values("sale_date")
            )

    def get_kpis(self) -> dict:
        with self.tracer.start_as_current_span("analytics.kpis"):
            sales = self.load_sales_history(days_back=180)
            if sales.empty:
                return {
                    "total_revenue": 0.0,
                    "gross_margin_pct": 0.0,
                    "total_quantity": 0.0,
                    "avg_order_value": 0.0,
                    "records_in_window": 0,
                }

            total_revenue = float(sales["revenue"].sum())
            total_cost = float(sales["cost"].sum())
            total_orders = max(float(sales["orders"].sum()), 1.0)
            margin_pct = (
                ((total_revenue - total_cost) / total_revenue * 100)
                if total_revenue
                else 0.0
            )
            return {
                "total_revenue": round(total_revenue, 2),
                "gross_margin_pct": round(margin_pct, 2),
                "total_quantity": round(float(sales["quantity"].sum()), 2),
                "avg_order_value": round(total_revenue / total_orders, 2),
                "records_in_window": int(sales.shape[0]),
            }

    def get_history_points(self, days_back: int | None = None) -> list[dict]:
        with self.tracer.start_as_current_span("analytics.history_points"):
            sales = self.load_sales_history(days_back=days_back)
            if sales.empty:
                return []
            return [
                {
                    "date": pd.Timestamp(row["sale_date"]).date().isoformat(),
                    "revenue": round(float(row["revenue"]), 2),
                    "cost": round(float(row["cost"]), 2),
                    "quantity": round(float(row["quantity"]), 2),
                }
                for _, row in sales.iterrows()
            ]

    def get_forecast(
        self,
        horizon_days: int | None = None,
        *,
        trigger_source: str = "api.forecasts",
        persist: bool = True,
    ) -> list[dict]:
        with self.tracer.start_as_current_span("analytics.forecast"):
            horizon = horizon_days or settings.forecast_horizon_days
            sales = self.load_sales_history(days_back=720)
            if sales.empty:
                return []

            series = (
                sales.set_index("sale_date")["revenue"]
                .sort_index()
                .resample("D")
                .sum()
                .fillna(0.0)
            )
            y = series.values
            x = np.arange(len(y), dtype=float).reshape(-1, 1)
            model = LinearRegression()
            model.fit(x, y)
            baseline = model.predict(x)
            residual = y - baseline
            sigma = float(np.std(residual)) if len(residual) > 1 else 0.0

            weekday_baseline = series.groupby(series.index.weekday).mean()
            overall_mean = float(series.mean()) if len(series) else 0.0
            weekday_factor = (
                weekday_baseline / overall_mean
                if overall_mean > 0
                else pd.Series([1.0] * 7, index=range(7))
            )
            weekday_factor = weekday_factor.replace([np.inf, -np.inf], 1.0).fillna(1.0)

            future_x = np.arange(len(y), len(y) + horizon, dtype=float).reshape(-1, 1)
            raw_forecast = model.predict(future_x)

            predictions: list[dict] = []
            start_date = series.index.max().date()
            for idx, point in enumerate(raw_forecast, start=1):
                day = start_date + timedelta(days=idx)
                factor = (
                    float(weekday_factor.loc[day.weekday()])
                    if day.weekday() in weekday_factor.index
                    else 1.0
                )
                yhat = max(float(point) * factor, 0.0)
                ci = 1.96 * sigma * sqrt(1 + idx / max(len(y), 1))
                predictions.append(
                    {
                        "date": day.isoformat(),
                        "predicted_revenue": round(yhat, 2),
                        "lower_bound": round(max(yhat - ci, 0.0), 2),
                        "upper_bound": round(yhat + ci, 2),
                    }
                )

            if persist and self.persistence_service is not None:
                span_context = trace.get_current_span().get_span_context()
                trace_id = (
                    f"{span_context.trace_id:032x}" if span_context.is_valid else None
                )
                span_id = (
                    f"{span_context.span_id:016x}" if span_context.is_valid else None
                )
                self.persistence_service.record_forecast_run(
                    horizon_days=horizon,
                    payload=predictions,
                    trigger_source=trigger_source,
                    trace_id=trace_id,
                    span_id=span_id,
                )

            return predictions

    def get_rankings(
        self,
        top_n: int | None = None,
        *,
        trigger_source: str = "api.rankings",
        persist: bool = True,
    ) -> list[dict]:
        with self.tracer.start_as_current_span("analytics.rankings"):
            n = top_n or settings.ranking_default_top_n
            products = self.warehouse_client.fetch_product_performance().copy()
            if products.empty:
                return []

            products["revenue"] = pd.to_numeric(
                products["revenue"], errors="coerce"
            ).fillna(0.0)
            products["cost"] = pd.to_numeric(products["cost"], errors="coerce").fillna(
                0.0
            )
            products["quantity"] = pd.to_numeric(
                products["quantity"], errors="coerce"
            ).fillna(0.0)
            products["orders"] = pd.to_numeric(
                products["orders"], errors="coerce"
            ).fillna(0.0)

            grouped = (
                products.groupby(
                    ["product_id", "product_name", "category_name"], as_index=False
                )[["revenue", "cost", "quantity", "orders"]]
                .sum()
                .sort_values("revenue", ascending=False)
            )

            grouped["margin_pct"] = np.where(
                grouped["revenue"] > 0,
                ((grouped["revenue"] - grouped["cost"]) / grouped["revenue"]) * 100,
                0.0,
            )

            revenue_norm = grouped["revenue"] / max(
                float(grouped["revenue"].max()), 1.0
            )
            margin_norm = (grouped["margin_pct"] + 100) / 200
            velocity_norm = grouped["quantity"] / max(
                float(grouped["quantity"].max()), 1.0
            )
            grouped["score"] = (
                (0.55 * revenue_norm)
                + (0.30 * margin_norm.clip(0, 1))
                + (0.15 * velocity_norm)
            )
            ranked = (
                grouped.sort_values("score", ascending=False)
                .head(n)
                .reset_index(drop=True)
            )

            result = [
                {
                    "rank": int(idx + 1),
                    "product_id": str(row["product_id"]),
                    "product_name": str(row["product_name"]),
                    "category": str(row["category_name"]),
                    "revenue": round(float(row["revenue"]), 2),
                    "margin_pct": round(float(row["margin_pct"]), 2),
                    "score": round(float(row["score"]) * 100, 2),
                }
                for idx, row in ranked.iterrows()
            ]

            if persist and self.persistence_service is not None:
                span_context = trace.get_current_span().get_span_context()
                trace_id = (
                    f"{span_context.trace_id:032x}" if span_context.is_valid else None
                )
                span_id = (
                    f"{span_context.span_id:016x}" if span_context.is_valid else None
                )
                self.persistence_service.record_ranking_run(
                    top_n=n,
                    payload=result,
                    trigger_source=trigger_source,
                    trace_id=trace_id,
                    span_id=span_id,
                )

            return result

    def get_recommendations(
        self,
        rankings: list[dict] | None = None,
        *,
        trigger_source: str = "api.recommendations",
        persist: bool = True,
    ) -> list[dict]:
        with self.tracer.start_as_current_span("analytics.recommendations"):
            ranking_rows = (
                rankings
                if rankings is not None
                else self.get_rankings(
                    top_n=20, trigger_source=trigger_source, persist=persist
                )
            )
            customers = self.warehouse_client.fetch_customer_performance().copy()
            if customers.empty:
                customers = pd.DataFrame(columns=["customer_name", "revenue", "orders"])

            recommendations: list[dict] = []

            if ranking_rows:
                champion = ranking_rows[0]
                recommendations.append(
                    {
                        "title": "Double down on champion SKU",
                        "priority": "high",
                        "summary": (
                            f"Promote '{champion['product_name']}' with score {champion['score']:.2f} "
                            f"and margin {champion['margin_pct']:.2f}%."
                        ),
                    }
                )

                low_margin = next(
                    (row for row in ranking_rows if row["margin_pct"] < 10), None
                )
                if low_margin:
                    recommendations.append(
                        {
                            "title": "Review pricing for low-margin bestseller",
                            "priority": "medium",
                            "summary": (
                                f"'{low_margin['product_name']}' has strong rank but only "
                                f"{low_margin['margin_pct']:.2f}% margin."
                            ),
                        }
                    )

            if not customers.empty:
                customers["revenue"] = pd.to_numeric(
                    customers["revenue"], errors="coerce"
                ).fillna(0.0)
                customers["orders"] = pd.to_numeric(
                    customers["orders"], errors="coerce"
                ).fillna(0.0)
                customer = customers.sort_values("revenue", ascending=False).iloc[0]
                recommendations.append(
                    {
                        "title": "Protect top customer relationship",
                        "priority": "high",
                        "summary": (
                            f"Prioritize retention for '{customer['customer_name']}' with "
                            f"{float(customer['orders']):.0f} orders and {float(customer['revenue']):.2f} revenue."
                        ),
                    }
                )

            result = recommendations[:5]
            if persist and self.persistence_service is not None:
                span_context = trace.get_current_span().get_span_context()
                trace_id = (
                    f"{span_context.trace_id:032x}" if span_context.is_valid else None
                )
                span_id = (
                    f"{span_context.span_id:016x}" if span_context.is_valid else None
                )
                self.persistence_service.record_recommendation_run(
                    payload=result,
                    trigger_source=trigger_source,
                    trace_id=trace_id,
                    span_id=span_id,
                )
            return result

    def get_dashboard(self) -> DashboardSnapshot:
        with self.tracer.start_as_current_span("analytics.dashboard"):
            rankings = self.get_rankings(trigger_source="api.dashboard", persist=True)
            return DashboardSnapshot(
                kpis=self.get_kpis(),
                history=self.get_history_points(),
                forecasts=self.get_forecast(
                    trigger_source="api.dashboard", persist=True
                ),
                rankings=rankings,
                recommendations=self.get_recommendations(
                    rankings=rankings,
                    trigger_source="api.dashboard",
                    persist=True,
                ),
            )