Files
zavrsni-rad-otel-app/backend/app/services/analytics_service.py

374 lines
14 KiB
Python

from __future__ import annotations
from dataclasses import dataclass
from datetime import date, timedelta
from math import sqrt
import numpy as np
import pandas as pd
from opentelemetry import trace
from sklearn.linear_model import LinearRegression
from app.core.config import settings
from app.services.persistence_service import PersistenceService
from app.services.warehouse_service import ReadOnlyWarehouseClient
@dataclass
class DashboardSnapshot:
kpis: dict
history: list[dict]
forecasts: list[dict]
rankings: list[dict]
recommendations: list[dict]
class AnalyticsService:
def __init__(
self,
warehouse_client: ReadOnlyWarehouseClient,
persistence_service: PersistenceService | None = None,
) -> None:
self.warehouse_client = warehouse_client
self.persistence_service = persistence_service
self.tracer = trace.get_tracer(__name__)
@staticmethod
def _normalize_frame(df: pd.DataFrame, date_col: str = "sale_date") -> pd.DataFrame:
normalized = df.copy()
normalized[date_col] = pd.to_datetime(normalized[date_col], errors="coerce")
for numeric in ("revenue", "cost", "quantity", "orders"):
if numeric in normalized.columns:
normalized[numeric] = pd.to_numeric(
normalized[numeric], errors="coerce"
).fillna(0.0)
return normalized.dropna(subset=[date_col])
def load_sales_history(self, days_back: int | None = None) -> pd.DataFrame:
with self.tracer.start_as_current_span("analytics.load_sales_history"):
daily_sales = self._normalize_frame(
self.warehouse_client.fetch_daily_sales()
)
days = days_back or settings.default_history_days
min_date = pd.Timestamp(date.today() - timedelta(days=days))
filtered = daily_sales[daily_sales["sale_date"] >= min_date]
return (
filtered.groupby("sale_date", as_index=False)[
["revenue", "cost", "quantity", "orders"]
]
.sum()
.sort_values("sale_date")
)
def get_kpis(self) -> dict:
with self.tracer.start_as_current_span("analytics.kpis"):
sales = self.load_sales_history(days_back=180)
if sales.empty:
return {
"total_revenue": 0.0,
"gross_margin_pct": 0.0,
"total_quantity": 0.0,
"avg_order_value": 0.0,
"records_in_window": 0,
}
total_revenue = float(sales["revenue"].sum())
total_cost = float(sales["cost"].sum())
total_orders = max(float(sales["orders"].sum()), 1.0)
margin_pct = (
((total_revenue - total_cost) / total_revenue * 100)
if total_revenue
else 0.0
)
return {
"total_revenue": round(total_revenue, 2),
"gross_margin_pct": round(margin_pct, 2),
"total_quantity": round(float(sales["quantity"].sum()), 2),
"avg_order_value": round(total_revenue / total_orders, 2),
"records_in_window": int(sales.shape[0]),
}
def get_history_points(self, days_back: int | None = None) -> list[dict]:
with self.tracer.start_as_current_span("analytics.history_points"):
sales = self.load_sales_history(days_back=days_back)
if sales.empty:
return []
return [
{
"date": pd.Timestamp(row["sale_date"]).date().isoformat(),
"revenue": round(float(row["revenue"]), 2),
"cost": round(float(row["cost"]), 2),
"quantity": round(float(row["quantity"]), 2),
}
for _, row in sales.iterrows()
]
def get_forecast(
self,
horizon_days: int | None = None,
*,
trigger_source: str = "api.forecasts",
persist: bool = True,
) -> list[dict]:
with self.tracer.start_as_current_span("analytics.forecast"):
horizon = horizon_days or settings.forecast_horizon_days
sales = self.load_sales_history(days_back=720)
if sales.empty:
return []
series = (
sales.set_index("sale_date")["revenue"]
.sort_index()
.resample("D")
.sum()
.fillna(0.0)
)
y = series.values
x = np.arange(len(y), dtype=float).reshape(-1, 1)
model = LinearRegression()
model.fit(x, y)
baseline = model.predict(x)
residual = y - baseline
sigma = float(np.std(residual)) if len(residual) > 1 else 0.0
weekday_baseline = series.groupby(series.index.weekday).mean()
overall_mean = float(series.mean()) if len(series) else 0.0
weekday_factor = (
weekday_baseline / overall_mean
if overall_mean > 0
else pd.Series([1.0] * 7, index=range(7))
)
weekday_factor = weekday_factor.replace([np.inf, -np.inf], 1.0).fillna(1.0)
future_x = np.arange(len(y), len(y) + horizon, dtype=float).reshape(-1, 1)
raw_forecast = model.predict(future_x)
predictions: list[dict] = []
start_date = series.index.max().date()
for idx, point in enumerate(raw_forecast, start=1):
day = start_date + timedelta(days=idx)
factor = (
float(weekday_factor.loc[day.weekday()])
if day.weekday() in weekday_factor.index
else 1.0
)
yhat = max(float(point) * factor, 0.0)
ci = 1.96 * sigma * sqrt(1 + idx / max(len(y), 1))
predictions.append(
{
"date": day.isoformat(),
"predicted_revenue": round(yhat, 2),
"lower_bound": round(max(yhat - ci, 0.0), 2),
"upper_bound": round(yhat + ci, 2),
}
)
if persist and self.persistence_service is not None:
span_context = trace.get_current_span().get_span_context()
trace_id = (
f"{span_context.trace_id:032x}" if span_context.is_valid else None
)
span_id = (
f"{span_context.span_id:016x}" if span_context.is_valid else None
)
self.persistence_service.record_forecast_run(
horizon_days=horizon,
payload=predictions,
trigger_source=trigger_source,
trace_id=trace_id,
span_id=span_id,
)
return predictions
def get_rankings(
self,
top_n: int | None = None,
*,
trigger_source: str = "api.rankings",
persist: bool = True,
) -> list[dict]:
with self.tracer.start_as_current_span("analytics.rankings"):
n = top_n or settings.ranking_default_top_n
products = self.warehouse_client.fetch_product_performance().copy()
if products.empty:
return []
products["revenue"] = pd.to_numeric(
products["revenue"], errors="coerce"
).fillna(0.0)
products["cost"] = pd.to_numeric(products["cost"], errors="coerce").fillna(
0.0
)
products["quantity"] = pd.to_numeric(
products["quantity"], errors="coerce"
).fillna(0.0)
products["orders"] = pd.to_numeric(
products["orders"], errors="coerce"
).fillna(0.0)
grouped = (
products.groupby(
["product_id", "product_name", "category_name"], as_index=False
)[["revenue", "cost", "quantity", "orders"]]
.sum()
.sort_values("revenue", ascending=False)
)
grouped["margin_pct"] = np.where(
grouped["revenue"] > 0,
((grouped["revenue"] - grouped["cost"]) / grouped["revenue"]) * 100,
0.0,
)
revenue_norm = grouped["revenue"] / max(
float(grouped["revenue"].max()), 1.0
)
margin_norm = (grouped["margin_pct"] + 100) / 200
velocity_norm = grouped["quantity"] / max(
float(grouped["quantity"].max()), 1.0
)
grouped["score"] = (
(0.55 * revenue_norm)
+ (0.30 * margin_norm.clip(0, 1))
+ (0.15 * velocity_norm)
)
ranked = (
grouped.sort_values("score", ascending=False)
.head(n)
.reset_index(drop=True)
)
result = [
{
"rank": int(idx + 1),
"product_id": str(row["product_id"]),
"product_name": str(row["product_name"]),
"category": str(row["category_name"]),
"revenue": round(float(row["revenue"]), 2),
"margin_pct": round(float(row["margin_pct"]), 2),
"score": round(float(row["score"]) * 100, 2),
}
for idx, row in ranked.iterrows()
]
if persist and self.persistence_service is not None:
span_context = trace.get_current_span().get_span_context()
trace_id = (
f"{span_context.trace_id:032x}" if span_context.is_valid else None
)
span_id = (
f"{span_context.span_id:016x}" if span_context.is_valid else None
)
self.persistence_service.record_ranking_run(
top_n=n,
payload=result,
trigger_source=trigger_source,
trace_id=trace_id,
span_id=span_id,
)
return result
def get_recommendations(
self,
rankings: list[dict] | None = None,
*,
trigger_source: str = "api.recommendations",
persist: bool = True,
) -> list[dict]:
with self.tracer.start_as_current_span("analytics.recommendations"):
ranking_rows = (
rankings
if rankings is not None
else self.get_rankings(
top_n=20, trigger_source=trigger_source, persist=persist
)
)
customers = self.warehouse_client.fetch_customer_performance().copy()
if customers.empty:
customers = pd.DataFrame(columns=["customer_name", "revenue", "orders"])
recommendations: list[dict] = []
if ranking_rows:
champion = ranking_rows[0]
recommendations.append(
{
"title": "Double down on champion SKU",
"priority": "high",
"summary": (
f"Promote '{champion['product_name']}' with score {champion['score']:.2f} "
f"and margin {champion['margin_pct']:.2f}%."
),
}
)
low_margin = next(
(row for row in ranking_rows if row["margin_pct"] < 10), None
)
if low_margin:
recommendations.append(
{
"title": "Review pricing for low-margin bestseller",
"priority": "medium",
"summary": (
f"'{low_margin['product_name']}' has strong rank but only "
f"{low_margin['margin_pct']:.2f}% margin."
),
}
)
if not customers.empty:
customers["revenue"] = pd.to_numeric(
customers["revenue"], errors="coerce"
).fillna(0.0)
customers["orders"] = pd.to_numeric(
customers["orders"], errors="coerce"
).fillna(0.0)
customer = customers.sort_values("revenue", ascending=False).iloc[0]
recommendations.append(
{
"title": "Protect top customer relationship",
"priority": "high",
"summary": (
f"Prioritize retention for '{customer['customer_name']}' with "
f"{float(customer['orders']):.0f} orders and {float(customer['revenue']):.2f} revenue."
),
}
)
result = recommendations[:5]
if persist and self.persistence_service is not None:
span_context = trace.get_current_span().get_span_context()
trace_id = (
f"{span_context.trace_id:032x}" if span_context.is_valid else None
)
span_id = (
f"{span_context.span_id:016x}" if span_context.is_valid else None
)
self.persistence_service.record_recommendation_run(
payload=result,
trigger_source=trigger_source,
trace_id=trace_id,
span_id=span_id,
)
return result
def get_dashboard(self) -> DashboardSnapshot:
with self.tracer.start_as_current_span("analytics.dashboard"):
rankings = self.get_rankings(trigger_source="api.dashboard", persist=True)
return DashboardSnapshot(
kpis=self.get_kpis(),
history=self.get_history_points(),
forecasts=self.get_forecast(
trigger_source="api.dashboard", persist=True
),
rankings=rankings,
recommendations=self.get_recommendations(
rankings=rankings,
trigger_source="api.dashboard",
persist=True,
),
)