Add initial work from Codex

2026-03-20 15:13:33 +01:00
parent 19771ddd37
commit adb5c1a439
48 changed files with 7054 additions and 16 deletions
--- a/backend/app/services/init.py
+++ b/backend/app/services/init.py
@@ -0,0 +1 @@
+"""Business logic services."""
--- a/backend/app/services/analytics_service.py
+++ b/backend/app/services/analytics_service.py
@@ -0,0 +1,373 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from datetime import date, timedelta
+from math import sqrt
+
+import numpy as np
+import pandas as pd
+from opentelemetry import trace
+from sklearn.linear_model import LinearRegression
+
+from app.core.config import settings
+from app.services.persistence_service import PersistenceService
+from app.services.warehouse_service import ReadOnlyWarehouseClient
+
+
+@dataclass
+class DashboardSnapshot:
+    kpis: dict
+    history: list[dict]
+    forecasts: list[dict]
+    rankings: list[dict]
+    recommendations: list[dict]
+
+
+class AnalyticsService:
+    def __init__(
+        self,
+        warehouse_client: ReadOnlyWarehouseClient,
+        persistence_service: PersistenceService | None = None,
+    ) -> None:
+        self.warehouse_client = warehouse_client
+        self.persistence_service = persistence_service
+        self.tracer = trace.get_tracer(__name__)
+
+    @staticmethod
+    def _normalize_frame(df: pd.DataFrame, date_col: str = "sale_date") -> pd.DataFrame:
+        normalized = df.copy()
+        normalized[date_col] = pd.to_datetime(normalized[date_col], errors="coerce")
+        for numeric in ("revenue", "cost", "quantity", "orders"):
+            if numeric in normalized.columns:
+                normalized[numeric] = pd.to_numeric(
+                    normalized[numeric], errors="coerce"
+                ).fillna(0.0)
+        return normalized.dropna(subset=[date_col])
+
+    def load_sales_history(self, days_back: int | None = None) -> pd.DataFrame:
+        with self.tracer.start_as_current_span("analytics.load_sales_history"):
+            daily_sales = self._normalize_frame(
+                self.warehouse_client.fetch_daily_sales()
+            )
+            days = days_back or settings.default_history_days
+            min_date = pd.Timestamp(date.today() - timedelta(days=days))
+            filtered = daily_sales[daily_sales["sale_date"] >= min_date]
+            return (
+                filtered.groupby("sale_date", as_index=False)[
+                    ["revenue", "cost", "quantity", "orders"]
+                ]
+                .sum()
+                .sort_values("sale_date")
+            )
+
+    def get_kpis(self) -> dict:
+        with self.tracer.start_as_current_span("analytics.kpis"):
+            sales = self.load_sales_history(days_back=180)
+            if sales.empty:
+                return {
+                    "total_revenue": 0.0,
+                    "gross_margin_pct": 0.0,
+                    "total_quantity": 0.0,
+                    "avg_order_value": 0.0,
+                    "records_in_window": 0,
+                }
+
+            total_revenue = float(sales["revenue"].sum())
+            total_cost = float(sales["cost"].sum())
+            total_orders = max(float(sales["orders"].sum()), 1.0)
+            margin_pct = (
+                ((total_revenue - total_cost) / total_revenue * 100)
+                if total_revenue
+                else 0.0
+            )
+            return {
+                "total_revenue": round(total_revenue, 2),
+                "gross_margin_pct": round(margin_pct, 2),
+                "total_quantity": round(float(sales["quantity"].sum()), 2),
+                "avg_order_value": round(total_revenue / total_orders, 2),
+                "records_in_window": int(sales.shape[0]),
+            }
+
+    def get_history_points(self, days_back: int | None = None) -> list[dict]:
+        with self.tracer.start_as_current_span("analytics.history_points"):
+            sales = self.load_sales_history(days_back=days_back)
+            if sales.empty:
+                return []
+            return [
+                {
+                    "date": pd.Timestamp(row["sale_date"]).date().isoformat(),
+                    "revenue": round(float(row["revenue"]), 2),
+                    "cost": round(float(row["cost"]), 2),
+                    "quantity": round(float(row["quantity"]), 2),
+                }
+                for _, row in sales.iterrows()
+            ]
+
+    def get_forecast(
+        self,
+        horizon_days: int | None = None,
+        *,
+        trigger_source: str = "api.forecasts",
+        persist: bool = True,
+    ) -> list[dict]:
+        with self.tracer.start_as_current_span("analytics.forecast"):
+            horizon = horizon_days or settings.forecast_horizon_days
+            sales = self.load_sales_history(days_back=720)
+            if sales.empty:
+                return []
+
+            series = (
+                sales.set_index("sale_date")["revenue"]
+                .sort_index()
+                .resample("D")
+                .sum()
+                .fillna(0.0)
+            )
+            y = series.values
+            x = np.arange(len(y), dtype=float).reshape(-1, 1)
+            model = LinearRegression()
+            model.fit(x, y)
+            baseline = model.predict(x)
+            residual = y - baseline
+            sigma = float(np.std(residual)) if len(residual) > 1 else 0.0
+
+            weekday_baseline = series.groupby(series.index.weekday).mean()
+            overall_mean = float(series.mean()) if len(series) else 0.0
+            weekday_factor = (
+                weekday_baseline / overall_mean
+                if overall_mean > 0
+                else pd.Series([1.0] * 7, index=range(7))
+            )
+            weekday_factor = weekday_factor.replace([np.inf, -np.inf], 1.0).fillna(1.0)
+
+            future_x = np.arange(len(y), len(y) + horizon, dtype=float).reshape(-1, 1)
+            raw_forecast = model.predict(future_x)
+
+            predictions: list[dict] = []
+            start_date = series.index.max().date()
+            for idx, point in enumerate(raw_forecast, start=1):
+                day = start_date + timedelta(days=idx)
+                factor = (
+                    float(weekday_factor.loc[day.weekday()])
+                    if day.weekday() in weekday_factor.index
+                    else 1.0
+                )
+                yhat = max(float(point) * factor, 0.0)
+                ci = 1.96 * sigma * sqrt(1 + idx / max(len(y), 1))
+                predictions.append(
+                    {
+                        "date": day.isoformat(),
+                        "predicted_revenue": round(yhat, 2),
+                        "lower_bound": round(max(yhat - ci, 0.0), 2),
+                        "upper_bound": round(yhat + ci, 2),
+                    }
+                )
+
+            if persist and self.persistence_service is not None:
+                span_context = trace.get_current_span().get_span_context()
+                trace_id = (
+                    f"{span_context.trace_id:032x}" if span_context.is_valid else None
+                )
+                span_id = (
+                    f"{span_context.span_id:016x}" if span_context.is_valid else None
+                )
+                self.persistence_service.record_forecast_run(
+                    horizon_days=horizon,
+                    payload=predictions,
+                    trigger_source=trigger_source,
+                    trace_id=trace_id,
+                    span_id=span_id,
+                )
+
+            return predictions
+
+    def get_rankings(
+        self,
+        top_n: int | None = None,
+        *,
+        trigger_source: str = "api.rankings",
+        persist: bool = True,
+    ) -> list[dict]:
+        with self.tracer.start_as_current_span("analytics.rankings"):
+            n = top_n or settings.ranking_default_top_n
+            products = self.warehouse_client.fetch_product_performance().copy()
+            if products.empty:
+                return []
+
+            products["revenue"] = pd.to_numeric(
+                products["revenue"], errors="coerce"
+            ).fillna(0.0)
+            products["cost"] = pd.to_numeric(products["cost"], errors="coerce").fillna(
+                0.0
+            )
+            products["quantity"] = pd.to_numeric(
+                products["quantity"], errors="coerce"
+            ).fillna(0.0)
+            products["orders"] = pd.to_numeric(
+                products["orders"], errors="coerce"
+            ).fillna(0.0)
+
+            grouped = (
+                products.groupby(
+                    ["product_id", "product_name", "category_name"], as_index=False
+                )[["revenue", "cost", "quantity", "orders"]]
+                .sum()
+                .sort_values("revenue", ascending=False)
+            )
+
+            grouped["margin_pct"] = np.where(
+                grouped["revenue"] > 0,
+                ((grouped["revenue"] - grouped["cost"]) / grouped["revenue"]) * 100,
+                0.0,
+            )
+
+            revenue_norm = grouped["revenue"] / max(
+                float(grouped["revenue"].max()), 1.0
+            )
+            margin_norm = (grouped["margin_pct"] + 100) / 200
+            velocity_norm = grouped["quantity"] / max(
+                float(grouped["quantity"].max()), 1.0
+            )
+            grouped["score"] = (
+                (0.55 * revenue_norm)
+                + (0.30 * margin_norm.clip(0, 1))
+                + (0.15 * velocity_norm)
+            )
+            ranked = (
+                grouped.sort_values("score", ascending=False)
+                .head(n)
+                .reset_index(drop=True)
+            )
+
+            result = [
+                {
+                    "rank": int(idx + 1),
+                    "product_id": str(row["product_id"]),
+                    "product_name": str(row["product_name"]),
+                    "category": str(row["category_name"]),
+                    "revenue": round(float(row["revenue"]), 2),
+                    "margin_pct": round(float(row["margin_pct"]), 2),
+                    "score": round(float(row["score"]) * 100, 2),
+                }
+                for idx, row in ranked.iterrows()
+            ]
+
+            if persist and self.persistence_service is not None:
+                span_context = trace.get_current_span().get_span_context()
+                trace_id = (
+                    f"{span_context.trace_id:032x}" if span_context.is_valid else None
+                )
+                span_id = (
+                    f"{span_context.span_id:016x}" if span_context.is_valid else None
+                )
+                self.persistence_service.record_ranking_run(
+                    top_n=n,
+                    payload=result,
+                    trigger_source=trigger_source,
+                    trace_id=trace_id,
+                    span_id=span_id,
+                )
+
+            return result
+
+    def get_recommendations(
+        self,
+        rankings: list[dict] | None = None,
+        *,
+        trigger_source: str = "api.recommendations",
+        persist: bool = True,
+    ) -> list[dict]:
+        with self.tracer.start_as_current_span("analytics.recommendations"):
+            ranking_rows = (
+                rankings
+                if rankings is not None
+                else self.get_rankings(
+                    top_n=20, trigger_source=trigger_source, persist=persist
+                )
+            )
+            customers = self.warehouse_client.fetch_customer_performance().copy()
+            if customers.empty:
+                customers = pd.DataFrame(columns=["customer_name", "revenue", "orders"])
+
+            recommendations: list[dict] = []
+
+            if ranking_rows:
+                champion = ranking_rows[0]
+                recommendations.append(
+                    {
+                        "title": "Double down on champion SKU",
+                        "priority": "high",
+                        "summary": (
+                            f"Promote '{champion['product_name']}' with score {champion['score']:.2f} "
+                            f"and margin {champion['margin_pct']:.2f}%."
+                        ),
+                    }
+                )
+
+                low_margin = next(
+                    (row for row in ranking_rows if row["margin_pct"] < 10), None
+                )
+                if low_margin:
+                    recommendations.append(
+                        {
+                            "title": "Review pricing for low-margin bestseller",
+                            "priority": "medium",
+                            "summary": (
+                                f"'{low_margin['product_name']}' has strong rank but only "
+                                f"{low_margin['margin_pct']:.2f}% margin."
+                            ),
+                        }
+                    )
+
+            if not customers.empty:
+                customers["revenue"] = pd.to_numeric(
+                    customers["revenue"], errors="coerce"
+                ).fillna(0.0)
+                customers["orders"] = pd.to_numeric(
+                    customers["orders"], errors="coerce"
+                ).fillna(0.0)
+                customer = customers.sort_values("revenue", ascending=False).iloc[0]
+                recommendations.append(
+                    {
+                        "title": "Protect top customer relationship",
+                        "priority": "high",
+                        "summary": (
+                            f"Prioritize retention for '{customer['customer_name']}' with "
+                            f"{float(customer['orders']):.0f} orders and {float(customer['revenue']):.2f} revenue."
+                        ),
+                    }
+                )
+
+            result = recommendations[:5]
+            if persist and self.persistence_service is not None:
+                span_context = trace.get_current_span().get_span_context()
+                trace_id = (
+                    f"{span_context.trace_id:032x}" if span_context.is_valid else None
+                )
+                span_id = (
+                    f"{span_context.span_id:016x}" if span_context.is_valid else None
+                )
+                self.persistence_service.record_recommendation_run(
+                    payload=result,
+                    trigger_source=trigger_source,
+                    trace_id=trace_id,
+                    span_id=span_id,
+                )
+            return result
+
+    def get_dashboard(self) -> DashboardSnapshot:
+        with self.tracer.start_as_current_span("analytics.dashboard"):
+            rankings = self.get_rankings(trigger_source="api.dashboard", persist=True)
+            return DashboardSnapshot(
+                kpis=self.get_kpis(),
+                history=self.get_history_points(),
+                forecasts=self.get_forecast(
+                    trigger_source="api.dashboard", persist=True
+                ),
+                rankings=rankings,
+                recommendations=self.get_recommendations(
+                    rankings=rankings,
+                    trigger_source="api.dashboard",
+                    persist=True,
+                ),
+            )
--- a/backend/app/services/persistence_service.py
+++ b/backend/app/services/persistence_service.py
@@ -0,0 +1,281 @@
+from __future__ import annotations
+
+import logging
+from time import perf_counter
+
+from opentelemetry import metrics, trace
+from sqlalchemy import desc, select
+from sqlalchemy.exc import SQLAlchemyError
+from sqlalchemy.orm import Session, sessionmaker
+
+from app.db.postgres_models import AuditLog, ForecastRun, RankingRun, RecommendationRun
+
+LOGGER = logging.getLogger(__name__)
+
+
+class PersistenceService:
+    def __init__(self, session_factory: sessionmaker[Session]) -> None:
+        self.session_factory = session_factory
+        self.tracer = trace.get_tracer(__name__)
+        self.meter = metrics.get_meter(__name__)
+        self.write_counter = self.meter.create_counter(
+            name="postgres_persist_writes_total",
+            description="Total writes to app persistence PostgreSQL",
+        )
+        self.write_latency = self.meter.create_histogram(
+            name="postgres_persist_write_latency_ms",
+            unit="ms",
+            description="Latency of app persistence write operations",
+        )
+
+    @staticmethod
+    def _to_audit_dict(row: AuditLog) -> dict:
+        return {
+            "id": row.id,
+            "created_at": row.created_at.isoformat(),
+            "method": row.method,
+            "path": row.path,
+            "query_string": row.query_string,
+            "status_code": row.status_code,
+            "duration_ms": row.duration_ms,
+            "trace_id": row.trace_id,
+            "span_id": row.span_id,
+            "client_ip": row.client_ip,
+            "user_agent": row.user_agent,
+            "details": row.details,
+        }
+
+    @staticmethod
+    def _to_forecast_dict(row: ForecastRun) -> dict:
+        return {
+            "id": row.id,
+            "created_at": row.created_at.isoformat(),
+            "horizon_days": row.horizon_days,
+            "point_count": row.point_count,
+            "trigger_source": row.trigger_source,
+            "trace_id": row.trace_id,
+            "span_id": row.span_id,
+            "payload": row.payload,
+        }
+
+    @staticmethod
+    def _to_ranking_dict(row: RankingRun) -> dict:
+        return {
+            "id": row.id,
+            "created_at": row.created_at.isoformat(),
+            "top_n": row.top_n,
+            "item_count": row.item_count,
+            "trigger_source": row.trigger_source,
+            "trace_id": row.trace_id,
+            "span_id": row.span_id,
+            "payload": row.payload,
+        }
+
+    @staticmethod
+    def _to_recommendation_dict(row: RecommendationRun) -> dict:
+        return {
+            "id": row.id,
+            "created_at": row.created_at.isoformat(),
+            "item_count": row.item_count,
+            "trigger_source": row.trigger_source,
+            "trace_id": row.trace_id,
+            "span_id": row.span_id,
+            "payload": row.payload,
+        }
+
+    def record_audit_log(
+        self,
+        *,
+        method: str,
+        path: str,
+        query_string: str,
+        status_code: int,
+        duration_ms: float,
+        trace_id: str | None,
+        span_id: str | None,
+        client_ip: str | None,
+        user_agent: str | None,
+        details: dict | None = None,
+    ) -> None:
+        started = perf_counter()
+        with self.tracer.start_as_current_span("persist.audit_log"):
+            try:
+                with self.session_factory() as session:
+                    session.add(
+                        AuditLog(
+                            method=method,
+                            path=path,
+                            query_string=query_string[:1000],
+                            status_code=status_code,
+                            duration_ms=duration_ms,
+                            trace_id=trace_id,
+                            span_id=span_id,
+                            client_ip=client_ip,
+                            user_agent=user_agent,
+                            details=details or {},
+                        )
+                    )
+                    session.commit()
+                self.write_counter.add(
+                    1, attributes={"entity": "audit", "status": "ok"}
+                )
+            except SQLAlchemyError as exc:
+                LOGGER.exception("Failed to persist audit log: %s", exc)
+                self.write_counter.add(
+                    1, attributes={"entity": "audit", "status": "error"}
+                )
+            finally:
+                self.write_latency.record(
+                    (perf_counter() - started) * 1000,
+                    attributes={"entity": "audit"},
+                )
+
+    def record_forecast_run(
+        self,
+        *,
+        horizon_days: int,
+        payload: list[dict],
+        trigger_source: str,
+        trace_id: str | None,
+        span_id: str | None,
+    ) -> None:
+        started = perf_counter()
+        with self.tracer.start_as_current_span("persist.forecast_run"):
+            try:
+                with self.session_factory() as session:
+                    session.add(
+                        ForecastRun(
+                            horizon_days=horizon_days,
+                            point_count=len(payload),
+                            trigger_source=trigger_source,
+                            trace_id=trace_id,
+                            span_id=span_id,
+                            payload=payload,
+                        )
+                    )
+                    session.commit()
+                self.write_counter.add(
+                    1, attributes={"entity": "forecast", "status": "ok"}
+                )
+            except SQLAlchemyError as exc:
+                LOGGER.exception("Failed to persist forecast run: %s", exc)
+                self.write_counter.add(
+                    1, attributes={"entity": "forecast", "status": "error"}
+                )
+            finally:
+                self.write_latency.record(
+                    (perf_counter() - started) * 1000,
+                    attributes={"entity": "forecast"},
+                )
+
+    def record_ranking_run(
+        self,
+        *,
+        top_n: int,
+        payload: list[dict],
+        trigger_source: str,
+        trace_id: str | None,
+        span_id: str | None,
+    ) -> None:
+        started = perf_counter()
+        with self.tracer.start_as_current_span("persist.ranking_run"):
+            try:
+                with self.session_factory() as session:
+                    session.add(
+                        RankingRun(
+                            top_n=top_n,
+                            item_count=len(payload),
+                            trigger_source=trigger_source,
+                            trace_id=trace_id,
+                            span_id=span_id,
+                            payload=payload,
+                        )
+                    )
+                    session.commit()
+                self.write_counter.add(
+                    1, attributes={"entity": "ranking", "status": "ok"}
+                )
+            except SQLAlchemyError as exc:
+                LOGGER.exception("Failed to persist ranking run: %s", exc)
+                self.write_counter.add(
+                    1, attributes={"entity": "ranking", "status": "error"}
+                )
+            finally:
+                self.write_latency.record(
+                    (perf_counter() - started) * 1000,
+                    attributes={"entity": "ranking"},
+                )
+
+    def record_recommendation_run(
+        self,
+        *,
+        payload: list[dict],
+        trigger_source: str,
+        trace_id: str | None,
+        span_id: str | None,
+    ) -> None:
+        started = perf_counter()
+        with self.tracer.start_as_current_span("persist.recommendation_run"):
+            try:
+                with self.session_factory() as session:
+                    session.add(
+                        RecommendationRun(
+                            item_count=len(payload),
+                            trigger_source=trigger_source,
+                            trace_id=trace_id,
+                            span_id=span_id,
+                            payload=payload,
+                        )
+                    )
+                    session.commit()
+                self.write_counter.add(
+                    1, attributes={"entity": "recommendation", "status": "ok"}
+                )
+            except SQLAlchemyError as exc:
+                LOGGER.exception("Failed to persist recommendation run: %s", exc)
+                self.write_counter.add(
+                    1, attributes={"entity": "recommendation", "status": "error"}
+                )
+            finally:
+                self.write_latency.record(
+                    (perf_counter() - started) * 1000,
+                    attributes={"entity": "recommendation"},
+                )
+
+    def list_audit_logs(self, limit: int) -> list[dict]:
+        with self.tracer.start_as_current_span("persist.list_audit_logs"):
+            with self.session_factory() as session:
+                rows = session.execute(
+                    select(AuditLog).order_by(desc(AuditLog.created_at)).limit(limit)
+                ).scalars()
+                return [self._to_audit_dict(row) for row in rows]
+
+    def list_forecast_runs(self, limit: int) -> list[dict]:
+        with self.tracer.start_as_current_span("persist.list_forecast_runs"):
+            with self.session_factory() as session:
+                rows = session.execute(
+                    select(ForecastRun)
+                    .order_by(desc(ForecastRun.created_at))
+                    .limit(limit)
+                ).scalars()
+                return [self._to_forecast_dict(row) for row in rows]
+
+    def list_ranking_runs(self, limit: int) -> list[dict]:
+        with self.tracer.start_as_current_span("persist.list_ranking_runs"):
+            with self.session_factory() as session:
+                rows = session.execute(
+                    select(RankingRun)
+                    .order_by(desc(RankingRun.created_at))
+                    .limit(limit)
+                ).scalars()
+                return [self._to_ranking_dict(row) for row in rows]
+
+    def list_recommendation_runs(self, limit: int) -> list[dict]:
+        with self.tracer.start_as_current_span("persist.list_recommendation_runs"):
+            with self.session_factory() as session:
+                rows = session.execute(
+                    select(RecommendationRun)
+                    .order_by(desc(RecommendationRun.created_at))
+                    .limit(limit)
+                ).scalars()
+                return [self._to_recommendation_dict(row) for row in rows]
--- a/backend/app/services/warehouse_service.py
+++ b/backend/app/services/warehouse_service.py
@@ -0,0 +1,101 @@
+from __future__ import annotations
+
+import hashlib
+import logging
+from collections.abc import Sequence
+from time import perf_counter
+
+import pandas as pd
+from opentelemetry import metrics, trace
+from sqlalchemy import text
+from sqlalchemy.engine import Engine
+from sqlalchemy.exc import SQLAlchemyError
+
+from app.db import queries
+
+LOGGER = logging.getLogger(__name__)
+
+
+class ReadOnlyWarehouseClient:
+    def __init__(self, engines: dict[str, Engine]) -> None:
+        self.engines = engines
+        self.tracer = trace.get_tracer(__name__)
+        self.meter = metrics.get_meter(__name__)
+        self.query_counter = self.meter.create_counter(
+            name="warehouse_queries_total",
+            description="Total warehouse query executions",
+        )
+        self.query_latency = self.meter.create_histogram(
+            name="warehouse_query_latency_ms",
+            unit="ms",
+            description="Warehouse query latency",
+        )
+
+    def _validate_read_only_query(self, sql: str) -> None:
+        normalized = sql.strip().lower()
+        if not (normalized.startswith("select") or normalized.startswith("with")):
+            raise ValueError("Only read-only SELECT/CTE SQL statements are allowed.")
+
+    def _run_query_list(
+        self, source: str, sql_candidates: Sequence[str]
+    ) -> pd.DataFrame:
+        engine = self.engines[source]
+        last_error: Exception | None = None
+
+        for candidate in sql_candidates:
+            self._validate_read_only_query(candidate)
+            query_hash = hashlib.sha256(candidate.encode("utf-8")).hexdigest()[:12]
+            with self.tracer.start_as_current_span("warehouse.query") as span:
+                span.set_attribute("db.system", "mssql")
+                span.set_attribute("db.source", source)
+                span.set_attribute("db.query.hash", query_hash)
+                started = perf_counter()
+                try:
+                    with engine.connect() as conn:
+                        with self.tracer.start_as_current_span(
+                            "warehouse.query.execute"
+                        ):
+                            df = pd.read_sql_query(sql=text(candidate), con=conn)
+                    elapsed_ms = (perf_counter() - started) * 1000
+                    self.query_latency.record(elapsed_ms, attributes={"source": source})
+                    self.query_counter.add(
+                        1, attributes={"source": source, "status": "ok"}
+                    )
+                    return df
+                except SQLAlchemyError as exc:
+                    last_error = exc
+                    elapsed_ms = (perf_counter() - started) * 1000
+                    self.query_latency.record(elapsed_ms, attributes={"source": source})
+                    self.query_counter.add(
+                        1, attributes={"source": source, "status": "error"}
+                    )
+                    LOGGER.warning(
+                        "Query failed for %s with hash %s: %s", source, query_hash, exc
+                    )
+
+        if last_error is not None:
+            raise RuntimeError(
+                f"All query candidates failed for source '{source}'."
+            ) from last_error
+        return pd.DataFrame()
+
+    def fetch_daily_sales(self) -> pd.DataFrame:
+        aw = self._run_query_list("aw", queries.AW_DAILY_SALES_QUERIES)
+        aw["source"] = "AdventureWorks2022DWH"
+        wwi = self._run_query_list("wwi", queries.WWI_DAILY_SALES_QUERIES)
+        wwi["source"] = "WorldWideImporters"
+        return pd.concat([aw, wwi], ignore_index=True)
+
+    def fetch_product_performance(self) -> pd.DataFrame:
+        aw = self._run_query_list("aw", queries.AW_PRODUCT_PERFORMANCE_QUERIES)
+        aw["source"] = "AdventureWorks2022DWH"
+        wwi = self._run_query_list("wwi", queries.WWI_PRODUCT_PERFORMANCE_QUERIES)
+        wwi["source"] = "WorldWideImporters"
+        return pd.concat([aw, wwi], ignore_index=True)
+
+    def fetch_customer_performance(self) -> pd.DataFrame:
+        aw = self._run_query_list("aw", queries.AW_CUSTOMER_QUERIES)
+        aw["source"] = "AdventureWorks2022DWH"
+        wwi = self._run_query_list("wwi", queries.WWI_CUSTOMER_QUERIES)
+        wwi["source"] = "WorldWideImporters"
+        return pd.concat([aw, wwi], ignore_index=True)