feat(metrics): Fix 3 — DB-computed metrics, stateless tracker, resolution tracking

schema.sql trades: + close_pnl, resolution (market outcome storage) metrics_daily: + unrealized_pnl_est, realized_pnl, open/closed/resolved_count db.py close_paper_position(): accepts resolution; computes close_pnl in SQL BUY_YES: (resolution − entry_price) × shares BUY_NO: ((1 − resolution) − entry_price) × shares save_daily_metrics(): persists new columns compute_metrics_from_db(): single DB query for all metrics; no in-memory state tracker.py — complete rewrite (stateless) Removed self._trades, self._daily_returns, compute_metrics(), _compute_sharpe(), check_promotion_thresholds(), _empty_metrics() update_daily_summary() now reads compute_metrics_from_db() every cycle Safe across pod restarts: always reflects full DB history paper.py close_position(): passes resolution to close_paper_position() api/main.py /api/summary Added unrealized_pnl_est (estimated, open trades) and realized_pnl (exact, closed+resolved) as separate fields alongside total_pnl win_rate: null if < 5 resolved trades (was proxy on entry_price < 0.5) calibration_score: Brier-based, null if < 10 resolved trades resolved_count exposed as field Each field annotated with: exact/estimated, source, null conditions Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-21 17:34:48 +00:00
parent 9b62636a3e
commit 9a5be27532
5 changed files with 268 additions and 160 deletions
@@ -1,21 +1,27 @@
 """
-Metrics Tracker — Computes trading performance metrics.
+Metrics Tracker — computes and persists trading performance metrics from the DB.

-Key metrics tracked:
- P&L (cumulative and daily)
- Sharpe Ratio (annualized)
- Win Rate
- Calibration Score (how accurate our probability estimates are)
- Max Drawdown
- Average Edge realized
+All metrics are derived directly from the `trades` table on every cycle call.
+No in-memory trade state is kept: the tracker is stateless across pod restarts.
+
+Metric definitions
+──────────────────
+unrealized_pnl_est  Estimated PnL for OPEN positions: edge_net × net_cost − fee.
+                    Source: open trades with edge_net. Estimated (model signal).
+realized_pnl        Exact PnL for CLOSED positions: computed from resolution.
+                    Source: closed trades with known resolution. Exact.
+total_pnl           unrealized_pnl_est + realized_pnl.
+win_rate            Fraction of resolved closed trades with close_pnl > 0.
+                    NULL if fewer than 5 resolved trades.
+calibration_score   1 − AVG((final_prob − resolution)²) on resolved trades.
+                    Brier score (higher = better calibration). NULL if < 10 resolved.
+sharpe_ratio        0.0 — requires a daily-return time series, not yet tracked.
 """
 import logging
-import math
 from datetime import datetime, UTC
-from typing import Optional

-from bot.executor.paper import Trade
 from bot.data.db import Database
+from bot.executor.paper import Trade

 log = logging.getLogger(__name__)

@@ -23,119 +29,69 @@ log = logging.getLogger(__name__)
 class MetricsTracker:
    def __init__(self, db: Database) -> None:
        self._db = db
-        self._trades: list[Trade] = []
-        self._daily_returns: list[float] = []

    async def record_trade(self, trade: Trade) -> None:
-        self._trades.append(trade)
+        """Persist a trade to the DB. No in-memory accumulation."""
        await self._db.save_trade(trade)
-        log.info("Trade recorded. Total trades: %d", len(self._trades))
+        log.info("Trade recorded: %s", trade)

    async def update_daily_summary(self) -> None:
-        """Compute and store daily metrics snapshot."""
-        if not self._trades:
+        """Compute metrics from DB and write a metrics_daily snapshot.
+
+        Called every cycle by the trading loop. Safe after pod restarts:
+        reads the full trade history from DB, not from in-memory state.
+        """
+        raw = await self._db.compute_metrics_from_db()
+        if not raw["total_trades"]:
            return

-        metrics = self.compute_metrics()
+        open_count    = int(raw["open_count"] or 0)
+        closed_count  = int(raw["closed_count"] or 0)
+        resolved      = int(raw["resolved_count"] or 0)
+        wins          = int(raw["wins_realized"] or 0)
+        unrealized    = float(raw["unrealized_pnl_est"] or 0)
+        realized      = float(raw["realized_pnl"] or 0)
+        total_deployed = float(raw["total_deployed"] or 0)
+        total_fees    = float(raw["total_fees"] or 0)
+        total_pnl     = unrealized + realized
+
+        # win_rate: only over resolved closed trades; null if sample too small
+        win_rate = (wins / resolved) if resolved >= 5 else None
+
+        # calibration: Brier score from DB; null if sample too small
+        calibration = (
+            float(raw["calibration_score"])
+            if raw["calibration_score"] is not None and resolved >= 10
+            else None
+        )
+
+        avg_edge = total_pnl / total_deployed if total_deployed > 0 else 0.0
+
+        metrics = {
+            "timestamp":          datetime.now(UTC),
+            "total_trades":       int(raw["total_trades"]),
+            "open_count":         open_count,
+            "closed_count":       closed_count,
+            "resolved_count":     resolved,
+            "total_deployed":     total_deployed,
+            "total_fees":         total_fees,
+            "unrealized_pnl_est": unrealized,
+            "realized_pnl":       realized,
+            "total_pnl":          total_pnl,
+            "win_rate":           win_rate,
+            "avg_edge":           avg_edge,
+            "sharpe_ratio":       0.0,   # requires daily-return series (not yet tracked)
+            "calibration_score":  calibration,
+            "paper_mode":         True,
+        }
        await self._db.save_daily_metrics(metrics)

        log.info(
-            "Daily metrics | Trades: %d | P&L: $%.2f | Win: %.1f%% | Sharpe: %.2f",
-            metrics["total_trades"],
-            metrics["total_pnl"],
-            metrics["win_rate"] * 100,
-            metrics["sharpe_ratio"],
+            "Daily metrics | trades=%d (open=%d closed=%d resolved=%d) | "
+            "unrealized=$%.2f realized=$%.2f total=$%.2f | "
+            "win_rate=%s calibration=%s",
+            metrics["total_trades"], open_count, closed_count, resolved,
+            unrealized, realized, total_pnl,
+            f"{win_rate:.1%}" if win_rate is not None else "n/a (<5)",
+            f"{calibration:.3f}" if calibration is not None else "n/a (<10)",
        )
-
-    def compute_metrics(self) -> dict:
-        if not self._trades:
-            return self._empty_metrics()
-
-        trades = self._trades
-        n = len(trades)
-
-        # ── Capital: all in-session trades (open + closed this session) ────────
-        # NOTE: self._trades is in-memory; resets on pod restart.
-        # Fix 3 (planned): replace with DB-computed metrics so restarts don't
-        # truncate history. Until then, these numbers reflect the current session.
-        total_deployed = sum(t.net_cost for t in trades)
-        total_fees = sum(t.fee_usdc for t in trades)
-
-        # ── Win rate ─────────────────────────────────────────────────────────
-        # Proxy for open trades: fraction where edge_net > 0.
-        # Not a realized win rate (no market resolutions available yet).
-        wins = sum(1 for t in trades if t.entry_price < 0.5)
-        win_rate = wins / n if n > 0 else 0
-
-        # ── Estimated unrealized P&L (open positions only) ───────────────────
-        # Formula: model_edge × deployed_capital per trade.
-        # Conservative bound: edge_net ∈ [-1, 1] → max PnL = net_cost per trade.
-        # Previous formula (0.5 − entry_price) × shares inflated BUY_NO trades
-        # at low entry prices by 3–10× (e.g. entry=0.158 → 3164 shares → $1072
-        # PnL on $500 deployed, vs $122 with edge_net=0.2589 here).
-        # Trades with NULL edge_net (legacy data) contribute only −fee_usdc.
-        total_pnl = sum(
-            (t.edge_net or 0.0) * t.net_cost - t.fee_usdc
-            for t in trades
-        )
-
-        avg_edge = total_pnl / total_deployed if total_deployed > 0 else 0
-
-        sharpe = self._compute_sharpe()
-
-        # ── Calibration score: not available ─────────────────────────────────
-        # Real calibration (Brier score) requires knowing how each market
-        # resolved (YES=1 or NO=0). Until close_price / resolution is stored
-        # per trade, any formula here is a proxy, not a calibration.
-        # Returns None so the API can surface "unavailable" rather than a
-        # misleading number. Will be computed from closed trades in Fix 3.
-        calibration = None  # type: ignore[assignment]
-
-        return {
-            "timestamp": datetime.now(UTC),
-            "total_trades": n,
-            "total_deployed": total_deployed,
-            "total_fees": total_fees,
-            "total_pnl": total_pnl,           # estimated unrealized (open trades, current session)
-            "win_rate": win_rate,              # proxy: fraction with entry_price < 0.5
-            "avg_edge": avg_edge,
-            "sharpe_ratio": sharpe,
-            "calibration_score": calibration,  # None — requires market resolution data
-            "paper_mode": True,
-        }
-
-    def _compute_sharpe(self) -> float:
-        """Annualized Sharpe ratio from daily returns."""
-        if len(self._daily_returns) < 2:
-            return 0.0
-        mean_r = sum(self._daily_returns) / len(self._daily_returns)
-        variance = sum((r - mean_r) ** 2 for r in self._daily_returns) / len(self._daily_returns)
-        std_r = math.sqrt(variance) if variance > 0 else 1e-9
-        return (mean_r / std_r) * math.sqrt(365)  # Annualize
-
-    def check_promotion_thresholds(self) -> tuple[bool, dict]:
-        """Check if metrics qualify for real money trading."""
-        metrics = self.compute_metrics()
-        cal = metrics["calibration_score"]  # may be None
-        checks = {
-            "sharpe_ratio": (metrics["sharpe_ratio"], 0.5, metrics["sharpe_ratio"] >= 0.5),
-            "win_rate": (metrics["win_rate"], 0.52, metrics["win_rate"] >= 0.52),
-            "calibration_score": (cal, 0.7, cal is not None and cal >= 0.7),
-            "min_trades": (metrics["total_trades"], 50, metrics["total_trades"] >= 50),
-        }
-        all_pass = all(v[2] for v in checks.values())
-        return all_pass, checks
-
-    def _empty_metrics(self) -> dict:
-        return {
-            "timestamp": datetime.now(UTC),
-            "total_trades": 0,
-            "total_deployed": 0,
-            "total_fees": 0,
-            "total_pnl": 0,
-            "win_rate": 0,
-            "avg_edge": 0,
-            "sharpe_ratio": 0,
-            "calibration_score": None,  # requires market resolution data
-            "paper_mode": True,
-        }