polymarket-bot/bot/metrics/tracker.py

"""
Metrics Tracker — Computes trading performance metrics.

Key metrics tracked:
- P&L (cumulative and daily)
- Sharpe Ratio (annualized)
- Win Rate
- Calibration Score (how accurate our probability estimates are)
- Max Drawdown
- Average Edge realized
"""
import logging
import math
from datetime import datetime, UTC
from typing import Optional

from bot.executor.paper import Trade
from bot.data.db import Database

log = logging.getLogger(__name__)


class MetricsTracker:
    def __init__(self, db: Database) -> None:
        self._db = db
        self._trades: list[Trade] = []
        self._daily_returns: list[float] = []

    async def record_trade(self, trade: Trade) -> None:
        self._trades.append(trade)
        await self._db.save_trade(trade)
        log.info("Trade recorded. Total trades: %d", len(self._trades))

    async def update_daily_summary(self) -> None:
        """Compute and store daily metrics snapshot."""
        if not self._trades:
            return

        metrics = self.compute_metrics()
        await self._db.save_daily_metrics(metrics)

        log.info(
            "Daily metrics | Trades: %d | P&L: $%.2f | Win: %.1f%% | Sharpe: %.2f",
            metrics["total_trades"],
            metrics["total_pnl"],
            metrics["win_rate"] * 100,
            metrics["sharpe_ratio"],
        )

    def compute_metrics(self) -> dict:
        if not self._trades:
            return self._empty_metrics()

        trades = self._trades
        n = len(trades)

        # ── Capital: all in-session trades (open + closed this session) ────────
        # NOTE: self._trades is in-memory; resets on pod restart.
        # Fix 3 (planned): replace with DB-computed metrics so restarts don't
        # truncate history. Until then, these numbers reflect the current session.
        total_deployed = sum(t.net_cost for t in trades)
        total_fees = sum(t.fee_usdc for t in trades)

        # ── Win rate ─────────────────────────────────────────────────────────
        # Proxy for open trades: fraction where edge_net > 0.
        # Not a realized win rate (no market resolutions available yet).
        wins = sum(1 for t in trades if t.entry_price < 0.5)
        win_rate = wins / n if n > 0 else 0

        # ── Estimated unrealized P&L (open positions only) ───────────────────
        # Formula: model_edge × deployed_capital per trade.
        # Conservative bound: edge_net ∈ [-1, 1] → max PnL = net_cost per trade.
        # Previous formula (0.5 − entry_price) × shares inflated BUY_NO trades
        # at low entry prices by 3–10× (e.g. entry=0.158 → 3164 shares → $1072
        # PnL on $500 deployed, vs $122 with edge_net=0.2589 here).
        # Trades with NULL edge_net (legacy data) contribute only −fee_usdc.
        total_pnl = sum(
            (t.edge_net or 0.0) * t.net_cost - t.fee_usdc
            for t in trades
        )

        avg_edge = total_pnl / total_deployed if total_deployed > 0 else 0

        sharpe = self._compute_sharpe()

        # ── Calibration score: not available ─────────────────────────────────
        # Real calibration (Brier score) requires knowing how each market
        # resolved (YES=1 or NO=0). Until close_price / resolution is stored
        # per trade, any formula here is a proxy, not a calibration.
        # Returns None so the API can surface "unavailable" rather than a
        # misleading number. Will be computed from closed trades in Fix 3.
        calibration = None  # type: ignore[assignment]

        return {
            "timestamp": datetime.now(UTC),
            "total_trades": n,
            "total_deployed": total_deployed,
            "total_fees": total_fees,
            "total_pnl": total_pnl,           # estimated unrealized (open trades, current session)
            "win_rate": win_rate,              # proxy: fraction with entry_price < 0.5
            "avg_edge": avg_edge,
            "sharpe_ratio": sharpe,
            "calibration_score": calibration,  # None — requires market resolution data
            "paper_mode": True,
        }

    def _compute_sharpe(self) -> float:
        """Annualized Sharpe ratio from daily returns."""
        if len(self._daily_returns) < 2:
            return 0.0
        mean_r = sum(self._daily_returns) / len(self._daily_returns)
        variance = sum((r - mean_r) ** 2 for r in self._daily_returns) / len(self._daily_returns)
        std_r = math.sqrt(variance) if variance > 0 else 1e-9
        return (mean_r / std_r) * math.sqrt(365)  # Annualize

    def check_promotion_thresholds(self) -> tuple[bool, dict]:
        """Check if metrics qualify for real money trading."""
        metrics = self.compute_metrics()
        cal = metrics["calibration_score"]  # may be None
        checks = {
            "sharpe_ratio": (metrics["sharpe_ratio"], 0.5, metrics["sharpe_ratio"] >= 0.5),
            "win_rate": (metrics["win_rate"], 0.52, metrics["win_rate"] >= 0.52),
            "calibration_score": (cal, 0.7, cal is not None and cal >= 0.7),
            "min_trades": (metrics["total_trades"], 50, metrics["total_trades"] >= 50),
        }
        all_pass = all(v[2] for v in checks.values())
        return all_pass, checks

    def _empty_metrics(self) -> dict:
        return {
            "timestamp": datetime.now(UTC),
            "total_trades": 0,
            "total_deployed": 0,
            "total_fees": 0,
            "total_pnl": 0,
            "win_rate": 0,
            "avg_edge": 0,
            "sharpe_ratio": 0,
            "calibration_score": None,  # requires market resolution data
            "paper_mode": True,
        }