Files
polymarket-bot/bot/metrics/tracker.py
T
chemavx 5a3df975d9
CI/CD / build-and-push (push) Failing after 1m20s
fix(metrics): replace inflated PnL formula; drop fake calibration_score
total_pnl now uses edge_net × net_cost instead of (0.5 - entry_price) × shares.
The old formula overestimated BUY_NO trades at low entry prices by 3–10× because
buying at price 0.158 yields 3164 shares — any exit-at-0.5 assumption produced
$1072 PnL on $500 deployed. edge_net × net_cost is bounded by net_cost per trade
and uses the model's own signal, giving $122 for the same position.

calibration_score is now None (null in API) instead of 1 - 2×|avg_edge|. That
formula was not a real calibration: it requires knowing market resolutions
(YES=1/NO=0) which we do not store yet. Returning null is more honest than
returning 0.0 or a meaningless proxy. Fix 3 will compute it from closed trades.

check_promotion_thresholds updated to handle None calibration (null → not ready).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-21 16:47:05 +00:00

142 lines
5.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Metrics Tracker — Computes trading performance metrics.
Key metrics tracked:
- P&L (cumulative and daily)
- Sharpe Ratio (annualized)
- Win Rate
- Calibration Score (how accurate our probability estimates are)
- Max Drawdown
- Average Edge realized
"""
import logging
import math
from datetime import datetime, UTC
from typing import Optional
from bot.executor.paper import Trade
from bot.data.db import Database
log = logging.getLogger(__name__)
class MetricsTracker:
def __init__(self, db: Database) -> None:
self._db = db
self._trades: list[Trade] = []
self._daily_returns: list[float] = []
async def record_trade(self, trade: Trade) -> None:
self._trades.append(trade)
await self._db.save_trade(trade)
log.info("Trade recorded. Total trades: %d", len(self._trades))
async def update_daily_summary(self) -> None:
"""Compute and store daily metrics snapshot."""
if not self._trades:
return
metrics = self.compute_metrics()
await self._db.save_daily_metrics(metrics)
log.info(
"Daily metrics | Trades: %d | P&L: $%.2f | Win: %.1f%% | Sharpe: %.2f",
metrics["total_trades"],
metrics["total_pnl"],
metrics["win_rate"] * 100,
metrics["sharpe_ratio"],
)
def compute_metrics(self) -> dict:
if not self._trades:
return self._empty_metrics()
trades = self._trades
n = len(trades)
# ── Capital: all in-session trades (open + closed this session) ────────
# NOTE: self._trades is in-memory; resets on pod restart.
# Fix 3 (planned): replace with DB-computed metrics so restarts don't
# truncate history. Until then, these numbers reflect the current session.
total_deployed = sum(t.net_cost for t in trades)
total_fees = sum(t.fee_usdc for t in trades)
# ── Win rate ─────────────────────────────────────────────────────────
# Proxy for open trades: fraction where edge_net > 0.
# Not a realized win rate (no market resolutions available yet).
wins = sum(1 for t in trades if t.entry_price < 0.5)
win_rate = wins / n if n > 0 else 0
# ── Estimated unrealized P&L (open positions only) ───────────────────
# Formula: model_edge × deployed_capital per trade.
# Conservative bound: edge_net ∈ [-1, 1] → max PnL = net_cost per trade.
# Previous formula (0.5 entry_price) × shares inflated BUY_NO trades
# at low entry prices by 310× (e.g. entry=0.158 → 3164 shares → $1072
# PnL on $500 deployed, vs $122 with edge_net=0.2589 here).
# Trades with NULL edge_net (legacy data) contribute only fee_usdc.
total_pnl = sum(
(t.edge_net or 0.0) * t.net_cost - t.fee_usdc
for t in trades
)
avg_edge = total_pnl / total_deployed if total_deployed > 0 else 0
sharpe = self._compute_sharpe()
# ── Calibration score: not available ─────────────────────────────────
# Real calibration (Brier score) requires knowing how each market
# resolved (YES=1 or NO=0). Until close_price / resolution is stored
# per trade, any formula here is a proxy, not a calibration.
# Returns None so the API can surface "unavailable" rather than a
# misleading number. Will be computed from closed trades in Fix 3.
calibration = None # type: ignore[assignment]
return {
"timestamp": datetime.now(UTC),
"total_trades": n,
"total_deployed": total_deployed,
"total_fees": total_fees,
"total_pnl": total_pnl, # estimated unrealized (open trades, current session)
"win_rate": win_rate, # proxy: fraction with entry_price < 0.5
"avg_edge": avg_edge,
"sharpe_ratio": sharpe,
"calibration_score": calibration, # None — requires market resolution data
"paper_mode": True,
}
def _compute_sharpe(self) -> float:
"""Annualized Sharpe ratio from daily returns."""
if len(self._daily_returns) < 2:
return 0.0
mean_r = sum(self._daily_returns) / len(self._daily_returns)
variance = sum((r - mean_r) ** 2 for r in self._daily_returns) / len(self._daily_returns)
std_r = math.sqrt(variance) if variance > 0 else 1e-9
return (mean_r / std_r) * math.sqrt(365) # Annualize
def check_promotion_thresholds(self) -> tuple[bool, dict]:
"""Check if metrics qualify for real money trading."""
metrics = self.compute_metrics()
cal = metrics["calibration_score"] # may be None
checks = {
"sharpe_ratio": (metrics["sharpe_ratio"], 0.5, metrics["sharpe_ratio"] >= 0.5),
"win_rate": (metrics["win_rate"], 0.52, metrics["win_rate"] >= 0.52),
"calibration_score": (cal, 0.7, cal is not None and cal >= 0.7),
"min_trades": (metrics["total_trades"], 50, metrics["total_trades"] >= 50),
}
all_pass = all(v[2] for v in checks.values())
return all_pass, checks
def _empty_metrics(self) -> dict:
return {
"timestamp": datetime.now(UTC),
"total_trades": 0,
"total_deployed": 0,
"total_fees": 0,
"total_pnl": 0,
"win_rate": 0,
"avg_edge": 0,
"sharpe_ratio": 0,
"calibration_score": None, # requires market resolution data
"paper_mode": True,
}