5a3df975d9
CI/CD / build-and-push (push) Failing after 1m20s
total_pnl now uses edge_net × net_cost instead of (0.5 - entry_price) × shares. The old formula overestimated BUY_NO trades at low entry prices by 3–10× because buying at price 0.158 yields 3164 shares — any exit-at-0.5 assumption produced $1072 PnL on $500 deployed. edge_net × net_cost is bounded by net_cost per trade and uses the model's own signal, giving $122 for the same position. calibration_score is now None (null in API) instead of 1 - 2×|avg_edge|. That formula was not a real calibration: it requires knowing market resolutions (YES=1/NO=0) which we do not store yet. Returning null is more honest than returning 0.0 or a meaningless proxy. Fix 3 will compute it from closed trades. check_promotion_thresholds updated to handle None calibration (null → not ready). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
142 lines
5.8 KiB
Python
142 lines
5.8 KiB
Python
"""
|
||
Metrics Tracker — Computes trading performance metrics.
|
||
|
||
Key metrics tracked:
|
||
- P&L (cumulative and daily)
|
||
- Sharpe Ratio (annualized)
|
||
- Win Rate
|
||
- Calibration Score (how accurate our probability estimates are)
|
||
- Max Drawdown
|
||
- Average Edge realized
|
||
"""
|
||
import logging
|
||
import math
|
||
from datetime import datetime, UTC
|
||
from typing import Optional
|
||
|
||
from bot.executor.paper import Trade
|
||
from bot.data.db import Database
|
||
|
||
log = logging.getLogger(__name__)
|
||
|
||
|
||
class MetricsTracker:
|
||
def __init__(self, db: Database) -> None:
|
||
self._db = db
|
||
self._trades: list[Trade] = []
|
||
self._daily_returns: list[float] = []
|
||
|
||
async def record_trade(self, trade: Trade) -> None:
|
||
self._trades.append(trade)
|
||
await self._db.save_trade(trade)
|
||
log.info("Trade recorded. Total trades: %d", len(self._trades))
|
||
|
||
async def update_daily_summary(self) -> None:
|
||
"""Compute and store daily metrics snapshot."""
|
||
if not self._trades:
|
||
return
|
||
|
||
metrics = self.compute_metrics()
|
||
await self._db.save_daily_metrics(metrics)
|
||
|
||
log.info(
|
||
"Daily metrics | Trades: %d | P&L: $%.2f | Win: %.1f%% | Sharpe: %.2f",
|
||
metrics["total_trades"],
|
||
metrics["total_pnl"],
|
||
metrics["win_rate"] * 100,
|
||
metrics["sharpe_ratio"],
|
||
)
|
||
|
||
def compute_metrics(self) -> dict:
|
||
if not self._trades:
|
||
return self._empty_metrics()
|
||
|
||
trades = self._trades
|
||
n = len(trades)
|
||
|
||
# ── Capital: all in-session trades (open + closed this session) ────────
|
||
# NOTE: self._trades is in-memory; resets on pod restart.
|
||
# Fix 3 (planned): replace with DB-computed metrics so restarts don't
|
||
# truncate history. Until then, these numbers reflect the current session.
|
||
total_deployed = sum(t.net_cost for t in trades)
|
||
total_fees = sum(t.fee_usdc for t in trades)
|
||
|
||
# ── Win rate ─────────────────────────────────────────────────────────
|
||
# Proxy for open trades: fraction where edge_net > 0.
|
||
# Not a realized win rate (no market resolutions available yet).
|
||
wins = sum(1 for t in trades if t.entry_price < 0.5)
|
||
win_rate = wins / n if n > 0 else 0
|
||
|
||
# ── Estimated unrealized P&L (open positions only) ───────────────────
|
||
# Formula: model_edge × deployed_capital per trade.
|
||
# Conservative bound: edge_net ∈ [-1, 1] → max PnL = net_cost per trade.
|
||
# Previous formula (0.5 − entry_price) × shares inflated BUY_NO trades
|
||
# at low entry prices by 3–10× (e.g. entry=0.158 → 3164 shares → $1072
|
||
# PnL on $500 deployed, vs $122 with edge_net=0.2589 here).
|
||
# Trades with NULL edge_net (legacy data) contribute only −fee_usdc.
|
||
total_pnl = sum(
|
||
(t.edge_net or 0.0) * t.net_cost - t.fee_usdc
|
||
for t in trades
|
||
)
|
||
|
||
avg_edge = total_pnl / total_deployed if total_deployed > 0 else 0
|
||
|
||
sharpe = self._compute_sharpe()
|
||
|
||
# ── Calibration score: not available ─────────────────────────────────
|
||
# Real calibration (Brier score) requires knowing how each market
|
||
# resolved (YES=1 or NO=0). Until close_price / resolution is stored
|
||
# per trade, any formula here is a proxy, not a calibration.
|
||
# Returns None so the API can surface "unavailable" rather than a
|
||
# misleading number. Will be computed from closed trades in Fix 3.
|
||
calibration = None # type: ignore[assignment]
|
||
|
||
return {
|
||
"timestamp": datetime.now(UTC),
|
||
"total_trades": n,
|
||
"total_deployed": total_deployed,
|
||
"total_fees": total_fees,
|
||
"total_pnl": total_pnl, # estimated unrealized (open trades, current session)
|
||
"win_rate": win_rate, # proxy: fraction with entry_price < 0.5
|
||
"avg_edge": avg_edge,
|
||
"sharpe_ratio": sharpe,
|
||
"calibration_score": calibration, # None — requires market resolution data
|
||
"paper_mode": True,
|
||
}
|
||
|
||
def _compute_sharpe(self) -> float:
|
||
"""Annualized Sharpe ratio from daily returns."""
|
||
if len(self._daily_returns) < 2:
|
||
return 0.0
|
||
mean_r = sum(self._daily_returns) / len(self._daily_returns)
|
||
variance = sum((r - mean_r) ** 2 for r in self._daily_returns) / len(self._daily_returns)
|
||
std_r = math.sqrt(variance) if variance > 0 else 1e-9
|
||
return (mean_r / std_r) * math.sqrt(365) # Annualize
|
||
|
||
def check_promotion_thresholds(self) -> tuple[bool, dict]:
|
||
"""Check if metrics qualify for real money trading."""
|
||
metrics = self.compute_metrics()
|
||
cal = metrics["calibration_score"] # may be None
|
||
checks = {
|
||
"sharpe_ratio": (metrics["sharpe_ratio"], 0.5, metrics["sharpe_ratio"] >= 0.5),
|
||
"win_rate": (metrics["win_rate"], 0.52, metrics["win_rate"] >= 0.52),
|
||
"calibration_score": (cal, 0.7, cal is not None and cal >= 0.7),
|
||
"min_trades": (metrics["total_trades"], 50, metrics["total_trades"] >= 50),
|
||
}
|
||
all_pass = all(v[2] for v in checks.values())
|
||
return all_pass, checks
|
||
|
||
def _empty_metrics(self) -> dict:
|
||
return {
|
||
"timestamp": datetime.now(UTC),
|
||
"total_trades": 0,
|
||
"total_deployed": 0,
|
||
"total_fees": 0,
|
||
"total_pnl": 0,
|
||
"win_rate": 0,
|
||
"avg_edge": 0,
|
||
"sharpe_ratio": 0,
|
||
"calibration_score": None, # requires market resolution data
|
||
"paper_mode": True,
|
||
}
|