feat(metrics): Fix 3 — DB-computed metrics, stateless tracker, resolution tracking
CI/CD / build-and-push (push) Successful in 1m47s

schema.sql
  trades:        + close_pnl, resolution (market outcome storage)
  metrics_daily: + unrealized_pnl_est, realized_pnl, open/closed/resolved_count

db.py
  close_paper_position(): accepts resolution; computes close_pnl in SQL
    BUY_YES: (resolution − entry_price) × shares
    BUY_NO:  ((1 − resolution) − entry_price) × shares
  save_daily_metrics(): persists new columns
  compute_metrics_from_db(): single DB query for all metrics; no in-memory state

tracker.py — complete rewrite (stateless)
  Removed self._trades, self._daily_returns, compute_metrics(), _compute_sharpe(),
  check_promotion_thresholds(), _empty_metrics()
  update_daily_summary() now reads compute_metrics_from_db() every cycle
  Safe across pod restarts: always reflects full DB history

paper.py
  close_position(): passes resolution to close_paper_position()

api/main.py  /api/summary
  Added unrealized_pnl_est (estimated, open trades) and realized_pnl (exact,
  closed+resolved) as separate fields alongside total_pnl
  win_rate: null if < 5 resolved trades (was proxy on entry_price < 0.5)
  calibration_score: Brier-based, null if < 10 resolved trades
  resolved_count exposed as field
  Each field annotated with: exact/estimated, source, null conditions

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
chemavx
2026-04-21 17:34:48 +00:00
parent 9b62636a3e
commit 9a5be27532
5 changed files with 268 additions and 160 deletions
+72 -116
View File
@@ -1,21 +1,27 @@
"""
Metrics Tracker — Computes trading performance metrics.
Metrics Tracker — computes and persists trading performance metrics from the DB.
Key metrics tracked:
- P&L (cumulative and daily)
- Sharpe Ratio (annualized)
- Win Rate
- Calibration Score (how accurate our probability estimates are)
- Max Drawdown
- Average Edge realized
All metrics are derived directly from the `trades` table on every cycle call.
No in-memory trade state is kept: the tracker is stateless across pod restarts.
Metric definitions
──────────────────
unrealized_pnl_est Estimated PnL for OPEN positions: edge_net × net_cost fee.
Source: open trades with edge_net. Estimated (model signal).
realized_pnl Exact PnL for CLOSED positions: computed from resolution.
Source: closed trades with known resolution. Exact.
total_pnl unrealized_pnl_est + realized_pnl.
win_rate Fraction of resolved closed trades with close_pnl > 0.
NULL if fewer than 5 resolved trades.
calibration_score 1 AVG((final_prob resolution)²) on resolved trades.
Brier score (higher = better calibration). NULL if < 10 resolved.
sharpe_ratio 0.0 — requires a daily-return time series, not yet tracked.
"""
import logging
import math
from datetime import datetime, UTC
from typing import Optional
from bot.executor.paper import Trade
from bot.data.db import Database
from bot.executor.paper import Trade
log = logging.getLogger(__name__)
@@ -23,119 +29,69 @@ log = logging.getLogger(__name__)
class MetricsTracker:
def __init__(self, db: Database) -> None:
self._db = db
self._trades: list[Trade] = []
self._daily_returns: list[float] = []
async def record_trade(self, trade: Trade) -> None:
self._trades.append(trade)
"""Persist a trade to the DB. No in-memory accumulation."""
await self._db.save_trade(trade)
log.info("Trade recorded. Total trades: %d", len(self._trades))
log.info("Trade recorded: %s", trade)
async def update_daily_summary(self) -> None:
"""Compute and store daily metrics snapshot."""
if not self._trades:
"""Compute metrics from DB and write a metrics_daily snapshot.
Called every cycle by the trading loop. Safe after pod restarts:
reads the full trade history from DB, not from in-memory state.
"""
raw = await self._db.compute_metrics_from_db()
if not raw["total_trades"]:
return
metrics = self.compute_metrics()
open_count = int(raw["open_count"] or 0)
closed_count = int(raw["closed_count"] or 0)
resolved = int(raw["resolved_count"] or 0)
wins = int(raw["wins_realized"] or 0)
unrealized = float(raw["unrealized_pnl_est"] or 0)
realized = float(raw["realized_pnl"] or 0)
total_deployed = float(raw["total_deployed"] or 0)
total_fees = float(raw["total_fees"] or 0)
total_pnl = unrealized + realized
# win_rate: only over resolved closed trades; null if sample too small
win_rate = (wins / resolved) if resolved >= 5 else None
# calibration: Brier score from DB; null if sample too small
calibration = (
float(raw["calibration_score"])
if raw["calibration_score"] is not None and resolved >= 10
else None
)
avg_edge = total_pnl / total_deployed if total_deployed > 0 else 0.0
metrics = {
"timestamp": datetime.now(UTC),
"total_trades": int(raw["total_trades"]),
"open_count": open_count,
"closed_count": closed_count,
"resolved_count": resolved,
"total_deployed": total_deployed,
"total_fees": total_fees,
"unrealized_pnl_est": unrealized,
"realized_pnl": realized,
"total_pnl": total_pnl,
"win_rate": win_rate,
"avg_edge": avg_edge,
"sharpe_ratio": 0.0, # requires daily-return series (not yet tracked)
"calibration_score": calibration,
"paper_mode": True,
}
await self._db.save_daily_metrics(metrics)
log.info(
"Daily metrics | Trades: %d | P&L: $%.2f | Win: %.1f%% | Sharpe: %.2f",
metrics["total_trades"],
metrics["total_pnl"],
metrics["win_rate"] * 100,
metrics["sharpe_ratio"],
"Daily metrics | trades=%d (open=%d closed=%d resolved=%d) | "
"unrealized=$%.2f realized=$%.2f total=$%.2f | "
"win_rate=%s calibration=%s",
metrics["total_trades"], open_count, closed_count, resolved,
unrealized, realized, total_pnl,
f"{win_rate:.1%}" if win_rate is not None else "n/a (<5)",
f"{calibration:.3f}" if calibration is not None else "n/a (<10)",
)
def compute_metrics(self) -> dict:
if not self._trades:
return self._empty_metrics()
trades = self._trades
n = len(trades)
# ── Capital: all in-session trades (open + closed this session) ────────
# NOTE: self._trades is in-memory; resets on pod restart.
# Fix 3 (planned): replace with DB-computed metrics so restarts don't
# truncate history. Until then, these numbers reflect the current session.
total_deployed = sum(t.net_cost for t in trades)
total_fees = sum(t.fee_usdc for t in trades)
# ── Win rate ─────────────────────────────────────────────────────────
# Proxy for open trades: fraction where edge_net > 0.
# Not a realized win rate (no market resolutions available yet).
wins = sum(1 for t in trades if t.entry_price < 0.5)
win_rate = wins / n if n > 0 else 0
# ── Estimated unrealized P&L (open positions only) ───────────────────
# Formula: model_edge × deployed_capital per trade.
# Conservative bound: edge_net ∈ [-1, 1] → max PnL = net_cost per trade.
# Previous formula (0.5 entry_price) × shares inflated BUY_NO trades
# at low entry prices by 310× (e.g. entry=0.158 → 3164 shares → $1072
# PnL on $500 deployed, vs $122 with edge_net=0.2589 here).
# Trades with NULL edge_net (legacy data) contribute only fee_usdc.
total_pnl = sum(
(t.edge_net or 0.0) * t.net_cost - t.fee_usdc
for t in trades
)
avg_edge = total_pnl / total_deployed if total_deployed > 0 else 0
sharpe = self._compute_sharpe()
# ── Calibration score: not available ─────────────────────────────────
# Real calibration (Brier score) requires knowing how each market
# resolved (YES=1 or NO=0). Until close_price / resolution is stored
# per trade, any formula here is a proxy, not a calibration.
# Returns None so the API can surface "unavailable" rather than a
# misleading number. Will be computed from closed trades in Fix 3.
calibration = None # type: ignore[assignment]
return {
"timestamp": datetime.now(UTC),
"total_trades": n,
"total_deployed": total_deployed,
"total_fees": total_fees,
"total_pnl": total_pnl, # estimated unrealized (open trades, current session)
"win_rate": win_rate, # proxy: fraction with entry_price < 0.5
"avg_edge": avg_edge,
"sharpe_ratio": sharpe,
"calibration_score": calibration, # None — requires market resolution data
"paper_mode": True,
}
def _compute_sharpe(self) -> float:
"""Annualized Sharpe ratio from daily returns."""
if len(self._daily_returns) < 2:
return 0.0
mean_r = sum(self._daily_returns) / len(self._daily_returns)
variance = sum((r - mean_r) ** 2 for r in self._daily_returns) / len(self._daily_returns)
std_r = math.sqrt(variance) if variance > 0 else 1e-9
return (mean_r / std_r) * math.sqrt(365) # Annualize
def check_promotion_thresholds(self) -> tuple[bool, dict]:
"""Check if metrics qualify for real money trading."""
metrics = self.compute_metrics()
cal = metrics["calibration_score"] # may be None
checks = {
"sharpe_ratio": (metrics["sharpe_ratio"], 0.5, metrics["sharpe_ratio"] >= 0.5),
"win_rate": (metrics["win_rate"], 0.52, metrics["win_rate"] >= 0.52),
"calibration_score": (cal, 0.7, cal is not None and cal >= 0.7),
"min_trades": (metrics["total_trades"], 50, metrics["total_trades"] >= 50),
}
all_pass = all(v[2] for v in checks.values())
return all_pass, checks
def _empty_metrics(self) -> dict:
return {
"timestamp": datetime.now(UTC),
"total_trades": 0,
"total_deployed": 0,
"total_fees": 0,
"total_pnl": 0,
"win_rate": 0,
"avg_edge": 0,
"sharpe_ratio": 0,
"calibration_score": None, # requires market resolution data
"paper_mode": True,
}