feat(metrics): Fix 3 — DB-computed metrics, stateless tracker, resolution tracking
CI/CD / build-and-push (push) Successful in 1m47s
CI/CD / build-and-push (push) Successful in 1m47s
schema.sql
trades: + close_pnl, resolution (market outcome storage)
metrics_daily: + unrealized_pnl_est, realized_pnl, open/closed/resolved_count
db.py
close_paper_position(): accepts resolution; computes close_pnl in SQL
BUY_YES: (resolution − entry_price) × shares
BUY_NO: ((1 − resolution) − entry_price) × shares
save_daily_metrics(): persists new columns
compute_metrics_from_db(): single DB query for all metrics; no in-memory state
tracker.py — complete rewrite (stateless)
Removed self._trades, self._daily_returns, compute_metrics(), _compute_sharpe(),
check_promotion_thresholds(), _empty_metrics()
update_daily_summary() now reads compute_metrics_from_db() every cycle
Safe across pod restarts: always reflects full DB history
paper.py
close_position(): passes resolution to close_paper_position()
api/main.py /api/summary
Added unrealized_pnl_est (estimated, open trades) and realized_pnl (exact,
closed+resolved) as separate fields alongside total_pnl
win_rate: null if < 5 resolved trades (was proxy on entry_price < 0.5)
calibration_score: Brier-based, null if < 10 resolved trades
resolved_count exposed as field
Each field annotated with: exact/estimated, source, null conditions
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+72
-116
@@ -1,21 +1,27 @@
|
||||
"""
|
||||
Metrics Tracker — Computes trading performance metrics.
|
||||
Metrics Tracker — computes and persists trading performance metrics from the DB.
|
||||
|
||||
Key metrics tracked:
|
||||
- P&L (cumulative and daily)
|
||||
- Sharpe Ratio (annualized)
|
||||
- Win Rate
|
||||
- Calibration Score (how accurate our probability estimates are)
|
||||
- Max Drawdown
|
||||
- Average Edge realized
|
||||
All metrics are derived directly from the `trades` table on every cycle call.
|
||||
No in-memory trade state is kept: the tracker is stateless across pod restarts.
|
||||
|
||||
Metric definitions
|
||||
──────────────────
|
||||
unrealized_pnl_est Estimated PnL for OPEN positions: edge_net × net_cost − fee.
|
||||
Source: open trades with edge_net. Estimated (model signal).
|
||||
realized_pnl Exact PnL for CLOSED positions: computed from resolution.
|
||||
Source: closed trades with known resolution. Exact.
|
||||
total_pnl unrealized_pnl_est + realized_pnl.
|
||||
win_rate Fraction of resolved closed trades with close_pnl > 0.
|
||||
NULL if fewer than 5 resolved trades.
|
||||
calibration_score 1 − AVG((final_prob − resolution)²) on resolved trades.
|
||||
Brier score (higher = better calibration). NULL if < 10 resolved.
|
||||
sharpe_ratio 0.0 — requires a daily-return time series, not yet tracked.
|
||||
"""
|
||||
import logging
|
||||
import math
|
||||
from datetime import datetime, UTC
|
||||
from typing import Optional
|
||||
|
||||
from bot.executor.paper import Trade
|
||||
from bot.data.db import Database
|
||||
from bot.executor.paper import Trade
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
@@ -23,119 +29,69 @@ log = logging.getLogger(__name__)
|
||||
class MetricsTracker:
|
||||
def __init__(self, db: Database) -> None:
|
||||
self._db = db
|
||||
self._trades: list[Trade] = []
|
||||
self._daily_returns: list[float] = []
|
||||
|
||||
async def record_trade(self, trade: Trade) -> None:
|
||||
self._trades.append(trade)
|
||||
"""Persist a trade to the DB. No in-memory accumulation."""
|
||||
await self._db.save_trade(trade)
|
||||
log.info("Trade recorded. Total trades: %d", len(self._trades))
|
||||
log.info("Trade recorded: %s", trade)
|
||||
|
||||
async def update_daily_summary(self) -> None:
|
||||
"""Compute and store daily metrics snapshot."""
|
||||
if not self._trades:
|
||||
"""Compute metrics from DB and write a metrics_daily snapshot.
|
||||
|
||||
Called every cycle by the trading loop. Safe after pod restarts:
|
||||
reads the full trade history from DB, not from in-memory state.
|
||||
"""
|
||||
raw = await self._db.compute_metrics_from_db()
|
||||
if not raw["total_trades"]:
|
||||
return
|
||||
|
||||
metrics = self.compute_metrics()
|
||||
open_count = int(raw["open_count"] or 0)
|
||||
closed_count = int(raw["closed_count"] or 0)
|
||||
resolved = int(raw["resolved_count"] or 0)
|
||||
wins = int(raw["wins_realized"] or 0)
|
||||
unrealized = float(raw["unrealized_pnl_est"] or 0)
|
||||
realized = float(raw["realized_pnl"] or 0)
|
||||
total_deployed = float(raw["total_deployed"] or 0)
|
||||
total_fees = float(raw["total_fees"] or 0)
|
||||
total_pnl = unrealized + realized
|
||||
|
||||
# win_rate: only over resolved closed trades; null if sample too small
|
||||
win_rate = (wins / resolved) if resolved >= 5 else None
|
||||
|
||||
# calibration: Brier score from DB; null if sample too small
|
||||
calibration = (
|
||||
float(raw["calibration_score"])
|
||||
if raw["calibration_score"] is not None and resolved >= 10
|
||||
else None
|
||||
)
|
||||
|
||||
avg_edge = total_pnl / total_deployed if total_deployed > 0 else 0.0
|
||||
|
||||
metrics = {
|
||||
"timestamp": datetime.now(UTC),
|
||||
"total_trades": int(raw["total_trades"]),
|
||||
"open_count": open_count,
|
||||
"closed_count": closed_count,
|
||||
"resolved_count": resolved,
|
||||
"total_deployed": total_deployed,
|
||||
"total_fees": total_fees,
|
||||
"unrealized_pnl_est": unrealized,
|
||||
"realized_pnl": realized,
|
||||
"total_pnl": total_pnl,
|
||||
"win_rate": win_rate,
|
||||
"avg_edge": avg_edge,
|
||||
"sharpe_ratio": 0.0, # requires daily-return series (not yet tracked)
|
||||
"calibration_score": calibration,
|
||||
"paper_mode": True,
|
||||
}
|
||||
await self._db.save_daily_metrics(metrics)
|
||||
|
||||
log.info(
|
||||
"Daily metrics | Trades: %d | P&L: $%.2f | Win: %.1f%% | Sharpe: %.2f",
|
||||
metrics["total_trades"],
|
||||
metrics["total_pnl"],
|
||||
metrics["win_rate"] * 100,
|
||||
metrics["sharpe_ratio"],
|
||||
"Daily metrics | trades=%d (open=%d closed=%d resolved=%d) | "
|
||||
"unrealized=$%.2f realized=$%.2f total=$%.2f | "
|
||||
"win_rate=%s calibration=%s",
|
||||
metrics["total_trades"], open_count, closed_count, resolved,
|
||||
unrealized, realized, total_pnl,
|
||||
f"{win_rate:.1%}" if win_rate is not None else "n/a (<5)",
|
||||
f"{calibration:.3f}" if calibration is not None else "n/a (<10)",
|
||||
)
|
||||
|
||||
def compute_metrics(self) -> dict:
|
||||
if not self._trades:
|
||||
return self._empty_metrics()
|
||||
|
||||
trades = self._trades
|
||||
n = len(trades)
|
||||
|
||||
# ── Capital: all in-session trades (open + closed this session) ────────
|
||||
# NOTE: self._trades is in-memory; resets on pod restart.
|
||||
# Fix 3 (planned): replace with DB-computed metrics so restarts don't
|
||||
# truncate history. Until then, these numbers reflect the current session.
|
||||
total_deployed = sum(t.net_cost for t in trades)
|
||||
total_fees = sum(t.fee_usdc for t in trades)
|
||||
|
||||
# ── Win rate ─────────────────────────────────────────────────────────
|
||||
# Proxy for open trades: fraction where edge_net > 0.
|
||||
# Not a realized win rate (no market resolutions available yet).
|
||||
wins = sum(1 for t in trades if t.entry_price < 0.5)
|
||||
win_rate = wins / n if n > 0 else 0
|
||||
|
||||
# ── Estimated unrealized P&L (open positions only) ───────────────────
|
||||
# Formula: model_edge × deployed_capital per trade.
|
||||
# Conservative bound: edge_net ∈ [-1, 1] → max PnL = net_cost per trade.
|
||||
# Previous formula (0.5 − entry_price) × shares inflated BUY_NO trades
|
||||
# at low entry prices by 3–10× (e.g. entry=0.158 → 3164 shares → $1072
|
||||
# PnL on $500 deployed, vs $122 with edge_net=0.2589 here).
|
||||
# Trades with NULL edge_net (legacy data) contribute only −fee_usdc.
|
||||
total_pnl = sum(
|
||||
(t.edge_net or 0.0) * t.net_cost - t.fee_usdc
|
||||
for t in trades
|
||||
)
|
||||
|
||||
avg_edge = total_pnl / total_deployed if total_deployed > 0 else 0
|
||||
|
||||
sharpe = self._compute_sharpe()
|
||||
|
||||
# ── Calibration score: not available ─────────────────────────────────
|
||||
# Real calibration (Brier score) requires knowing how each market
|
||||
# resolved (YES=1 or NO=0). Until close_price / resolution is stored
|
||||
# per trade, any formula here is a proxy, not a calibration.
|
||||
# Returns None so the API can surface "unavailable" rather than a
|
||||
# misleading number. Will be computed from closed trades in Fix 3.
|
||||
calibration = None # type: ignore[assignment]
|
||||
|
||||
return {
|
||||
"timestamp": datetime.now(UTC),
|
||||
"total_trades": n,
|
||||
"total_deployed": total_deployed,
|
||||
"total_fees": total_fees,
|
||||
"total_pnl": total_pnl, # estimated unrealized (open trades, current session)
|
||||
"win_rate": win_rate, # proxy: fraction with entry_price < 0.5
|
||||
"avg_edge": avg_edge,
|
||||
"sharpe_ratio": sharpe,
|
||||
"calibration_score": calibration, # None — requires market resolution data
|
||||
"paper_mode": True,
|
||||
}
|
||||
|
||||
def _compute_sharpe(self) -> float:
|
||||
"""Annualized Sharpe ratio from daily returns."""
|
||||
if len(self._daily_returns) < 2:
|
||||
return 0.0
|
||||
mean_r = sum(self._daily_returns) / len(self._daily_returns)
|
||||
variance = sum((r - mean_r) ** 2 for r in self._daily_returns) / len(self._daily_returns)
|
||||
std_r = math.sqrt(variance) if variance > 0 else 1e-9
|
||||
return (mean_r / std_r) * math.sqrt(365) # Annualize
|
||||
|
||||
def check_promotion_thresholds(self) -> tuple[bool, dict]:
|
||||
"""Check if metrics qualify for real money trading."""
|
||||
metrics = self.compute_metrics()
|
||||
cal = metrics["calibration_score"] # may be None
|
||||
checks = {
|
||||
"sharpe_ratio": (metrics["sharpe_ratio"], 0.5, metrics["sharpe_ratio"] >= 0.5),
|
||||
"win_rate": (metrics["win_rate"], 0.52, metrics["win_rate"] >= 0.52),
|
||||
"calibration_score": (cal, 0.7, cal is not None and cal >= 0.7),
|
||||
"min_trades": (metrics["total_trades"], 50, metrics["total_trades"] >= 50),
|
||||
}
|
||||
all_pass = all(v[2] for v in checks.values())
|
||||
return all_pass, checks
|
||||
|
||||
def _empty_metrics(self) -> dict:
|
||||
return {
|
||||
"timestamp": datetime.now(UTC),
|
||||
"total_trades": 0,
|
||||
"total_deployed": 0,
|
||||
"total_fees": 0,
|
||||
"total_pnl": 0,
|
||||
"win_rate": 0,
|
||||
"avg_edge": 0,
|
||||
"sharpe_ratio": 0,
|
||||
"calibration_score": None, # requires market resolution data
|
||||
"paper_mode": True,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user