Files
polymarket-bot/api/main.py
chemavx adf2917cda
CI/CD / build-and-push (push) Successful in 1m52s
feat(attribution): dominant_feature per trade + /api/metrics/attribution
Adds alpha attribution by dominant signal feature — which feat_*_lo had
the largest absolute log-odds value on each trade.

Changes:
- _dominant_feature() helper in api/main.py: picks the winning feature
  from signal_components (threshold 0.0001, same as "triggered" in
  /api/metrics/features)
- _enrich_trade() refactored to single exit point; adds dominant_feature
  field to every open trade in /api/trades
- compute_attribution_from_db() in db.py: VALUES subquery finds dominant
  feature per trade in SQL, then aggregates trade_count/avg_edge_net/
  unrealized_pnl_est/realized_pnl/resolved_count/win_rate per group
- /api/metrics/attribution endpoint: returns attribution dict + total_attributed_trades

No schema changes, no strategy changes. Pure observability.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-22 16:35:24 +00:00

279 lines
12 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
FastAPI Backend — serves metrics and trade data to the React dashboard.
"""
import asyncio
from contextlib import asynccontextmanager
from datetime import datetime, timezone
import os
import re
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from bot.data.db import Database
# Phase 6 format (Phase 6+): values already in log-odds space.
# "fg_lo=+0.1200 mom_lo=+0.0000 news_lo=+0.0000 mfld_lo=-0.7483 btc_dom_lo=+0.0000"
_FEAT_RE_LO = re.compile(
r"fg_lo=([+-]?[\d.]+).*?mom_lo=([+-]?[\d.]+).*?"
r"news_lo=([+-]?[\d.]+).*?mfld_lo=([+-]?[\d.]+).*?btc_dom_lo=([+-]?[\d.]+)"
)
# Pre-Phase-6 format: raw probability-delta values (fg/mom need ×2 for log-odds).
# "fg=+0.0600 mom=+0.0000 news=+0.0000 mfld=-0.7483"
_FEAT_RE_RAW = re.compile(
r"fg=([+-]?[\d.]+).*?mom=([+-]?[\d.]+).*?news=([+-]?[\d.]+).*?mfld=([+-]?[\d.]+)"
)
def _dominant_feature(sc: dict | None) -> str | None:
"""Return the name of the signal_components key with the largest abs log-odds value.
Returns None if signal_components is absent or all features are below threshold.
Threshold 0.0001 matches the "triggered" definition used in /api/metrics/features.
"""
if not sc:
return None
candidates = {
k: abs(v)
for k, v in sc.items()
if k != "unit" and v is not None and abs(v) > 0.0001
}
if not candidates:
return None
return max(candidates, key=candidates.__getitem__)
def _enrich_trade(trade: dict) -> dict:
"""Add days_open, signal_components (log-odds), and dominant_feature to a trade."""
ts = trade.get("timestamp")
if ts is not None:
now = datetime.now(timezone.utc)
if getattr(ts, "tzinfo", None) is None:
ts = ts.replace(tzinfo=timezone.utc)
trade["days_open"] = round((now - ts).total_seconds() / 86400, 1)
else:
trade["days_open"] = None
# Prefer DB columns (Phase 6+) — exact, no parsing required.
if trade.get("feat_fg_lo") is not None:
sc = {
"unit": "log_odds",
"fg": trade["feat_fg_lo"],
"mom": trade["feat_mom_lo"],
"news": trade["feat_news_lo"],
"mfld": trade["feat_mfld_lo"],
"btc_dom": trade.get("feat_btc_dom_lo"),
}
else:
# Fallback: parse reasoning string (trades before Phase 6 DB columns exist).
reasoning = trade.get("reasoning") or ""
m_lo = _FEAT_RE_LO.search(reasoning)
m_raw = _FEAT_RE_RAW.search(reasoning)
if m_lo:
sc = {
"unit": "log_odds",
"fg": float(m_lo.group(1)),
"mom": float(m_lo.group(2)),
"news": float(m_lo.group(3)),
"mfld": float(m_lo.group(4)),
"btc_dom": float(m_lo.group(5)),
}
elif m_raw:
# Pre-Phase-6: fg/mom are raw probability-deltas → multiply ×2.
sc = {
"unit": "log_odds",
"fg": float(m_raw.group(1)) * 2,
"mom": float(m_raw.group(2)) * 2,
"news": float(m_raw.group(3)),
"mfld": float(m_raw.group(4)),
"btc_dom": None,
}
else:
sc = None
trade["signal_components"] = sc
trade["dominant_feature"] = _dominant_feature(sc)
return trade
db = Database()
@asynccontextmanager
async def lifespan(app: FastAPI):
await db.connect()
yield
await db.disconnect()
app = FastAPI(title="Polymarket Bot API", lifespan=lifespan)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["GET"],
allow_headers=["*"],
)
@app.get("/health")
async def health():
return {"status": "ok", "paper_mode": os.getenv("PAPER_MODE", "true")}
@app.get("/api/metrics")
async def get_metrics():
history = await db.get_metrics_history(days=42)
if not history:
return {"history": [], "latest": None}
return {"history": history, "latest": history[0]}
@app.get("/api/trades")
async def get_trades(limit: int = 50, status: str = "open"):
"""
status: "open" (default) | "closed" | "all"
Open trades include days_open and signal_components {fg, mom, news, mfld}.
"""
if status not in ("open", "closed", "all"):
status = "open"
filter_status = None if status == "all" else status
trades = await db.get_recent_trades(limit=limit, status=filter_status)
if filter_status == "open":
trades = [_enrich_trade(t) for t in trades]
return {"trades": trades, "count": len(trades), "status_filter": status}
@app.get("/api/metrics/features")
async def get_feature_metrics():
"""Per-signal-feature performance breakdown — all values in log-odds space.
Each feature key contains:
unit "log_odds" (common unit for all features)
materiality_threshold |lo| threshold for "material" classification
triggered_count trades where |feat_lo| > 0.0001 (signal fired)
material_count trades where |feat_lo| >= threshold (moved the model)
avg_contribution_lo mean signed contribution (triggered trades)
avg_abs_contribution_lo mean absolute contribution (triggered trades)
avg_edge_net_when_material mean edge_net for material trades
unrealized_pnl_est estimated open-position PnL (triggered trades)
realized_pnl sum close_pnl for resolved triggered trades
resolved_count closed triggered trades with known outcome
win_rate null if resolved_count < 5
net_positive_count triggered trades where feature pushed BUY direction
net_negative_count triggered trades where feature pushed SELL direction
NULL values in resolved_count / win_rate are expected early in the paper run.
"""
features = await db.compute_feature_metrics_from_db()
return {"features": features}
@app.get("/api/trades/legacy")
async def get_legacy_trades():
"""Trades with NULL edge_net — pre-Phase-1 data, excluded from PnL estimates.
These trades have no signal quality information (edge_net, final_prob)
and are excluded from unrealized_pnl_est in /api/summary.
They may also be missing feat_*_lo columns if the reasoning string
predates the Phase 6 format.
"""
trades = await db.get_legacy_incomplete_trades()
return {"trades": trades, "count": len(trades)}
@app.get("/api/metrics/attribution")
async def get_attribution():
"""Alpha attribution by dominant signal feature.
Groups Phase 6 trades by their dominant feature — the feat_*_lo with the
largest absolute log-odds value — to reveal which signal is actually
generating opportunities.
Each key in "attribution" contains:
trade_count trades where this feature was dominant
avg_edge_net mean net edge for those trades
unrealized_pnl_est open-position PnL estimate (edge_net × net_cost fee)
realized_pnl sum close_pnl for resolved trades in this group
resolved_count closed trades with known outcome
win_rate null if resolved_count < 5
"none" appears for Phase 6 trades where all feat_*_lo values are < 0.0001
(signals fired but below trigger threshold).
Only Phase 6 trades (feat_fg_lo IS NOT NULL) are included.
Pre-Phase-6 trades appear in /api/trades/legacy instead.
"""
attribution = await db.compute_attribution_from_db()
total = sum(v["trade_count"] for v in attribution.values())
return {"attribution": attribution, "total_attributed_trades": total}
@app.get("/api/summary")
async def get_summary():
"""Dashboard summary card data.
All portfolio counts (total_trades, open_trades_count, total_deployed,
cash_available) are computed live from the DB on every request.
PnL and performance metrics come from the latest metrics_daily snapshot,
which is written by the bot every cycle via MetricsTracker.update_daily_summary().
After Fix 3, that snapshot is also DB-computed — not dependent on pod restarts.
"""
latest_metrics, open_trades, all_trades, inverted, legacy_count = await asyncio.gather(
db.get_metrics_history(days=1),
db.get_recent_trades(limit=500, status="open"),
db.get_recent_trades(limit=500),
db.get_recently_closed_inverted(hours=24),
db.get_legacy_incomplete_count(),
)
latest = latest_metrics[0] if latest_metrics else {}
paper_bankroll = float(os.getenv("PAPER_BANKROLL", "10000"))
total_deployed = sum(t.get("net_cost", 0) for t in open_trades)
return {
# ── Portfolio state (live from DB) ──────────────────────────────────
"paper_mode": os.getenv("PAPER_MODE", "true") == "true",
"paper_bankroll": paper_bankroll,
"total_trades": len(all_trades), # exact, from DB
"open_trades_count": len(open_trades), # exact, from DB
"closed_trades_count": len(all_trades) - len(open_trades), # exact
"total_deployed": total_deployed, # exact, from DB
"cash_available": max(0.0, paper_bankroll - total_deployed), # exact
"legacy_incomplete_count": legacy_count, # exact, from DB
"reentry_guard_blocks_24h": len(inverted), # exact, from DB
# ── P&L (from latest metrics_daily snapshot) ────────────────────────
# unrealized_pnl_est: open positions, edge_net × net_cost fee.
# Estimated — uses model signal, not live price. Source: open trades.
# realized_pnl: closed positions with known resolution.
# Exact — computed from (resolution entry_price) × shares.
# total_pnl: sum of both.
"unrealized_pnl_est": latest.get("unrealized_pnl_est") or 0,
"realized_pnl": latest.get("realized_pnl") or 0,
"total_pnl": latest.get("total_pnl") or 0,
# ── Performance metrics (from latest metrics_daily snapshot) ─────────
# win_rate: fraction of resolved closed trades where close_pnl > 0.
# null if fewer than 5 resolved trades. Source: closed+resolved trades.
# sharpe_ratio: 0.0 — requires daily-return time series (not yet tracked).
# calibration_score: 1 Brier score on resolved trades (higher = better).
# null if fewer than 10 resolved trades. Source: closed+resolved trades.
"win_rate": latest.get("win_rate"), # null if < 5 resolved
"sharpe_ratio": latest.get("sharpe_ratio") or 0, # 0.0 until tracked
"calibration_score": latest.get("calibration_score"), # null if < 10 resolved
# ── Counters from snapshot ───────────────────────────────────────────
"resolved_count": latest.get("resolved_count") or 0,
# ── Promotion gate ───────────────────────────────────────────────────
# All thresholds must pass; null metrics count as not-ready.
"promotion_ready": (
(latest.get("sharpe_ratio") or 0) >= 0.5
and (latest.get("win_rate") or 0) >= 0.52
and (latest.get("calibration_score") or 0) >= 0.7
and len(all_trades) >= 50
),
}