feat(phase6): per-feature signal attribution in log-odds space

Adds feat_fg_lo / feat_mom_lo / feat_news_lo / feat_mfld_lo / feat_btc_dom_lo to every trade, all normalized to log-odds contribution for direct comparability. - fg / mom / btc_dom: raw probability-delta × 2 → log-odds - news / mfld: already log-odds (LOGODDS_WEIGHT already applied), no scaling - btc_dom tracked separately in bayesian.py instead of bundled in total_adj - reasoning string updated to fg_lo= / mom_lo= notation for self-documentation Schema: 5 new DOUBLE PRECISION columns + 2 partial indexes Stack: TradingSignal → Order → Trade → save_trade all carry feat fields Startup: backfill_feature_columns() recovers fg/mom/news/mfld from old reasoning strings (×2 applied to fg/mom); btc_dom_lo stays NULL for legacy API: /api/metrics/features — triggered/material split per feature with two-level thresholds (0.05 for fg/mom/btc_dom, 0.10 for news/mfld) API: /api/trades/legacy — exposes pre-Phase-1 trades (edge_net IS NULL) API: _enrich_trade backward-compat: reads DB columns first, falls back to reasoning regex with unit conversion for pre-Phase-6 trades Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-22 07:04:53 +00:00
parent 9a5be27532
commit 8479a63174
7 changed files with 343 additions and 20 deletions
@@ -12,15 +12,22 @@ from fastapi.middleware.cors import CORSMiddleware

 from bot.data.db import Database

-# Matches the feat_str embedded in reasoning for trades from bayesian.py v2+:
+# Phase 6 format (Phase 6+): values already in log-odds space.
+# "fg_lo=+0.1200 mom_lo=+0.0000 news_lo=+0.0000 mfld_lo=-0.7483 btc_dom_lo=+0.0000"
+_FEAT_RE_LO = re.compile(
+    r"fg_lo=([+-]?[\d.]+).*?mom_lo=([+-]?[\d.]+).*?"
+    r"news_lo=([+-]?[\d.]+).*?mfld_lo=([+-]?[\d.]+).*?btc_dom_lo=([+-]?[\d.]+)"
+)
+
+# Pre-Phase-6 format: raw probability-delta values (fg/mom need ×2 for log-odds).
 # "fg=+0.0600 mom=+0.0000 news=+0.0000 mfld=-0.7483"
-_FEAT_RE = re.compile(
+_FEAT_RE_RAW = re.compile(
    r"fg=([+-]?[\d.]+).*?mom=([+-]?[\d.]+).*?news=([+-]?[\d.]+).*?mfld=([+-]?[\d.]+)"
 )


 def _enrich_trade(trade: dict) -> dict:
-    """Add days_open and signal_components to an open trade dict."""
+    """Add days_open and signal_components (all log-odds) to an open trade dict."""
    ts = trade.get("timestamp")
    if ts is not None:
        now = datetime.now(timezone.utc)
@@ -30,13 +37,47 @@ def _enrich_trade(trade: dict) -> dict:
    else:
        trade["days_open"] = None

+    # Prefer DB columns (Phase 6+) — exact, no parsing required.
+    if trade.get("feat_fg_lo") is not None:
+        trade["signal_components"] = {
+            "unit":    "log_odds",
+            "fg":      trade["feat_fg_lo"],
+            "mom":     trade["feat_mom_lo"],
+            "news":    trade["feat_news_lo"],
+            "mfld":    trade["feat_mfld_lo"],
+            "btc_dom": trade.get("feat_btc_dom_lo"),
+        }
+        return trade
+
+    # Fallback: parse reasoning string (trades before Phase 6 DB columns exist).
    reasoning = trade.get("reasoning") or ""
-    m = _FEAT_RE.search(reasoning)
-    trade["signal_components"] = (
-        {"fg": float(m.group(1)), "mom": float(m.group(2)),
-         "news": float(m.group(3)), "mfld": float(m.group(4))}
-        if m else None
-    )
+    m_lo = _FEAT_RE_LO.search(reasoning)
+    if m_lo:
+        # Phase 6 reasoning format — values already in log-odds.
+        trade["signal_components"] = {
+            "unit":    "log_odds",
+            "fg":      float(m_lo.group(1)),
+            "mom":     float(m_lo.group(2)),
+            "news":    float(m_lo.group(3)),
+            "mfld":    float(m_lo.group(4)),
+            "btc_dom": float(m_lo.group(5)),
+        }
+        return trade
+
+    m_raw = _FEAT_RE_RAW.search(reasoning)
+    if m_raw:
+        # Pre-Phase-6 reasoning: fg/mom are raw probability-deltas → multiply ×2.
+        trade["signal_components"] = {
+            "unit":    "log_odds",
+            "fg":      float(m_raw.group(1)) * 2,
+            "mom":     float(m_raw.group(2)) * 2,
+            "news":    float(m_raw.group(3)),
+            "mfld":    float(m_raw.group(4)),
+            "btc_dom": None,
+        }
+        return trade
+
+    trade["signal_components"] = None
    return trade

 db = Database()
@@ -87,6 +128,44 @@ async def get_trades(limit: int = 50, status: str = "open"):
    return {"trades": trades, "count": len(trades), "status_filter": status}


+@app.get("/api/metrics/features")
+async def get_feature_metrics():
+    """Per-signal-feature performance breakdown — all values in log-odds space.
+
+    Each feature key contains:
+      unit                     "log_odds" (common unit for all features)
+      materiality_threshold    |lo| threshold for "material" classification
+      triggered_count          trades where |feat_lo| > 0.0001 (signal fired)
+      material_count           trades where |feat_lo| >= threshold (moved the model)
+      avg_contribution_lo      mean signed contribution (triggered trades)
+      avg_abs_contribution_lo  mean absolute contribution (triggered trades)
+      avg_edge_net_when_material  mean edge_net for material trades
+      unrealized_pnl_est       estimated open-position PnL (triggered trades)
+      realized_pnl             sum close_pnl for resolved triggered trades
+      resolved_count           closed triggered trades with known outcome
+      win_rate                 null if resolved_count < 5
+      net_positive_count       triggered trades where feature pushed BUY direction
+      net_negative_count       triggered trades where feature pushed SELL direction
+
+    NULL values in resolved_count / win_rate are expected early in the paper run.
+    """
+    features = await db.compute_feature_metrics_from_db()
+    return {"features": features}
+
+
+@app.get("/api/trades/legacy")
+async def get_legacy_trades():
+    """Trades with NULL edge_net — pre-Phase-1 data, excluded from PnL estimates.
+
+    These trades have no signal quality information (edge_net, final_prob)
+    and are excluded from unrealized_pnl_est in /api/summary.
+    They may also be missing feat_*_lo columns if the reasoning string
+    predates the Phase 6 format.
+    """
+    trades = await db.get_legacy_incomplete_trades()
+    return {"trades": trades, "count": len(trades)}
+
+
@app.get("/api/summary")
 async def get_summary():
    """Dashboard summary card data.