feat(phase6): per-feature signal attribution in log-odds space

Adds feat_fg_lo / feat_mom_lo / feat_news_lo / feat_mfld_lo / feat_btc_dom_lo to every trade, all normalized to log-odds contribution for direct comparability. - fg / mom / btc_dom: raw probability-delta × 2 → log-odds - news / mfld: already log-odds (LOGODDS_WEIGHT already applied), no scaling - btc_dom tracked separately in bayesian.py instead of bundled in total_adj - reasoning string updated to fg_lo= / mom_lo= notation for self-documentation Schema: 5 new DOUBLE PRECISION columns + 2 partial indexes Stack: TradingSignal → Order → Trade → save_trade all carry feat fields Startup: backfill_feature_columns() recovers fg/mom/news/mfld from old reasoning strings (×2 applied to fg/mom); btc_dom_lo stays NULL for legacy API: /api/metrics/features — triggered/material split per feature with two-level thresholds (0.05 for fg/mom/btc_dom, 0.10 for news/mfld) API: /api/trades/legacy — exposes pre-Phase-1 trades (edge_net IS NULL) API: _enrich_trade backward-compat: reads DB columns first, falls back to reasoning regex with unit conversion for pre-Phase-6 trades Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-22 07:04:53 +00:00
parent 9a5be27532
commit 8479a63174
7 changed files with 343 additions and 20 deletions
@@ -35,10 +35,12 @@ class Database:
                    id, market_id, question, direction, size_usdc,
                    entry_price, shares, fee_usdc, net_cost, timestamp, reasoning, paper,
                    edge_gross, edge_net, prior_prob, final_prob,
-                    mid_price, spread_estimate, commission, family_key
+                    mid_price, spread_estimate, commission, family_key,
+                    feat_fg_lo, feat_mom_lo, feat_news_lo, feat_mfld_lo, feat_btc_dom_lo
                ) VALUES (
                    $1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,
-                    $13,$14,$15,$16,$17,$18,$19,$20
+                    $13,$14,$15,$16,$17,$18,$19,$20,
+                    $21,$22,$23,$24,$25
                )
                ON CONFLICT (id) DO NOTHING
            """,
@@ -48,6 +50,9 @@ class Database:
                # Phase 1 fields
                trade.edge_gross, trade.edge_net, trade.prior_prob, trade.final_prob,
                trade.mid_price, trade.spread_estimate, trade.commission, trade.family_key,
+                # Phase 6 feature log-odds
+                trade.feat_fg_lo, trade.feat_mom_lo, trade.feat_news_lo,
+                trade.feat_mfld_lo, trade.feat_btc_dom_lo,
            )

    async def save_daily_metrics(self, metrics: dict) -> None:
@@ -264,3 +269,145 @@ class Database:
                "SELECT * FROM metrics_daily ORDER BY timestamp DESC LIMIT $1", days
            )
            return [dict(r) for r in rows]
+
+    async def backfill_feature_columns(self) -> int:
+        """Back-populate feat_*_lo for trades created before Phase 6.
+
+        Parses the reasoning string (format: 'fg=+0.0600 mom=... news=... mfld=...').
+        fg / mom raw values are multiplied by 2 to convert to log-odds.
+        news / mfld are already in log-odds (no scaling).
+        feat_btc_dom_lo cannot be recovered from the old reasoning string and
+        remains NULL for legacy trades.
+
+        Returns the number of rows updated.
+        """
+        async with self._pool.acquire() as conn:
+            result = await conn.execute("""
+                UPDATE trades
+                SET
+                  feat_fg_lo  = ((regexp_match(reasoning, 'fg=([^ |]+)'))[1])::DOUBLE PRECISION * 2,
+                  feat_mom_lo = ((regexp_match(reasoning, 'mom=([^ |]+)'))[1])::DOUBLE PRECISION * 2,
+                  feat_news_lo = ((regexp_match(reasoning, 'news=([^ |]+)'))[1])::DOUBLE PRECISION,
+                  feat_mfld_lo = ((regexp_match(reasoning, 'mfld=([^ |]+)'))[1])::DOUBLE PRECISION,
+                  feat_btc_dom_lo = NULL
+                WHERE feat_fg_lo IS NULL
+                  AND reasoning IS NOT NULL
+                  AND reasoning LIKE '%fg=%'
+                  AND reasoning NOT LIKE '%fg_lo=%'
+            """)
+        updated = int(result.split()[-1]) if result else 0
+        if updated:
+            log.info("backfill_feature_columns: updated %d trade(s)", updated)
+        return updated
+
+    async def get_legacy_incomplete_trades(self) -> list[dict]:
+        """Return trades with NULL edge_net — pre-Phase-1 data with no signal quality info."""
+        async with self._pool.acquire() as conn:
+            rows = await conn.fetch("""
+                SELECT id, market_id, question, direction, net_cost, entry_price,
+                       timestamp, reasoning, closed_at, close_reason, family_key,
+                       feat_fg_lo, feat_mom_lo, feat_news_lo, feat_mfld_lo, feat_btc_dom_lo
+                FROM trades
+                WHERE edge_net IS NULL
+                ORDER BY timestamp DESC
+            """)
+            return [dict(r) for r in rows]
+
+    async def compute_feature_metrics_from_db(self) -> dict:
+        """Per-feature performance metrics, all in log-odds space.
+
+        For each feature (fg, mom, news, mfld, btc_dom) returns:
+          unit                     — always "log_odds"
+          materiality_threshold    — |lo| threshold for "material" classification
+          triggered_count          — trades where |feat_lo| > 0.0001
+          material_count           — trades where |feat_lo| >= materiality_threshold
+          avg_contribution_lo      — mean signed lo value (triggered trades)
+          avg_abs_contribution_lo  — mean absolute lo value (triggered trades)
+          avg_edge_net_when_material — mean edge_net for material trades
+          unrealized_pnl_est       — sum edge_net*net_cost−fee for triggered open trades
+          realized_pnl             — sum close_pnl for triggered resolved trades
+          resolved_count           — closed trades with known outcome (triggered)
+          win_rate                 — NULL if resolved_count < 5
+          net_positive_count       — triggered trades where feat_lo > 0
+          net_negative_count       — triggered trades where feat_lo < 0
+        """
+        async with self._pool.acquire() as conn:
+            rows = await conn.fetch("""
+                WITH feature_values AS (
+                  SELECT 'fg'  AS feature,
+                         0.05::DOUBLE PRECISION AS mat_thresh,
+                         feat_fg_lo AS fval,
+                         edge_net, net_cost, fee_usdc, closed_at, close_pnl
+                  FROM trades WHERE feat_fg_lo IS NOT NULL
+                  UNION ALL
+                  SELECT 'mom', 0.05, feat_mom_lo,
+                         edge_net, net_cost, fee_usdc, closed_at, close_pnl
+                  FROM trades WHERE feat_mom_lo IS NOT NULL
+                  UNION ALL
+                  SELECT 'news', 0.10, feat_news_lo,
+                         edge_net, net_cost, fee_usdc, closed_at, close_pnl
+                  FROM trades WHERE feat_news_lo IS NOT NULL
+                  UNION ALL
+                  SELECT 'mfld', 0.10, feat_mfld_lo,
+                         edge_net, net_cost, fee_usdc, closed_at, close_pnl
+                  FROM trades WHERE feat_mfld_lo IS NOT NULL
+                  UNION ALL
+                  SELECT 'btc_dom', 0.05, feat_btc_dom_lo,
+                         edge_net, net_cost, fee_usdc, closed_at, close_pnl
+                  FROM trades WHERE feat_btc_dom_lo IS NOT NULL
+                )
+                SELECT
+                  feature,
+                  mat_thresh                                                              AS materiality_threshold,
+                  COUNT(*)     FILTER (WHERE ABS(fval) > 0.0001)                         AS triggered_count,
+                  COUNT(*)     FILTER (WHERE ABS(fval) >= mat_thresh)                    AS material_count,
+                  AVG(fval)    FILTER (WHERE ABS(fval) > 0.0001)                         AS avg_contribution_lo,
+                  AVG(ABS(fval)) FILTER (WHERE ABS(fval) > 0.0001)                       AS avg_abs_contribution_lo,
+                  AVG(edge_net) FILTER (WHERE ABS(fval) >= mat_thresh
+                                          AND edge_net IS NOT NULL)                      AS avg_edge_net_when_material,
+                  COALESCE(SUM(edge_net * net_cost - fee_usdc)
+                    FILTER (WHERE ABS(fval) > 0.0001
+                              AND closed_at IS NULL
+                              AND edge_net IS NOT NULL), 0)                              AS unrealized_pnl_est,
+                  COALESCE(SUM(close_pnl)
+                    FILTER (WHERE ABS(fval) > 0.0001
+                              AND close_pnl IS NOT NULL), 0)                             AS realized_pnl,
+                  COUNT(*) FILTER (WHERE ABS(fval) > 0.0001
+                                     AND close_pnl IS NOT NULL
+                                     AND close_pnl > 0)                                 AS wins_realized,
+                  COUNT(*) FILTER (WHERE ABS(fval) > 0.0001
+                                     AND close_pnl IS NOT NULL)                         AS resolved_count,
+                  COUNT(*) FILTER (WHERE fval >  0.0001)                                AS net_positive_count,
+                  COUNT(*) FILTER (WHERE fval < -0.0001)                                AS net_negative_count
+                FROM feature_values
+                GROUP BY feature, mat_thresh
+                ORDER BY feature
+            """)
+
+        result: dict[str, dict] = {}
+        for r in rows:
+            d = dict(r)
+            feature = d["feature"]
+            resolved = int(d.get("resolved_count") or 0)
+            wins = int(d.get("wins_realized") or 0)
+            result[feature] = {
+                "unit":                    "log_odds",
+                "materiality_threshold":   float(d["materiality_threshold"]),
+                "triggered_count":         int(d.get("triggered_count") or 0),
+                "material_count":          int(d.get("material_count") or 0),
+                "avg_contribution_lo":     _f(d.get("avg_contribution_lo")),
+                "avg_abs_contribution_lo": _f(d.get("avg_abs_contribution_lo")),
+                "avg_edge_net_when_material": _f(d.get("avg_edge_net_when_material")),
+                "unrealized_pnl_est":      float(d.get("unrealized_pnl_est") or 0),
+                "realized_pnl":            float(d.get("realized_pnl") or 0),
+                "resolved_count":          resolved,
+                "win_rate":                (wins / resolved) if resolved >= 5 else None,
+                "net_positive_count":      int(d.get("net_positive_count") or 0),
+                "net_negative_count":      int(d.get("net_negative_count") or 0),
+            }
+        return result
+
+
+def _f(v) -> Optional[float]:
+    """None-safe float cast for asyncpg Decimal/None values."""
+    return float(v) if v is not None else None