feat(phase6): per-feature signal attribution in log-odds space

Adds feat_fg_lo / feat_mom_lo / feat_news_lo / feat_mfld_lo / feat_btc_dom_lo to every trade, all normalized to log-odds contribution for direct comparability. - fg / mom / btc_dom: raw probability-delta × 2 → log-odds - news / mfld: already log-odds (LOGODDS_WEIGHT already applied), no scaling - btc_dom tracked separately in bayesian.py instead of bundled in total_adj - reasoning string updated to fg_lo= / mom_lo= notation for self-documentation Schema: 5 new DOUBLE PRECISION columns + 2 partial indexes Stack: TradingSignal → Order → Trade → save_trade all carry feat fields Startup: backfill_feature_columns() recovers fg/mom/news/mfld from old reasoning strings (×2 applied to fg/mom); btc_dom_lo stays NULL for legacy API: /api/metrics/features — triggered/material split per feature with two-level thresholds (0.05 for fg/mom/btc_dom, 0.10 for news/mfld) API: /api/trades/legacy — exposes pre-Phase-1 trades (edge_net IS NULL) API: _enrich_trade backward-compat: reads DB columns first, falls back to reasoning regex with unit conversion for pre-Phase-6 trades Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-22 07:04:53 +00:00
parent 9a5be27532
commit 8479a63174
7 changed files with 343 additions and 20 deletions
@@ -35,10 +35,12 @@ class Database:
                    id, market_id, question, direction, size_usdc,
                    entry_price, shares, fee_usdc, net_cost, timestamp, reasoning, paper,
                    edge_gross, edge_net, prior_prob, final_prob,
-                    mid_price, spread_estimate, commission, family_key
+                    mid_price, spread_estimate, commission, family_key,
+                    feat_fg_lo, feat_mom_lo, feat_news_lo, feat_mfld_lo, feat_btc_dom_lo
                ) VALUES (
                    $1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,
-                    $13,$14,$15,$16,$17,$18,$19,$20
+                    $13,$14,$15,$16,$17,$18,$19,$20,
+                    $21,$22,$23,$24,$25
                )
                ON CONFLICT (id) DO NOTHING
            """,
@@ -48,6 +50,9 @@ class Database:
                # Phase 1 fields
                trade.edge_gross, trade.edge_net, trade.prior_prob, trade.final_prob,
                trade.mid_price, trade.spread_estimate, trade.commission, trade.family_key,
+                # Phase 6 feature log-odds
+                trade.feat_fg_lo, trade.feat_mom_lo, trade.feat_news_lo,
+                trade.feat_mfld_lo, trade.feat_btc_dom_lo,
            )

    async def save_daily_metrics(self, metrics: dict) -> None:
@@ -264,3 +269,145 @@ class Database:
                "SELECT * FROM metrics_daily ORDER BY timestamp DESC LIMIT $1", days
            )
            return [dict(r) for r in rows]
+
+    async def backfill_feature_columns(self) -> int:
+        """Back-populate feat_*_lo for trades created before Phase 6.
+
+        Parses the reasoning string (format: 'fg=+0.0600 mom=... news=... mfld=...').
+        fg / mom raw values are multiplied by 2 to convert to log-odds.
+        news / mfld are already in log-odds (no scaling).
+        feat_btc_dom_lo cannot be recovered from the old reasoning string and
+        remains NULL for legacy trades.
+
+        Returns the number of rows updated.
+        """
+        async with self._pool.acquire() as conn:
+            result = await conn.execute("""
+                UPDATE trades
+                SET
+                  feat_fg_lo  = ((regexp_match(reasoning, 'fg=([^ |]+)'))[1])::DOUBLE PRECISION * 2,
+                  feat_mom_lo = ((regexp_match(reasoning, 'mom=([^ |]+)'))[1])::DOUBLE PRECISION * 2,
+                  feat_news_lo = ((regexp_match(reasoning, 'news=([^ |]+)'))[1])::DOUBLE PRECISION,
+                  feat_mfld_lo = ((regexp_match(reasoning, 'mfld=([^ |]+)'))[1])::DOUBLE PRECISION,
+                  feat_btc_dom_lo = NULL
+                WHERE feat_fg_lo IS NULL
+                  AND reasoning IS NOT NULL
+                  AND reasoning LIKE '%fg=%'
+                  AND reasoning NOT LIKE '%fg_lo=%'
+            """)
+        updated = int(result.split()[-1]) if result else 0
+        if updated:
+            log.info("backfill_feature_columns: updated %d trade(s)", updated)
+        return updated
+
+    async def get_legacy_incomplete_trades(self) -> list[dict]:
+        """Return trades with NULL edge_net — pre-Phase-1 data with no signal quality info."""
+        async with self._pool.acquire() as conn:
+            rows = await conn.fetch("""
+                SELECT id, market_id, question, direction, net_cost, entry_price,
+                       timestamp, reasoning, closed_at, close_reason, family_key,
+                       feat_fg_lo, feat_mom_lo, feat_news_lo, feat_mfld_lo, feat_btc_dom_lo
+                FROM trades
+                WHERE edge_net IS NULL
+                ORDER BY timestamp DESC
+            """)
+            return [dict(r) for r in rows]
+
+    async def compute_feature_metrics_from_db(self) -> dict:
+        """Per-feature performance metrics, all in log-odds space.
+
+        For each feature (fg, mom, news, mfld, btc_dom) returns:
+          unit                     — always "log_odds"
+          materiality_threshold    — |lo| threshold for "material" classification
+          triggered_count          — trades where |feat_lo| > 0.0001
+          material_count           — trades where |feat_lo| >= materiality_threshold
+          avg_contribution_lo      — mean signed lo value (triggered trades)
+          avg_abs_contribution_lo  — mean absolute lo value (triggered trades)
+          avg_edge_net_when_material — mean edge_net for material trades
+          unrealized_pnl_est       — sum edge_net*net_cost−fee for triggered open trades
+          realized_pnl             — sum close_pnl for triggered resolved trades
+          resolved_count           — closed trades with known outcome (triggered)
+          win_rate                 — NULL if resolved_count < 5
+          net_positive_count       — triggered trades where feat_lo > 0
+          net_negative_count       — triggered trades where feat_lo < 0
+        """
+        async with self._pool.acquire() as conn:
+            rows = await conn.fetch("""
+                WITH feature_values AS (
+                  SELECT 'fg'  AS feature,
+                         0.05::DOUBLE PRECISION AS mat_thresh,
+                         feat_fg_lo AS fval,
+                         edge_net, net_cost, fee_usdc, closed_at, close_pnl
+                  FROM trades WHERE feat_fg_lo IS NOT NULL
+                  UNION ALL
+                  SELECT 'mom', 0.05, feat_mom_lo,
+                         edge_net, net_cost, fee_usdc, closed_at, close_pnl
+                  FROM trades WHERE feat_mom_lo IS NOT NULL
+                  UNION ALL
+                  SELECT 'news', 0.10, feat_news_lo,
+                         edge_net, net_cost, fee_usdc, closed_at, close_pnl
+                  FROM trades WHERE feat_news_lo IS NOT NULL
+                  UNION ALL
+                  SELECT 'mfld', 0.10, feat_mfld_lo,
+                         edge_net, net_cost, fee_usdc, closed_at, close_pnl
+                  FROM trades WHERE feat_mfld_lo IS NOT NULL
+                  UNION ALL
+                  SELECT 'btc_dom', 0.05, feat_btc_dom_lo,
+                         edge_net, net_cost, fee_usdc, closed_at, close_pnl
+                  FROM trades WHERE feat_btc_dom_lo IS NOT NULL
+                )
+                SELECT
+                  feature,
+                  mat_thresh                                                              AS materiality_threshold,
+                  COUNT(*)     FILTER (WHERE ABS(fval) > 0.0001)                         AS triggered_count,
+                  COUNT(*)     FILTER (WHERE ABS(fval) >= mat_thresh)                    AS material_count,
+                  AVG(fval)    FILTER (WHERE ABS(fval) > 0.0001)                         AS avg_contribution_lo,
+                  AVG(ABS(fval)) FILTER (WHERE ABS(fval) > 0.0001)                       AS avg_abs_contribution_lo,
+                  AVG(edge_net) FILTER (WHERE ABS(fval) >= mat_thresh
+                                          AND edge_net IS NOT NULL)                      AS avg_edge_net_when_material,
+                  COALESCE(SUM(edge_net * net_cost - fee_usdc)
+                    FILTER (WHERE ABS(fval) > 0.0001
+                              AND closed_at IS NULL
+                              AND edge_net IS NOT NULL), 0)                              AS unrealized_pnl_est,
+                  COALESCE(SUM(close_pnl)
+                    FILTER (WHERE ABS(fval) > 0.0001
+                              AND close_pnl IS NOT NULL), 0)                             AS realized_pnl,
+                  COUNT(*) FILTER (WHERE ABS(fval) > 0.0001
+                                     AND close_pnl IS NOT NULL
+                                     AND close_pnl > 0)                                 AS wins_realized,
+                  COUNT(*) FILTER (WHERE ABS(fval) > 0.0001
+                                     AND close_pnl IS NOT NULL)                         AS resolved_count,
+                  COUNT(*) FILTER (WHERE fval >  0.0001)                                AS net_positive_count,
+                  COUNT(*) FILTER (WHERE fval < -0.0001)                                AS net_negative_count
+                FROM feature_values
+                GROUP BY feature, mat_thresh
+                ORDER BY feature
+            """)
+
+        result: dict[str, dict] = {}
+        for r in rows:
+            d = dict(r)
+            feature = d["feature"]
+            resolved = int(d.get("resolved_count") or 0)
+            wins = int(d.get("wins_realized") or 0)
+            result[feature] = {
+                "unit":                    "log_odds",
+                "materiality_threshold":   float(d["materiality_threshold"]),
+                "triggered_count":         int(d.get("triggered_count") or 0),
+                "material_count":          int(d.get("material_count") or 0),
+                "avg_contribution_lo":     _f(d.get("avg_contribution_lo")),
+                "avg_abs_contribution_lo": _f(d.get("avg_abs_contribution_lo")),
+                "avg_edge_net_when_material": _f(d.get("avg_edge_net_when_material")),
+                "unrealized_pnl_est":      float(d.get("unrealized_pnl_est") or 0),
+                "realized_pnl":            float(d.get("realized_pnl") or 0),
+                "resolved_count":          resolved,
+                "win_rate":                (wins / resolved) if resolved >= 5 else None,
+                "net_positive_count":      int(d.get("net_positive_count") or 0),
+                "net_negative_count":      int(d.get("net_negative_count") or 0),
+            }
+        return result
+
+
+def _f(v) -> Optional[float]:
+    """None-safe float cast for asyncpg Decimal/None values."""
+    return float(v) if v is not None else None
@@ -121,6 +121,53 @@ CREATE INDEX IF NOT EXISTS idx_trades_closed ON trades(closed_at) WHERE closed_a
 ALTER TABLE trades ADD COLUMN IF NOT EXISTS close_pnl  DOUBLE PRECISION;
 ALTER TABLE trades ADD COLUMN IF NOT EXISTS resolution DOUBLE PRECISION;

+-- ─────────────────────────────────────────────────────────────────────────────
+-- Phase 6: per-feature signal attribution — all values in log-odds space
+--
+-- All four primary features share a common unit (log-odds contribution to
+-- the posterior estimate) so they can be compared directly:
+--
+--   feat_fg_lo      = _fg_contribution × 2
+--                     Fear & Greed direction-adjusted delta, ×2 to log-odds.
+--                     Non-zero for every trade. Range ≈ ±0.12.
+--                     Materiality threshold: |lo| ≥ 0.05.
+--
+--   feat_mom_lo     = _momentum_contribution × 2
+--                     Momentum delta (direction-adjusted), ×2 to log-odds.
+--                     Zero when |btc_change_24h| ≤ 2 %. Range ≈ ±0.15.
+--                     Materiality threshold: |lo| ≥ 0.05.
+--
+--   feat_news_lo    = news_log_adj   (already in log-odds, no scaling)
+--                     GNews sentiment × NEWS_LOGODDS_WEIGHT (1.5).
+--                     Zero for non-politics or when GNews budget exhausted.
+--                     Range ≈ ±1.5.  Materiality threshold: |lo| ≥ 0.10.
+--
+--   feat_mfld_lo    = manifold_log_adj  (already in log-odds, no scaling)
+--                     Manifold divergence × MANIFOLD_LOGODDS_WEIGHT (0.6).
+--                     Zero when Manifold returned no result.
+--                     Range ≈ ±0.6.  Materiality threshold: |lo| ≥ 0.10.
+--
+--   feat_btc_dom_lo = _btc_dom_contribution × 2
+--                     BTC-dominance alt-pressure delta, ×2 to log-odds.
+--                     Only fires for ETH / altcoin / general-crypto markets
+--                     when btc_dominance > 55 % or < 45 %.
+--                     Values: { −0.06, 0.0, +0.06 }.
+--                     Materiality threshold: |lo| ≥ 0.05.
+--
+-- NULL for pre-Phase-6 trades.  Backfilled at startup via
+-- Database.backfill_feature_columns() using reasoning-string regex
+-- (fg_lo/mom_lo multiplied by 2 from raw; news_lo/mfld_lo taken directly;
+-- btc_dom_lo cannot be backfilled and remains NULL for legacy trades).
+-- ─────────────────────────────────────────────────────────────────────────────
+ALTER TABLE trades ADD COLUMN IF NOT EXISTS feat_fg_lo      DOUBLE PRECISION;
+ALTER TABLE trades ADD COLUMN IF NOT EXISTS feat_mom_lo     DOUBLE PRECISION;
+ALTER TABLE trades ADD COLUMN IF NOT EXISTS feat_news_lo    DOUBLE PRECISION;
+ALTER TABLE trades ADD COLUMN IF NOT EXISTS feat_mfld_lo    DOUBLE PRECISION;
+ALTER TABLE trades ADD COLUMN IF NOT EXISTS feat_btc_dom_lo DOUBLE PRECISION;
+
+CREATE INDEX IF NOT EXISTS idx_trades_feat_fg   ON trades(feat_fg_lo)  WHERE feat_fg_lo IS NOT NULL;
+CREATE INDEX IF NOT EXISTS idx_trades_feat_mfld ON trades(feat_mfld_lo) WHERE feat_mfld_lo IS NOT NULL;
+
 -- ─────────────────────────────────────────────────────────────────────────────
 -- Fix 3: extended metrics_daily columns for DB-computed metrics
 --
@@ -49,6 +49,12 @@ class Trade:
    commission: float = 0.0      # = POLYMARKET_FEE * size_usdc
    # ── Phase 2: market family ────────────────────────────────────────────────
    family_key: str = ""
+    # ── Phase 6: per-feature log-odds contributions ───────────────────────────
+    feat_fg_lo:      float = 0.0
+    feat_mom_lo:     float = 0.0
+    feat_news_lo:    float = 0.0
+    feat_mfld_lo:    float = 0.0
+    feat_btc_dom_lo: float = 0.0

    def __str__(self) -> str:
        return (
@@ -148,6 +154,12 @@ class PaperExecutor:
            commission=commission,
            # Phase 2 family
            family_key=order.family_key,
+            # Phase 6 feature log-odds
+            feat_fg_lo=order.feat_fg_lo,
+            feat_mom_lo=order.feat_mom_lo,
+            feat_news_lo=order.feat_news_lo,
+            feat_mfld_lo=order.feat_mfld_lo,
+            feat_btc_dom_lo=order.feat_btc_dom_lo,
        )

        # Update paper portfolio
@@ -369,6 +369,7 @@ async def main() -> None:
    db = Database()
    await db.connect()
    await db.run_migrations()
+    await db.backfill_feature_columns()

    poly = PolymarketClient()
    external = ExternalDataClient()
@@ -56,6 +56,12 @@ class Order:
    family_key: str = ""
    # Phase 4 — regime threshold applied
    regime_min_edge: float = 0.10
+    # Phase 6 — per-feature log-odds contributions (see TradingSignal for semantics)
+    feat_fg_lo:      float = 0.0
+    feat_mom_lo:     float = 0.0
+    feat_news_lo:    float = 0.0
+    feat_mfld_lo:    float = 0.0
+    feat_btc_dom_lo: float = 0.0


 class RiskManager:
@@ -147,4 +153,10 @@ class RiskManager:
            family_key=signal.family_key,
            # Phase 4 — regime
            regime_min_edge=signal.regime_min_edge,
+            # Phase 6 — feature log-odds
+            feat_fg_lo=signal.feat_fg_lo,
+            feat_mom_lo=signal.feat_mom_lo,
+            feat_news_lo=signal.feat_news_lo,
+            feat_mfld_lo=signal.feat_mfld_lo,
+            feat_btc_dom_lo=signal.feat_btc_dom_lo,
        )
@@ -160,6 +160,16 @@ class TradingSignal:
    family_key: str = ""
    # ── Phase 4: regime ──────────────────────────────────────────────────────
    regime_min_edge: float = 0.10
+    # ── Phase 6: per-feature log-odds contributions ───────────────────────────
+    # All values are in log-odds space for direct comparability.
+    # feat_fg_lo / feat_mom_lo: probability-delta × 2 → log-odds.
+    # feat_news_lo / feat_mfld_lo: already log-odds (no scaling).
+    # feat_btc_dom_lo: btc-dominance probability-delta × 2 → log-odds.
+    feat_fg_lo:      float = 0.0
+    feat_mom_lo:     float = 0.0
+    feat_news_lo:    float = 0.0
+    feat_mfld_lo:    float = 0.0
+    feat_btc_dom_lo: float = 0.0


 class BayesianStrategy:
@@ -379,11 +389,14 @@ class BayesianStrategy:
        adjustments.append(_fg_contribution)

        # Signal 3: BTC dominance — hurts altcoins when high
+        _btc_dom_contribution = 0.0
        if (is_eth or is_altcoin or is_general_crypto) and ext.btc_dominance > 55:
-            adjustments.append(-0.03 if is_price_above else 0.03)
+            _btc_dom_contribution = -0.03 if is_price_above else 0.03
+            adjustments.append(_btc_dom_contribution)
            sources.append(f"BTC dom: {ext.btc_dominance:.1f}% (high → alt pressure)")
        elif (is_eth or is_altcoin or is_general_crypto) and ext.btc_dominance < 45:
-            adjustments.append(0.03 if is_price_above else -0.03)
+            _btc_dom_contribution = 0.03 if is_price_above else -0.03
+            adjustments.append(_btc_dom_contribution)
            sources.append(f"BTC dom: {ext.btc_dominance:.1f}% (low → alt season)")

        # Signal 4: GNews sentiment (politics only, budget-gated)
@@ -448,12 +461,19 @@ class BayesianStrategy:
        if manifold_log_adj != 0.0:
            confidence = min(confidence_cap, confidence + 0.08)

-        # Per-feature contribution string for audit logging
+        # Per-feature log-odds contributions (Phase 6).
+        # fg / mom / btc_dom: probability-delta × 2 → log-odds.
+        # news / mfld: already log-odds (LOGODDS_WEIGHT already applied).
+        feat_fg_lo      = _fg_contribution * 2
+        feat_mom_lo     = _momentum_contribution * 2
+        feat_news_lo    = news_log_adj
+        feat_mfld_lo    = manifold_log_adj
+        feat_btc_dom_lo = _btc_dom_contribution * 2
+
        feat_str = (
-            f"fg={_fg_contribution:+.3f} "
-            f"mom={_momentum_contribution:+.3f} "
-            f"mfld={manifold_log_adj:+.4f} "
-            f"news={news_log_adj:+.4f}"
+            f"fg_lo={feat_fg_lo:+.4f} mom_lo={feat_mom_lo:+.4f} "
+            f"news_lo={feat_news_lo:+.4f} mfld_lo={feat_mfld_lo:+.4f} "
+            f"btc_dom_lo={feat_btc_dom_lo:+.4f}"
        )

        # ── Phase 5: structured audit log ────────────────────────────────────
@@ -496,8 +516,7 @@ class BayesianStrategy:
            f"regime_min={regime_min:.2f} | days={days} | "
            f"family={family} | "
            f"Direction={direction} | "
-            f"fg={_fg_contribution:+.4f} mom={_momentum_contribution:+.4f} "
-            f"news={news_log_adj:+.4f} mfld={manifold_log_adj:+.4f} | "
+            f"{feat_str} | "
            f"Signals: {', '.join(sources[1:])}"
        )

@@ -535,6 +554,12 @@ class BayesianStrategy:
            family_key=family,
            # Phase 4 new fields
            regime_min_edge=regime_min,
+            # Phase 6 new fields — all in log-odds space
+            feat_fg_lo=feat_fg_lo,
+            feat_mom_lo=feat_mom_lo,
+            feat_news_lo=feat_news_lo,
+            feat_mfld_lo=feat_mfld_lo,
+            feat_btc_dom_lo=feat_btc_dom_lo,
        )