feat(phase6): per-feature signal attribution in log-odds space
CI/CD / build-and-push (push) Successful in 1m56s

Adds feat_fg_lo / feat_mom_lo / feat_news_lo / feat_mfld_lo / feat_btc_dom_lo
to every trade, all normalized to log-odds contribution for direct comparability.

- fg / mom / btc_dom: raw probability-delta × 2 → log-odds
- news / mfld: already log-odds (LOGODDS_WEIGHT already applied), no scaling
- btc_dom tracked separately in bayesian.py instead of bundled in total_adj
- reasoning string updated to fg_lo= / mom_lo= notation for self-documentation

Schema: 5 new DOUBLE PRECISION columns + 2 partial indexes
Stack: TradingSignal → Order → Trade → save_trade all carry feat fields
Startup: backfill_feature_columns() recovers fg/mom/news/mfld from old
  reasoning strings (×2 applied to fg/mom); btc_dom_lo stays NULL for legacy
API: /api/metrics/features — triggered/material split per feature with
  two-level thresholds (0.05 for fg/mom/btc_dom, 0.10 for news/mfld)
API: /api/trades/legacy — exposes pre-Phase-1 trades (edge_net IS NULL)
API: _enrich_trade backward-compat: reads DB columns first, falls back to
  reasoning regex with unit conversion for pre-Phase-6 trades

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
chemavx
2026-04-22 07:04:53 +00:00
parent 9a5be27532
commit 8479a63174
7 changed files with 343 additions and 20 deletions
+88 -9
View File
@@ -12,15 +12,22 @@ from fastapi.middleware.cors import CORSMiddleware
from bot.data.db import Database
# Matches the feat_str embedded in reasoning for trades from bayesian.py v2+:
# Phase 6 format (Phase 6+): values already in log-odds space.
# "fg_lo=+0.1200 mom_lo=+0.0000 news_lo=+0.0000 mfld_lo=-0.7483 btc_dom_lo=+0.0000"
_FEAT_RE_LO = re.compile(
r"fg_lo=([+-]?[\d.]+).*?mom_lo=([+-]?[\d.]+).*?"
r"news_lo=([+-]?[\d.]+).*?mfld_lo=([+-]?[\d.]+).*?btc_dom_lo=([+-]?[\d.]+)"
)
# Pre-Phase-6 format: raw probability-delta values (fg/mom need ×2 for log-odds).
# "fg=+0.0600 mom=+0.0000 news=+0.0000 mfld=-0.7483"
_FEAT_RE = re.compile(
_FEAT_RE_RAW = re.compile(
r"fg=([+-]?[\d.]+).*?mom=([+-]?[\d.]+).*?news=([+-]?[\d.]+).*?mfld=([+-]?[\d.]+)"
)
def _enrich_trade(trade: dict) -> dict:
"""Add days_open and signal_components to an open trade dict."""
"""Add days_open and signal_components (all log-odds) to an open trade dict."""
ts = trade.get("timestamp")
if ts is not None:
now = datetime.now(timezone.utc)
@@ -30,13 +37,47 @@ def _enrich_trade(trade: dict) -> dict:
else:
trade["days_open"] = None
# Prefer DB columns (Phase 6+) — exact, no parsing required.
if trade.get("feat_fg_lo") is not None:
trade["signal_components"] = {
"unit": "log_odds",
"fg": trade["feat_fg_lo"],
"mom": trade["feat_mom_lo"],
"news": trade["feat_news_lo"],
"mfld": trade["feat_mfld_lo"],
"btc_dom": trade.get("feat_btc_dom_lo"),
}
return trade
# Fallback: parse reasoning string (trades before Phase 6 DB columns exist).
reasoning = trade.get("reasoning") or ""
m = _FEAT_RE.search(reasoning)
trade["signal_components"] = (
{"fg": float(m.group(1)), "mom": float(m.group(2)),
"news": float(m.group(3)), "mfld": float(m.group(4))}
if m else None
)
m_lo = _FEAT_RE_LO.search(reasoning)
if m_lo:
# Phase 6 reasoning format — values already in log-odds.
trade["signal_components"] = {
"unit": "log_odds",
"fg": float(m_lo.group(1)),
"mom": float(m_lo.group(2)),
"news": float(m_lo.group(3)),
"mfld": float(m_lo.group(4)),
"btc_dom": float(m_lo.group(5)),
}
return trade
m_raw = _FEAT_RE_RAW.search(reasoning)
if m_raw:
# Pre-Phase-6 reasoning: fg/mom are raw probability-deltas → multiply ×2.
trade["signal_components"] = {
"unit": "log_odds",
"fg": float(m_raw.group(1)) * 2,
"mom": float(m_raw.group(2)) * 2,
"news": float(m_raw.group(3)),
"mfld": float(m_raw.group(4)),
"btc_dom": None,
}
return trade
trade["signal_components"] = None
return trade
db = Database()
@@ -87,6 +128,44 @@ async def get_trades(limit: int = 50, status: str = "open"):
return {"trades": trades, "count": len(trades), "status_filter": status}
@app.get("/api/metrics/features")
async def get_feature_metrics():
"""Per-signal-feature performance breakdown — all values in log-odds space.
Each feature key contains:
unit "log_odds" (common unit for all features)
materiality_threshold |lo| threshold for "material" classification
triggered_count trades where |feat_lo| > 0.0001 (signal fired)
material_count trades where |feat_lo| >= threshold (moved the model)
avg_contribution_lo mean signed contribution (triggered trades)
avg_abs_contribution_lo mean absolute contribution (triggered trades)
avg_edge_net_when_material mean edge_net for material trades
unrealized_pnl_est estimated open-position PnL (triggered trades)
realized_pnl sum close_pnl for resolved triggered trades
resolved_count closed triggered trades with known outcome
win_rate null if resolved_count < 5
net_positive_count triggered trades where feature pushed BUY direction
net_negative_count triggered trades where feature pushed SELL direction
NULL values in resolved_count / win_rate are expected early in the paper run.
"""
features = await db.compute_feature_metrics_from_db()
return {"features": features}
@app.get("/api/trades/legacy")
async def get_legacy_trades():
"""Trades with NULL edge_net — pre-Phase-1 data, excluded from PnL estimates.
These trades have no signal quality information (edge_net, final_prob)
and are excluded from unrealized_pnl_est in /api/summary.
They may also be missing feat_*_lo columns if the reasoning string
predates the Phase 6 format.
"""
trades = await db.get_legacy_incomplete_trades()
return {"trades": trades, "count": len(trades)}
@app.get("/api/summary")
async def get_summary():
"""Dashboard summary card data.
+149 -2
View File
@@ -35,10 +35,12 @@ class Database:
id, market_id, question, direction, size_usdc,
entry_price, shares, fee_usdc, net_cost, timestamp, reasoning, paper,
edge_gross, edge_net, prior_prob, final_prob,
mid_price, spread_estimate, commission, family_key
mid_price, spread_estimate, commission, family_key,
feat_fg_lo, feat_mom_lo, feat_news_lo, feat_mfld_lo, feat_btc_dom_lo
) VALUES (
$1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,
$13,$14,$15,$16,$17,$18,$19,$20
$13,$14,$15,$16,$17,$18,$19,$20,
$21,$22,$23,$24,$25
)
ON CONFLICT (id) DO NOTHING
""",
@@ -48,6 +50,9 @@ class Database:
# Phase 1 fields
trade.edge_gross, trade.edge_net, trade.prior_prob, trade.final_prob,
trade.mid_price, trade.spread_estimate, trade.commission, trade.family_key,
# Phase 6 feature log-odds
trade.feat_fg_lo, trade.feat_mom_lo, trade.feat_news_lo,
trade.feat_mfld_lo, trade.feat_btc_dom_lo,
)
async def save_daily_metrics(self, metrics: dict) -> None:
@@ -264,3 +269,145 @@ class Database:
"SELECT * FROM metrics_daily ORDER BY timestamp DESC LIMIT $1", days
)
return [dict(r) for r in rows]
async def backfill_feature_columns(self) -> int:
"""Back-populate feat_*_lo for trades created before Phase 6.
Parses the reasoning string (format: 'fg=+0.0600 mom=... news=... mfld=...').
fg / mom raw values are multiplied by 2 to convert to log-odds.
news / mfld are already in log-odds (no scaling).
feat_btc_dom_lo cannot be recovered from the old reasoning string and
remains NULL for legacy trades.
Returns the number of rows updated.
"""
async with self._pool.acquire() as conn:
result = await conn.execute("""
UPDATE trades
SET
feat_fg_lo = ((regexp_match(reasoning, 'fg=([^ |]+)'))[1])::DOUBLE PRECISION * 2,
feat_mom_lo = ((regexp_match(reasoning, 'mom=([^ |]+)'))[1])::DOUBLE PRECISION * 2,
feat_news_lo = ((regexp_match(reasoning, 'news=([^ |]+)'))[1])::DOUBLE PRECISION,
feat_mfld_lo = ((regexp_match(reasoning, 'mfld=([^ |]+)'))[1])::DOUBLE PRECISION,
feat_btc_dom_lo = NULL
WHERE feat_fg_lo IS NULL
AND reasoning IS NOT NULL
AND reasoning LIKE '%fg=%'
AND reasoning NOT LIKE '%fg_lo=%'
""")
updated = int(result.split()[-1]) if result else 0
if updated:
log.info("backfill_feature_columns: updated %d trade(s)", updated)
return updated
async def get_legacy_incomplete_trades(self) -> list[dict]:
"""Return trades with NULL edge_net — pre-Phase-1 data with no signal quality info."""
async with self._pool.acquire() as conn:
rows = await conn.fetch("""
SELECT id, market_id, question, direction, net_cost, entry_price,
timestamp, reasoning, closed_at, close_reason, family_key,
feat_fg_lo, feat_mom_lo, feat_news_lo, feat_mfld_lo, feat_btc_dom_lo
FROM trades
WHERE edge_net IS NULL
ORDER BY timestamp DESC
""")
return [dict(r) for r in rows]
async def compute_feature_metrics_from_db(self) -> dict:
"""Per-feature performance metrics, all in log-odds space.
For each feature (fg, mom, news, mfld, btc_dom) returns:
unit — always "log_odds"
materiality_threshold — |lo| threshold for "material" classification
triggered_count — trades where |feat_lo| > 0.0001
material_count — trades where |feat_lo| >= materiality_threshold
avg_contribution_lo — mean signed lo value (triggered trades)
avg_abs_contribution_lo — mean absolute lo value (triggered trades)
avg_edge_net_when_material — mean edge_net for material trades
unrealized_pnl_est — sum edge_net*net_costfee for triggered open trades
realized_pnl — sum close_pnl for triggered resolved trades
resolved_count — closed trades with known outcome (triggered)
win_rate — NULL if resolved_count < 5
net_positive_count — triggered trades where feat_lo > 0
net_negative_count — triggered trades where feat_lo < 0
"""
async with self._pool.acquire() as conn:
rows = await conn.fetch("""
WITH feature_values AS (
SELECT 'fg' AS feature,
0.05::DOUBLE PRECISION AS mat_thresh,
feat_fg_lo AS fval,
edge_net, net_cost, fee_usdc, closed_at, close_pnl
FROM trades WHERE feat_fg_lo IS NOT NULL
UNION ALL
SELECT 'mom', 0.05, feat_mom_lo,
edge_net, net_cost, fee_usdc, closed_at, close_pnl
FROM trades WHERE feat_mom_lo IS NOT NULL
UNION ALL
SELECT 'news', 0.10, feat_news_lo,
edge_net, net_cost, fee_usdc, closed_at, close_pnl
FROM trades WHERE feat_news_lo IS NOT NULL
UNION ALL
SELECT 'mfld', 0.10, feat_mfld_lo,
edge_net, net_cost, fee_usdc, closed_at, close_pnl
FROM trades WHERE feat_mfld_lo IS NOT NULL
UNION ALL
SELECT 'btc_dom', 0.05, feat_btc_dom_lo,
edge_net, net_cost, fee_usdc, closed_at, close_pnl
FROM trades WHERE feat_btc_dom_lo IS NOT NULL
)
SELECT
feature,
mat_thresh AS materiality_threshold,
COUNT(*) FILTER (WHERE ABS(fval) > 0.0001) AS triggered_count,
COUNT(*) FILTER (WHERE ABS(fval) >= mat_thresh) AS material_count,
AVG(fval) FILTER (WHERE ABS(fval) > 0.0001) AS avg_contribution_lo,
AVG(ABS(fval)) FILTER (WHERE ABS(fval) > 0.0001) AS avg_abs_contribution_lo,
AVG(edge_net) FILTER (WHERE ABS(fval) >= mat_thresh
AND edge_net IS NOT NULL) AS avg_edge_net_when_material,
COALESCE(SUM(edge_net * net_cost - fee_usdc)
FILTER (WHERE ABS(fval) > 0.0001
AND closed_at IS NULL
AND edge_net IS NOT NULL), 0) AS unrealized_pnl_est,
COALESCE(SUM(close_pnl)
FILTER (WHERE ABS(fval) > 0.0001
AND close_pnl IS NOT NULL), 0) AS realized_pnl,
COUNT(*) FILTER (WHERE ABS(fval) > 0.0001
AND close_pnl IS NOT NULL
AND close_pnl > 0) AS wins_realized,
COUNT(*) FILTER (WHERE ABS(fval) > 0.0001
AND close_pnl IS NOT NULL) AS resolved_count,
COUNT(*) FILTER (WHERE fval > 0.0001) AS net_positive_count,
COUNT(*) FILTER (WHERE fval < -0.0001) AS net_negative_count
FROM feature_values
GROUP BY feature, mat_thresh
ORDER BY feature
""")
result: dict[str, dict] = {}
for r in rows:
d = dict(r)
feature = d["feature"]
resolved = int(d.get("resolved_count") or 0)
wins = int(d.get("wins_realized") or 0)
result[feature] = {
"unit": "log_odds",
"materiality_threshold": float(d["materiality_threshold"]),
"triggered_count": int(d.get("triggered_count") or 0),
"material_count": int(d.get("material_count") or 0),
"avg_contribution_lo": _f(d.get("avg_contribution_lo")),
"avg_abs_contribution_lo": _f(d.get("avg_abs_contribution_lo")),
"avg_edge_net_when_material": _f(d.get("avg_edge_net_when_material")),
"unrealized_pnl_est": float(d.get("unrealized_pnl_est") or 0),
"realized_pnl": float(d.get("realized_pnl") or 0),
"resolved_count": resolved,
"win_rate": (wins / resolved) if resolved >= 5 else None,
"net_positive_count": int(d.get("net_positive_count") or 0),
"net_negative_count": int(d.get("net_negative_count") or 0),
}
return result
def _f(v) -> Optional[float]:
"""None-safe float cast for asyncpg Decimal/None values."""
return float(v) if v is not None else None
+47
View File
@@ -121,6 +121,53 @@ CREATE INDEX IF NOT EXISTS idx_trades_closed ON trades(closed_at) WHERE closed_a
ALTER TABLE trades ADD COLUMN IF NOT EXISTS close_pnl DOUBLE PRECISION;
ALTER TABLE trades ADD COLUMN IF NOT EXISTS resolution DOUBLE PRECISION;
-- ─────────────────────────────────────────────────────────────────────────────
-- Phase 6: per-feature signal attribution — all values in log-odds space
--
-- All four primary features share a common unit (log-odds contribution to
-- the posterior estimate) so they can be compared directly:
--
-- feat_fg_lo = _fg_contribution × 2
-- Fear & Greed direction-adjusted delta, ×2 to log-odds.
-- Non-zero for every trade. Range ≈ ±0.12.
-- Materiality threshold: |lo| ≥ 0.05.
--
-- feat_mom_lo = _momentum_contribution × 2
-- Momentum delta (direction-adjusted), ×2 to log-odds.
-- Zero when |btc_change_24h| ≤ 2 %. Range ≈ ±0.15.
-- Materiality threshold: |lo| ≥ 0.05.
--
-- feat_news_lo = news_log_adj (already in log-odds, no scaling)
-- GNews sentiment × NEWS_LOGODDS_WEIGHT (1.5).
-- Zero for non-politics or when GNews budget exhausted.
-- Range ≈ ±1.5. Materiality threshold: |lo| ≥ 0.10.
--
-- feat_mfld_lo = manifold_log_adj (already in log-odds, no scaling)
-- Manifold divergence × MANIFOLD_LOGODDS_WEIGHT (0.6).
-- Zero when Manifold returned no result.
-- Range ≈ ±0.6. Materiality threshold: |lo| ≥ 0.10.
--
-- feat_btc_dom_lo = _btc_dom_contribution × 2
-- BTC-dominance alt-pressure delta, ×2 to log-odds.
-- Only fires for ETH / altcoin / general-crypto markets
-- when btc_dominance > 55 % or < 45 %.
-- Values: { 0.06, 0.0, +0.06 }.
-- Materiality threshold: |lo| ≥ 0.05.
--
-- NULL for pre-Phase-6 trades. Backfilled at startup via
-- Database.backfill_feature_columns() using reasoning-string regex
-- (fg_lo/mom_lo multiplied by 2 from raw; news_lo/mfld_lo taken directly;
-- btc_dom_lo cannot be backfilled and remains NULL for legacy trades).
-- ─────────────────────────────────────────────────────────────────────────────
ALTER TABLE trades ADD COLUMN IF NOT EXISTS feat_fg_lo DOUBLE PRECISION;
ALTER TABLE trades ADD COLUMN IF NOT EXISTS feat_mom_lo DOUBLE PRECISION;
ALTER TABLE trades ADD COLUMN IF NOT EXISTS feat_news_lo DOUBLE PRECISION;
ALTER TABLE trades ADD COLUMN IF NOT EXISTS feat_mfld_lo DOUBLE PRECISION;
ALTER TABLE trades ADD COLUMN IF NOT EXISTS feat_btc_dom_lo DOUBLE PRECISION;
CREATE INDEX IF NOT EXISTS idx_trades_feat_fg ON trades(feat_fg_lo) WHERE feat_fg_lo IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_trades_feat_mfld ON trades(feat_mfld_lo) WHERE feat_mfld_lo IS NOT NULL;
-- ─────────────────────────────────────────────────────────────────────────────
-- Fix 3: extended metrics_daily columns for DB-computed metrics
--
+12
View File
@@ -49,6 +49,12 @@ class Trade:
commission: float = 0.0 # = POLYMARKET_FEE * size_usdc
# ── Phase 2: market family ────────────────────────────────────────────────
family_key: str = ""
# ── Phase 6: per-feature log-odds contributions ───────────────────────────
feat_fg_lo: float = 0.0
feat_mom_lo: float = 0.0
feat_news_lo: float = 0.0
feat_mfld_lo: float = 0.0
feat_btc_dom_lo: float = 0.0
def __str__(self) -> str:
return (
@@ -148,6 +154,12 @@ class PaperExecutor:
commission=commission,
# Phase 2 family
family_key=order.family_key,
# Phase 6 feature log-odds
feat_fg_lo=order.feat_fg_lo,
feat_mom_lo=order.feat_mom_lo,
feat_news_lo=order.feat_news_lo,
feat_mfld_lo=order.feat_mfld_lo,
feat_btc_dom_lo=order.feat_btc_dom_lo,
)
# Update paper portfolio
+1
View File
@@ -369,6 +369,7 @@ async def main() -> None:
db = Database()
await db.connect()
await db.run_migrations()
await db.backfill_feature_columns()
poly = PolymarketClient()
external = ExternalDataClient()
+12
View File
@@ -56,6 +56,12 @@ class Order:
family_key: str = ""
# Phase 4 — regime threshold applied
regime_min_edge: float = 0.10
# Phase 6 — per-feature log-odds contributions (see TradingSignal for semantics)
feat_fg_lo: float = 0.0
feat_mom_lo: float = 0.0
feat_news_lo: float = 0.0
feat_mfld_lo: float = 0.0
feat_btc_dom_lo: float = 0.0
class RiskManager:
@@ -147,4 +153,10 @@ class RiskManager:
family_key=signal.family_key,
# Phase 4 — regime
regime_min_edge=signal.regime_min_edge,
# Phase 6 — feature log-odds
feat_fg_lo=signal.feat_fg_lo,
feat_mom_lo=signal.feat_mom_lo,
feat_news_lo=signal.feat_news_lo,
feat_mfld_lo=signal.feat_mfld_lo,
feat_btc_dom_lo=signal.feat_btc_dom_lo,
)
+34 -9
View File
@@ -160,6 +160,16 @@ class TradingSignal:
family_key: str = ""
# ── Phase 4: regime ──────────────────────────────────────────────────────
regime_min_edge: float = 0.10
# ── Phase 6: per-feature log-odds contributions ───────────────────────────
# All values are in log-odds space for direct comparability.
# feat_fg_lo / feat_mom_lo: probability-delta × 2 → log-odds.
# feat_news_lo / feat_mfld_lo: already log-odds (no scaling).
# feat_btc_dom_lo: btc-dominance probability-delta × 2 → log-odds.
feat_fg_lo: float = 0.0
feat_mom_lo: float = 0.0
feat_news_lo: float = 0.0
feat_mfld_lo: float = 0.0
feat_btc_dom_lo: float = 0.0
class BayesianStrategy:
@@ -379,11 +389,14 @@ class BayesianStrategy:
adjustments.append(_fg_contribution)
# Signal 3: BTC dominance — hurts altcoins when high
_btc_dom_contribution = 0.0
if (is_eth or is_altcoin or is_general_crypto) and ext.btc_dominance > 55:
adjustments.append(-0.03 if is_price_above else 0.03)
_btc_dom_contribution = -0.03 if is_price_above else 0.03
adjustments.append(_btc_dom_contribution)
sources.append(f"BTC dom: {ext.btc_dominance:.1f}% (high → alt pressure)")
elif (is_eth or is_altcoin or is_general_crypto) and ext.btc_dominance < 45:
adjustments.append(0.03 if is_price_above else -0.03)
_btc_dom_contribution = 0.03 if is_price_above else -0.03
adjustments.append(_btc_dom_contribution)
sources.append(f"BTC dom: {ext.btc_dominance:.1f}% (low → alt season)")
# Signal 4: GNews sentiment (politics only, budget-gated)
@@ -448,12 +461,19 @@ class BayesianStrategy:
if manifold_log_adj != 0.0:
confidence = min(confidence_cap, confidence + 0.08)
# Per-feature contribution string for audit logging
# Per-feature log-odds contributions (Phase 6).
# fg / mom / btc_dom: probability-delta × 2 → log-odds.
# news / mfld: already log-odds (LOGODDS_WEIGHT already applied).
feat_fg_lo = _fg_contribution * 2
feat_mom_lo = _momentum_contribution * 2
feat_news_lo = news_log_adj
feat_mfld_lo = manifold_log_adj
feat_btc_dom_lo = _btc_dom_contribution * 2
feat_str = (
f"fg={_fg_contribution:+.3f} "
f"mom={_momentum_contribution:+.3f} "
f"mfld={manifold_log_adj:+.4f} "
f"news={news_log_adj:+.4f}"
f"fg_lo={feat_fg_lo:+.4f} mom_lo={feat_mom_lo:+.4f} "
f"news_lo={feat_news_lo:+.4f} mfld_lo={feat_mfld_lo:+.4f} "
f"btc_dom_lo={feat_btc_dom_lo:+.4f}"
)
# ── Phase 5: structured audit log ────────────────────────────────────
@@ -496,8 +516,7 @@ class BayesianStrategy:
f"regime_min={regime_min:.2f} | days={days} | "
f"family={family} | "
f"Direction={direction} | "
f"fg={_fg_contribution:+.4f} mom={_momentum_contribution:+.4f} "
f"news={news_log_adj:+.4f} mfld={manifold_log_adj:+.4f} | "
f"{feat_str} | "
f"Signals: {', '.join(sources[1:])}"
)
@@ -535,6 +554,12 @@ class BayesianStrategy:
family_key=family,
# Phase 4 new fields
regime_min_edge=regime_min,
# Phase 6 new fields — all in log-odds space
feat_fg_lo=feat_fg_lo,
feat_mom_lo=feat_mom_lo,
feat_news_lo=feat_news_lo,
feat_mfld_lo=feat_mfld_lo,
feat_btc_dom_lo=feat_btc_dom_lo,
)