feat(phase6): per-feature signal attribution in log-odds space
CI/CD / build-and-push (push) Successful in 1m56s
CI/CD / build-and-push (push) Successful in 1m56s
Adds feat_fg_lo / feat_mom_lo / feat_news_lo / feat_mfld_lo / feat_btc_dom_lo to every trade, all normalized to log-odds contribution for direct comparability. - fg / mom / btc_dom: raw probability-delta × 2 → log-odds - news / mfld: already log-odds (LOGODDS_WEIGHT already applied), no scaling - btc_dom tracked separately in bayesian.py instead of bundled in total_adj - reasoning string updated to fg_lo= / mom_lo= notation for self-documentation Schema: 5 new DOUBLE PRECISION columns + 2 partial indexes Stack: TradingSignal → Order → Trade → save_trade all carry feat fields Startup: backfill_feature_columns() recovers fg/mom/news/mfld from old reasoning strings (×2 applied to fg/mom); btc_dom_lo stays NULL for legacy API: /api/metrics/features — triggered/material split per feature with two-level thresholds (0.05 for fg/mom/btc_dom, 0.10 for news/mfld) API: /api/trades/legacy — exposes pre-Phase-1 trades (edge_net IS NULL) API: _enrich_trade backward-compat: reads DB columns first, falls back to reasoning regex with unit conversion for pre-Phase-6 trades Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+149
-2
@@ -35,10 +35,12 @@ class Database:
|
||||
id, market_id, question, direction, size_usdc,
|
||||
entry_price, shares, fee_usdc, net_cost, timestamp, reasoning, paper,
|
||||
edge_gross, edge_net, prior_prob, final_prob,
|
||||
mid_price, spread_estimate, commission, family_key
|
||||
mid_price, spread_estimate, commission, family_key,
|
||||
feat_fg_lo, feat_mom_lo, feat_news_lo, feat_mfld_lo, feat_btc_dom_lo
|
||||
) VALUES (
|
||||
$1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,
|
||||
$13,$14,$15,$16,$17,$18,$19,$20
|
||||
$13,$14,$15,$16,$17,$18,$19,$20,
|
||||
$21,$22,$23,$24,$25
|
||||
)
|
||||
ON CONFLICT (id) DO NOTHING
|
||||
""",
|
||||
@@ -48,6 +50,9 @@ class Database:
|
||||
# Phase 1 fields
|
||||
trade.edge_gross, trade.edge_net, trade.prior_prob, trade.final_prob,
|
||||
trade.mid_price, trade.spread_estimate, trade.commission, trade.family_key,
|
||||
# Phase 6 feature log-odds
|
||||
trade.feat_fg_lo, trade.feat_mom_lo, trade.feat_news_lo,
|
||||
trade.feat_mfld_lo, trade.feat_btc_dom_lo,
|
||||
)
|
||||
|
||||
async def save_daily_metrics(self, metrics: dict) -> None:
|
||||
@@ -264,3 +269,145 @@ class Database:
|
||||
"SELECT * FROM metrics_daily ORDER BY timestamp DESC LIMIT $1", days
|
||||
)
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
async def backfill_feature_columns(self) -> int:
|
||||
"""Back-populate feat_*_lo for trades created before Phase 6.
|
||||
|
||||
Parses the reasoning string (format: 'fg=+0.0600 mom=... news=... mfld=...').
|
||||
fg / mom raw values are multiplied by 2 to convert to log-odds.
|
||||
news / mfld are already in log-odds (no scaling).
|
||||
feat_btc_dom_lo cannot be recovered from the old reasoning string and
|
||||
remains NULL for legacy trades.
|
||||
|
||||
Returns the number of rows updated.
|
||||
"""
|
||||
async with self._pool.acquire() as conn:
|
||||
result = await conn.execute("""
|
||||
UPDATE trades
|
||||
SET
|
||||
feat_fg_lo = ((regexp_match(reasoning, 'fg=([^ |]+)'))[1])::DOUBLE PRECISION * 2,
|
||||
feat_mom_lo = ((regexp_match(reasoning, 'mom=([^ |]+)'))[1])::DOUBLE PRECISION * 2,
|
||||
feat_news_lo = ((regexp_match(reasoning, 'news=([^ |]+)'))[1])::DOUBLE PRECISION,
|
||||
feat_mfld_lo = ((regexp_match(reasoning, 'mfld=([^ |]+)'))[1])::DOUBLE PRECISION,
|
||||
feat_btc_dom_lo = NULL
|
||||
WHERE feat_fg_lo IS NULL
|
||||
AND reasoning IS NOT NULL
|
||||
AND reasoning LIKE '%fg=%'
|
||||
AND reasoning NOT LIKE '%fg_lo=%'
|
||||
""")
|
||||
updated = int(result.split()[-1]) if result else 0
|
||||
if updated:
|
||||
log.info("backfill_feature_columns: updated %d trade(s)", updated)
|
||||
return updated
|
||||
|
||||
async def get_legacy_incomplete_trades(self) -> list[dict]:
|
||||
"""Return trades with NULL edge_net — pre-Phase-1 data with no signal quality info."""
|
||||
async with self._pool.acquire() as conn:
|
||||
rows = await conn.fetch("""
|
||||
SELECT id, market_id, question, direction, net_cost, entry_price,
|
||||
timestamp, reasoning, closed_at, close_reason, family_key,
|
||||
feat_fg_lo, feat_mom_lo, feat_news_lo, feat_mfld_lo, feat_btc_dom_lo
|
||||
FROM trades
|
||||
WHERE edge_net IS NULL
|
||||
ORDER BY timestamp DESC
|
||||
""")
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
async def compute_feature_metrics_from_db(self) -> dict:
|
||||
"""Per-feature performance metrics, all in log-odds space.
|
||||
|
||||
For each feature (fg, mom, news, mfld, btc_dom) returns:
|
||||
unit — always "log_odds"
|
||||
materiality_threshold — |lo| threshold for "material" classification
|
||||
triggered_count — trades where |feat_lo| > 0.0001
|
||||
material_count — trades where |feat_lo| >= materiality_threshold
|
||||
avg_contribution_lo — mean signed lo value (triggered trades)
|
||||
avg_abs_contribution_lo — mean absolute lo value (triggered trades)
|
||||
avg_edge_net_when_material — mean edge_net for material trades
|
||||
unrealized_pnl_est — sum edge_net*net_cost−fee for triggered open trades
|
||||
realized_pnl — sum close_pnl for triggered resolved trades
|
||||
resolved_count — closed trades with known outcome (triggered)
|
||||
win_rate — NULL if resolved_count < 5
|
||||
net_positive_count — triggered trades where feat_lo > 0
|
||||
net_negative_count — triggered trades where feat_lo < 0
|
||||
"""
|
||||
async with self._pool.acquire() as conn:
|
||||
rows = await conn.fetch("""
|
||||
WITH feature_values AS (
|
||||
SELECT 'fg' AS feature,
|
||||
0.05::DOUBLE PRECISION AS mat_thresh,
|
||||
feat_fg_lo AS fval,
|
||||
edge_net, net_cost, fee_usdc, closed_at, close_pnl
|
||||
FROM trades WHERE feat_fg_lo IS NOT NULL
|
||||
UNION ALL
|
||||
SELECT 'mom', 0.05, feat_mom_lo,
|
||||
edge_net, net_cost, fee_usdc, closed_at, close_pnl
|
||||
FROM trades WHERE feat_mom_lo IS NOT NULL
|
||||
UNION ALL
|
||||
SELECT 'news', 0.10, feat_news_lo,
|
||||
edge_net, net_cost, fee_usdc, closed_at, close_pnl
|
||||
FROM trades WHERE feat_news_lo IS NOT NULL
|
||||
UNION ALL
|
||||
SELECT 'mfld', 0.10, feat_mfld_lo,
|
||||
edge_net, net_cost, fee_usdc, closed_at, close_pnl
|
||||
FROM trades WHERE feat_mfld_lo IS NOT NULL
|
||||
UNION ALL
|
||||
SELECT 'btc_dom', 0.05, feat_btc_dom_lo,
|
||||
edge_net, net_cost, fee_usdc, closed_at, close_pnl
|
||||
FROM trades WHERE feat_btc_dom_lo IS NOT NULL
|
||||
)
|
||||
SELECT
|
||||
feature,
|
||||
mat_thresh AS materiality_threshold,
|
||||
COUNT(*) FILTER (WHERE ABS(fval) > 0.0001) AS triggered_count,
|
||||
COUNT(*) FILTER (WHERE ABS(fval) >= mat_thresh) AS material_count,
|
||||
AVG(fval) FILTER (WHERE ABS(fval) > 0.0001) AS avg_contribution_lo,
|
||||
AVG(ABS(fval)) FILTER (WHERE ABS(fval) > 0.0001) AS avg_abs_contribution_lo,
|
||||
AVG(edge_net) FILTER (WHERE ABS(fval) >= mat_thresh
|
||||
AND edge_net IS NOT NULL) AS avg_edge_net_when_material,
|
||||
COALESCE(SUM(edge_net * net_cost - fee_usdc)
|
||||
FILTER (WHERE ABS(fval) > 0.0001
|
||||
AND closed_at IS NULL
|
||||
AND edge_net IS NOT NULL), 0) AS unrealized_pnl_est,
|
||||
COALESCE(SUM(close_pnl)
|
||||
FILTER (WHERE ABS(fval) > 0.0001
|
||||
AND close_pnl IS NOT NULL), 0) AS realized_pnl,
|
||||
COUNT(*) FILTER (WHERE ABS(fval) > 0.0001
|
||||
AND close_pnl IS NOT NULL
|
||||
AND close_pnl > 0) AS wins_realized,
|
||||
COUNT(*) FILTER (WHERE ABS(fval) > 0.0001
|
||||
AND close_pnl IS NOT NULL) AS resolved_count,
|
||||
COUNT(*) FILTER (WHERE fval > 0.0001) AS net_positive_count,
|
||||
COUNT(*) FILTER (WHERE fval < -0.0001) AS net_negative_count
|
||||
FROM feature_values
|
||||
GROUP BY feature, mat_thresh
|
||||
ORDER BY feature
|
||||
""")
|
||||
|
||||
result: dict[str, dict] = {}
|
||||
for r in rows:
|
||||
d = dict(r)
|
||||
feature = d["feature"]
|
||||
resolved = int(d.get("resolved_count") or 0)
|
||||
wins = int(d.get("wins_realized") or 0)
|
||||
result[feature] = {
|
||||
"unit": "log_odds",
|
||||
"materiality_threshold": float(d["materiality_threshold"]),
|
||||
"triggered_count": int(d.get("triggered_count") or 0),
|
||||
"material_count": int(d.get("material_count") or 0),
|
||||
"avg_contribution_lo": _f(d.get("avg_contribution_lo")),
|
||||
"avg_abs_contribution_lo": _f(d.get("avg_abs_contribution_lo")),
|
||||
"avg_edge_net_when_material": _f(d.get("avg_edge_net_when_material")),
|
||||
"unrealized_pnl_est": float(d.get("unrealized_pnl_est") or 0),
|
||||
"realized_pnl": float(d.get("realized_pnl") or 0),
|
||||
"resolved_count": resolved,
|
||||
"win_rate": (wins / resolved) if resolved >= 5 else None,
|
||||
"net_positive_count": int(d.get("net_positive_count") or 0),
|
||||
"net_negative_count": int(d.get("net_negative_count") or 0),
|
||||
}
|
||||
return result
|
||||
|
||||
|
||||
def _f(v) -> Optional[float]:
|
||||
"""None-safe float cast for asyncpg Decimal/None values."""
|
||||
return float(v) if v is not None else None
|
||||
|
||||
Reference in New Issue
Block a user