feat(phase6): per-feature signal attribution in log-odds space
CI/CD / build-and-push (push) Successful in 1m56s

Adds feat_fg_lo / feat_mom_lo / feat_news_lo / feat_mfld_lo / feat_btc_dom_lo
to every trade, all normalized to log-odds contribution for direct comparability.

- fg / mom / btc_dom: raw probability-delta × 2 → log-odds
- news / mfld: already log-odds (LOGODDS_WEIGHT already applied), no scaling
- btc_dom tracked separately in bayesian.py instead of bundled in total_adj
- reasoning string updated to fg_lo= / mom_lo= notation for self-documentation

Schema: 5 new DOUBLE PRECISION columns + 2 partial indexes
Stack: TradingSignal → Order → Trade → save_trade all carry feat fields
Startup: backfill_feature_columns() recovers fg/mom/news/mfld from old
  reasoning strings (×2 applied to fg/mom); btc_dom_lo stays NULL for legacy
API: /api/metrics/features — triggered/material split per feature with
  two-level thresholds (0.05 for fg/mom/btc_dom, 0.10 for news/mfld)
API: /api/trades/legacy — exposes pre-Phase-1 trades (edge_net IS NULL)
API: _enrich_trade backward-compat: reads DB columns first, falls back to
  reasoning regex with unit conversion for pre-Phase-6 trades

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
chemavx
2026-04-22 07:04:53 +00:00
parent 9a5be27532
commit 8479a63174
7 changed files with 343 additions and 20 deletions
+149 -2
View File
@@ -35,10 +35,12 @@ class Database:
id, market_id, question, direction, size_usdc,
entry_price, shares, fee_usdc, net_cost, timestamp, reasoning, paper,
edge_gross, edge_net, prior_prob, final_prob,
mid_price, spread_estimate, commission, family_key
mid_price, spread_estimate, commission, family_key,
feat_fg_lo, feat_mom_lo, feat_news_lo, feat_mfld_lo, feat_btc_dom_lo
) VALUES (
$1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,
$13,$14,$15,$16,$17,$18,$19,$20
$13,$14,$15,$16,$17,$18,$19,$20,
$21,$22,$23,$24,$25
)
ON CONFLICT (id) DO NOTHING
""",
@@ -48,6 +50,9 @@ class Database:
# Phase 1 fields
trade.edge_gross, trade.edge_net, trade.prior_prob, trade.final_prob,
trade.mid_price, trade.spread_estimate, trade.commission, trade.family_key,
# Phase 6 feature log-odds
trade.feat_fg_lo, trade.feat_mom_lo, trade.feat_news_lo,
trade.feat_mfld_lo, trade.feat_btc_dom_lo,
)
async def save_daily_metrics(self, metrics: dict) -> None:
@@ -264,3 +269,145 @@ class Database:
"SELECT * FROM metrics_daily ORDER BY timestamp DESC LIMIT $1", days
)
return [dict(r) for r in rows]
async def backfill_feature_columns(self) -> int:
"""Back-populate feat_*_lo for trades created before Phase 6.
Parses the reasoning string (format: 'fg=+0.0600 mom=... news=... mfld=...').
fg / mom raw values are multiplied by 2 to convert to log-odds.
news / mfld are already in log-odds (no scaling).
feat_btc_dom_lo cannot be recovered from the old reasoning string and
remains NULL for legacy trades.
Returns the number of rows updated.
"""
async with self._pool.acquire() as conn:
result = await conn.execute("""
UPDATE trades
SET
feat_fg_lo = ((regexp_match(reasoning, 'fg=([^ |]+)'))[1])::DOUBLE PRECISION * 2,
feat_mom_lo = ((regexp_match(reasoning, 'mom=([^ |]+)'))[1])::DOUBLE PRECISION * 2,
feat_news_lo = ((regexp_match(reasoning, 'news=([^ |]+)'))[1])::DOUBLE PRECISION,
feat_mfld_lo = ((regexp_match(reasoning, 'mfld=([^ |]+)'))[1])::DOUBLE PRECISION,
feat_btc_dom_lo = NULL
WHERE feat_fg_lo IS NULL
AND reasoning IS NOT NULL
AND reasoning LIKE '%fg=%'
AND reasoning NOT LIKE '%fg_lo=%'
""")
updated = int(result.split()[-1]) if result else 0
if updated:
log.info("backfill_feature_columns: updated %d trade(s)", updated)
return updated
async def get_legacy_incomplete_trades(self) -> list[dict]:
"""Return trades with NULL edge_net — pre-Phase-1 data with no signal quality info."""
async with self._pool.acquire() as conn:
rows = await conn.fetch("""
SELECT id, market_id, question, direction, net_cost, entry_price,
timestamp, reasoning, closed_at, close_reason, family_key,
feat_fg_lo, feat_mom_lo, feat_news_lo, feat_mfld_lo, feat_btc_dom_lo
FROM trades
WHERE edge_net IS NULL
ORDER BY timestamp DESC
""")
return [dict(r) for r in rows]
async def compute_feature_metrics_from_db(self) -> dict:
"""Per-feature performance metrics, all in log-odds space.
For each feature (fg, mom, news, mfld, btc_dom) returns:
unit — always "log_odds"
materiality_threshold — |lo| threshold for "material" classification
triggered_count — trades where |feat_lo| > 0.0001
material_count — trades where |feat_lo| >= materiality_threshold
avg_contribution_lo — mean signed lo value (triggered trades)
avg_abs_contribution_lo — mean absolute lo value (triggered trades)
avg_edge_net_when_material — mean edge_net for material trades
unrealized_pnl_est — sum edge_net*net_costfee for triggered open trades
realized_pnl — sum close_pnl for triggered resolved trades
resolved_count — closed trades with known outcome (triggered)
win_rate — NULL if resolved_count < 5
net_positive_count — triggered trades where feat_lo > 0
net_negative_count — triggered trades where feat_lo < 0
"""
async with self._pool.acquire() as conn:
rows = await conn.fetch("""
WITH feature_values AS (
SELECT 'fg' AS feature,
0.05::DOUBLE PRECISION AS mat_thresh,
feat_fg_lo AS fval,
edge_net, net_cost, fee_usdc, closed_at, close_pnl
FROM trades WHERE feat_fg_lo IS NOT NULL
UNION ALL
SELECT 'mom', 0.05, feat_mom_lo,
edge_net, net_cost, fee_usdc, closed_at, close_pnl
FROM trades WHERE feat_mom_lo IS NOT NULL
UNION ALL
SELECT 'news', 0.10, feat_news_lo,
edge_net, net_cost, fee_usdc, closed_at, close_pnl
FROM trades WHERE feat_news_lo IS NOT NULL
UNION ALL
SELECT 'mfld', 0.10, feat_mfld_lo,
edge_net, net_cost, fee_usdc, closed_at, close_pnl
FROM trades WHERE feat_mfld_lo IS NOT NULL
UNION ALL
SELECT 'btc_dom', 0.05, feat_btc_dom_lo,
edge_net, net_cost, fee_usdc, closed_at, close_pnl
FROM trades WHERE feat_btc_dom_lo IS NOT NULL
)
SELECT
feature,
mat_thresh AS materiality_threshold,
COUNT(*) FILTER (WHERE ABS(fval) > 0.0001) AS triggered_count,
COUNT(*) FILTER (WHERE ABS(fval) >= mat_thresh) AS material_count,
AVG(fval) FILTER (WHERE ABS(fval) > 0.0001) AS avg_contribution_lo,
AVG(ABS(fval)) FILTER (WHERE ABS(fval) > 0.0001) AS avg_abs_contribution_lo,
AVG(edge_net) FILTER (WHERE ABS(fval) >= mat_thresh
AND edge_net IS NOT NULL) AS avg_edge_net_when_material,
COALESCE(SUM(edge_net * net_cost - fee_usdc)
FILTER (WHERE ABS(fval) > 0.0001
AND closed_at IS NULL
AND edge_net IS NOT NULL), 0) AS unrealized_pnl_est,
COALESCE(SUM(close_pnl)
FILTER (WHERE ABS(fval) > 0.0001
AND close_pnl IS NOT NULL), 0) AS realized_pnl,
COUNT(*) FILTER (WHERE ABS(fval) > 0.0001
AND close_pnl IS NOT NULL
AND close_pnl > 0) AS wins_realized,
COUNT(*) FILTER (WHERE ABS(fval) > 0.0001
AND close_pnl IS NOT NULL) AS resolved_count,
COUNT(*) FILTER (WHERE fval > 0.0001) AS net_positive_count,
COUNT(*) FILTER (WHERE fval < -0.0001) AS net_negative_count
FROM feature_values
GROUP BY feature, mat_thresh
ORDER BY feature
""")
result: dict[str, dict] = {}
for r in rows:
d = dict(r)
feature = d["feature"]
resolved = int(d.get("resolved_count") or 0)
wins = int(d.get("wins_realized") or 0)
result[feature] = {
"unit": "log_odds",
"materiality_threshold": float(d["materiality_threshold"]),
"triggered_count": int(d.get("triggered_count") or 0),
"material_count": int(d.get("material_count") or 0),
"avg_contribution_lo": _f(d.get("avg_contribution_lo")),
"avg_abs_contribution_lo": _f(d.get("avg_abs_contribution_lo")),
"avg_edge_net_when_material": _f(d.get("avg_edge_net_when_material")),
"unrealized_pnl_est": float(d.get("unrealized_pnl_est") or 0),
"realized_pnl": float(d.get("realized_pnl") or 0),
"resolved_count": resolved,
"win_rate": (wins / resolved) if resolved >= 5 else None,
"net_positive_count": int(d.get("net_positive_count") or 0),
"net_negative_count": int(d.get("net_negative_count") or 0),
}
return result
def _f(v) -> Optional[float]:
"""None-safe float cast for asyncpg Decimal/None values."""
return float(v) if v is not None else None
+47
View File
@@ -121,6 +121,53 @@ CREATE INDEX IF NOT EXISTS idx_trades_closed ON trades(closed_at) WHERE closed_a
ALTER TABLE trades ADD COLUMN IF NOT EXISTS close_pnl DOUBLE PRECISION;
ALTER TABLE trades ADD COLUMN IF NOT EXISTS resolution DOUBLE PRECISION;
-- ─────────────────────────────────────────────────────────────────────────────
-- Phase 6: per-feature signal attribution — all values in log-odds space
--
-- All four primary features share a common unit (log-odds contribution to
-- the posterior estimate) so they can be compared directly:
--
-- feat_fg_lo = _fg_contribution × 2
-- Fear & Greed direction-adjusted delta, ×2 to log-odds.
-- Non-zero for every trade. Range ≈ ±0.12.
-- Materiality threshold: |lo| ≥ 0.05.
--
-- feat_mom_lo = _momentum_contribution × 2
-- Momentum delta (direction-adjusted), ×2 to log-odds.
-- Zero when |btc_change_24h| ≤ 2 %. Range ≈ ±0.15.
-- Materiality threshold: |lo| ≥ 0.05.
--
-- feat_news_lo = news_log_adj (already in log-odds, no scaling)
-- GNews sentiment × NEWS_LOGODDS_WEIGHT (1.5).
-- Zero for non-politics or when GNews budget exhausted.
-- Range ≈ ±1.5. Materiality threshold: |lo| ≥ 0.10.
--
-- feat_mfld_lo = manifold_log_adj (already in log-odds, no scaling)
-- Manifold divergence × MANIFOLD_LOGODDS_WEIGHT (0.6).
-- Zero when Manifold returned no result.
-- Range ≈ ±0.6. Materiality threshold: |lo| ≥ 0.10.
--
-- feat_btc_dom_lo = _btc_dom_contribution × 2
-- BTC-dominance alt-pressure delta, ×2 to log-odds.
-- Only fires for ETH / altcoin / general-crypto markets
-- when btc_dominance > 55 % or < 45 %.
-- Values: { 0.06, 0.0, +0.06 }.
-- Materiality threshold: |lo| ≥ 0.05.
--
-- NULL for pre-Phase-6 trades. Backfilled at startup via
-- Database.backfill_feature_columns() using reasoning-string regex
-- (fg_lo/mom_lo multiplied by 2 from raw; news_lo/mfld_lo taken directly;
-- btc_dom_lo cannot be backfilled and remains NULL for legacy trades).
-- ─────────────────────────────────────────────────────────────────────────────
ALTER TABLE trades ADD COLUMN IF NOT EXISTS feat_fg_lo DOUBLE PRECISION;
ALTER TABLE trades ADD COLUMN IF NOT EXISTS feat_mom_lo DOUBLE PRECISION;
ALTER TABLE trades ADD COLUMN IF NOT EXISTS feat_news_lo DOUBLE PRECISION;
ALTER TABLE trades ADD COLUMN IF NOT EXISTS feat_mfld_lo DOUBLE PRECISION;
ALTER TABLE trades ADD COLUMN IF NOT EXISTS feat_btc_dom_lo DOUBLE PRECISION;
CREATE INDEX IF NOT EXISTS idx_trades_feat_fg ON trades(feat_fg_lo) WHERE feat_fg_lo IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_trades_feat_mfld ON trades(feat_mfld_lo) WHERE feat_mfld_lo IS NOT NULL;
-- ─────────────────────────────────────────────────────────────────────────────
-- Fix 3: extended metrics_daily columns for DB-computed metrics
--