feat(phase6): per-feature signal attribution in log-odds space
CI/CD / build-and-push (push) Successful in 1m56s
CI/CD / build-and-push (push) Successful in 1m56s
Adds feat_fg_lo / feat_mom_lo / feat_news_lo / feat_mfld_lo / feat_btc_dom_lo to every trade, all normalized to log-odds contribution for direct comparability. - fg / mom / btc_dom: raw probability-delta × 2 → log-odds - news / mfld: already log-odds (LOGODDS_WEIGHT already applied), no scaling - btc_dom tracked separately in bayesian.py instead of bundled in total_adj - reasoning string updated to fg_lo= / mom_lo= notation for self-documentation Schema: 5 new DOUBLE PRECISION columns + 2 partial indexes Stack: TradingSignal → Order → Trade → save_trade all carry feat fields Startup: backfill_feature_columns() recovers fg/mom/news/mfld from old reasoning strings (×2 applied to fg/mom); btc_dom_lo stays NULL for legacy API: /api/metrics/features — triggered/material split per feature with two-level thresholds (0.05 for fg/mom/btc_dom, 0.10 for news/mfld) API: /api/trades/legacy — exposes pre-Phase-1 trades (edge_net IS NULL) API: _enrich_trade backward-compat: reads DB columns first, falls back to reasoning regex with unit conversion for pre-Phase-6 trades Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+149
-2
@@ -35,10 +35,12 @@ class Database:
|
||||
id, market_id, question, direction, size_usdc,
|
||||
entry_price, shares, fee_usdc, net_cost, timestamp, reasoning, paper,
|
||||
edge_gross, edge_net, prior_prob, final_prob,
|
||||
mid_price, spread_estimate, commission, family_key
|
||||
mid_price, spread_estimate, commission, family_key,
|
||||
feat_fg_lo, feat_mom_lo, feat_news_lo, feat_mfld_lo, feat_btc_dom_lo
|
||||
) VALUES (
|
||||
$1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,
|
||||
$13,$14,$15,$16,$17,$18,$19,$20
|
||||
$13,$14,$15,$16,$17,$18,$19,$20,
|
||||
$21,$22,$23,$24,$25
|
||||
)
|
||||
ON CONFLICT (id) DO NOTHING
|
||||
""",
|
||||
@@ -48,6 +50,9 @@ class Database:
|
||||
# Phase 1 fields
|
||||
trade.edge_gross, trade.edge_net, trade.prior_prob, trade.final_prob,
|
||||
trade.mid_price, trade.spread_estimate, trade.commission, trade.family_key,
|
||||
# Phase 6 feature log-odds
|
||||
trade.feat_fg_lo, trade.feat_mom_lo, trade.feat_news_lo,
|
||||
trade.feat_mfld_lo, trade.feat_btc_dom_lo,
|
||||
)
|
||||
|
||||
async def save_daily_metrics(self, metrics: dict) -> None:
|
||||
@@ -264,3 +269,145 @@ class Database:
|
||||
"SELECT * FROM metrics_daily ORDER BY timestamp DESC LIMIT $1", days
|
||||
)
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
async def backfill_feature_columns(self) -> int:
|
||||
"""Back-populate feat_*_lo for trades created before Phase 6.
|
||||
|
||||
Parses the reasoning string (format: 'fg=+0.0600 mom=... news=... mfld=...').
|
||||
fg / mom raw values are multiplied by 2 to convert to log-odds.
|
||||
news / mfld are already in log-odds (no scaling).
|
||||
feat_btc_dom_lo cannot be recovered from the old reasoning string and
|
||||
remains NULL for legacy trades.
|
||||
|
||||
Returns the number of rows updated.
|
||||
"""
|
||||
async with self._pool.acquire() as conn:
|
||||
result = await conn.execute("""
|
||||
UPDATE trades
|
||||
SET
|
||||
feat_fg_lo = ((regexp_match(reasoning, 'fg=([^ |]+)'))[1])::DOUBLE PRECISION * 2,
|
||||
feat_mom_lo = ((regexp_match(reasoning, 'mom=([^ |]+)'))[1])::DOUBLE PRECISION * 2,
|
||||
feat_news_lo = ((regexp_match(reasoning, 'news=([^ |]+)'))[1])::DOUBLE PRECISION,
|
||||
feat_mfld_lo = ((regexp_match(reasoning, 'mfld=([^ |]+)'))[1])::DOUBLE PRECISION,
|
||||
feat_btc_dom_lo = NULL
|
||||
WHERE feat_fg_lo IS NULL
|
||||
AND reasoning IS NOT NULL
|
||||
AND reasoning LIKE '%fg=%'
|
||||
AND reasoning NOT LIKE '%fg_lo=%'
|
||||
""")
|
||||
updated = int(result.split()[-1]) if result else 0
|
||||
if updated:
|
||||
log.info("backfill_feature_columns: updated %d trade(s)", updated)
|
||||
return updated
|
||||
|
||||
async def get_legacy_incomplete_trades(self) -> list[dict]:
|
||||
"""Return trades with NULL edge_net — pre-Phase-1 data with no signal quality info."""
|
||||
async with self._pool.acquire() as conn:
|
||||
rows = await conn.fetch("""
|
||||
SELECT id, market_id, question, direction, net_cost, entry_price,
|
||||
timestamp, reasoning, closed_at, close_reason, family_key,
|
||||
feat_fg_lo, feat_mom_lo, feat_news_lo, feat_mfld_lo, feat_btc_dom_lo
|
||||
FROM trades
|
||||
WHERE edge_net IS NULL
|
||||
ORDER BY timestamp DESC
|
||||
""")
|
||||
return [dict(r) for r in rows]
|
||||
|
||||
async def compute_feature_metrics_from_db(self) -> dict:
|
||||
"""Per-feature performance metrics, all in log-odds space.
|
||||
|
||||
For each feature (fg, mom, news, mfld, btc_dom) returns:
|
||||
unit — always "log_odds"
|
||||
materiality_threshold — |lo| threshold for "material" classification
|
||||
triggered_count — trades where |feat_lo| > 0.0001
|
||||
material_count — trades where |feat_lo| >= materiality_threshold
|
||||
avg_contribution_lo — mean signed lo value (triggered trades)
|
||||
avg_abs_contribution_lo — mean absolute lo value (triggered trades)
|
||||
avg_edge_net_when_material — mean edge_net for material trades
|
||||
unrealized_pnl_est — sum edge_net*net_cost−fee for triggered open trades
|
||||
realized_pnl — sum close_pnl for triggered resolved trades
|
||||
resolved_count — closed trades with known outcome (triggered)
|
||||
win_rate — NULL if resolved_count < 5
|
||||
net_positive_count — triggered trades where feat_lo > 0
|
||||
net_negative_count — triggered trades where feat_lo < 0
|
||||
"""
|
||||
async with self._pool.acquire() as conn:
|
||||
rows = await conn.fetch("""
|
||||
WITH feature_values AS (
|
||||
SELECT 'fg' AS feature,
|
||||
0.05::DOUBLE PRECISION AS mat_thresh,
|
||||
feat_fg_lo AS fval,
|
||||
edge_net, net_cost, fee_usdc, closed_at, close_pnl
|
||||
FROM trades WHERE feat_fg_lo IS NOT NULL
|
||||
UNION ALL
|
||||
SELECT 'mom', 0.05, feat_mom_lo,
|
||||
edge_net, net_cost, fee_usdc, closed_at, close_pnl
|
||||
FROM trades WHERE feat_mom_lo IS NOT NULL
|
||||
UNION ALL
|
||||
SELECT 'news', 0.10, feat_news_lo,
|
||||
edge_net, net_cost, fee_usdc, closed_at, close_pnl
|
||||
FROM trades WHERE feat_news_lo IS NOT NULL
|
||||
UNION ALL
|
||||
SELECT 'mfld', 0.10, feat_mfld_lo,
|
||||
edge_net, net_cost, fee_usdc, closed_at, close_pnl
|
||||
FROM trades WHERE feat_mfld_lo IS NOT NULL
|
||||
UNION ALL
|
||||
SELECT 'btc_dom', 0.05, feat_btc_dom_lo,
|
||||
edge_net, net_cost, fee_usdc, closed_at, close_pnl
|
||||
FROM trades WHERE feat_btc_dom_lo IS NOT NULL
|
||||
)
|
||||
SELECT
|
||||
feature,
|
||||
mat_thresh AS materiality_threshold,
|
||||
COUNT(*) FILTER (WHERE ABS(fval) > 0.0001) AS triggered_count,
|
||||
COUNT(*) FILTER (WHERE ABS(fval) >= mat_thresh) AS material_count,
|
||||
AVG(fval) FILTER (WHERE ABS(fval) > 0.0001) AS avg_contribution_lo,
|
||||
AVG(ABS(fval)) FILTER (WHERE ABS(fval) > 0.0001) AS avg_abs_contribution_lo,
|
||||
AVG(edge_net) FILTER (WHERE ABS(fval) >= mat_thresh
|
||||
AND edge_net IS NOT NULL) AS avg_edge_net_when_material,
|
||||
COALESCE(SUM(edge_net * net_cost - fee_usdc)
|
||||
FILTER (WHERE ABS(fval) > 0.0001
|
||||
AND closed_at IS NULL
|
||||
AND edge_net IS NOT NULL), 0) AS unrealized_pnl_est,
|
||||
COALESCE(SUM(close_pnl)
|
||||
FILTER (WHERE ABS(fval) > 0.0001
|
||||
AND close_pnl IS NOT NULL), 0) AS realized_pnl,
|
||||
COUNT(*) FILTER (WHERE ABS(fval) > 0.0001
|
||||
AND close_pnl IS NOT NULL
|
||||
AND close_pnl > 0) AS wins_realized,
|
||||
COUNT(*) FILTER (WHERE ABS(fval) > 0.0001
|
||||
AND close_pnl IS NOT NULL) AS resolved_count,
|
||||
COUNT(*) FILTER (WHERE fval > 0.0001) AS net_positive_count,
|
||||
COUNT(*) FILTER (WHERE fval < -0.0001) AS net_negative_count
|
||||
FROM feature_values
|
||||
GROUP BY feature, mat_thresh
|
||||
ORDER BY feature
|
||||
""")
|
||||
|
||||
result: dict[str, dict] = {}
|
||||
for r in rows:
|
||||
d = dict(r)
|
||||
feature = d["feature"]
|
||||
resolved = int(d.get("resolved_count") or 0)
|
||||
wins = int(d.get("wins_realized") or 0)
|
||||
result[feature] = {
|
||||
"unit": "log_odds",
|
||||
"materiality_threshold": float(d["materiality_threshold"]),
|
||||
"triggered_count": int(d.get("triggered_count") or 0),
|
||||
"material_count": int(d.get("material_count") or 0),
|
||||
"avg_contribution_lo": _f(d.get("avg_contribution_lo")),
|
||||
"avg_abs_contribution_lo": _f(d.get("avg_abs_contribution_lo")),
|
||||
"avg_edge_net_when_material": _f(d.get("avg_edge_net_when_material")),
|
||||
"unrealized_pnl_est": float(d.get("unrealized_pnl_est") or 0),
|
||||
"realized_pnl": float(d.get("realized_pnl") or 0),
|
||||
"resolved_count": resolved,
|
||||
"win_rate": (wins / resolved) if resolved >= 5 else None,
|
||||
"net_positive_count": int(d.get("net_positive_count") or 0),
|
||||
"net_negative_count": int(d.get("net_negative_count") or 0),
|
||||
}
|
||||
return result
|
||||
|
||||
|
||||
def _f(v) -> Optional[float]:
|
||||
"""None-safe float cast for asyncpg Decimal/None values."""
|
||||
return float(v) if v is not None else None
|
||||
|
||||
@@ -121,6 +121,53 @@ CREATE INDEX IF NOT EXISTS idx_trades_closed ON trades(closed_at) WHERE closed_a
|
||||
ALTER TABLE trades ADD COLUMN IF NOT EXISTS close_pnl DOUBLE PRECISION;
|
||||
ALTER TABLE trades ADD COLUMN IF NOT EXISTS resolution DOUBLE PRECISION;
|
||||
|
||||
-- ─────────────────────────────────────────────────────────────────────────────
|
||||
-- Phase 6: per-feature signal attribution — all values in log-odds space
|
||||
--
|
||||
-- All four primary features share a common unit (log-odds contribution to
|
||||
-- the posterior estimate) so they can be compared directly:
|
||||
--
|
||||
-- feat_fg_lo = _fg_contribution × 2
|
||||
-- Fear & Greed direction-adjusted delta, ×2 to log-odds.
|
||||
-- Non-zero for every trade. Range ≈ ±0.12.
|
||||
-- Materiality threshold: |lo| ≥ 0.05.
|
||||
--
|
||||
-- feat_mom_lo = _momentum_contribution × 2
|
||||
-- Momentum delta (direction-adjusted), ×2 to log-odds.
|
||||
-- Zero when |btc_change_24h| ≤ 2 %. Range ≈ ±0.15.
|
||||
-- Materiality threshold: |lo| ≥ 0.05.
|
||||
--
|
||||
-- feat_news_lo = news_log_adj (already in log-odds, no scaling)
|
||||
-- GNews sentiment × NEWS_LOGODDS_WEIGHT (1.5).
|
||||
-- Zero for non-politics or when GNews budget exhausted.
|
||||
-- Range ≈ ±1.5. Materiality threshold: |lo| ≥ 0.10.
|
||||
--
|
||||
-- feat_mfld_lo = manifold_log_adj (already in log-odds, no scaling)
|
||||
-- Manifold divergence × MANIFOLD_LOGODDS_WEIGHT (0.6).
|
||||
-- Zero when Manifold returned no result.
|
||||
-- Range ≈ ±0.6. Materiality threshold: |lo| ≥ 0.10.
|
||||
--
|
||||
-- feat_btc_dom_lo = _btc_dom_contribution × 2
|
||||
-- BTC-dominance alt-pressure delta, ×2 to log-odds.
|
||||
-- Only fires for ETH / altcoin / general-crypto markets
|
||||
-- when btc_dominance > 55 % or < 45 %.
|
||||
-- Values: { −0.06, 0.0, +0.06 }.
|
||||
-- Materiality threshold: |lo| ≥ 0.05.
|
||||
--
|
||||
-- NULL for pre-Phase-6 trades. Backfilled at startup via
|
||||
-- Database.backfill_feature_columns() using reasoning-string regex
|
||||
-- (fg_lo/mom_lo multiplied by 2 from raw; news_lo/mfld_lo taken directly;
|
||||
-- btc_dom_lo cannot be backfilled and remains NULL for legacy trades).
|
||||
-- ─────────────────────────────────────────────────────────────────────────────
|
||||
ALTER TABLE trades ADD COLUMN IF NOT EXISTS feat_fg_lo DOUBLE PRECISION;
|
||||
ALTER TABLE trades ADD COLUMN IF NOT EXISTS feat_mom_lo DOUBLE PRECISION;
|
||||
ALTER TABLE trades ADD COLUMN IF NOT EXISTS feat_news_lo DOUBLE PRECISION;
|
||||
ALTER TABLE trades ADD COLUMN IF NOT EXISTS feat_mfld_lo DOUBLE PRECISION;
|
||||
ALTER TABLE trades ADD COLUMN IF NOT EXISTS feat_btc_dom_lo DOUBLE PRECISION;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_trades_feat_fg ON trades(feat_fg_lo) WHERE feat_fg_lo IS NOT NULL;
|
||||
CREATE INDEX IF NOT EXISTS idx_trades_feat_mfld ON trades(feat_mfld_lo) WHERE feat_mfld_lo IS NOT NULL;
|
||||
|
||||
-- ─────────────────────────────────────────────────────────────────────────────
|
||||
-- Fix 3: extended metrics_daily columns for DB-computed metrics
|
||||
--
|
||||
|
||||
@@ -49,6 +49,12 @@ class Trade:
|
||||
commission: float = 0.0 # = POLYMARKET_FEE * size_usdc
|
||||
# ── Phase 2: market family ────────────────────────────────────────────────
|
||||
family_key: str = ""
|
||||
# ── Phase 6: per-feature log-odds contributions ───────────────────────────
|
||||
feat_fg_lo: float = 0.0
|
||||
feat_mom_lo: float = 0.0
|
||||
feat_news_lo: float = 0.0
|
||||
feat_mfld_lo: float = 0.0
|
||||
feat_btc_dom_lo: float = 0.0
|
||||
|
||||
def __str__(self) -> str:
|
||||
return (
|
||||
@@ -148,6 +154,12 @@ class PaperExecutor:
|
||||
commission=commission,
|
||||
# Phase 2 family
|
||||
family_key=order.family_key,
|
||||
# Phase 6 feature log-odds
|
||||
feat_fg_lo=order.feat_fg_lo,
|
||||
feat_mom_lo=order.feat_mom_lo,
|
||||
feat_news_lo=order.feat_news_lo,
|
||||
feat_mfld_lo=order.feat_mfld_lo,
|
||||
feat_btc_dom_lo=order.feat_btc_dom_lo,
|
||||
)
|
||||
|
||||
# Update paper portfolio
|
||||
|
||||
@@ -369,6 +369,7 @@ async def main() -> None:
|
||||
db = Database()
|
||||
await db.connect()
|
||||
await db.run_migrations()
|
||||
await db.backfill_feature_columns()
|
||||
|
||||
poly = PolymarketClient()
|
||||
external = ExternalDataClient()
|
||||
|
||||
@@ -56,6 +56,12 @@ class Order:
|
||||
family_key: str = ""
|
||||
# Phase 4 — regime threshold applied
|
||||
regime_min_edge: float = 0.10
|
||||
# Phase 6 — per-feature log-odds contributions (see TradingSignal for semantics)
|
||||
feat_fg_lo: float = 0.0
|
||||
feat_mom_lo: float = 0.0
|
||||
feat_news_lo: float = 0.0
|
||||
feat_mfld_lo: float = 0.0
|
||||
feat_btc_dom_lo: float = 0.0
|
||||
|
||||
|
||||
class RiskManager:
|
||||
@@ -147,4 +153,10 @@ class RiskManager:
|
||||
family_key=signal.family_key,
|
||||
# Phase 4 — regime
|
||||
regime_min_edge=signal.regime_min_edge,
|
||||
# Phase 6 — feature log-odds
|
||||
feat_fg_lo=signal.feat_fg_lo,
|
||||
feat_mom_lo=signal.feat_mom_lo,
|
||||
feat_news_lo=signal.feat_news_lo,
|
||||
feat_mfld_lo=signal.feat_mfld_lo,
|
||||
feat_btc_dom_lo=signal.feat_btc_dom_lo,
|
||||
)
|
||||
|
||||
@@ -160,6 +160,16 @@ class TradingSignal:
|
||||
family_key: str = ""
|
||||
# ── Phase 4: regime ──────────────────────────────────────────────────────
|
||||
regime_min_edge: float = 0.10
|
||||
# ── Phase 6: per-feature log-odds contributions ───────────────────────────
|
||||
# All values are in log-odds space for direct comparability.
|
||||
# feat_fg_lo / feat_mom_lo: probability-delta × 2 → log-odds.
|
||||
# feat_news_lo / feat_mfld_lo: already log-odds (no scaling).
|
||||
# feat_btc_dom_lo: btc-dominance probability-delta × 2 → log-odds.
|
||||
feat_fg_lo: float = 0.0
|
||||
feat_mom_lo: float = 0.0
|
||||
feat_news_lo: float = 0.0
|
||||
feat_mfld_lo: float = 0.0
|
||||
feat_btc_dom_lo: float = 0.0
|
||||
|
||||
|
||||
class BayesianStrategy:
|
||||
@@ -379,11 +389,14 @@ class BayesianStrategy:
|
||||
adjustments.append(_fg_contribution)
|
||||
|
||||
# Signal 3: BTC dominance — hurts altcoins when high
|
||||
_btc_dom_contribution = 0.0
|
||||
if (is_eth or is_altcoin or is_general_crypto) and ext.btc_dominance > 55:
|
||||
adjustments.append(-0.03 if is_price_above else 0.03)
|
||||
_btc_dom_contribution = -0.03 if is_price_above else 0.03
|
||||
adjustments.append(_btc_dom_contribution)
|
||||
sources.append(f"BTC dom: {ext.btc_dominance:.1f}% (high → alt pressure)")
|
||||
elif (is_eth or is_altcoin or is_general_crypto) and ext.btc_dominance < 45:
|
||||
adjustments.append(0.03 if is_price_above else -0.03)
|
||||
_btc_dom_contribution = 0.03 if is_price_above else -0.03
|
||||
adjustments.append(_btc_dom_contribution)
|
||||
sources.append(f"BTC dom: {ext.btc_dominance:.1f}% (low → alt season)")
|
||||
|
||||
# Signal 4: GNews sentiment (politics only, budget-gated)
|
||||
@@ -448,12 +461,19 @@ class BayesianStrategy:
|
||||
if manifold_log_adj != 0.0:
|
||||
confidence = min(confidence_cap, confidence + 0.08)
|
||||
|
||||
# Per-feature contribution string for audit logging
|
||||
# Per-feature log-odds contributions (Phase 6).
|
||||
# fg / mom / btc_dom: probability-delta × 2 → log-odds.
|
||||
# news / mfld: already log-odds (LOGODDS_WEIGHT already applied).
|
||||
feat_fg_lo = _fg_contribution * 2
|
||||
feat_mom_lo = _momentum_contribution * 2
|
||||
feat_news_lo = news_log_adj
|
||||
feat_mfld_lo = manifold_log_adj
|
||||
feat_btc_dom_lo = _btc_dom_contribution * 2
|
||||
|
||||
feat_str = (
|
||||
f"fg={_fg_contribution:+.3f} "
|
||||
f"mom={_momentum_contribution:+.3f} "
|
||||
f"mfld={manifold_log_adj:+.4f} "
|
||||
f"news={news_log_adj:+.4f}"
|
||||
f"fg_lo={feat_fg_lo:+.4f} mom_lo={feat_mom_lo:+.4f} "
|
||||
f"news_lo={feat_news_lo:+.4f} mfld_lo={feat_mfld_lo:+.4f} "
|
||||
f"btc_dom_lo={feat_btc_dom_lo:+.4f}"
|
||||
)
|
||||
|
||||
# ── Phase 5: structured audit log ────────────────────────────────────
|
||||
@@ -496,8 +516,7 @@ class BayesianStrategy:
|
||||
f"regime_min={regime_min:.2f} | days={days} | "
|
||||
f"family={family} | "
|
||||
f"Direction={direction} | "
|
||||
f"fg={_fg_contribution:+.4f} mom={_momentum_contribution:+.4f} "
|
||||
f"news={news_log_adj:+.4f} mfld={manifold_log_adj:+.4f} | "
|
||||
f"{feat_str} | "
|
||||
f"Signals: {', '.join(sources[1:])}"
|
||||
)
|
||||
|
||||
@@ -535,6 +554,12 @@ class BayesianStrategy:
|
||||
family_key=family,
|
||||
# Phase 4 new fields
|
||||
regime_min_edge=regime_min,
|
||||
# Phase 6 new fields — all in log-odds space
|
||||
feat_fg_lo=feat_fg_lo,
|
||||
feat_mom_lo=feat_mom_lo,
|
||||
feat_news_lo=feat_news_lo,
|
||||
feat_mfld_lo=feat_mfld_lo,
|
||||
feat_btc_dom_lo=feat_btc_dom_lo,
|
||||
)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user