diff --git a/bot/data/db.py b/bot/data/db.py index e06d882..886830f 100644 --- a/bot/data/db.py +++ b/bot/data/db.py @@ -226,8 +226,11 @@ class Database: COUNT(*) AS total_trades, COUNT(*) FILTER (WHERE closed_at IS NULL) AS open_count, COUNT(*) FILTER (WHERE closed_at IS NOT NULL) AS closed_count, + -- excluded_from_metrics trades are omitted from resolved_count, + -- realized_pnl, wins_realized, and calibration_score. COUNT(*) FILTER (WHERE resolution IS NOT NULL - AND final_prob IS NOT NULL) AS resolved_count, + AND final_prob IS NOT NULL + AND (excluded_from_metrics IS NOT TRUE)) AS resolved_count, COALESCE(SUM(net_cost) FILTER (WHERE closed_at IS NULL), 0) AS total_deployed, @@ -240,15 +243,17 @@ class Database: FILTER (WHERE closed_at IS NULL AND edge_net IS NOT NULL), 0) AS unrealized_pnl_est, - -- Realized PnL: closed trades with a known resolution. - -- close_pnl is computed at close time from actual resolution. + -- Realized PnL: admin-excluded trades omitted (close_pnl=0 by convention + -- but excluded explicitly so they don't skew the aggregate). COALESCE(SUM(close_pnl) FILTER (WHERE closed_at IS NOT NULL - AND close_pnl IS NOT NULL), 0) AS realized_pnl, + AND close_pnl IS NOT NULL + AND (excluded_from_metrics IS NOT TRUE)), 0) AS realized_pnl, COUNT(*) FILTER (WHERE closed_at IS NOT NULL AND close_pnl IS NOT NULL - AND close_pnl > 0) AS wins_realized, + AND close_pnl > 0 + AND (excluded_from_metrics IS NOT TRUE)) AS wins_realized, -- Calibration (Brier score transformed to higher-is-better): -- 1 − AVG((final_prob − resolution)²) on resolved trades. @@ -256,12 +261,15 @@ class Database: -- resolution is 1.0 (YES won) or 0.0 (NO won). -- Perfect calibration → 1.0 | Random → ~0.75 | Worst → 0.0 -- Returns NULL if fewer than 10 resolved trades with final_prob. + -- Admin-excluded trades omitted from both threshold and average. CASE WHEN COUNT(*) FILTER (WHERE resolution IS NOT NULL - AND final_prob IS NOT NULL) >= 10 + AND final_prob IS NOT NULL + AND (excluded_from_metrics IS NOT TRUE)) >= 10 THEN 1.0 - AVG((final_prob - resolution) * (final_prob - resolution)) FILTER (WHERE resolution IS NOT NULL - AND final_prob IS NOT NULL) + AND final_prob IS NOT NULL + AND (excluded_from_metrics IS NOT TRUE)) ELSE NULL END AS calibration_score @@ -368,22 +376,27 @@ class Database: feat_fg_lo AS fval, edge_net, net_cost, fee_usdc, closed_at, close_pnl FROM trades WHERE feat_fg_lo IS NOT NULL + AND (excluded_from_metrics IS NOT TRUE) UNION ALL SELECT 'mom', 0.05, feat_mom_lo, edge_net, net_cost, fee_usdc, closed_at, close_pnl FROM trades WHERE feat_mom_lo IS NOT NULL + AND (excluded_from_metrics IS NOT TRUE) UNION ALL SELECT 'news', 0.10, feat_news_lo, edge_net, net_cost, fee_usdc, closed_at, close_pnl FROM trades WHERE feat_news_lo IS NOT NULL + AND (excluded_from_metrics IS NOT TRUE) UNION ALL SELECT 'mfld', 0.10, feat_mfld_lo, edge_net, net_cost, fee_usdc, closed_at, close_pnl FROM trades WHERE feat_mfld_lo IS NOT NULL + AND (excluded_from_metrics IS NOT TRUE) UNION ALL SELECT 'btc_dom', 0.05, feat_btc_dom_lo, edge_net, net_cost, fee_usdc, closed_at, close_pnl FROM trades WHERE feat_btc_dom_lo IS NOT NULL + AND (excluded_from_metrics IS NOT TRUE) ) SELECT feature, @@ -467,6 +480,7 @@ class Database: ) AS dominant FROM trades WHERE feat_fg_lo IS NOT NULL + AND (excluded_from_metrics IS NOT TRUE) ) SELECT COALESCE(dominant, 'none') AS dominant_feature, diff --git a/bot/data/schema.sql b/bot/data/schema.sql index 142ac39..9bc56e2 100644 --- a/bot/data/schema.sql +++ b/bot/data/schema.sql @@ -214,6 +214,21 @@ CREATE INDEX IF NOT EXISTS idx_mfld_audit_timestamp ON manifold_match_audit(time CREATE INDEX IF NOT EXISTS idx_mfld_audit_status ON manifold_match_audit(match_status); CREATE INDEX IF NOT EXISTS idx_mfld_audit_poly_mkt ON manifold_match_audit(poly_market_id); +-- ───────────────────────────────────────────────────────────────────────────── +-- Metric exclusion — administrative closure flag +-- +-- excluded_from_metrics: TRUE for trades closed for non-signal reasons +-- (bad matcher, data error, admin close). These trades are excluded from +-- win_rate, calibration_score, realized_pnl, and feature attribution. +-- exclusion_reason: free-text label for the exclusion cause. +-- e.g. 'invalid_manifold_match_legacy' +-- ───────────────────────────────────────────────────────────────────────────── +ALTER TABLE trades ADD COLUMN IF NOT EXISTS excluded_from_metrics BOOLEAN DEFAULT FALSE; +ALTER TABLE trades ADD COLUMN IF NOT EXISTS exclusion_reason TEXT; + +CREATE INDEX IF NOT EXISTS idx_trades_excluded ON trades(excluded_from_metrics) + WHERE excluded_from_metrics = TRUE; + -- ───────────────────────────────────────────────────────────────────────────── -- Fix 3: extended metrics_daily columns for DB-computed metrics --