Files
chemavx 34fd1f8719
CI/CD / build-and-push (push) Successful in 7s
feat(manifold): add outcome compatibility guard and conditional market rejection
Reject false-positive matches where Jaccard overlap is high but the outcome is
not equivalent (e.g. Poly nomination vs Manifold "If X is nominee, will he win").

- _is_conditional(): detect conditional Manifold markets (If/Conditional on/
  Assuming/Given that prefixes + mid-sentence " if ...," clauses) -> reject with
  reason "conditional_market".
- _classify_outcome(): classify into nomination|primary_win|general_win|
  conditional|other; reject when poly/mfld types differ or either is conditional
  -> reason "outcome_mismatch: poly=... manifold=...".
- Persist poly_outcome_type/mfld_outcome_type on ManifoldMatchResult, in
  manifold_match_audit (CREATE + idempotent ALTER), save_manifold_audit() and
  the bayesian call site.
- Tests covering classification, conditional detection and the Graham Platner
  regression (now rejected); valid nomination<->nomination still accepted.

Untouched: _MATCH_THRESHOLD (0.40), MANIFOLD_LOGODDS_WEIGHT, edge thresholds,
exposure, trading logic.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-05-31 15:28:26 +00:00

264 lines
17 KiB
SQL
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
-- Polymarket Bot Database Schema
CREATE TABLE IF NOT EXISTS trades (
id TEXT PRIMARY KEY,
market_id TEXT NOT NULL,
question TEXT NOT NULL,
direction TEXT NOT NULL, -- BUY_YES | BUY_NO
size_usdc DOUBLE PRECISION,
entry_price DOUBLE PRECISION,
shares DOUBLE PRECISION,
fee_usdc DOUBLE PRECISION,
net_cost DOUBLE PRECISION,
timestamp TIMESTAMPTZ NOT NULL,
reasoning TEXT,
paper BOOLEAN DEFAULT TRUE
);
CREATE TABLE IF NOT EXISTS metrics_daily (
id SERIAL PRIMARY KEY,
timestamp TIMESTAMPTZ NOT NULL,
total_trades INTEGER,
total_deployed DOUBLE PRECISION,
total_fees DOUBLE PRECISION,
total_pnl DOUBLE PRECISION,
win_rate DOUBLE PRECISION,
avg_edge DOUBLE PRECISION,
sharpe_ratio DOUBLE PRECISION,
calibration_score DOUBLE PRECISION,
paper_mode BOOLEAN DEFAULT TRUE
);
CREATE TABLE IF NOT EXISTS markets (
id TEXT PRIMARY KEY,
condition_id TEXT,
question TEXT NOT NULL,
category TEXT,
end_date TEXT,
active BOOLEAN DEFAULT TRUE,
last_seen TIMESTAMPTZ DEFAULT NOW()
);
CREATE TABLE IF NOT EXISTS signals (
id SERIAL PRIMARY KEY,
market_id TEXT NOT NULL,
timestamp TIMESTAMPTZ NOT NULL,
polymarket_price DOUBLE PRECISION,
estimated_prob DOUBLE PRECISION,
edge DOUBLE PRECISION,
confidence DOUBLE PRECISION,
direction TEXT,
acted_on BOOLEAN DEFAULT FALSE
);
CREATE INDEX IF NOT EXISTS idx_trades_timestamp ON trades(timestamp DESC);
CREATE INDEX IF NOT EXISTS idx_trades_market ON trades(market_id);
CREATE INDEX IF NOT EXISTS idx_metrics_timestamp ON metrics_daily(timestamp DESC);
CREATE INDEX IF NOT EXISTS idx_signals_timestamp ON signals(timestamp DESC);
-- ─────────────────────────────────────────────────────────────────────────────
-- Phase 1 migrations: edge neto real
--
-- spread_estimate and commission are HEURISTICS, not exact Polymarket exchange
-- costs. spread_estimate ≈ estimated half-spread for medium-liquidity markets.
-- commission = COMMISSION_RATE (0.02) * size_usdc — mirrors Polymarket taker fee.
-- edge_net = edge_gross - spread_estimate - commission/size_usdc
-- = edge_gross - 0.02 - 0.02 (always 0.04 deduction at current rates)
--
-- These are stored per-trade so we can audit whether the model's cost assumptions
-- were met in practice once markets resolve.
-- ─────────────────────────────────────────────────────────────────────────────
ALTER TABLE trades ADD COLUMN IF NOT EXISTS edge_gross DOUBLE PRECISION;
ALTER TABLE trades ADD COLUMN IF NOT EXISTS edge_net DOUBLE PRECISION;
ALTER TABLE trades ADD COLUMN IF NOT EXISTS prior_prob DOUBLE PRECISION;
ALTER TABLE trades ADD COLUMN IF NOT EXISTS final_prob DOUBLE PRECISION;
ALTER TABLE trades ADD COLUMN IF NOT EXISTS mid_price DOUBLE PRECISION;
ALTER TABLE trades ADD COLUMN IF NOT EXISTS spread_estimate DOUBLE PRECISION;
ALTER TABLE trades ADD COLUMN IF NOT EXISTS commission DOUBLE PRECISION;
ALTER TABLE trades ADD COLUMN IF NOT EXISTS family_key TEXT;
-- ─────────────────────────────────────────────────────────────────────────────
-- Phase 2 / Phase 5 migrations: market families + observability
--
-- Signals table extended so each evaluated market carries its audit trail:
-- skip_reason — why the market was not traded ("edge_net", "family",
-- "gnews_priority", "regime", "prior_extreme", etc.)
-- passed_gross — True if edge_gross alone met regime_min_edge
-- passed_net — True if edge_net met regime_min_edge (the actual gate)
-- family_key — market family slug (e.g. "texas-republican-2026")
-- regime_min_edge — threshold that applied to this market/category
-- ─────────────────────────────────────────────────────────────────────────────
ALTER TABLE signals ADD COLUMN IF NOT EXISTS edge_gross DOUBLE PRECISION;
ALTER TABLE signals ADD COLUMN IF NOT EXISTS edge_net DOUBLE PRECISION;
ALTER TABLE signals ADD COLUMN IF NOT EXISTS family_key TEXT;
ALTER TABLE signals ADD COLUMN IF NOT EXISTS regime_min_edge DOUBLE PRECISION;
ALTER TABLE signals ADD COLUMN IF NOT EXISTS skip_reason TEXT;
ALTER TABLE signals ADD COLUMN IF NOT EXISTS passed_gross BOOLEAN;
ALTER TABLE signals ADD COLUMN IF NOT EXISTS passed_net BOOLEAN;
CREATE INDEX IF NOT EXISTS idx_signals_market ON signals(market_id);
CREATE INDEX IF NOT EXISTS idx_trades_family ON trades(family_key);
-- ─────────────────────────────────────────────────────────────────────────────
-- Position lifecycle: legacy scan can close erroneous paper positions.
-- closed_at IS NULL → position is open (all open-position queries filter this).
-- closed_at NOT NULL → position closed; close_reason explains why.
-- ─────────────────────────────────────────────────────────────────────────────
ALTER TABLE trades ADD COLUMN IF NOT EXISTS closed_at TIMESTAMPTZ;
ALTER TABLE trades ADD COLUMN IF NOT EXISTS close_reason TEXT;
CREATE INDEX IF NOT EXISTS idx_trades_closed ON trades(closed_at) WHERE closed_at IS NOT NULL;
-- ─────────────────────────────────────────────────────────────────────────────
-- Fix 3: market resolution and realized P&L per trade
--
-- resolution: 1.0 if YES resolved, 0.0 if NO resolved, NULL if not yet settled.
-- close_pnl: realized P&L in USDC at close time.
-- BUY_YES: (resolution - entry_price) * shares
-- BUY_NO: ((1 - resolution) - entry_price) * shares
-- NULL if closed without a known resolution (legacy closes, inversion fixes).
-- ─────────────────────────────────────────────────────────────────────────────
ALTER TABLE trades ADD COLUMN IF NOT EXISTS close_pnl DOUBLE PRECISION;
ALTER TABLE trades ADD COLUMN IF NOT EXISTS resolution DOUBLE PRECISION;
-- ─────────────────────────────────────────────────────────────────────────────
-- Phase 6: per-feature signal attribution — all values in log-odds space
--
-- All four primary features share a common unit (log-odds contribution to
-- the posterior estimate) so they can be compared directly:
--
-- feat_fg_lo = _fg_contribution × 2
-- Fear & Greed direction-adjusted delta, ×2 to log-odds.
-- Non-zero for every trade. Range ≈ ±0.12.
-- Materiality threshold: |lo| ≥ 0.05.
--
-- feat_mom_lo = _momentum_contribution × 2
-- Momentum delta (direction-adjusted), ×2 to log-odds.
-- Zero when |btc_change_24h| ≤ 2 %. Range ≈ ±0.15.
-- Materiality threshold: |lo| ≥ 0.05.
--
-- feat_news_lo = news_log_adj (already in log-odds, no scaling)
-- GNews sentiment × NEWS_LOGODDS_WEIGHT (1.5).
-- Zero for non-politics or when GNews budget exhausted.
-- Range ≈ ±1.5. Materiality threshold: |lo| ≥ 0.10.
--
-- feat_mfld_lo = manifold_log_adj (already in log-odds, no scaling)
-- Manifold divergence × MANIFOLD_LOGODDS_WEIGHT (0.6).
-- Zero when Manifold returned no result.
-- Range ≈ ±0.6. Materiality threshold: |lo| ≥ 0.10.
--
-- feat_btc_dom_lo = _btc_dom_contribution × 2
-- BTC-dominance alt-pressure delta, ×2 to log-odds.
-- Only fires for ETH / altcoin / general-crypto markets
-- when btc_dominance > 55 % or < 45 %.
-- Values: { 0.06, 0.0, +0.06 }.
-- Materiality threshold: |lo| ≥ 0.05.
--
-- NULL for pre-Phase-6 trades. Backfilled at startup via
-- Database.backfill_feature_columns() using reasoning-string regex
-- (fg_lo/mom_lo multiplied by 2 from raw; news_lo/mfld_lo taken directly;
-- btc_dom_lo cannot be backfilled and remains NULL for legacy trades).
-- ─────────────────────────────────────────────────────────────────────────────
ALTER TABLE trades ADD COLUMN IF NOT EXISTS feat_fg_lo DOUBLE PRECISION;
ALTER TABLE trades ADD COLUMN IF NOT EXISTS feat_mom_lo DOUBLE PRECISION;
ALTER TABLE trades ADD COLUMN IF NOT EXISTS feat_news_lo DOUBLE PRECISION;
ALTER TABLE trades ADD COLUMN IF NOT EXISTS feat_mfld_lo DOUBLE PRECISION;
ALTER TABLE trades ADD COLUMN IF NOT EXISTS feat_btc_dom_lo DOUBLE PRECISION;
CREATE INDEX IF NOT EXISTS idx_trades_feat_fg ON trades(feat_fg_lo) WHERE feat_fg_lo IS NOT NULL;
CREATE INDEX IF NOT EXISTS idx_trades_feat_mfld ON trades(feat_mfld_lo) WHERE feat_mfld_lo IS NOT NULL;
-- ─────────────────────────────────────────────────────────────────────────────
-- Manifold match audit — per-trade columns in trades
--
-- Persisted for every trade where Manifold was queried (status='accepted').
-- mfld_match_status: 'accepted' | 'rejected' | 'no_results'
-- mfld_inverted: TRUE when prob_final = 1 - prob_raw (party complement match)
-- ─────────────────────────────────────────────────────────────────────────────
ALTER TABLE trades ADD COLUMN IF NOT EXISTS mfld_market_id TEXT;
ALTER TABLE trades ADD COLUMN IF NOT EXISTS mfld_market_title TEXT;
ALTER TABLE trades ADD COLUMN IF NOT EXISTS mfld_market_url TEXT;
ALTER TABLE trades ADD COLUMN IF NOT EXISTS mfld_prob_raw DOUBLE PRECISION;
ALTER TABLE trades ADD COLUMN IF NOT EXISTS mfld_prob_final DOUBLE PRECISION;
ALTER TABLE trades ADD COLUMN IF NOT EXISTS mfld_inverted BOOLEAN;
ALTER TABLE trades ADD COLUMN IF NOT EXISTS mfld_match_score DOUBLE PRECISION;
ALTER TABLE trades ADD COLUMN IF NOT EXISTS mfld_match_reason TEXT;
ALTER TABLE trades ADD COLUMN IF NOT EXISTS mfld_match_status TEXT;
-- ─────────────────────────────────────────────────────────────────────────────
-- Manifold match audit table — records every Manifold query attempt
--
-- Populated for ALL queries: accepted, rejected, and no_results.
-- used_in_trade=TRUE is set after executor confirms a trade was executed.
-- poly_market_id: Market.id from the Polymarket Market dataclass (never NULL).
-- ─────────────────────────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS manifold_match_audit (
id TEXT PRIMARY KEY,
timestamp TIMESTAMPTZ DEFAULT NOW(),
poly_market_id TEXT NOT NULL,
poly_question TEXT NOT NULL,
search_query TEXT,
mfld_market_id TEXT,
mfld_market_title TEXT,
mfld_market_url TEXT,
prob_raw DOUBLE PRECISION,
prob_final DOUBLE PRECISION,
inverted BOOLEAN DEFAULT FALSE,
match_score DOUBLE PRECISION,
match_reason TEXT,
match_status TEXT NOT NULL,
used_in_trade BOOLEAN DEFAULT FALSE,
poly_outcome_type TEXT,
mfld_outcome_type TEXT
);
CREATE INDEX IF NOT EXISTS idx_mfld_audit_timestamp ON manifold_match_audit(timestamp DESC);
CREATE INDEX IF NOT EXISTS idx_mfld_audit_status ON manifold_match_audit(match_status);
CREATE INDEX IF NOT EXISTS idx_mfld_audit_poly_mkt ON manifold_match_audit(poly_market_id);
-- Backfill outcome-type columns on pre-existing tables (idempotent).
ALTER TABLE manifold_match_audit ADD COLUMN IF NOT EXISTS poly_outcome_type TEXT;
ALTER TABLE manifold_match_audit ADD COLUMN IF NOT EXISTS mfld_outcome_type TEXT;
-- ─────────────────────────────────────────────────────────────────────────────
-- Metric exclusion — administrative closure flag
--
-- excluded_from_metrics: TRUE for trades closed for non-signal reasons
-- (bad matcher, data error, admin close). These trades are excluded from
-- win_rate, calibration_score, realized_pnl, and feature attribution.
-- exclusion_reason: free-text label for the exclusion cause.
-- e.g. 'invalid_manifold_match_legacy'
-- ─────────────────────────────────────────────────────────────────────────────
ALTER TABLE trades ADD COLUMN IF NOT EXISTS excluded_from_metrics BOOLEAN DEFAULT FALSE;
ALTER TABLE trades ADD COLUMN IF NOT EXISTS exclusion_reason TEXT;
CREATE INDEX IF NOT EXISTS idx_trades_excluded ON trades(excluded_from_metrics)
WHERE excluded_from_metrics = TRUE;
-- ─────────────────────────────────────────────────────────────────────────────
-- Fix 3: extended metrics_daily columns for DB-computed metrics
--
-- unrealized_pnl_est: SUM(edge_net * net_cost - fee) on open trades with edge_net.
-- Estimated — uses model's own edge signal, not live market price.
-- realized_pnl: SUM(close_pnl) on closed trades with a known resolution.
-- Exact — derived from actual market outcome.
-- open_count / closed_count / resolved_count: trade counts at snapshot time.
-- ─────────────────────────────────────────────────────────────────────────────
ALTER TABLE metrics_daily ADD COLUMN IF NOT EXISTS unrealized_pnl_est DOUBLE PRECISION;
ALTER TABLE metrics_daily ADD COLUMN IF NOT EXISTS realized_pnl DOUBLE PRECISION;
ALTER TABLE metrics_daily ADD COLUMN IF NOT EXISTS open_count INTEGER;
ALTER TABLE metrics_daily ADD COLUMN IF NOT EXISTS closed_count INTEGER;
ALTER TABLE metrics_daily ADD COLUMN IF NOT EXISTS resolved_count INTEGER;
-- ─────────────────────────────────────────────────────────────────────────────
-- Checkpoint alerts — one-shot and rate-limited Telegram observation alerts
--
-- fired_at: timestamp of the first fire (immutable for one-shot checkpoints)
-- last_fired_at: updated on every fire (used for rate-limiting repeatable alerts)
-- ─────────────────────────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS checkpoint_alerts (
checkpoint_name TEXT PRIMARY KEY,
fired_at TIMESTAMPTZ NOT NULL,
last_fired_at TIMESTAMPTZ
);