diff --git a/bot/data/db.py b/bot/data/db.py index 54dc61e..3796d49 100644 --- a/bot/data/db.py +++ b/bot/data/db.py @@ -33,13 +33,21 @@ class Database: await conn.execute(""" INSERT INTO trades ( id, market_id, question, direction, size_usdc, - entry_price, shares, fee_usdc, net_cost, timestamp, reasoning, paper - ) VALUES ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12) + entry_price, shares, fee_usdc, net_cost, timestamp, reasoning, paper, + edge_gross, edge_net, prior_prob, final_prob, + mid_price, spread_estimate, commission, family_key + ) VALUES ( + $1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12, + $13,$14,$15,$16,$17,$18,$19,$20 + ) ON CONFLICT (id) DO NOTHING """, trade.id, trade.market_id, trade.question, trade.direction, trade.size_usdc, trade.entry_price, trade.shares, trade.fee_usdc, trade.net_cost, trade.timestamp, trade.reasoning, trade.paper, + # Phase 1 fields + trade.edge_gross, trade.edge_net, trade.prior_prob, trade.final_prob, + trade.mid_price, trade.spread_estimate, trade.commission, trade.family_key, ) async def save_daily_metrics(self, metrics: dict) -> None: @@ -69,6 +77,18 @@ class Database: ) return {r["market_id"]: float(r["total"]) for r in rows} + async def get_open_families(self) -> set[str]: + """Return the set of family_key values from all open positions. + + Used at startup to rebuild occupied_families from DB state so the + family-deduplication logic survives pod restarts. + """ + async with self._pool.acquire() as conn: + rows = await conn.fetch( + "SELECT DISTINCT family_key FROM trades WHERE family_key IS NOT NULL" + ) + return {r["family_key"] for r in rows if r["family_key"]} + async def get_recent_trades(self, limit: int = 100) -> list[dict]: async with self._pool.acquire() as conn: rows = await conn.fetch( diff --git a/bot/data/news.py b/bot/data/news.py index fe3fe4e..5c5b23e 100644 --- a/bot/data/news.py +++ b/bot/data/news.py @@ -155,6 +155,32 @@ class NewsClient: async def close(self) -> None: await self._client.aclose() + def get_freshness(self, question: str) -> float: + """ + Return a freshness score [0.1, 1.0] for GNews priority calculation. + + Score interpretation: + 1.00 — never queried (maximum priority for GNews budget) + 0.75 — last queried >6 h ago (cache expired, worth re-querying) + 0.40 — queried 2–6 h ago (in-cache but moderately stale) + 0.10 — queried <2 h ago (cache very fresh, low re-query value) + + If the API key is absent, always returns 1.0 (key missing means the + query will be skipped anyway; don't penalise the priority score). + """ + if not self._api_key: + return 1.0 + query = self._build_query(question) + cached = self._cache.get(query.lower()) + if cached is None: + return 1.0 + age_seconds = time.monotonic() - cached[0] + if age_seconds > 6 * 3600: + return 0.75 + if age_seconds > 2 * 3600: + return 0.40 + return 0.10 + # ------------------------------------------------------------------ # Internal helpers # ------------------------------------------------------------------ diff --git a/bot/data/polymarket.py b/bot/data/polymarket.py index 7b1b773..ea82da6 100644 --- a/bot/data/polymarket.py +++ b/bot/data/polymarket.py @@ -5,6 +5,7 @@ Docs: https://docs.polymarket.com import asyncio import logging import os +import re from dataclasses import dataclass, field from datetime import datetime, timezone, timedelta from typing import Optional @@ -15,6 +16,158 @@ log = logging.getLogger(__name__) POLYMARKET_API = "https://clob.polymarket.com" GAMMA_API = "https://gamma-api.polymarket.com" +# ───────────────────────────────────────────────────────────────────────────── +# Phase 2 — Market family classification helpers +# Used by market_family_key() below. +# ───────────────────────────────────────────────────────────────────────────── + +_YEAR_RE = re.compile(r"\b(202\d|203\d)\b") +_MONTH_RE = re.compile( + r"\b(january|february|march|april|may|june|july|august|" + r"september|october|november|december)\b", + re.IGNORECASE, +) +_FED_TRIGGER_RE = re.compile( + r"\b(federal reserve|interest rate|bps|basis point|fed\s+(rate|meeting|decision))", + re.IGNORECASE, +) +_US_STATE_RE = re.compile( + r"\b(Alabama|Alaska|Arizona|Arkansas|California|Colorado|Connecticut|" + r"Delaware|Florida|Georgia|Hawaii|Idaho|Illinois|Indiana|Iowa|Kansas|" + r"Kentucky|Louisiana|Maine|Maryland|Massachusetts|Michigan|Minnesota|" + r"Mississippi|Missouri|Montana|Nebraska|Nevada|New\s+Hampshire|" + r"New\s+Jersey|New\s+Mexico|New\s+York|North\s+Carolina|North\s+Dakota|" + r"Ohio|Oklahoma|Oregon|Pennsylvania|Rhode\s+Island|South\s+Carolina|" + r"South\s+Dakota|Tennessee|Texas|Utah|Vermont|Virginia|Washington|" + r"West\s+Virginia|Wisconsin|Wyoming)\b", + re.IGNORECASE, +) +_PARTY_RE = re.compile(r"\b(Republican|Democrats?|Democratic|GOP)\b", re.IGNORECASE) +_ELECTION_TYPE_RE = re.compile( + r"\b(presidential|president|mayoral|mayor|gubernatorial|governor|" + r"senate|congress(?:ional)?|primary|election)\b", + re.IGNORECASE, +) + +# Ordered list of (pattern, place_slug) for named non-US locations. +# Checked after US-state patterns so US city/state names don't shadow these. +_NAMED_PLACES: list[tuple[re.Pattern, str]] = [ + (re.compile(r"\bColomb", re.IGNORECASE), "colombia"), + (re.compile(r"\bSeoul\b", re.IGNORECASE), "seoul"), + (re.compile(r"\bBusan\b", re.IGNORECASE), "busan"), + (re.compile(r"\bGyeonggi\b", re.IGNORECASE), "gyeonggi"), + (re.compile(r"\bChungcheong", re.IGNORECASE), "chungcheong"), + (re.compile(r"\bSouth\s+Korean?\b", re.IGNORECASE), "south-korea"), + (re.compile(r"\bLos\s+Angeles\b", re.IGNORECASE), "los-angeles"), + (re.compile(r"\bCuba\b", re.IGNORECASE), "cuba"), + (re.compile(r"\bLebanon\b", re.IGNORECASE), "lebanon"), + (re.compile(r"\bIsrael\b", re.IGNORECASE), "israel"), + (re.compile(r"\bUkraine\b", re.IGNORECASE), "ukraine"), + (re.compile(r"\bRussia\b", re.IGNORECASE), "russia"), +] + +# Ordered list of (pattern, company_slug) for tech/company markets. +_NAMED_COMPANIES: list[tuple[re.Pattern, str]] = [ + (re.compile(r"\bopenai\b", re.IGNORECASE), "openai"), + (re.compile(r"\banthropic\b", re.IGNORECASE), "anthropic"), + (re.compile(r"\bnvidia\b", re.IGNORECASE), "nvidia"), + (re.compile(r"\bapple\b", re.IGNORECASE), "apple"), + (re.compile(r"\bmicrosoft\b", re.IGNORECASE), "microsoft"), + (re.compile(r"\bgoogle\b", re.IGNORECASE), "google"), + (re.compile(r"\btesla\b", re.IGNORECASE), "tesla"), + # \bmeta\b does NOT match MetaMask (no word boundary mid-compound-word) + (re.compile(r"\bmeta\b", re.IGNORECASE), "meta"), +] + + +def _end_month(market: "Market") -> str: + """Return market end_date formatted as YYYY-MM, or '' if unparseable.""" + raw = market.end_date + if not raw: + return "" + try: + dt = datetime.fromisoformat(raw.replace("Z", "+00:00")) + return dt.strftime("%Y-%m") + except (ValueError, TypeError): + return "" + + +def market_family_key(market: "Market") -> str: + """ + Return a stable slug that groups related markets together. + + Markets in the same family share an underlying event (same election, + same Fed meeting decision, same company). The bot allows at most one + open position per family per cycle to avoid correlated exposure. + + Priority order (first match wins): + 1. Fed / interest-rate decision → fed-{month}-{year} + 2. US state + party election → {state}-{party}-{year} + 3. Named non-US city/country → {place}-{event_type}-{year} + 4. Named tech company → {company}-{year} + 5. Fallback → {category}-{end_YYYY-MM} + + Examples: + "Will Ken Paxton win the 2026 Texas Republican Primary" + → texas-republican-2026 + "Will the Fed decrease rates by 25 bps after April 2026 meeting" + → fed-april-2026 + "Will OpenAI IPO by December 31 2026?" + → openai-2026 + """ + q = market.question + + # Prefer year from question text; fall back to end_date year if absent + year_m = _YEAR_RE.search(q) + if year_m: + year = year_m.group(1) + else: + end_m = _end_month(market) # e.g. "2026-06" + year = end_m[:4] if end_m else "unknown" + + # 1. Fed / interest-rate meeting + if _FED_TRIGGER_RE.search(q): + month_m = _MONTH_RE.search(q) + if month_m: + return f"fed-{month_m.group(1).lower()}-{year}" + return f"fed-{year}" + + # 2. US state + party (primary, senate, governor, etc.) + state_m = _US_STATE_RE.search(q) + party_m = _PARTY_RE.search(q) + if state_m and party_m: + state = re.sub(r"\s+", "-", state_m.group(1).lower()) + raw_party = party_m.group(1).lower() + # "democrat" prefix covers "democrat", "democrats", "democratic" + party = "democrat" if "democrat" in raw_party else "republican" + return f"{state}-{party}-{year}" + + # 3. Named non-US city / country + for place_re, place_slug in _NAMED_PLACES: + if place_re.search(q): + etype_m = _ELECTION_TYPE_RE.search(q) + if etype_m: + raw_etype = etype_m.group(1).lower() + # Normalise synonyms + etype = { + "president": "presidential", + "mayor": "mayoral", + "governor": "gubernatorial", + }.get(raw_etype, raw_etype) + else: + etype = "event" + return f"{place_slug}-{etype}-{year}" + + # 4. Named tech company + for company_re, company_slug in _NAMED_COMPANIES: + if company_re.search(q): + return f"{company_slug}-{year}" + + # 5. Fallback: category + end_date month + end_month = _end_month(market) + base = market.category if market.category else "misc" + return f"{base}-{end_month}" if end_month else f"{base}-{year}" + @dataclass class Market: diff --git a/bot/data/schema.sql b/bot/data/schema.sql index 8e1f105..9ab37bc 100644 --- a/bot/data/schema.sql +++ b/bot/data/schema.sql @@ -55,3 +55,46 @@ CREATE INDEX IF NOT EXISTS idx_trades_timestamp ON trades(timestamp DESC); CREATE INDEX IF NOT EXISTS idx_trades_market ON trades(market_id); CREATE INDEX IF NOT EXISTS idx_metrics_timestamp ON metrics_daily(timestamp DESC); CREATE INDEX IF NOT EXISTS idx_signals_timestamp ON signals(timestamp DESC); + +-- ───────────────────────────────────────────────────────────────────────────── +-- Phase 1 migrations: edge neto real +-- +-- spread_estimate and commission are HEURISTICS, not exact Polymarket exchange +-- costs. spread_estimate ≈ estimated half-spread for medium-liquidity markets. +-- commission = COMMISSION_RATE (0.02) * size_usdc — mirrors Polymarket taker fee. +-- edge_net = edge_gross - spread_estimate - commission/size_usdc +-- = edge_gross - 0.02 - 0.02 (always 0.04 deduction at current rates) +-- +-- These are stored per-trade so we can audit whether the model's cost assumptions +-- were met in practice once markets resolve. +-- ───────────────────────────────────────────────────────────────────────────── +ALTER TABLE trades ADD COLUMN IF NOT EXISTS edge_gross DOUBLE PRECISION; +ALTER TABLE trades ADD COLUMN IF NOT EXISTS edge_net DOUBLE PRECISION; +ALTER TABLE trades ADD COLUMN IF NOT EXISTS prior_prob DOUBLE PRECISION; +ALTER TABLE trades ADD COLUMN IF NOT EXISTS final_prob DOUBLE PRECISION; +ALTER TABLE trades ADD COLUMN IF NOT EXISTS mid_price DOUBLE PRECISION; +ALTER TABLE trades ADD COLUMN IF NOT EXISTS spread_estimate DOUBLE PRECISION; +ALTER TABLE trades ADD COLUMN IF NOT EXISTS commission DOUBLE PRECISION; +ALTER TABLE trades ADD COLUMN IF NOT EXISTS family_key TEXT; + +-- ───────────────────────────────────────────────────────────────────────────── +-- Phase 2 / Phase 5 migrations: market families + observability +-- +-- Signals table extended so each evaluated market carries its audit trail: +-- skip_reason — why the market was not traded ("edge_net", "family", +-- "gnews_priority", "regime", "prior_extreme", etc.) +-- passed_gross — True if edge_gross alone met regime_min_edge +-- passed_net — True if edge_net met regime_min_edge (the actual gate) +-- family_key — market family slug (e.g. "texas-republican-2026") +-- regime_min_edge — threshold that applied to this market/category +-- ───────────────────────────────────────────────────────────────────────────── +ALTER TABLE signals ADD COLUMN IF NOT EXISTS edge_gross DOUBLE PRECISION; +ALTER TABLE signals ADD COLUMN IF NOT EXISTS edge_net DOUBLE PRECISION; +ALTER TABLE signals ADD COLUMN IF NOT EXISTS family_key TEXT; +ALTER TABLE signals ADD COLUMN IF NOT EXISTS regime_min_edge DOUBLE PRECISION; +ALTER TABLE signals ADD COLUMN IF NOT EXISTS skip_reason TEXT; +ALTER TABLE signals ADD COLUMN IF NOT EXISTS passed_gross BOOLEAN; +ALTER TABLE signals ADD COLUMN IF NOT EXISTS passed_net BOOLEAN; + +CREATE INDEX IF NOT EXISTS idx_signals_market ON signals(market_id); +CREATE INDEX IF NOT EXISTS idx_trades_family ON trades(family_key); diff --git a/bot/executor/paper.py b/bot/executor/paper.py index 3bd4629..de29c2a 100644 --- a/bot/executor/paper.py +++ b/bot/executor/paper.py @@ -6,7 +6,7 @@ All trades are logged to PostgreSQL for metrics analysis. """ import logging import uuid -from dataclasses import dataclass +from dataclasses import dataclass, field from datetime import datetime, UTC from typing import Optional @@ -15,7 +15,10 @@ from bot.data.db import Database log = logging.getLogger(__name__) -POLYMARKET_FEE = 0.02 # 2% fee on each trade +# Polymarket taker fee used for paper simulation. +# Also stored as commission in each Trade for audit purposes. +# NOTE: this is a heuristic — see COMMISSION_RATE in bayesian.py for context. +POLYMARKET_FEE = 0.02 # 2% @dataclass @@ -32,11 +35,27 @@ class Trade: timestamp: datetime reasoning: str paper: bool = True + # ── Phase 1: edge neto audit fields ────────────────────────────────────── + # edge_gross: raw model edge before any cost deductions + # edge_net: edge_gross - spread_estimate - commission/size_usdc + # Both are heuristic estimates — see schema.sql comment for details. + edge_gross: float = 0.0 + edge_net: float = 0.0 + prior_prob: float = 0.0 # market.yes_price clamped, before Bayesian update + final_prob: float = 0.0 # estimated probability after all signals + # mid_price: order-book midpoint when available; falls back to market.yes_price + mid_price: float = 0.0 + spread_estimate: float = 0.02 + commission: float = 0.0 # = POLYMARKET_FEE * size_usdc + # ── Phase 2: market family ──────────────────────────────────────────────── + family_key: str = "" def __str__(self) -> str: return ( f"[PAPER] {self.direction} {self.shares:.1f} shares @ {self.entry_price:.3f} " - f"= ${self.net_cost:.2f} (fee ${self.fee_usdc:.2f}) | {self.question[:40]}" + f"= ${self.net_cost:.2f} (fee ${self.fee_usdc:.2f}) " + f"edge_net={self.edge_net:+.3f} family={self.family_key} " + f"| {self.question[:40]}" ) @@ -102,6 +121,10 @@ class PaperExecutor: net_cost = order.size_usdc + fee shares = order.size_usdc / entry_price + # commission mirrors the heuristic COMMISSION_RATE applied in bayesian.py + # when computing edge_net. Stored for audit: confirms cost assumption held. + commission = order.size_usdc * POLYMARKET_FEE # = fee_usdc at current rate + trade = Trade( id=str(uuid.uuid4()), market_id=order.market_id, @@ -115,6 +138,16 @@ class PaperExecutor: timestamp=datetime.now(UTC), reasoning=order.reasoning, paper=True, + # Phase 1 audit fields + edge_gross=order.edge_gross, + edge_net=order.edge_net, + prior_prob=order.prior_prob, + final_prob=order.final_prob, + mid_price=order.mid_price, + spread_estimate=order.spread_estimate, + commission=commission, + # Phase 2 family + family_key=order.family_key, ) # Update paper portfolio diff --git a/bot/main.py b/bot/main.py index 52e250d..1868fc6 100644 --- a/bot/main.py +++ b/bot/main.py @@ -1,17 +1,16 @@ """ Polymarket Trading Bot — Main Entry Point -# ci-test: 2026-04-14 +# ci-test: 2026-04-16 """ import asyncio import logging import os -from contextlib import asynccontextmanager from datetime import datetime, timezone -from bot.data.polymarket import PolymarketClient +from bot.data.polymarket import PolymarketClient, market_family_key from bot.data.external import ExternalDataClient from bot.data.news import NewsClient -from bot.strategy.bayesian import BayesianStrategy +from bot.strategy.bayesian import BayesianStrategy, gnews_priority from bot.risk.manager import RiskManager from bot.executor.paper import PaperExecutor from bot.metrics.tracker import MetricsTracker @@ -34,65 +33,100 @@ async def run_trading_loop( risk: RiskManager, executor: PaperExecutor, metrics: MetricsTracker, + db: Database, ) -> None: """Main trading loop — runs every 60 seconds.""" log.info("Trading loop started. PAPER_MODE=%s", PAPER_MODE) while True: try: - # 1. Fetch active crypto/finance markets + # 1. Fetch active markets (90-day window) markets = await poly.get_active_markets() log.info("Found %d active markets", len(markets)) - # Sort: politics markets first (soonest-resolving → highest GNews priority), - # then all others. This ensures the 5-query-per-cycle cap hits the most - # time-sensitive political markets before the budget runs out. - def _sort_key(m): - is_pol = m.category == "politics" - try: - dt = datetime.fromisoformat(m.end_date.replace("Z", "+00:00")) - except Exception: - dt = datetime(9999, 12, 31, tzinfo=timezone.utc) - return (0 if is_pol else 1, dt) - - markets = sorted(markets, key=_sort_key) - for _m in markets: - log.info(" [market] %s | ends: %s | yes_price: %.3f", - _m.question, _m.end_date, _m.yes_price) - # 2. Get external signals ext_data = await external.get_all_signals() - # Reset per-cycle GNews counter so the limit applies fresh each cycle + # 3. Build occupied_families from the current open portfolio positions. + # This prevents re-entering a family where we already hold a position. + # We also pull from DB to survive pod restarts. + portfolio = executor.get_portfolio() + occupied_families: set[str] = set() + for market_id in portfolio.positions: + mkt = next((m for m in markets if m.id == market_id), None) + if mkt: + occupied_families.add(market_family_key(mkt)) + # Also seed from DB in case a family was traded in a prior cycle + # that isn't reflected in the current markets list + db_families = await db.get_open_families() + occupied_families |= db_families + if occupied_families: + log.info("Occupied families (from portfolio): %s", sorted(occupied_families)) + + # 4. Sort markets. + # Politics: sort by gnews_priority DESC (highest-value markets get + # GNews budget first — Phase 3). + # Others: sort by end_date ASC (soonest-resolving first). + def _sort_key(m): + try: + dt = datetime.fromisoformat(m.end_date.replace("Z", "+00:00")) + except Exception: + dt = datetime(9999, 12, 31, tzinfo=timezone.utc) + if m.category == "politics": + priority = gnews_priority(m, strategy._news) if strategy._news else 0.0 + # Bucket 0 = politics, sort by priority DESC (negate for asc sort) + return (0, -priority, dt) + return (1, 0.0, dt) + + markets = sorted(markets, key=_sort_key) + + for _m in markets: + log.info( + " [market] %-55s | cat=%-12s | family=%-28s | ends=%s | yes=%.3f", + _m.question[:55], _m.category, market_family_key(_m), + _m.end_date[:10] if _m.end_date else "?", _m.yes_price, + ) + + # Reset per-cycle GNews counter strategy.reset_cycle() + # 5. Evaluate each market + cycle_trades = 0 for market in markets: - # 3. Estimate true probability - signal = await strategy.evaluate(market, ext_data) + # evaluate() returns None for all skips — reasons are logged internally + signal = await strategy.evaluate(market, ext_data, occupied_families) if signal is None: continue log.info( - "Signal: market=%s poly_price=%.3f our_estimate=%.3f confidence=%.2f", + "Signal generated: market=%-50s | edge_gross=%+.3f | edge_net=%+.3f | " + "regime_min=%.2f | family=%s | conf=%.2f", market.question[:50], - signal.polymarket_price, - signal.estimated_prob, + signal.edge_gross, + signal.edge_net, + signal.regime_min_edge, + signal.family_key, signal.confidence, ) - # 4. Risk check + position sizing - order = risk.size_order(signal, executor.get_portfolio()) + # 6. Risk check + position sizing + order = risk.size_order(signal, portfolio) if order is None: log.debug("Risk manager rejected order for %s", market.id) continue - # 5. Execute (paper or real) + # 7. Execute (paper) trade = await executor.execute(order) if trade: await metrics.record_trade(trade) log.info("Trade executed: %s", trade) + # Block this family for the rest of the cycle (Phase 2) + occupied_families.add(signal.family_key) + cycle_trades += 1 - # 6. Update daily metrics + log.info("Cycle complete — trades this cycle: %d", cycle_trades) + + # 8. Update daily metrics await metrics.update_daily_summary() except Exception as e: @@ -123,7 +157,6 @@ async def main() -> None: metrics = MetricsTracker(db=db) if executor is None: - # Import real executor only when explicitly needed from bot.executor.real import RealExecutor # noqa executor = RealExecutor(db=db) @@ -131,7 +164,7 @@ async def main() -> None: await executor.initialize() try: - await run_trading_loop(poly, external, strategy, risk, executor, metrics) + await run_trading_loop(poly, external, strategy, risk, executor, metrics, db) finally: await db.disconnect() await news.close() diff --git a/bot/risk/manager.py b/bot/risk/manager.py index 785a41f..90f5e27 100644 --- a/bot/risk/manager.py +++ b/bot/risk/manager.py @@ -45,6 +45,17 @@ class Order: signal_edge: float signal_confidence: float reasoning: str + # Phase 1 — edge neto audit fields (passed through from TradingSignal) + edge_gross: float = 0.0 + edge_net: float = 0.0 + prior_prob: float = 0.0 + final_prob: float = 0.0 + mid_price: float = 0.0 + spread_estimate: float = 0.02 + # Phase 2 — market family + family_key: str = "" + # Phase 4 — regime threshold applied + regime_min_edge: float = 0.10 class RiskManager: @@ -125,4 +136,15 @@ class RiskManager: signal_edge=signal.edge, signal_confidence=signal.confidence, reasoning=signal.reasoning, + # Phase 1 — pass audit fields through to executor + edge_gross=signal.edge_gross, + edge_net=signal.edge_net, + prior_prob=signal.prior_prob, + final_prob=signal.final_prob, + mid_price=signal.mid_price, + spread_estimate=signal.spread_estimate, + # Phase 2 — family + family_key=signal.family_key, + # Phase 4 — regime + regime_min_edge=signal.regime_min_edge, ) diff --git a/bot/strategy/bayesian.py b/bot/strategy/bayesian.py index caa10a2..51b70fe 100644 --- a/bot/strategy/bayesian.py +++ b/bot/strategy/bayesian.py @@ -12,20 +12,39 @@ Polymarket might reflect in a slow-moving order book. """ import logging import math -from dataclasses import dataclass -from typing import Optional +from dataclasses import dataclass, field +from datetime import datetime, timezone +from typing import Optional, TYPE_CHECKING -from bot.data.polymarket import Market +from bot.data.polymarket import Market, market_family_key from bot.data.external import ExternalSignals -from bot.data.news import NewsClient + +if TYPE_CHECKING: + from bot.data.news import NewsClient log = logging.getLogger(__name__) -# Minimum edge required to place a trade. -# With an informed prior (poly price), 10% means our signals strongly disagree -# with the market — much higher bar than before, but necessary to avoid noise. -MIN_EDGE = 0.10 # 10% edge minimum -MIN_CONFIDENCE = 0.55 # Minimum confidence in our estimate +# ───────────────────────────────────────────────────────────────────────────── +# Cost constants (Phase 1 — heuristics, not exact Polymarket exchange costs) +# ───────────────────────────────────────────────────────────────────────────── +# spread_estimate: approximate half-spread for medium-liquidity Polymarket +# markets. Real spread varies by market and time; 0.02 is a conservative +# starting estimate. Replace with live order-book data when available. +SPREAD_ESTIMATE: float = 0.02 + +# commission_rate: Polymarket taker fee approximation. Current Polymarket fee +# is 0% on CLOB but was 2% historically; keeping 2% as a conservative buffer +# against future fee changes and exchange rate effects. +COMMISSION_RATE: float = 0.02 + +# Combined cost floor deducted from edge_gross to get edge_net. +# edge_net = edge_gross - SPREAD_ESTIMATE - COMMISSION_RATE +TOTAL_COST_RATE: float = SPREAD_ESTIMATE + COMMISSION_RATE # 0.04 + +# ───────────────────────────────────────────────────────────────────────────── +# Other strategy constants +# ───────────────────────────────────────────────────────────────────────────── +MIN_CONFIDENCE = 0.55 # Minimum confidence to generate a signal # Log-odds weight applied to the GNews sentiment score (range ±1.0). # A weight of 1.5 means a fully negative/positive signal shifts log-odds by ±1.5, @@ -37,17 +56,103 @@ NEWS_LOGODDS_WEIGHT = 1.5 MAX_NEWS_QUERIES_PER_CYCLE = 5 +# ───────────────────────────────────────────────────────────────────────────── +# Phase 4 — Regime-based minimum edge (uses edge_NET, not edge_gross) +# ───────────────────────────────────────────────────────────────────────────── + +def _regime_min_edge(category: str, days_to_resolution: int) -> float: + """ + Return the minimum edge_net required to execute a trade. + + Thresholds are higher for far-future politics markets (less signal, more + noise) and lower for near-term politics (time pressure makes any edge + actionable). Tech/crypto use a flat threshold. + + category | days_to_resolution | min_edge_net + ──────────────────────┼────────────────────┼───────────── + politics | > 60 d | 0.12 + politics | 30–60 d | 0.10 + politics | < 30 d | 0.08 + tech / crypto/finance | any | 0.10 + other / unknown | any | 0.10 + """ + if category == "politics": + if days_to_resolution > 60: + return 0.12 + if days_to_resolution > 30: + return 0.10 + return 0.08 + return 0.10 # tech, crypto/finance, events, default + + +def _days_to_resolution(end_date: str) -> int: + """Return calendar days until market resolution, or 30 if unknown.""" + if not end_date: + return 30 # conservative: treat as medium-term + try: + dt = datetime.fromisoformat(end_date.replace("Z", "+00:00")) + if dt.tzinfo is None: + dt = dt.replace(tzinfo=timezone.utc) + days = (dt - datetime.now(timezone.utc)).days + return max(0, days) + except (ValueError, TypeError): + return 30 + + +# ───────────────────────────────────────────────────────────────────────────── +# Phase 3 — GNews priority scoring +# ───────────────────────────────────────────────────────────────────────────── + +def gnews_priority(market: Market, news: "NewsClient") -> float: + """ + Score a market for GNews query priority (higher = more valuable to query). + + Formula: priority = uncertainty × volume_score × freshness + + uncertainty = 1 - |prior - 0.5| × 2 (1.0 at 50%, 0.0 at 0%/100%) + volume_score = min(volume_24h / 10_000, 1.0) + freshness = NewsClient.get_freshness(question) + (1.0 never queried → 0.10 queried <2h ago) + + Markets with occupied families, or that have already been queried recently, + score lower and receive GNews budget only if capacity remains. + """ + prior = max(0.05, min(0.95, market.yes_price)) + uncertainty = 1.0 - abs(prior - 0.5) * 2 + volume_score = min(market.volume_24h / 10_000, 1.0) + freshness = news.get_freshness(market.question) + return uncertainty * volume_score * freshness + + +# ───────────────────────────────────────────────────────────────────────────── +# Signal and strategy classes +# ───────────────────────────────────────────────────────────────────────────── + @dataclass class TradingSignal: market_id: str question: str - polymarket_price: float # Current market price for YES (0-1) - estimated_prob: float # Our Bayesian estimate (0-1) - edge: float # estimated_prob - polymarket_price - confidence: float # How confident we are (0-1) - direction: str # "BUY_YES" | "BUY_NO" - reasoning: str # Human-readable explanation for logging - sources: list[str] # Data sources used + polymarket_price: float # Current market price for YES (0-1) + estimated_prob: float # Our Bayesian estimate (0-1) + edge: float # Kept for backward compat — equals edge_gross + confidence: float # How confident we are (0-1) + direction: str # "BUY_YES" | "BUY_NO" + reasoning: str # Human-readable explanation for logging + sources: list[str] # Data sources used + # ── Phase 1: edge neto ─────────────────────────────────────────────────── + edge_gross: float = 0.0 # |estimated_prob - polymarket_price| + edge_net: float = 0.0 # edge_gross - SPREAD_ESTIMATE - COMMISSION_RATE + prior_prob: float = 0.0 # market.yes_price clamped to [0.05, 0.95] + final_prob: float = 0.0 # estimated_prob (explicit alias) + # mid_price: (bid+ask)/2 from order book when available; falls back to + # market.yes_price. Order-book fetching is a future enhancement — using + # yes_price here is conservative (already the ask side). + mid_price: float = 0.0 + spread_estimate: float = SPREAD_ESTIMATE + # ── Phase 2: market families ───────────────────────────────────────────── + family_key: str = "" + # ── Phase 4: regime ────────────────────────────────────────────────────── + regime_min_edge: float = 0.10 class BayesianStrategy: @@ -59,34 +164,59 @@ class BayesianStrategy: - BTC/ETH price momentum - Fear & Greed index - Market cap trend / BTC dominance - We only bet when our signals move the estimate far enough from the prior - to justify the fee + slippage cost (MIN_EDGE). + - GNews sentiment (politics only, capped at MAX_NEWS_QUERIES_PER_CYCLE) + + Execution gate (Phase 1 + 4): + - Compute edge_net = edge_gross - SPREAD_ESTIMATE - COMMISSION_RATE + - Only trade when edge_net > regime_min_edge(category, days_to_resolution) + + Family deduplication (Phase 2): + - At most 1 open position per market family per cycle. + - Caller passes occupied_families; this method skips and logs SKIP_FAMILY. + + GNews prioritisation (Phase 3): + - Caller pre-sorts politics markets by gnews_priority() (desc) so the + highest-value markets consume the GNews budget first. + - Within evaluate(), the per-cycle cap is enforced. """ - def __init__(self, news: Optional[NewsClient] = None) -> None: + def __init__(self, news: Optional["NewsClient"] = None) -> None: self._signal_count = 0 - self._news = news # Optional; degrades gracefully when None or key missing + self._news = news self._news_queries_this_cycle = 0 def reset_cycle(self) -> None: - """Call once at the start of each trading cycle to reset the per-cycle GNews counter.""" + """Call once at the start of each trading cycle to reset per-cycle counters.""" self._news_queries_this_cycle = 0 async def evaluate( self, market: Market, ext: ExternalSignals, + occupied_families: set[str], ) -> Optional[TradingSignal]: """ - Evaluate a market and return a signal if edge exists. - Returns None if no actionable opportunity. + Evaluate a market and return a TradingSignal if actionable. + + Returns None with a structured log line in all skip cases. + Skip reasons (Phase 5 observability): + SKIP_UNSUPPORTED — category not supported + SKIP_NO_SIGNALS — external data unavailable + SKIP_PRIOR_EXTREME — prior < 0.08 or > 0.92 + SKIP_FAMILY — family already has an open/pending position + SKIP_EDGE_NET — edge_net < regime_min_edge + SKIP_CONFIDENCE — confidence < MIN_CONFIDENCE """ question_lower = market.question.lower() - category = market.category # set by PolymarketClient + category = market.category - # Classify what kind of market this is - is_price_above = any(w in question_lower for w in ["above", "over", "exceed", "higher", "atleast", "reach"]) - is_price_below = any(w in question_lower for w in ["below", "under", "less than", "lower", "drop"]) + # ── Classify market type ───────────────────────────────────────────── + is_price_above = any(w in question_lower for w in [ + "above", "over", "exceed", "higher", "atleast", "reach", + ]) + is_price_below = any(w in question_lower for w in [ + "below", "under", "less than", "lower", "drop", + ]) is_btc = "btc" in question_lower or "bitcoin" in question_lower is_eth = "eth" in question_lower or "ethereum" in question_lower @@ -100,7 +230,9 @@ class BayesianStrategy: w in question_lower for w in ["crypto", "market cap", "total market", "altcoin", "defi"] ) is_macro = any( - w in question_lower for w in ["nasdaq", "s&p", "sp500", "inflation", "fed rate", "interest rate", "tariff"] + w in question_lower for w in [ + "nasdaq", "s&p", "sp500", "inflation", "fed rate", "interest rate", "tariff", + ] ) is_politics = category == "politics" is_tech = category == "tech" @@ -112,45 +244,52 @@ class BayesianStrategy: ) if not is_any_supported: log.info( - "SKIP %-50s | reason=unsupported category=%r", + "SKIP_UNSUPPORTED %-50s | cat=%r", market.question[:50], category, ) return None if not ext.valid: log.info( - "SKIP %-50s | reason=no external signals", + "SKIP_NO_SIGNALS %-50s | reason=external data unavailable", market.question[:50], ) - return None # Can't reason without external data + return None - # --- Bayesian probability estimation --- - # Prior = Polymarket consensus price, clamped away from extremes. - # The market already aggregates information from many traders; - # our signals update from that informed baseline, not from 0.5. + # ── Phase 1: prior + prior-extreme filter ──────────────────────────── prior = max(0.05, min(0.95, market.yes_price)) - # Skip markets where the crowd has already reached near-certainty. - # Below 0.08 or above 0.92 there is not enough room for our signals - # to generate MIN_EDGE — any trade would be fighting near-certain consensus. if market.yes_price < 0.08: log.info( - "SKIP %-50s | cat=%-12s | prior=%.3f | reason=prior too low, market already certain", + "SKIP_PRIOR_EXTREME %-50s | cat=%-12s | prior=%.3f | reason=prior<0.08", market.question[:50], category, market.yes_price, ) return None if market.yes_price > 0.92: log.info( - "SKIP %-50s | cat=%-12s | prior=%.3f | reason=prior too high, market already certain", + "SKIP_PRIOR_EXTREME %-50s | cat=%-12s | prior=%.3f | reason=prior>0.92", market.question[:50], category, market.yes_price, ) return None + # ── Phase 2: family deduplication ──────────────────────────────────── + family = market_family_key(market) + if family in occupied_families: + log.info( + "SKIP_FAMILY %-50s | cat=%-12s | family=%s", + market.question[:50], category, family, + ) + return None + + # ── Phase 4: regime min-edge ───────────────────────────────────────── + days = _days_to_resolution(market.end_date) + regime_min = _regime_min_edge(category, days) + + # ── Bayesian probability estimation ────────────────────────────────── sources: list[str] = [f"Prior=poly({prior:.3f})"] adjustments: list[float] = [] - # Signal 1: Price momentum (asset-specific or total market cap as proxy) - # For politics/tech/events use BTC as a broad sentiment proxy. + # Signal 1: price momentum (asset-specific or BTC as sentiment proxy) if is_btc: momentum = ext.btc_change_24h asset_label = "BTC" @@ -158,17 +297,14 @@ class BayesianStrategy: momentum = ext.eth_change_24h asset_label = "ETH" elif is_politics or is_tech or is_events: - # BTC as risk-sentiment proxy for non-crypto categories momentum = ext.btc_change_24h asset_label = "BTC(sentiment)" else: - # Altcoins and general crypto: use total market cap change as proxy momentum = ext.total_market_cap_change asset_label = "total mktcap" if abs(momentum) > 2: - momentum_adj = math.tanh(momentum / 20) * 0.15 # Max ±15% - # For non-directional markets (politics/events/tech), momentum is weaker signal + momentum_adj = math.tanh(momentum / 20) * 0.15 if is_politics or is_tech or is_events: momentum_adj *= 0.5 adjustments.append(momentum_adj if is_price_above else -momentum_adj) @@ -185,26 +321,19 @@ class BayesianStrategy: else: fg_adj = (fg - 50) / 50 * 0.04 sources.append(f"Fear&Greed: {fg} (neutral)") - adjustments.append(fg_adj if is_price_above else -fg_adj) # Signal 3: BTC dominance — hurts altcoins when high if (is_eth or is_altcoin or is_general_crypto) and ext.btc_dominance > 55: - dom_adj = -0.03 if is_price_above else 0.03 - adjustments.append(dom_adj) + adjustments.append(-0.03 if is_price_above else 0.03) sources.append(f"BTC dom: {ext.btc_dominance:.1f}% (high → alt pressure)") elif (is_eth or is_altcoin or is_general_crypto) and ext.btc_dominance < 45: - dom_adj = 0.03 if is_price_above else -0.03 - adjustments.append(dom_adj) + adjustments.append(0.03 if is_price_above else -0.03) sources.append(f"BTC dom: {ext.btc_dominance:.1f}% (low → alt season)") - # Signal 4: GNews sentiment — politics markets only. - # BTC/F&G already cover crypto and macro; GNews budget is too tight to - # waste on tech/events. Cap at MAX_NEWS_QUERIES_PER_CYCLE per cycle so - # we prioritise the soonest-resolving markets (caller sorts by end_date). - # Applied as a direct log-odds shift — stronger signal than macro proxies. - # Weight NEWS_LOGODDS_WEIGHT=1.5 means a ±1.0 sentiment score shifts - # log-odds by ±1.5 (e.g. 50% prior → ~82% / ~18%). + # Signal 4: GNews sentiment (politics only, budget-gated) + # Phase 3: caller has pre-sorted markets by gnews_priority() so the + # highest-value markets reach this block first. news_log_adj = 0.0 if is_politics and self._news is not None: if self._news_queries_this_cycle < MAX_NEWS_QUERIES_PER_CYCLE: @@ -214,72 +343,81 @@ class BayesianStrategy: news_log_adj = sentiment * NEWS_LOGODDS_WEIGHT sources.append(f"GNews: {sentiment:+.2f}") else: - log.debug( - "GNews cycle limit (%d) reached — skipping news for %r", - MAX_NEWS_QUERIES_PER_CYCLE, market.question[:50], + log.info( + "SKIP_GNEWS_PRIORITY %-50s | reason=cycle budget %d reached", + market.question[:50], MAX_NEWS_QUERIES_PER_CYCLE, ) - # Macro/politics/tech/events: cap confidence lower to reflect weaker signal quality - if is_macro or is_politics or is_tech or is_events: - confidence_cap = 0.65 - else: - confidence_cap = 0.90 + # Confidence cap: macro/politics/tech signals are weaker proxies + confidence_cap = 0.65 if (is_macro or is_politics or is_tech or is_events) else 0.90 - # Compute posterior using log-odds updating. - # total_adj (BTC/F&G/dominance) is amplified ×2 because those are weak proxies. - # news_log_adj is applied at face value — it IS a direct log-odds signal. + # Posterior via log-odds updating log_odds_prior = math.log(prior / (1 - prior)) total_adj = sum(adjustments) estimated_prob = _sigmoid(log_odds_prior + total_adj * 2 + news_log_adj) estimated_prob = max(0.05, min(0.95, estimated_prob)) - # Compute edge - edge = estimated_prob - market.yes_price - direction = "BUY_YES" if edge > 0 else "BUY_NO" - abs_edge = abs(edge) + # ── Phase 1: edge_gross and edge_net ───────────────────────────────── + raw_edge = estimated_prob - market.yes_price + direction = "BUY_YES" if raw_edge > 0 else "BUY_NO" + edge_gross = abs(raw_edge) + # NOTE: commission/size_usdc = COMMISSION_RATE always (constant fraction). + edge_net = edge_gross - SPREAD_ESTIMATE - COMMISSION_RATE + # mid_price falls back to yes_price; live order-book data is a future enhancement + mid_price = market.yes_price # Confidence based on signal agreement agreement = sum(1 for a in adjustments if (a > 0) == (total_adj > 0)) confidence = min(confidence_cap, 0.4 + (agreement / max(len(adjustments), 1)) * 0.5) - # News signal available → boost confidence by 0.10 (news corroborates macro signals) if news_log_adj != 0.0: confidence = min(confidence_cap, confidence + 0.10) - # Log evaluation result for every market - action = "TRADE" if (abs_edge >= MIN_EDGE and confidence >= MIN_CONFIDENCE) else "SKIP" - skip_reason = "" - if action == "SKIP": - reasons = [] - if abs_edge < MIN_EDGE: - reasons.append(f"edge={abs_edge:.3f}<{MIN_EDGE}") + # ── Phase 5: structured audit log ──────────────────────────────────── + passed_gross = edge_gross >= regime_min + passed_net = edge_net >= regime_min + can_trade = passed_net and confidence >= MIN_CONFIDENCE + + if not can_trade: + skip_parts: list[str] = [] + if not passed_gross: + skip_parts.append(f"edge_gross={edge_gross:.3f}<{regime_min:.2f}(regime)") + elif not passed_net: + skip_parts.append( + f"edge_net={edge_net:.3f}<{regime_min:.2f}(regime) " + f"[gross={edge_gross:.3f} pass]" + ) if confidence < MIN_CONFIDENCE: - reasons.append(f"conf={confidence:.2f}<{MIN_CONFIDENCE}") - skip_reason = " | reason=" + ",".join(reasons) - - log.info( - "%-5s %-50s | cat=%-12s | prior=%.3f | est=%.3f | edge=%+.3f | conf=%.2f | dir=%-8s | signals=%s%s", - action, - market.question[:50], - category, - prior, - estimated_prob, - edge, - confidence, - direction, - ", ".join(sources[1:]) or "none", - skip_reason, - ) - - # Filter: only trade if edge and confidence thresholds met - if abs_edge < MIN_EDGE or confidence < MIN_CONFIDENCE: + skip_parts.append(f"conf={confidence:.2f}<{MIN_CONFIDENCE}") + log.info( + "SKIP_EDGE_NET %-50s | cat=%-12s | family=%-28s | " + "prior=%.3f | est=%.3f | gross=%+.3f | net=%+.3f | " + "regime=%.2f | days=%d | conf=%.2f | signals=%s | %s", + market.question[:50], category, family, + prior, estimated_prob, edge_gross, edge_net, + regime_min, days, confidence, + ", ".join(sources[1:]) or "none", + " | ".join(skip_parts), + ) return None reasoning = ( f"Prior=poly({prior:.3f}) → estimate={estimated_prob:.3f} | " f"Poly price={market.yes_price:.3f} | " - f"Edge={edge:+.3f} | " + f"edge_gross={edge_gross:+.3f} | edge_net={edge_net:+.3f} | " + f"regime_min={regime_min:.2f} | days={days} | " + f"family={family} | " f"Direction={direction} | " - f"Signals: {', '.join(sources[1:])}" # skip the prior label already shown + f"Signals: {', '.join(sources[1:])}" + ) + + log.info( + "TRADE %-50s | cat=%-12s | family=%-28s | " + "prior=%.3f | est=%.3f | gross=%+.3f | net=%+.3f | " + "regime=%.2f | days=%d | conf=%.2f | dir=%-8s | signals=%s", + market.question[:50], category, family, + prior, estimated_prob, edge_gross, edge_net, + regime_min, days, confidence, direction, + ", ".join(sources[1:]) or "none", ) self._signal_count += 1 @@ -288,11 +426,22 @@ class BayesianStrategy: question=market.question, polymarket_price=market.yes_price, estimated_prob=estimated_prob, - edge=abs_edge, + edge=edge_gross, # backward compat — same as edge_gross confidence=confidence, direction=direction, reasoning=reasoning, sources=sources, + # Phase 1 new fields + edge_gross=edge_gross, + edge_net=edge_net, + prior_prob=prior, + final_prob=estimated_prob, + mid_price=mid_price, + spread_estimate=SPREAD_ESTIMATE, + # Phase 2 new fields + family_key=family, + # Phase 4 new fields + regime_min_edge=regime_min, )