""" Bayesian Market Making Strategy. Core idea: 1. Compute a prior probability for a market outcome using external data 2. Compare with Polymarket's current price 3. If divergence > threshold + confidence is high enough → generate signal For crypto markets: if BTC is up 5% and fear/greed is 75 (greed), a market asking "Will BTC be above $X?" should be priced higher than Polymarket might reflect in a slow-moving order book. """ import logging import math from dataclasses import dataclass, field from datetime import datetime, timezone from typing import Optional, TYPE_CHECKING from bot.data.polymarket import Market, market_family_key from bot.data.external import ExternalSignals if TYPE_CHECKING: from bot.data.news import NewsClient from bot.data.manifold import ManifoldClient log = logging.getLogger(__name__) # ───────────────────────────────────────────────────────────────────────────── # Cost constants (Phase 1 — heuristics, not exact Polymarket exchange costs) # ───────────────────────────────────────────────────────────────────────────── # spread_estimate: approximate half-spread for medium-liquidity Polymarket # markets. Real spread varies by market and time; 0.02 is a conservative # starting estimate. Replace with live order-book data when available. SPREAD_ESTIMATE: float = 0.02 # commission_rate: Polymarket taker fee approximation. Current Polymarket fee # is 0% on CLOB but was 2% historically; keeping 2% as a conservative buffer # against future fee changes and exchange rate effects. COMMISSION_RATE: float = 0.02 # Combined cost floor deducted from edge_gross to get edge_net. # edge_net = edge_gross - SPREAD_ESTIMATE - COMMISSION_RATE TOTAL_COST_RATE: float = SPREAD_ESTIMATE + COMMISSION_RATE # 0.04 # ───────────────────────────────────────────────────────────────────────────── # Other strategy constants # ───────────────────────────────────────────────────────────────────────────── MIN_CONFIDENCE = 0.55 # Minimum confidence to generate a signal # Log-odds weight applied to the GNews sentiment score (range ±1.0). # A weight of 1.5 means a fully negative/positive signal shifts log-odds by ±1.5, # which moves a 50% prior to ~18%/82% — strong but not overwhelming. NEWS_LOGODDS_WEIGHT = 1.5 # Log-odds weight applied to Manifold cross-market probability signal. # Weight 0.6: a 30 pp divergence (Manifold 0.75 vs Poly 0.45) produces # edge_gross ≈ 0.19, clearing politics far-horizon regime_min=0.12 after costs. # Weaker than NEWS_LOGODDS_WEIGHT because Manifold can have illiquid/stale markets. MANIFOLD_LOGODDS_WEIGHT = 0.6 # GNews free tier: 100 req/day. We limit to 5 queries per trading cycle # (politics markets only) and rely on 6 h cache to stay within budget. MAX_NEWS_QUERIES_PER_CYCLE = 5 # ───────────────────────────────────────────────────────────────────────────── # Phase 4 — Regime-based minimum edge (uses edge_NET, not edge_gross) # ───────────────────────────────────────────────────────────────────────────── def _regime_min_edge(category: str, days_to_resolution: int) -> float: """ Return the minimum edge_net required to execute a trade. Thresholds are higher for far-future politics markets (less signal, more noise) and lower for near-term politics (time pressure makes any edge actionable). Tech/crypto use a flat threshold. category | days_to_resolution | min_edge_net ──────────────────────┼────────────────────┼───────────── politics | > 60 d | 0.12 politics | 30–60 d | 0.10 politics | < 30 d | 0.08 tech / crypto/finance | any | 0.10 other / unknown | any | 0.10 """ if category == "politics": if days_to_resolution > 60: return 0.12 if days_to_resolution > 30: return 0.10 return 0.08 return 0.10 # tech, crypto/finance, events, default def _days_to_resolution(end_date: str) -> int: """Return calendar days until market resolution, or 30 if unknown.""" if not end_date: return 30 # conservative: treat as medium-term try: dt = datetime.fromisoformat(end_date.replace("Z", "+00:00")) if dt.tzinfo is None: dt = dt.replace(tzinfo=timezone.utc) days = (dt - datetime.now(timezone.utc)).days return max(0, days) except (ValueError, TypeError): return 30 # ───────────────────────────────────────────────────────────────────────────── # Phase 3 — GNews priority scoring # ───────────────────────────────────────────────────────────────────────────── def gnews_priority(market: Market, news: "NewsClient") -> float: """ Score a market for GNews query priority (higher = more valuable to query). Formula: priority = uncertainty × volume_score × freshness uncertainty = 1 - |prior - 0.5| × 2 (1.0 at 50%, 0.0 at 0%/100%) volume_score = min(volume_24h / 10_000, 1.0) freshness = NewsClient.get_freshness(question) (1.0 never queried → 0.10 queried <2h ago) Markets with occupied families, or that have already been queried recently, score lower and receive GNews budget only if capacity remains. """ prior = max(0.05, min(0.95, market.yes_price)) uncertainty = 1.0 - abs(prior - 0.5) * 2 volume_score = min(market.volume_24h / 10_000, 1.0) freshness = news.get_freshness(market.question) return uncertainty * volume_score * freshness # ───────────────────────────────────────────────────────────────────────────── # Signal and strategy classes # ───────────────────────────────────────────────────────────────────────────── @dataclass class TradingSignal: market_id: str question: str polymarket_price: float # Current market price for YES (0-1) estimated_prob: float # Our Bayesian estimate (0-1) edge: float # Kept for backward compat — equals edge_gross confidence: float # How confident we are (0-1) direction: str # "BUY_YES" | "BUY_NO" reasoning: str # Human-readable explanation for logging sources: list[str] # Data sources used # ── Phase 1: edge neto ─────────────────────────────────────────────────── edge_gross: float = 0.0 # |estimated_prob - polymarket_price| edge_net: float = 0.0 # edge_gross - SPREAD_ESTIMATE - COMMISSION_RATE prior_prob: float = 0.0 # market.yes_price clamped to [0.05, 0.95] final_prob: float = 0.0 # estimated_prob (explicit alias) # mid_price: (bid+ask)/2 from order book when available; falls back to # market.yes_price. Order-book fetching is a future enhancement — using # yes_price here is conservative (already the ask side). mid_price: float = 0.0 spread_estimate: float = SPREAD_ESTIMATE # ── Phase 2: market families ───────────────────────────────────────────── family_key: str = "" # ── Phase 4: regime ────────────────────────────────────────────────────── regime_min_edge: float = 0.10 # ── Phase 6: per-feature log-odds contributions ─────────────────────────── # All values are in log-odds space for direct comparability. # feat_fg_lo / feat_mom_lo: probability-delta × 2 → log-odds. # feat_news_lo / feat_mfld_lo: already log-odds (no scaling). # feat_btc_dom_lo: btc-dominance probability-delta × 2 → log-odds. feat_fg_lo: float = 0.0 feat_mom_lo: float = 0.0 feat_news_lo: float = 0.0 feat_mfld_lo: float = 0.0 feat_btc_dom_lo: float = 0.0 class BayesianStrategy: """ Estimates true probability using external signals and Bayesian updating. Prior: Polymarket's current YES price (market consensus — not 0.5) Likelihood updates from: - BTC/ETH price momentum - Fear & Greed index - Market cap trend / BTC dominance - GNews sentiment (politics only, capped at MAX_NEWS_QUERIES_PER_CYCLE) Execution gate (Phase 1 + 4): - Compute edge_net = edge_gross - SPREAD_ESTIMATE - COMMISSION_RATE - Only trade when edge_net > regime_min_edge(category, days_to_resolution) Family deduplication (Phase 2): - At most 1 open position per market family per cycle. - Caller passes occupied_families; this method skips and logs SKIP_FAMILY. GNews prioritisation (Phase 3): - Caller pre-sorts politics markets by gnews_priority() (desc) so the highest-value markets consume the GNews budget first. - Within evaluate(), the per-cycle cap is enforced. """ def __init__( self, news: Optional["NewsClient"] = None, manifold: Optional["ManifoldClient"] = None, ) -> None: self._signal_count = 0 self._news = news self._manifold = manifold self._news_queries_this_cycle = 0 # Per-cycle counters — reset by reset_cycle(), read by get_cycle_stats() self._skip_family: int = 0 self._skip_prior_extreme: int = 0 self._skip_edge_net_nonpositive: int = 0 # edge_net <= 0 self._skip_edge_net_below_regime: int = 0 # 0 < edge_net < regime_min self._manifold_fetched: int = 0 # markets where Manifold prob was retrieved self._manifold_on_trade: int = 0 # subset of above that ended in a trade signal # (edge_gross, edge_net, regime_min) for every market that reached the # edge computation stage (passed prior-extreme, family, unsupported filters) self._evaluated_edges: list[tuple[float, float, float]] = [] def reset_cycle(self) -> None: """Call once at the start of each trading cycle to reset per-cycle counters.""" self._news_queries_this_cycle = 0 self._skip_family = 0 self._skip_prior_extreme = 0 self._skip_edge_net_nonpositive = 0 self._skip_edge_net_below_regime = 0 self._manifold_fetched = 0 self._manifold_on_trade = 0 self._evaluated_edges = [] def get_cycle_stats(self) -> dict: """Return per-cycle counters for the [CYCLE SUMMARY] log block.""" edges = self._evaluated_edges all_gross = [g for g, n, r in edges] all_net = [n for g, n, r in edges] return { "skip_family": self._skip_family, "skip_prior_extreme": self._skip_prior_extreme, "skip_edge_net_nonpositive": self._skip_edge_net_nonpositive, "skip_edge_net_below_regime": self._skip_edge_net_below_regime, "gnews_queries_used": self._news_queries_this_cycle, "max_edge_gross": max(all_gross) if all_gross else 0.0, "max_edge_net": max(all_net) if all_net else 0.0, "evaluated_count": len(edges), "gross_gt_002": sum(1 for g in all_gross if g > 0.02), "gross_gt_004": sum(1 for g in all_gross if g > 0.04), "manifold_matches_accepted": self._manifold_on_trade, "manifold_matches_rejected": self._manifold_fetched - self._manifold_on_trade, } async def evaluate( self, market: Market, ext: ExternalSignals, occupied_families: set[str], ) -> Optional[TradingSignal]: """ Evaluate a market and return a TradingSignal if actionable. Returns None with a structured log line in all skip cases. Skip reasons (Phase 5 observability): SKIP_UNSUPPORTED — category not supported SKIP_NO_SIGNALS — external data unavailable SKIP_PRIOR_EXTREME — prior < 0.08 or > 0.92 SKIP_FAMILY — family already has an open/pending position SKIP_EDGE_NET — edge_net < regime_min_edge SKIP_CONFIDENCE — confidence < MIN_CONFIDENCE """ question_lower = market.question.lower() category = market.category # ── Classify market type ───────────────────────────────────────────── is_price_above = any(w in question_lower for w in [ "above", "over", "exceed", "higher", "atleast", "reach", ]) is_price_below = any(w in question_lower for w in [ "below", "under", "less than", "lower", "drop", ]) is_btc = "btc" in question_lower or "bitcoin" in question_lower is_eth = "eth" in question_lower or "ethereum" in question_lower is_sol = "sol" in question_lower or "solana" in question_lower is_xrp = "xrp" in question_lower or "ripple" in question_lower is_doge = "doge" in question_lower or "dogecoin" in question_lower is_altcoin = is_sol or is_xrp or is_doge or any( w in question_lower for w in ["ltc", "litecoin", "bnb", "ada", "cardano", "avax", "avalanche"] ) is_general_crypto = any( w in question_lower for w in ["crypto", "market cap", "total market", "altcoin", "defi"] ) is_macro = any( w in question_lower for w in [ "nasdaq", "s&p", "sp500", "inflation", "fed rate", "interest rate", "tariff", ] ) is_politics = category == "politics" is_tech = category == "tech" is_events = category == "events" is_any_supported = ( is_btc or is_eth or is_altcoin or is_general_crypto or is_macro or is_politics or is_tech or is_events ) if not is_any_supported: log.info( "SKIP_UNSUPPORTED %-50s | cat=%r", market.question[:50], category, ) return None if not ext.valid: log.info( "SKIP_NO_SIGNALS %-50s | reason=external data unavailable", market.question[:50], ) return None # ── Phase 1: prior + prior-extreme filter ──────────────────────────── prior = max(0.05, min(0.95, market.yes_price)) if market.yes_price < 0.08: self._skip_prior_extreme += 1 log.info( "SKIP_PRIOR_EXTREME %-50s | cat=%-12s | prior=%.3f | reason=prior<0.08", market.question[:50], category, market.yes_price, ) return None if market.yes_price > 0.92: self._skip_prior_extreme += 1 log.info( "SKIP_PRIOR_EXTREME %-50s | cat=%-12s | prior=%.3f | reason=prior>0.92", market.question[:50], category, market.yes_price, ) return None # ── Phase 2: family deduplication ──────────────────────────────────── family = market_family_key(market) if family in occupied_families: self._skip_family += 1 log.info( "SKIP_FAMILY %-50s | cat=%-12s | family=%s", market.question[:50], category, family, ) return None # ── Phase 4: regime min-edge ───────────────────────────────────────── days = _days_to_resolution(market.end_date) regime_min = _regime_min_edge(category, days) # ── Bayesian probability estimation ────────────────────────────────── sources: list[str] = [f"Prior=poly({prior:.3f})"] adjustments: list[float] = [] # Signal 1: price momentum (asset-specific or BTC as sentiment proxy) if is_btc: momentum = ext.btc_change_24h asset_label = "BTC" elif is_eth: momentum = ext.eth_change_24h asset_label = "ETH" elif is_politics or is_tech or is_events: momentum = ext.btc_change_24h asset_label = "BTC(sentiment)" else: momentum = ext.total_market_cap_change asset_label = "total mktcap" _momentum_contribution = 0.0 if abs(momentum) > 2: momentum_adj = math.tanh(momentum / 20) * 0.15 if is_politics or is_tech or is_events: momentum_adj *= 0.5 _momentum_contribution = momentum_adj if is_price_above else -momentum_adj adjustments.append(_momentum_contribution) sources.append(f"{asset_label} 24h: {momentum:+.1f}%") # Signal 2: Fear & Greed fg = ext.fear_greed_index if fg > 70: fg_adj = 0.06 sources.append(f"Fear&Greed: {fg} (greed)") elif fg < 30: fg_adj = -0.06 sources.append(f"Fear&Greed: {fg} (fear)") else: fg_adj = (fg - 50) / 50 * 0.04 sources.append(f"Fear&Greed: {fg} (neutral)") _fg_contribution = fg_adj if is_price_above else -fg_adj adjustments.append(_fg_contribution) # Signal 3: BTC dominance — hurts altcoins when high _btc_dom_contribution = 0.0 if (is_eth or is_altcoin or is_general_crypto) and ext.btc_dominance > 55: _btc_dom_contribution = -0.03 if is_price_above else 0.03 adjustments.append(_btc_dom_contribution) sources.append(f"BTC dom: {ext.btc_dominance:.1f}% (high → alt pressure)") elif (is_eth or is_altcoin or is_general_crypto) and ext.btc_dominance < 45: _btc_dom_contribution = 0.03 if is_price_above else -0.03 adjustments.append(_btc_dom_contribution) sources.append(f"BTC dom: {ext.btc_dominance:.1f}% (low → alt season)") # Signal 4: GNews sentiment (politics only, budget-gated) # Phase 3: caller has pre-sorted markets by gnews_priority() so the # highest-value markets reach this block first. news_log_adj = 0.0 if is_politics and self._news is not None: if self._news_queries_this_cycle < MAX_NEWS_QUERIES_PER_CYCLE: self._news_queries_this_cycle += 1 sentiment = await self._news.get_sentiment(market.question) if abs(sentiment) > 0.05: news_log_adj = sentiment * NEWS_LOGODDS_WEIGHT sources.append(f"GNews: {sentiment:+.2f}") else: log.info( "SKIP_GNEWS_PRIORITY %-50s | reason=cycle budget %d reached", market.question[:50], MAX_NEWS_QUERIES_PER_CYCLE, ) # Signal 5: Manifold cross-market probability (politics + tech) # Applies a log-odds adjustment proportional to divergence from prior. # No query budget — 30 min cache means network cost is paid once per cycle. manifold_log_adj = 0.0 manifold_used = False if (is_politics or is_tech) and self._manifold is not None: manifold_prob = await self._manifold.get_probability(market.question) if manifold_prob is not None: manifold_used = True self._manifold_fetched += 1 m_clamped = max(0.05, min(0.95, manifold_prob)) m_log = math.log(m_clamped / (1 - m_clamped)) p_log = math.log(prior / (1 - prior)) manifold_log_adj = (m_log - p_log) * MANIFOLD_LOGODDS_WEIGHT sources.append(f"Manifold:{manifold_prob:.2f}") # Confidence cap: macro/politics/tech signals are weaker proxies confidence_cap = 0.65 if (is_macro or is_politics or is_tech or is_events) else 0.90 # Posterior via log-odds updating log_odds_prior = math.log(prior / (1 - prior)) total_adj = sum(adjustments) estimated_prob = _sigmoid(log_odds_prior + total_adj * 2 + news_log_adj + manifold_log_adj) estimated_prob = max(0.05, min(0.95, estimated_prob)) # ── Phase 1: edge_gross and edge_net ───────────────────────────────── raw_edge = estimated_prob - market.yes_price direction = "BUY_YES" if raw_edge > 0 else "BUY_NO" edge_gross = abs(raw_edge) # NOTE: commission/size_usdc = COMMISSION_RATE always (constant fraction). edge_net = edge_gross - SPREAD_ESTIMATE - COMMISSION_RATE # mid_price falls back to yes_price; live order-book data is a future enhancement mid_price = market.yes_price # Record for cycle summary — every market that reached edge computation self._evaluated_edges.append((edge_gross, edge_net, regime_min)) # Confidence based on signal agreement agreement = sum(1 for a in adjustments if (a > 0) == (total_adj > 0)) confidence = min(confidence_cap, 0.4 + (agreement / max(len(adjustments), 1)) * 0.5) if news_log_adj != 0.0: confidence = min(confidence_cap, confidence + 0.10) if manifold_log_adj != 0.0: confidence = min(confidence_cap, confidence + 0.08) # Per-feature log-odds contributions (Phase 6). # fg / mom / btc_dom: probability-delta × 2 → log-odds. # news / mfld: already log-odds (LOGODDS_WEIGHT already applied). feat_fg_lo = _fg_contribution * 2 feat_mom_lo = _momentum_contribution * 2 feat_news_lo = news_log_adj feat_mfld_lo = manifold_log_adj feat_btc_dom_lo = _btc_dom_contribution * 2 feat_str = ( f"fg_lo={feat_fg_lo:+.4f} mom_lo={feat_mom_lo:+.4f} " f"news_lo={feat_news_lo:+.4f} mfld_lo={feat_mfld_lo:+.4f} " f"btc_dom_lo={feat_btc_dom_lo:+.4f}" ) # ── Phase 5: structured audit log ──────────────────────────────────── passed_gross = edge_gross >= regime_min passed_net = edge_net >= regime_min can_trade = passed_net and confidence >= MIN_CONFIDENCE if not can_trade: # Increment the appropriate edge-net counter if edge_net <= 0: self._skip_edge_net_nonpositive += 1 else: self._skip_edge_net_below_regime += 1 skip_parts: list[str] = [] if not passed_gross: skip_parts.append(f"edge_gross={edge_gross:.3f}<{regime_min:.2f}(regime)") elif not passed_net: skip_parts.append( f"edge_net={edge_net:.3f}<{regime_min:.2f}(regime) " f"[gross={edge_gross:.3f} pass]" ) if confidence < MIN_CONFIDENCE: skip_parts.append(f"conf={confidence:.2f}<{MIN_CONFIDENCE}") log.info( "SKIP_EDGE_NET %-50s | cat=%-12s | family=%-28s | " "prior=%.3f | est=%.3f | gross=%+.3f | net=%+.3f | " "regime=%.2f | days=%d | conf=%.2f | %s | signals=%s | %s", market.question[:50], category, family, prior, estimated_prob, edge_gross, edge_net, regime_min, days, confidence, feat_str, ", ".join(sources[1:]) or "none", " | ".join(skip_parts), ) return None reasoning = ( f"Prior=poly({prior:.3f}) → estimate={estimated_prob:.3f} | " f"Poly price={market.yes_price:.3f} | " f"edge_gross={edge_gross:+.3f} | edge_net={edge_net:+.3f} | " f"regime_min={regime_min:.2f} | days={days} | " f"family={family} | " f"Direction={direction} | " f"{feat_str} | " f"Signals: {', '.join(sources[1:])}" ) log.info( "TRADE %-50s | cat=%-12s | family=%-28s | " "prior=%.3f | est=%.3f | gross=%+.3f | net=%+.3f | " "regime=%.2f | days=%d | conf=%.2f | dir=%-8s | %s | signals=%s", market.question[:50], category, family, prior, estimated_prob, edge_gross, edge_net, regime_min, days, confidence, direction, feat_str, ", ".join(sources[1:]) or "none", ) self._signal_count += 1 if manifold_used: self._manifold_on_trade += 1 return TradingSignal( market_id=market.id, question=market.question, polymarket_price=market.yes_price, estimated_prob=estimated_prob, edge=edge_gross, # backward compat — same as edge_gross confidence=confidence, direction=direction, reasoning=reasoning, sources=sources, # Phase 1 new fields edge_gross=edge_gross, edge_net=edge_net, prior_prob=prior, final_prob=estimated_prob, mid_price=mid_price, spread_estimate=SPREAD_ESTIMATE, # Phase 2 new fields family_key=family, # Phase 4 new fields regime_min_edge=regime_min, # Phase 6 new fields — all in log-odds space feat_fg_lo=feat_fg_lo, feat_mom_lo=feat_mom_lo, feat_news_lo=feat_news_lo, feat_mfld_lo=feat_mfld_lo, feat_btc_dom_lo=feat_btc_dom_lo, ) def _sigmoid(x: float) -> float: return 1 / (1 + math.exp(-x))