Files
polymarket-bot/bot/strategy/bayesian.py
T
chemavx 411d346261
CI/CD / build-and-push (push) Successful in 2m16s
feat(bot): add [CYCLE SUMMARY] diagnostic block at end of each cycle
BayesianStrategy now tracks per-cycle counters (reset each cycle):
  - skip_prior_extreme, skip_family
  - skip_edge_net_nonpositive (edge_net ≤ 0)
  - skip_edge_net_below_regime (0 < edge_net < regime_min)
  - evaluated_edges list for max/pct computations

main.py logs one structured [CYCLE SUMMARY] block per cycle with:
  markets_total, markets_uncertainty_zone, max_edge_gross, max_edge_net,
  pct_edge_gross_gt_002, pct_edge_gross_gt_004, all blocked_by_* counters,
  trades_executed, gnews_queries_used/cap

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-16 15:55:22 +00:00

492 lines
23 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
Bayesian Market Making Strategy.
Core idea:
1. Compute a prior probability for a market outcome using external data
2. Compare with Polymarket's current price
3. If divergence > threshold + confidence is high enough → generate signal
For crypto markets: if BTC is up 5% and fear/greed is 75 (greed),
a market asking "Will BTC be above $X?" should be priced higher than
Polymarket might reflect in a slow-moving order book.
"""
import logging
import math
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import Optional, TYPE_CHECKING
from bot.data.polymarket import Market, market_family_key
from bot.data.external import ExternalSignals
if TYPE_CHECKING:
from bot.data.news import NewsClient
log = logging.getLogger(__name__)
# ─────────────────────────────────────────────────────────────────────────────
# Cost constants (Phase 1 — heuristics, not exact Polymarket exchange costs)
# ─────────────────────────────────────────────────────────────────────────────
# spread_estimate: approximate half-spread for medium-liquidity Polymarket
# markets. Real spread varies by market and time; 0.02 is a conservative
# starting estimate. Replace with live order-book data when available.
SPREAD_ESTIMATE: float = 0.02
# commission_rate: Polymarket taker fee approximation. Current Polymarket fee
# is 0% on CLOB but was 2% historically; keeping 2% as a conservative buffer
# against future fee changes and exchange rate effects.
COMMISSION_RATE: float = 0.02
# Combined cost floor deducted from edge_gross to get edge_net.
# edge_net = edge_gross - SPREAD_ESTIMATE - COMMISSION_RATE
TOTAL_COST_RATE: float = SPREAD_ESTIMATE + COMMISSION_RATE # 0.04
# ─────────────────────────────────────────────────────────────────────────────
# Other strategy constants
# ─────────────────────────────────────────────────────────────────────────────
MIN_CONFIDENCE = 0.55 # Minimum confidence to generate a signal
# Log-odds weight applied to the GNews sentiment score (range ±1.0).
# A weight of 1.5 means a fully negative/positive signal shifts log-odds by ±1.5,
# which moves a 50% prior to ~18%/82% — strong but not overwhelming.
NEWS_LOGODDS_WEIGHT = 1.5
# GNews free tier: 100 req/day. We limit to 5 queries per trading cycle
# (politics markets only) and rely on 6 h cache to stay within budget.
MAX_NEWS_QUERIES_PER_CYCLE = 5
# ─────────────────────────────────────────────────────────────────────────────
# Phase 4 — Regime-based minimum edge (uses edge_NET, not edge_gross)
# ─────────────────────────────────────────────────────────────────────────────
def _regime_min_edge(category: str, days_to_resolution: int) -> float:
"""
Return the minimum edge_net required to execute a trade.
Thresholds are higher for far-future politics markets (less signal, more
noise) and lower for near-term politics (time pressure makes any edge
actionable). Tech/crypto use a flat threshold.
category | days_to_resolution | min_edge_net
──────────────────────┼────────────────────┼─────────────
politics | > 60 d | 0.12
politics | 3060 d | 0.10
politics | < 30 d | 0.08
tech / crypto/finance | any | 0.10
other / unknown | any | 0.10
"""
if category == "politics":
if days_to_resolution > 60:
return 0.12
if days_to_resolution > 30:
return 0.10
return 0.08
return 0.10 # tech, crypto/finance, events, default
def _days_to_resolution(end_date: str) -> int:
"""Return calendar days until market resolution, or 30 if unknown."""
if not end_date:
return 30 # conservative: treat as medium-term
try:
dt = datetime.fromisoformat(end_date.replace("Z", "+00:00"))
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
days = (dt - datetime.now(timezone.utc)).days
return max(0, days)
except (ValueError, TypeError):
return 30
# ─────────────────────────────────────────────────────────────────────────────
# Phase 3 — GNews priority scoring
# ─────────────────────────────────────────────────────────────────────────────
def gnews_priority(market: Market, news: "NewsClient") -> float:
"""
Score a market for GNews query priority (higher = more valuable to query).
Formula: priority = uncertainty × volume_score × freshness
uncertainty = 1 - |prior - 0.5| × 2 (1.0 at 50%, 0.0 at 0%/100%)
volume_score = min(volume_24h / 10_000, 1.0)
freshness = NewsClient.get_freshness(question)
(1.0 never queried → 0.10 queried <2h ago)
Markets with occupied families, or that have already been queried recently,
score lower and receive GNews budget only if capacity remains.
"""
prior = max(0.05, min(0.95, market.yes_price))
uncertainty = 1.0 - abs(prior - 0.5) * 2
volume_score = min(market.volume_24h / 10_000, 1.0)
freshness = news.get_freshness(market.question)
return uncertainty * volume_score * freshness
# ─────────────────────────────────────────────────────────────────────────────
# Signal and strategy classes
# ─────────────────────────────────────────────────────────────────────────────
@dataclass
class TradingSignal:
market_id: str
question: str
polymarket_price: float # Current market price for YES (0-1)
estimated_prob: float # Our Bayesian estimate (0-1)
edge: float # Kept for backward compat — equals edge_gross
confidence: float # How confident we are (0-1)
direction: str # "BUY_YES" | "BUY_NO"
reasoning: str # Human-readable explanation for logging
sources: list[str] # Data sources used
# ── Phase 1: edge neto ───────────────────────────────────────────────────
edge_gross: float = 0.0 # |estimated_prob - polymarket_price|
edge_net: float = 0.0 # edge_gross - SPREAD_ESTIMATE - COMMISSION_RATE
prior_prob: float = 0.0 # market.yes_price clamped to [0.05, 0.95]
final_prob: float = 0.0 # estimated_prob (explicit alias)
# mid_price: (bid+ask)/2 from order book when available; falls back to
# market.yes_price. Order-book fetching is a future enhancement — using
# yes_price here is conservative (already the ask side).
mid_price: float = 0.0
spread_estimate: float = SPREAD_ESTIMATE
# ── Phase 2: market families ─────────────────────────────────────────────
family_key: str = ""
# ── Phase 4: regime ──────────────────────────────────────────────────────
regime_min_edge: float = 0.10
class BayesianStrategy:
"""
Estimates true probability using external signals and Bayesian updating.
Prior: Polymarket's current YES price (market consensus — not 0.5)
Likelihood updates from:
- BTC/ETH price momentum
- Fear & Greed index
- Market cap trend / BTC dominance
- GNews sentiment (politics only, capped at MAX_NEWS_QUERIES_PER_CYCLE)
Execution gate (Phase 1 + 4):
- Compute edge_net = edge_gross - SPREAD_ESTIMATE - COMMISSION_RATE
- Only trade when edge_net > regime_min_edge(category, days_to_resolution)
Family deduplication (Phase 2):
- At most 1 open position per market family per cycle.
- Caller passes occupied_families; this method skips and logs SKIP_FAMILY.
GNews prioritisation (Phase 3):
- Caller pre-sorts politics markets by gnews_priority() (desc) so the
highest-value markets consume the GNews budget first.
- Within evaluate(), the per-cycle cap is enforced.
"""
def __init__(self, news: Optional["NewsClient"] = None) -> None:
self._signal_count = 0
self._news = news
self._news_queries_this_cycle = 0
# Per-cycle counters — reset by reset_cycle(), read by get_cycle_stats()
self._skip_family: int = 0
self._skip_prior_extreme: int = 0
self._skip_edge_net_nonpositive: int = 0 # edge_net <= 0
self._skip_edge_net_below_regime: int = 0 # 0 < edge_net < regime_min
# (edge_gross, edge_net, regime_min) for every market that reached the
# edge computation stage (passed prior-extreme, family, unsupported filters)
self._evaluated_edges: list[tuple[float, float, float]] = []
def reset_cycle(self) -> None:
"""Call once at the start of each trading cycle to reset per-cycle counters."""
self._news_queries_this_cycle = 0
self._skip_family = 0
self._skip_prior_extreme = 0
self._skip_edge_net_nonpositive = 0
self._skip_edge_net_below_regime = 0
self._evaluated_edges = []
def get_cycle_stats(self) -> dict:
"""Return per-cycle counters for the [CYCLE SUMMARY] log block."""
edges = self._evaluated_edges
all_gross = [g for g, n, r in edges]
all_net = [n for g, n, r in edges]
return {
"skip_family": self._skip_family,
"skip_prior_extreme": self._skip_prior_extreme,
"skip_edge_net_nonpositive": self._skip_edge_net_nonpositive,
"skip_edge_net_below_regime": self._skip_edge_net_below_regime,
"gnews_queries_used": self._news_queries_this_cycle,
"max_edge_gross": max(all_gross) if all_gross else 0.0,
"max_edge_net": max(all_net) if all_net else 0.0,
"evaluated_count": len(edges),
"gross_gt_002": sum(1 for g in all_gross if g > 0.02),
"gross_gt_004": sum(1 for g in all_gross if g > 0.04),
}
async def evaluate(
self,
market: Market,
ext: ExternalSignals,
occupied_families: set[str],
) -> Optional[TradingSignal]:
"""
Evaluate a market and return a TradingSignal if actionable.
Returns None with a structured log line in all skip cases.
Skip reasons (Phase 5 observability):
SKIP_UNSUPPORTED — category not supported
SKIP_NO_SIGNALS — external data unavailable
SKIP_PRIOR_EXTREME — prior < 0.08 or > 0.92
SKIP_FAMILY — family already has an open/pending position
SKIP_EDGE_NET — edge_net < regime_min_edge
SKIP_CONFIDENCE — confidence < MIN_CONFIDENCE
"""
question_lower = market.question.lower()
category = market.category
# ── Classify market type ─────────────────────────────────────────────
is_price_above = any(w in question_lower for w in [
"above", "over", "exceed", "higher", "atleast", "reach",
])
is_price_below = any(w in question_lower for w in [
"below", "under", "less than", "lower", "drop",
])
is_btc = "btc" in question_lower or "bitcoin" in question_lower
is_eth = "eth" in question_lower or "ethereum" in question_lower
is_sol = "sol" in question_lower or "solana" in question_lower
is_xrp = "xrp" in question_lower or "ripple" in question_lower
is_doge = "doge" in question_lower or "dogecoin" in question_lower
is_altcoin = is_sol or is_xrp or is_doge or any(
w in question_lower for w in ["ltc", "litecoin", "bnb", "ada", "cardano", "avax", "avalanche"]
)
is_general_crypto = any(
w in question_lower for w in ["crypto", "market cap", "total market", "altcoin", "defi"]
)
is_macro = any(
w in question_lower for w in [
"nasdaq", "s&p", "sp500", "inflation", "fed rate", "interest rate", "tariff",
]
)
is_politics = category == "politics"
is_tech = category == "tech"
is_events = category == "events"
is_any_supported = (
is_btc or is_eth or is_altcoin or is_general_crypto or is_macro
or is_politics or is_tech or is_events
)
if not is_any_supported:
log.info(
"SKIP_UNSUPPORTED %-50s | cat=%r",
market.question[:50], category,
)
return None
if not ext.valid:
log.info(
"SKIP_NO_SIGNALS %-50s | reason=external data unavailable",
market.question[:50],
)
return None
# ── Phase 1: prior + prior-extreme filter ────────────────────────────
prior = max(0.05, min(0.95, market.yes_price))
if market.yes_price < 0.08:
self._skip_prior_extreme += 1
log.info(
"SKIP_PRIOR_EXTREME %-50s | cat=%-12s | prior=%.3f | reason=prior<0.08",
market.question[:50], category, market.yes_price,
)
return None
if market.yes_price > 0.92:
self._skip_prior_extreme += 1
log.info(
"SKIP_PRIOR_EXTREME %-50s | cat=%-12s | prior=%.3f | reason=prior>0.92",
market.question[:50], category, market.yes_price,
)
return None
# ── Phase 2: family deduplication ────────────────────────────────────
family = market_family_key(market)
if family in occupied_families:
self._skip_family += 1
log.info(
"SKIP_FAMILY %-50s | cat=%-12s | family=%s",
market.question[:50], category, family,
)
return None
# ── Phase 4: regime min-edge ─────────────────────────────────────────
days = _days_to_resolution(market.end_date)
regime_min = _regime_min_edge(category, days)
# ── Bayesian probability estimation ──────────────────────────────────
sources: list[str] = [f"Prior=poly({prior:.3f})"]
adjustments: list[float] = []
# Signal 1: price momentum (asset-specific or BTC as sentiment proxy)
if is_btc:
momentum = ext.btc_change_24h
asset_label = "BTC"
elif is_eth:
momentum = ext.eth_change_24h
asset_label = "ETH"
elif is_politics or is_tech or is_events:
momentum = ext.btc_change_24h
asset_label = "BTC(sentiment)"
else:
momentum = ext.total_market_cap_change
asset_label = "total mktcap"
if abs(momentum) > 2:
momentum_adj = math.tanh(momentum / 20) * 0.15
if is_politics or is_tech or is_events:
momentum_adj *= 0.5
adjustments.append(momentum_adj if is_price_above else -momentum_adj)
sources.append(f"{asset_label} 24h: {momentum:+.1f}%")
# Signal 2: Fear & Greed
fg = ext.fear_greed_index
if fg > 70:
fg_adj = 0.06
sources.append(f"Fear&Greed: {fg} (greed)")
elif fg < 30:
fg_adj = -0.06
sources.append(f"Fear&Greed: {fg} (fear)")
else:
fg_adj = (fg - 50) / 50 * 0.04
sources.append(f"Fear&Greed: {fg} (neutral)")
adjustments.append(fg_adj if is_price_above else -fg_adj)
# Signal 3: BTC dominance — hurts altcoins when high
if (is_eth or is_altcoin or is_general_crypto) and ext.btc_dominance > 55:
adjustments.append(-0.03 if is_price_above else 0.03)
sources.append(f"BTC dom: {ext.btc_dominance:.1f}% (high → alt pressure)")
elif (is_eth or is_altcoin or is_general_crypto) and ext.btc_dominance < 45:
adjustments.append(0.03 if is_price_above else -0.03)
sources.append(f"BTC dom: {ext.btc_dominance:.1f}% (low → alt season)")
# Signal 4: GNews sentiment (politics only, budget-gated)
# Phase 3: caller has pre-sorted markets by gnews_priority() so the
# highest-value markets reach this block first.
news_log_adj = 0.0
if is_politics and self._news is not None:
if self._news_queries_this_cycle < MAX_NEWS_QUERIES_PER_CYCLE:
self._news_queries_this_cycle += 1
sentiment = await self._news.get_sentiment(market.question)
if abs(sentiment) > 0.05:
news_log_adj = sentiment * NEWS_LOGODDS_WEIGHT
sources.append(f"GNews: {sentiment:+.2f}")
else:
log.info(
"SKIP_GNEWS_PRIORITY %-50s | reason=cycle budget %d reached",
market.question[:50], MAX_NEWS_QUERIES_PER_CYCLE,
)
# Confidence cap: macro/politics/tech signals are weaker proxies
confidence_cap = 0.65 if (is_macro or is_politics or is_tech or is_events) else 0.90
# Posterior via log-odds updating
log_odds_prior = math.log(prior / (1 - prior))
total_adj = sum(adjustments)
estimated_prob = _sigmoid(log_odds_prior + total_adj * 2 + news_log_adj)
estimated_prob = max(0.05, min(0.95, estimated_prob))
# ── Phase 1: edge_gross and edge_net ─────────────────────────────────
raw_edge = estimated_prob - market.yes_price
direction = "BUY_YES" if raw_edge > 0 else "BUY_NO"
edge_gross = abs(raw_edge)
# NOTE: commission/size_usdc = COMMISSION_RATE always (constant fraction).
edge_net = edge_gross - SPREAD_ESTIMATE - COMMISSION_RATE
# mid_price falls back to yes_price; live order-book data is a future enhancement
mid_price = market.yes_price
# Record for cycle summary — every market that reached edge computation
self._evaluated_edges.append((edge_gross, edge_net, regime_min))
# Confidence based on signal agreement
agreement = sum(1 for a in adjustments if (a > 0) == (total_adj > 0))
confidence = min(confidence_cap, 0.4 + (agreement / max(len(adjustments), 1)) * 0.5)
if news_log_adj != 0.0:
confidence = min(confidence_cap, confidence + 0.10)
# ── Phase 5: structured audit log ────────────────────────────────────
passed_gross = edge_gross >= regime_min
passed_net = edge_net >= regime_min
can_trade = passed_net and confidence >= MIN_CONFIDENCE
if not can_trade:
# Increment the appropriate edge-net counter
if edge_net <= 0:
self._skip_edge_net_nonpositive += 1
else:
self._skip_edge_net_below_regime += 1
skip_parts: list[str] = []
if not passed_gross:
skip_parts.append(f"edge_gross={edge_gross:.3f}<{regime_min:.2f}(regime)")
elif not passed_net:
skip_parts.append(
f"edge_net={edge_net:.3f}<{regime_min:.2f}(regime) "
f"[gross={edge_gross:.3f} pass]"
)
if confidence < MIN_CONFIDENCE:
skip_parts.append(f"conf={confidence:.2f}<{MIN_CONFIDENCE}")
log.info(
"SKIP_EDGE_NET %-50s | cat=%-12s | family=%-28s | "
"prior=%.3f | est=%.3f | gross=%+.3f | net=%+.3f | "
"regime=%.2f | days=%d | conf=%.2f | signals=%s | %s",
market.question[:50], category, family,
prior, estimated_prob, edge_gross, edge_net,
regime_min, days, confidence,
", ".join(sources[1:]) or "none",
" | ".join(skip_parts),
)
return None
reasoning = (
f"Prior=poly({prior:.3f}) → estimate={estimated_prob:.3f} | "
f"Poly price={market.yes_price:.3f} | "
f"edge_gross={edge_gross:+.3f} | edge_net={edge_net:+.3f} | "
f"regime_min={regime_min:.2f} | days={days} | "
f"family={family} | "
f"Direction={direction} | "
f"Signals: {', '.join(sources[1:])}"
)
log.info(
"TRADE %-50s | cat=%-12s | family=%-28s | "
"prior=%.3f | est=%.3f | gross=%+.3f | net=%+.3f | "
"regime=%.2f | days=%d | conf=%.2f | dir=%-8s | signals=%s",
market.question[:50], category, family,
prior, estimated_prob, edge_gross, edge_net,
regime_min, days, confidence, direction,
", ".join(sources[1:]) or "none",
)
self._signal_count += 1
return TradingSignal(
market_id=market.id,
question=market.question,
polymarket_price=market.yes_price,
estimated_prob=estimated_prob,
edge=edge_gross, # backward compat — same as edge_gross
confidence=confidence,
direction=direction,
reasoning=reasoning,
sources=sources,
# Phase 1 new fields
edge_gross=edge_gross,
edge_net=edge_net,
prior_prob=prior,
final_prob=estimated_prob,
mid_price=mid_price,
spread_estimate=SPREAD_ESTIMATE,
# Phase 2 new fields
family_key=family,
# Phase 4 new fields
regime_min_edge=regime_min,
)
def _sigmoid(x: float) -> float:
return 1 / (1 + math.exp(-x))