feat(bot): 5-phase strategy upgrade — edge neto, families, GNews priority, regimes
CI/CD / build-and-push (push) Successful in 2m30s

Phase 1 — Edge neto real (paper.py, bayesian.py, risk/manager.py, db.py):
- Trade records now store edge_gross, edge_net, prior_prob, final_prob,
  mid_price, spread_estimate, commission, family_key
- edge_net = edge_gross - SPREAD_ESTIMATE(0.02) - COMMISSION_RATE(0.02)
  NOTE: both constants are heuristics, not exact Polymarket exchange costs
- Execution gate changed from edge_gross > MIN_EDGE to edge_net > regime_min_edge

Phase 2 — Market families (polymarket.py):
- market_family_key(market) groups related markets:
    texas-republican-2026, fed-april-2026, openai-2026, etc.
- At most 1 trade per family per cycle; occupied_families propagated via main.py
- Family key logged on every TRADE and SKIP line

Phase 3 — GNews priority (news.py, bayesian.py, main.py):
- NewsClient.get_freshness() returns 1.0/0.75/0.40/0.10 by cache age
- gnews_priority(market, news) = uncertainty × volume_score × freshness
- Politics markets sorted by priority DESC before eval so best markets get
  the 5-query/cycle GNews budget first

Phase 4 — Regime min-edge by category/horizon (bayesian.py):
- politics >60d → 0.12, 30-60d → 0.10, <30d → 0.08
- tech / crypto/finance → 0.10
- All thresholds applied to edge_net (not edge_gross)

Phase 5 — Observability (bayesian.py, main.py):
- Structured skip labels: SKIP_UNSUPPORTED, SKIP_NO_SIGNALS,
  SKIP_PRIOR_EXTREME, SKIP_FAMILY, SKIP_GNEWS_PRIORITY, SKIP_EDGE_NET
- TRADE lines now include family_key, edge_gross, edge_net, regime_min, days
- schema.sql: 8 new cols on trades, 7 new cols on signals (via ALTER TABLE IF NOT EXISTS)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
chemavx
2026-04-16 15:34:46 +00:00
parent a0cbdc0256
commit 63d9f637ff
8 changed files with 620 additions and 141 deletions
+252 -103
View File
@@ -12,20 +12,39 @@ Polymarket might reflect in a slow-moving order book.
"""
import logging
import math
from dataclasses import dataclass
from typing import Optional
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import Optional, TYPE_CHECKING
from bot.data.polymarket import Market
from bot.data.polymarket import Market, market_family_key
from bot.data.external import ExternalSignals
from bot.data.news import NewsClient
if TYPE_CHECKING:
from bot.data.news import NewsClient
log = logging.getLogger(__name__)
# Minimum edge required to place a trade.
# With an informed prior (poly price), 10% means our signals strongly disagree
# with the market — much higher bar than before, but necessary to avoid noise.
MIN_EDGE = 0.10 # 10% edge minimum
MIN_CONFIDENCE = 0.55 # Minimum confidence in our estimate
# ─────────────────────────────────────────────────────────────────────────────
# Cost constants (Phase 1 — heuristics, not exact Polymarket exchange costs)
# ─────────────────────────────────────────────────────────────────────────────
# spread_estimate: approximate half-spread for medium-liquidity Polymarket
# markets. Real spread varies by market and time; 0.02 is a conservative
# starting estimate. Replace with live order-book data when available.
SPREAD_ESTIMATE: float = 0.02
# commission_rate: Polymarket taker fee approximation. Current Polymarket fee
# is 0% on CLOB but was 2% historically; keeping 2% as a conservative buffer
# against future fee changes and exchange rate effects.
COMMISSION_RATE: float = 0.02
# Combined cost floor deducted from edge_gross to get edge_net.
# edge_net = edge_gross - SPREAD_ESTIMATE - COMMISSION_RATE
TOTAL_COST_RATE: float = SPREAD_ESTIMATE + COMMISSION_RATE # 0.04
# ─────────────────────────────────────────────────────────────────────────────
# Other strategy constants
# ─────────────────────────────────────────────────────────────────────────────
MIN_CONFIDENCE = 0.55 # Minimum confidence to generate a signal
# Log-odds weight applied to the GNews sentiment score (range ±1.0).
# A weight of 1.5 means a fully negative/positive signal shifts log-odds by ±1.5,
@@ -37,17 +56,103 @@ NEWS_LOGODDS_WEIGHT = 1.5
MAX_NEWS_QUERIES_PER_CYCLE = 5
# ─────────────────────────────────────────────────────────────────────────────
# Phase 4 — Regime-based minimum edge (uses edge_NET, not edge_gross)
# ─────────────────────────────────────────────────────────────────────────────
def _regime_min_edge(category: str, days_to_resolution: int) -> float:
"""
Return the minimum edge_net required to execute a trade.
Thresholds are higher for far-future politics markets (less signal, more
noise) and lower for near-term politics (time pressure makes any edge
actionable). Tech/crypto use a flat threshold.
category | days_to_resolution | min_edge_net
──────────────────────┼────────────────────┼─────────────
politics | > 60 d | 0.12
politics | 3060 d | 0.10
politics | < 30 d | 0.08
tech / crypto/finance | any | 0.10
other / unknown | any | 0.10
"""
if category == "politics":
if days_to_resolution > 60:
return 0.12
if days_to_resolution > 30:
return 0.10
return 0.08
return 0.10 # tech, crypto/finance, events, default
def _days_to_resolution(end_date: str) -> int:
"""Return calendar days until market resolution, or 30 if unknown."""
if not end_date:
return 30 # conservative: treat as medium-term
try:
dt = datetime.fromisoformat(end_date.replace("Z", "+00:00"))
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
days = (dt - datetime.now(timezone.utc)).days
return max(0, days)
except (ValueError, TypeError):
return 30
# ─────────────────────────────────────────────────────────────────────────────
# Phase 3 — GNews priority scoring
# ─────────────────────────────────────────────────────────────────────────────
def gnews_priority(market: Market, news: "NewsClient") -> float:
"""
Score a market for GNews query priority (higher = more valuable to query).
Formula: priority = uncertainty × volume_score × freshness
uncertainty = 1 - |prior - 0.5| × 2 (1.0 at 50%, 0.0 at 0%/100%)
volume_score = min(volume_24h / 10_000, 1.0)
freshness = NewsClient.get_freshness(question)
(1.0 never queried → 0.10 queried <2h ago)
Markets with occupied families, or that have already been queried recently,
score lower and receive GNews budget only if capacity remains.
"""
prior = max(0.05, min(0.95, market.yes_price))
uncertainty = 1.0 - abs(prior - 0.5) * 2
volume_score = min(market.volume_24h / 10_000, 1.0)
freshness = news.get_freshness(market.question)
return uncertainty * volume_score * freshness
# ─────────────────────────────────────────────────────────────────────────────
# Signal and strategy classes
# ─────────────────────────────────────────────────────────────────────────────
@dataclass
class TradingSignal:
market_id: str
question: str
polymarket_price: float # Current market price for YES (0-1)
estimated_prob: float # Our Bayesian estimate (0-1)
edge: float # estimated_prob - polymarket_price
confidence: float # How confident we are (0-1)
direction: str # "BUY_YES" | "BUY_NO"
reasoning: str # Human-readable explanation for logging
sources: list[str] # Data sources used
polymarket_price: float # Current market price for YES (0-1)
estimated_prob: float # Our Bayesian estimate (0-1)
edge: float # Kept for backward compat — equals edge_gross
confidence: float # How confident we are (0-1)
direction: str # "BUY_YES" | "BUY_NO"
reasoning: str # Human-readable explanation for logging
sources: list[str] # Data sources used
# ── Phase 1: edge neto ───────────────────────────────────────────────────
edge_gross: float = 0.0 # |estimated_prob - polymarket_price|
edge_net: float = 0.0 # edge_gross - SPREAD_ESTIMATE - COMMISSION_RATE
prior_prob: float = 0.0 # market.yes_price clamped to [0.05, 0.95]
final_prob: float = 0.0 # estimated_prob (explicit alias)
# mid_price: (bid+ask)/2 from order book when available; falls back to
# market.yes_price. Order-book fetching is a future enhancement — using
# yes_price here is conservative (already the ask side).
mid_price: float = 0.0
spread_estimate: float = SPREAD_ESTIMATE
# ── Phase 2: market families ─────────────────────────────────────────────
family_key: str = ""
# ── Phase 4: regime ──────────────────────────────────────────────────────
regime_min_edge: float = 0.10
class BayesianStrategy:
@@ -59,34 +164,59 @@ class BayesianStrategy:
- BTC/ETH price momentum
- Fear & Greed index
- Market cap trend / BTC dominance
We only bet when our signals move the estimate far enough from the prior
to justify the fee + slippage cost (MIN_EDGE).
- GNews sentiment (politics only, capped at MAX_NEWS_QUERIES_PER_CYCLE)
Execution gate (Phase 1 + 4):
- Compute edge_net = edge_gross - SPREAD_ESTIMATE - COMMISSION_RATE
- Only trade when edge_net > regime_min_edge(category, days_to_resolution)
Family deduplication (Phase 2):
- At most 1 open position per market family per cycle.
- Caller passes occupied_families; this method skips and logs SKIP_FAMILY.
GNews prioritisation (Phase 3):
- Caller pre-sorts politics markets by gnews_priority() (desc) so the
highest-value markets consume the GNews budget first.
- Within evaluate(), the per-cycle cap is enforced.
"""
def __init__(self, news: Optional[NewsClient] = None) -> None:
def __init__(self, news: Optional["NewsClient"] = None) -> None:
self._signal_count = 0
self._news = news # Optional; degrades gracefully when None or key missing
self._news = news
self._news_queries_this_cycle = 0
def reset_cycle(self) -> None:
"""Call once at the start of each trading cycle to reset the per-cycle GNews counter."""
"""Call once at the start of each trading cycle to reset per-cycle counters."""
self._news_queries_this_cycle = 0
async def evaluate(
self,
market: Market,
ext: ExternalSignals,
occupied_families: set[str],
) -> Optional[TradingSignal]:
"""
Evaluate a market and return a signal if edge exists.
Returns None if no actionable opportunity.
Evaluate a market and return a TradingSignal if actionable.
Returns None with a structured log line in all skip cases.
Skip reasons (Phase 5 observability):
SKIP_UNSUPPORTED — category not supported
SKIP_NO_SIGNALS — external data unavailable
SKIP_PRIOR_EXTREME — prior < 0.08 or > 0.92
SKIP_FAMILY — family already has an open/pending position
SKIP_EDGE_NET — edge_net < regime_min_edge
SKIP_CONFIDENCE — confidence < MIN_CONFIDENCE
"""
question_lower = market.question.lower()
category = market.category # set by PolymarketClient
category = market.category
# Classify what kind of market this is
is_price_above = any(w in question_lower for w in ["above", "over", "exceed", "higher", "atleast", "reach"])
is_price_below = any(w in question_lower for w in ["below", "under", "less than", "lower", "drop"])
# ── Classify market type ─────────────────────────────────────────────
is_price_above = any(w in question_lower for w in [
"above", "over", "exceed", "higher", "atleast", "reach",
])
is_price_below = any(w in question_lower for w in [
"below", "under", "less than", "lower", "drop",
])
is_btc = "btc" in question_lower or "bitcoin" in question_lower
is_eth = "eth" in question_lower or "ethereum" in question_lower
@@ -100,7 +230,9 @@ class BayesianStrategy:
w in question_lower for w in ["crypto", "market cap", "total market", "altcoin", "defi"]
)
is_macro = any(
w in question_lower for w in ["nasdaq", "s&p", "sp500", "inflation", "fed rate", "interest rate", "tariff"]
w in question_lower for w in [
"nasdaq", "s&p", "sp500", "inflation", "fed rate", "interest rate", "tariff",
]
)
is_politics = category == "politics"
is_tech = category == "tech"
@@ -112,45 +244,52 @@ class BayesianStrategy:
)
if not is_any_supported:
log.info(
"SKIP %-50s | reason=unsupported category=%r",
"SKIP_UNSUPPORTED %-50s | cat=%r",
market.question[:50], category,
)
return None
if not ext.valid:
log.info(
"SKIP %-50s | reason=no external signals",
"SKIP_NO_SIGNALS %-50s | reason=external data unavailable",
market.question[:50],
)
return None # Can't reason without external data
return None
# --- Bayesian probability estimation ---
# Prior = Polymarket consensus price, clamped away from extremes.
# The market already aggregates information from many traders;
# our signals update from that informed baseline, not from 0.5.
# ── Phase 1: prior + prior-extreme filter ────────────────────────────
prior = max(0.05, min(0.95, market.yes_price))
# Skip markets where the crowd has already reached near-certainty.
# Below 0.08 or above 0.92 there is not enough room for our signals
# to generate MIN_EDGE — any trade would be fighting near-certain consensus.
if market.yes_price < 0.08:
log.info(
"SKIP %-50s | cat=%-12s | prior=%.3f | reason=prior too low, market already certain",
"SKIP_PRIOR_EXTREME %-50s | cat=%-12s | prior=%.3f | reason=prior<0.08",
market.question[:50], category, market.yes_price,
)
return None
if market.yes_price > 0.92:
log.info(
"SKIP %-50s | cat=%-12s | prior=%.3f | reason=prior too high, market already certain",
"SKIP_PRIOR_EXTREME %-50s | cat=%-12s | prior=%.3f | reason=prior>0.92",
market.question[:50], category, market.yes_price,
)
return None
# ── Phase 2: family deduplication ────────────────────────────────────
family = market_family_key(market)
if family in occupied_families:
log.info(
"SKIP_FAMILY %-50s | cat=%-12s | family=%s",
market.question[:50], category, family,
)
return None
# ── Phase 4: regime min-edge ─────────────────────────────────────────
days = _days_to_resolution(market.end_date)
regime_min = _regime_min_edge(category, days)
# ── Bayesian probability estimation ──────────────────────────────────
sources: list[str] = [f"Prior=poly({prior:.3f})"]
adjustments: list[float] = []
# Signal 1: Price momentum (asset-specific or total market cap as proxy)
# For politics/tech/events use BTC as a broad sentiment proxy.
# Signal 1: price momentum (asset-specific or BTC as sentiment proxy)
if is_btc:
momentum = ext.btc_change_24h
asset_label = "BTC"
@@ -158,17 +297,14 @@ class BayesianStrategy:
momentum = ext.eth_change_24h
asset_label = "ETH"
elif is_politics or is_tech or is_events:
# BTC as risk-sentiment proxy for non-crypto categories
momentum = ext.btc_change_24h
asset_label = "BTC(sentiment)"
else:
# Altcoins and general crypto: use total market cap change as proxy
momentum = ext.total_market_cap_change
asset_label = "total mktcap"
if abs(momentum) > 2:
momentum_adj = math.tanh(momentum / 20) * 0.15 # Max ±15%
# For non-directional markets (politics/events/tech), momentum is weaker signal
momentum_adj = math.tanh(momentum / 20) * 0.15
if is_politics or is_tech or is_events:
momentum_adj *= 0.5
adjustments.append(momentum_adj if is_price_above else -momentum_adj)
@@ -185,26 +321,19 @@ class BayesianStrategy:
else:
fg_adj = (fg - 50) / 50 * 0.04
sources.append(f"Fear&Greed: {fg} (neutral)")
adjustments.append(fg_adj if is_price_above else -fg_adj)
# Signal 3: BTC dominance — hurts altcoins when high
if (is_eth or is_altcoin or is_general_crypto) and ext.btc_dominance > 55:
dom_adj = -0.03 if is_price_above else 0.03
adjustments.append(dom_adj)
adjustments.append(-0.03 if is_price_above else 0.03)
sources.append(f"BTC dom: {ext.btc_dominance:.1f}% (high → alt pressure)")
elif (is_eth or is_altcoin or is_general_crypto) and ext.btc_dominance < 45:
dom_adj = 0.03 if is_price_above else -0.03
adjustments.append(dom_adj)
adjustments.append(0.03 if is_price_above else -0.03)
sources.append(f"BTC dom: {ext.btc_dominance:.1f}% (low → alt season)")
# Signal 4: GNews sentiment politics markets only.
# BTC/F&G already cover crypto and macro; GNews budget is too tight to
# waste on tech/events. Cap at MAX_NEWS_QUERIES_PER_CYCLE per cycle so
# we prioritise the soonest-resolving markets (caller sorts by end_date).
# Applied as a direct log-odds shift — stronger signal than macro proxies.
# Weight NEWS_LOGODDS_WEIGHT=1.5 means a ±1.0 sentiment score shifts
# log-odds by ±1.5 (e.g. 50% prior → ~82% / ~18%).
# Signal 4: GNews sentiment (politics only, budget-gated)
# Phase 3: caller has pre-sorted markets by gnews_priority() so the
# highest-value markets reach this block first.
news_log_adj = 0.0
if is_politics and self._news is not None:
if self._news_queries_this_cycle < MAX_NEWS_QUERIES_PER_CYCLE:
@@ -214,72 +343,81 @@ class BayesianStrategy:
news_log_adj = sentiment * NEWS_LOGODDS_WEIGHT
sources.append(f"GNews: {sentiment:+.2f}")
else:
log.debug(
"GNews cycle limit (%d) reached — skipping news for %r",
MAX_NEWS_QUERIES_PER_CYCLE, market.question[:50],
log.info(
"SKIP_GNEWS_PRIORITY %-50s | reason=cycle budget %d reached",
market.question[:50], MAX_NEWS_QUERIES_PER_CYCLE,
)
# Macro/politics/tech/events: cap confidence lower to reflect weaker signal quality
if is_macro or is_politics or is_tech or is_events:
confidence_cap = 0.65
else:
confidence_cap = 0.90
# Confidence cap: macro/politics/tech signals are weaker proxies
confidence_cap = 0.65 if (is_macro or is_politics or is_tech or is_events) else 0.90
# Compute posterior using log-odds updating.
# total_adj (BTC/F&G/dominance) is amplified ×2 because those are weak proxies.
# news_log_adj is applied at face value — it IS a direct log-odds signal.
# Posterior via log-odds updating
log_odds_prior = math.log(prior / (1 - prior))
total_adj = sum(adjustments)
estimated_prob = _sigmoid(log_odds_prior + total_adj * 2 + news_log_adj)
estimated_prob = max(0.05, min(0.95, estimated_prob))
# Compute edge
edge = estimated_prob - market.yes_price
direction = "BUY_YES" if edge > 0 else "BUY_NO"
abs_edge = abs(edge)
# ── Phase 1: edge_gross and edge_net ─────────────────────────────────
raw_edge = estimated_prob - market.yes_price
direction = "BUY_YES" if raw_edge > 0 else "BUY_NO"
edge_gross = abs(raw_edge)
# NOTE: commission/size_usdc = COMMISSION_RATE always (constant fraction).
edge_net = edge_gross - SPREAD_ESTIMATE - COMMISSION_RATE
# mid_price falls back to yes_price; live order-book data is a future enhancement
mid_price = market.yes_price
# Confidence based on signal agreement
agreement = sum(1 for a in adjustments if (a > 0) == (total_adj > 0))
confidence = min(confidence_cap, 0.4 + (agreement / max(len(adjustments), 1)) * 0.5)
# News signal available → boost confidence by 0.10 (news corroborates macro signals)
if news_log_adj != 0.0:
confidence = min(confidence_cap, confidence + 0.10)
# Log evaluation result for every market
action = "TRADE" if (abs_edge >= MIN_EDGE and confidence >= MIN_CONFIDENCE) else "SKIP"
skip_reason = ""
if action == "SKIP":
reasons = []
if abs_edge < MIN_EDGE:
reasons.append(f"edge={abs_edge:.3f}<{MIN_EDGE}")
# ── Phase 5: structured audit log ────────────────────────────────────
passed_gross = edge_gross >= regime_min
passed_net = edge_net >= regime_min
can_trade = passed_net and confidence >= MIN_CONFIDENCE
if not can_trade:
skip_parts: list[str] = []
if not passed_gross:
skip_parts.append(f"edge_gross={edge_gross:.3f}<{regime_min:.2f}(regime)")
elif not passed_net:
skip_parts.append(
f"edge_net={edge_net:.3f}<{regime_min:.2f}(regime) "
f"[gross={edge_gross:.3f} pass]"
)
if confidence < MIN_CONFIDENCE:
reasons.append(f"conf={confidence:.2f}<{MIN_CONFIDENCE}")
skip_reason = " | reason=" + ",".join(reasons)
log.info(
"%-5s %-50s | cat=%-12s | prior=%.3f | est=%.3f | edge=%+.3f | conf=%.2f | dir=%-8s | signals=%s%s",
action,
market.question[:50],
category,
prior,
estimated_prob,
edge,
confidence,
direction,
", ".join(sources[1:]) or "none",
skip_reason,
)
# Filter: only trade if edge and confidence thresholds met
if abs_edge < MIN_EDGE or confidence < MIN_CONFIDENCE:
skip_parts.append(f"conf={confidence:.2f}<{MIN_CONFIDENCE}")
log.info(
"SKIP_EDGE_NET %-50s | cat=%-12s | family=%-28s | "
"prior=%.3f | est=%.3f | gross=%+.3f | net=%+.3f | "
"regime=%.2f | days=%d | conf=%.2f | signals=%s | %s",
market.question[:50], category, family,
prior, estimated_prob, edge_gross, edge_net,
regime_min, days, confidence,
", ".join(sources[1:]) or "none",
" | ".join(skip_parts),
)
return None
reasoning = (
f"Prior=poly({prior:.3f}) → estimate={estimated_prob:.3f} | "
f"Poly price={market.yes_price:.3f} | "
f"Edge={edge:+.3f} | "
f"edge_gross={edge_gross:+.3f} | edge_net={edge_net:+.3f} | "
f"regime_min={regime_min:.2f} | days={days} | "
f"family={family} | "
f"Direction={direction} | "
f"Signals: {', '.join(sources[1:])}" # skip the prior label already shown
f"Signals: {', '.join(sources[1:])}"
)
log.info(
"TRADE %-50s | cat=%-12s | family=%-28s | "
"prior=%.3f | est=%.3f | gross=%+.3f | net=%+.3f | "
"regime=%.2f | days=%d | conf=%.2f | dir=%-8s | signals=%s",
market.question[:50], category, family,
prior, estimated_prob, edge_gross, edge_net,
regime_min, days, confidence, direction,
", ".join(sources[1:]) or "none",
)
self._signal_count += 1
@@ -288,11 +426,22 @@ class BayesianStrategy:
question=market.question,
polymarket_price=market.yes_price,
estimated_prob=estimated_prob,
edge=abs_edge,
edge=edge_gross, # backward compat — same as edge_gross
confidence=confidence,
direction=direction,
reasoning=reasoning,
sources=sources,
# Phase 1 new fields
edge_gross=edge_gross,
edge_net=edge_net,
prior_prob=prior,
final_prob=estimated_prob,
mid_price=mid_price,
spread_estimate=SPREAD_ESTIMATE,
# Phase 2 new fields
family_key=family,
# Phase 4 new fields
regime_min_edge=regime_min,
)