From 0cdb0758c4e17e32a7ecbd7babc261144955945e Mon Sep 17 00:00:00 2001 From: chemavx Date: Fri, 17 Apr 2026 10:07:47 +0000 Subject: [PATCH] feat(strategy): Manifold cross-market signal + per-feature contribution logging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signal 5: ManifoldClient queries Manifold Markets API for a matching binary market by keyword overlap (threshold 0.25) and applies a log-odds adjustment proportional to the divergence from the Polymarket prior. manifold_log_adj = (log_odds(manifold_prob) - log_odds(prior)) × 0.6 A 30pp divergence (Manifold 0.75 vs Poly 0.45) produces edge_gross ≈ 0.19, clearing the politics far-horizon regime_min=0.12 after costs. Confidence boosted +0.08 when Manifold match found. Per-feature observability: every SKIP_EDGE_NET and TRADE log line now includes fg=±X.XXX mom=±X.XXX mfld=±X.XXXX news=±X.XXXX so the contribution of each signal to edge is auditable per market. Files: bot/data/manifold.py (new), bot/strategy/bayesian.py, bot/main.py Co-Authored-By: Claude Sonnet 4.6 --- bot/data/manifold.py | 135 +++++++++++++++++++++++++++++++++++++++ bot/main.py | 5 +- bot/strategy/bayesian.py | 54 +++++++++++++--- 3 files changed, 185 insertions(+), 9 deletions(-) create mode 100644 bot/data/manifold.py diff --git a/bot/data/manifold.py b/bot/data/manifold.py new file mode 100644 index 0000000..47011cc --- /dev/null +++ b/bot/data/manifold.py @@ -0,0 +1,135 @@ +""" +Manifold Markets client — cross-platform prediction market probability signals. + +For each Polymarket question, searches Manifold for a matching binary market +by keyword overlap and returns its probability as a calibration signal. + +Used for politics and tech markets where Manifold often has independent +probability estimates that diverge from Polymarket. + +Cache TTL: 30 minutes (Manifold markets move slowly vs our 60 s cycle). +Match threshold: >= 0.25 keyword overlap ratio between significant tokens. + +Weight choice: MANIFOLD_LOGODDS_WEIGHT = 0.6 in bayesian.py means a 30 pp +divergence (Manifold 0.75 vs Poly 0.45) produces edge_gross ≈ 0.19, which +clears the politics far-horizon regime threshold of 0.12 after costs. +""" +import logging +import re +import time +from typing import Optional + +import httpx + +MANIFOLD_API = "https://api.manifold.markets/v0" +CACHE_TTL_SEC = 1800 # 30 minutes + +log = logging.getLogger(__name__) + +_MATCH_THRESHOLD = 0.25 + +_STOP_WORDS = frozenset([ + "will", "the", "a", "an", "is", "are", "was", "were", "be", "been", + "by", "in", "on", "at", "to", "for", "of", "and", "or", "not", + "this", "that", "with", "from", "have", "has", "had", "do", "does", + "did", "can", "could", "would", "should", "may", "might", "shall", + "win", "lose", "get", "become", "make", "take", "give", "see", + "any", "who", "what", "when", "where", "which", "how", "over", "under", + "than", "more", "most", "least", "its", "their", "they", + "him", "her", "his", "she", "been", "being", "into", "after", + "before", "during", "until", "against", "between", "through", +]) + + +def _significant_words(text: str) -> set[str]: + words = re.findall(r"[a-zA-Z]+", text.lower()) + return {w for w in words if w not in _STOP_WORDS and len(w) >= 3} + + +def _build_search_query(question: str, max_words: int = 6) -> str: + words = re.findall(r"[a-zA-Z0-9]+", question) + sig = [w for w in words if w.lower() not in _STOP_WORDS and len(w) >= 3] + return " ".join(sig[:max_words]) + + +def _best_match(poly_question: str, results: list[dict]) -> Optional[dict]: + """Return best-matching open binary Manifold market, or None if below threshold.""" + poly_words = _significant_words(poly_question) + if not poly_words: + return None + + best_score = 0.0 + best: Optional[dict] = None + + for result in results: + if result.get("outcomeType") != "BINARY": + continue + prob = result.get("probability") + if prob is None or not (0.02 < float(prob) < 0.98): + continue + title = result.get("question", "") + m_words = _significant_words(title) + if not m_words: + continue + overlap = len(poly_words & m_words) + score = overlap / min(len(poly_words), len(m_words)) + if score > best_score: + best_score = score + best = result + + if best_score >= _MATCH_THRESHOLD and best is not None: + return best + return None + + +class ManifoldClient: + """Async Manifold Markets client for cross-platform probability signals.""" + + def __init__(self) -> None: + self._client = httpx.AsyncClient(timeout=15) + # question → (fetched_at_monotonic, probability_or_None) + self._cache: dict[str, tuple[float, Optional[float]]] = {} + + async def get_probability(self, question: str) -> Optional[float]: + """ + Return Manifold probability for a matching market, or None. + + Searches by keyword overlap. Returns None if no match exceeds + _MATCH_THRESHOLD or on any API error (caller degrades gracefully). + """ + now = time.monotonic() + cached = self._cache.get(question) + if cached and (now - cached[0]) < CACHE_TTL_SEC: + return cached[1] + + query = _build_search_query(question) + if not query: + self._cache[question] = (now, None) + return None + + try: + resp = await self._client.get( + f"{MANIFOLD_API}/search-markets", + params={"term": query, "limit": 5, "filter": "open"}, + ) + resp.raise_for_status() + results = resp.json() + match = _best_match(question, results) + prob = float(match["probability"]) if match else None + self._cache[question] = (now, prob) + if prob is not None: + log.info( + "Manifold match: %-50s → %.3f | %s", + question[:50], prob, match.get("question", "")[:60], + ) + else: + log.debug("Manifold no match for: %s (query=%r)", question[:50], query) + return prob + + except Exception as e: + log.warning("Manifold API error for %r: %s", question[:40], e) + self._cache[question] = (now, None) + return None + + async def close(self) -> None: + await self._client.aclose() diff --git a/bot/main.py b/bot/main.py index 03e4928..af05330 100644 --- a/bot/main.py +++ b/bot/main.py @@ -10,6 +10,7 @@ from datetime import datetime, timezone from bot.data.polymarket import PolymarketClient, market_family_key from bot.data.external import ExternalDataClient from bot.data.news import NewsClient +from bot.data.manifold import ManifoldClient from bot.strategy.bayesian import BayesianStrategy, gnews_priority, MAX_NEWS_QUERIES_PER_CYCLE from bot.risk.manager import RiskManager from bot.executor.paper import PaperExecutor @@ -188,7 +189,8 @@ async def main() -> None: poly = PolymarketClient() external = ExternalDataClient() news = NewsClient() - strategy = BayesianStrategy(news=news) + manifold = ManifoldClient() + strategy = BayesianStrategy(news=news, manifold=manifold) risk = RiskManager(max_position_pct=0.05, max_exposure_pct=0.30) executor = PaperExecutor(db=db, bankroll=PAPER_BANKROLL) if PAPER_MODE else None metrics = MetricsTracker(db=db) @@ -205,6 +207,7 @@ async def main() -> None: finally: await db.disconnect() await news.close() + await manifold.close() if __name__ == "__main__": diff --git a/bot/strategy/bayesian.py b/bot/strategy/bayesian.py index c3daa14..f67a82b 100644 --- a/bot/strategy/bayesian.py +++ b/bot/strategy/bayesian.py @@ -21,6 +21,7 @@ from bot.data.external import ExternalSignals if TYPE_CHECKING: from bot.data.news import NewsClient + from bot.data.manifold import ManifoldClient log = logging.getLogger(__name__) @@ -51,6 +52,12 @@ MIN_CONFIDENCE = 0.55 # Minimum confidence to generate a signal # which moves a 50% prior to ~18%/82% — strong but not overwhelming. NEWS_LOGODDS_WEIGHT = 1.5 +# Log-odds weight applied to Manifold cross-market probability signal. +# Weight 0.6: a 30 pp divergence (Manifold 0.75 vs Poly 0.45) produces +# edge_gross ≈ 0.19, clearing politics far-horizon regime_min=0.12 after costs. +# Weaker than NEWS_LOGODDS_WEIGHT because Manifold can have illiquid/stale markets. +MANIFOLD_LOGODDS_WEIGHT = 0.6 + # GNews free tier: 100 req/day. We limit to 5 queries per trading cycle # (politics markets only) and rely on 6 h cache to stay within budget. MAX_NEWS_QUERIES_PER_CYCLE = 5 @@ -180,9 +187,14 @@ class BayesianStrategy: - Within evaluate(), the per-cycle cap is enforced. """ - def __init__(self, news: Optional["NewsClient"] = None) -> None: + def __init__( + self, + news: Optional["NewsClient"] = None, + manifold: Optional["ManifoldClient"] = None, + ) -> None: self._signal_count = 0 self._news = news + self._manifold = manifold self._news_queries_this_cycle = 0 # Per-cycle counters — reset by reset_cycle(), read by get_cycle_stats() self._skip_family: int = 0 @@ -337,11 +349,13 @@ class BayesianStrategy: momentum = ext.total_market_cap_change asset_label = "total mktcap" + _momentum_contribution = 0.0 if abs(momentum) > 2: momentum_adj = math.tanh(momentum / 20) * 0.15 if is_politics or is_tech or is_events: momentum_adj *= 0.5 - adjustments.append(momentum_adj if is_price_above else -momentum_adj) + _momentum_contribution = momentum_adj if is_price_above else -momentum_adj + adjustments.append(_momentum_contribution) sources.append(f"{asset_label} 24h: {momentum:+.1f}%") # Signal 2: Fear & Greed @@ -355,7 +369,8 @@ class BayesianStrategy: else: fg_adj = (fg - 50) / 50 * 0.04 sources.append(f"Fear&Greed: {fg} (neutral)") - adjustments.append(fg_adj if is_price_above else -fg_adj) + _fg_contribution = fg_adj if is_price_above else -fg_adj + adjustments.append(_fg_contribution) # Signal 3: BTC dominance — hurts altcoins when high if (is_eth or is_altcoin or is_general_crypto) and ext.btc_dominance > 55: @@ -382,13 +397,26 @@ class BayesianStrategy: market.question[:50], MAX_NEWS_QUERIES_PER_CYCLE, ) + # Signal 5: Manifold cross-market probability (politics + tech) + # Applies a log-odds adjustment proportional to divergence from prior. + # No query budget — 30 min cache means network cost is paid once per cycle. + manifold_log_adj = 0.0 + if (is_politics or is_tech) and self._manifold is not None: + manifold_prob = await self._manifold.get_probability(market.question) + if manifold_prob is not None: + m_clamped = max(0.05, min(0.95, manifold_prob)) + m_log = math.log(m_clamped / (1 - m_clamped)) + p_log = math.log(prior / (1 - prior)) + manifold_log_adj = (m_log - p_log) * MANIFOLD_LOGODDS_WEIGHT + sources.append(f"Manifold:{manifold_prob:.2f}") + # Confidence cap: macro/politics/tech signals are weaker proxies confidence_cap = 0.65 if (is_macro or is_politics or is_tech or is_events) else 0.90 # Posterior via log-odds updating log_odds_prior = math.log(prior / (1 - prior)) total_adj = sum(adjustments) - estimated_prob = _sigmoid(log_odds_prior + total_adj * 2 + news_log_adj) + estimated_prob = _sigmoid(log_odds_prior + total_adj * 2 + news_log_adj + manifold_log_adj) estimated_prob = max(0.05, min(0.95, estimated_prob)) # ── Phase 1: edge_gross and edge_net ───────────────────────────────── @@ -408,6 +436,16 @@ class BayesianStrategy: confidence = min(confidence_cap, 0.4 + (agreement / max(len(adjustments), 1)) * 0.5) if news_log_adj != 0.0: confidence = min(confidence_cap, confidence + 0.10) + if manifold_log_adj != 0.0: + confidence = min(confidence_cap, confidence + 0.08) + + # Per-feature contribution string for audit logging + feat_str = ( + f"fg={_fg_contribution:+.3f} " + f"mom={_momentum_contribution:+.3f} " + f"mfld={manifold_log_adj:+.4f} " + f"news={news_log_adj:+.4f}" + ) # ── Phase 5: structured audit log ──────────────────────────────────── passed_gross = edge_gross >= regime_min @@ -433,10 +471,10 @@ class BayesianStrategy: log.info( "SKIP_EDGE_NET %-50s | cat=%-12s | family=%-28s | " "prior=%.3f | est=%.3f | gross=%+.3f | net=%+.3f | " - "regime=%.2f | days=%d | conf=%.2f | signals=%s | %s", + "regime=%.2f | days=%d | conf=%.2f | %s | signals=%s | %s", market.question[:50], category, family, prior, estimated_prob, edge_gross, edge_net, - regime_min, days, confidence, + regime_min, days, confidence, feat_str, ", ".join(sources[1:]) or "none", " | ".join(skip_parts), ) @@ -455,10 +493,10 @@ class BayesianStrategy: log.info( "TRADE %-50s | cat=%-12s | family=%-28s | " "prior=%.3f | est=%.3f | gross=%+.3f | net=%+.3f | " - "regime=%.2f | days=%d | conf=%.2f | dir=%-8s | signals=%s", + "regime=%.2f | days=%d | conf=%.2f | dir=%-8s | %s | signals=%s", market.question[:50], category, family, prior, estimated_prob, edge_gross, edge_net, - regime_min, days, confidence, direction, + regime_min, days, confidence, direction, feat_str, ", ".join(sources[1:]) or "none", )