feat(strategy): Manifold cross-market signal + per-feature contribution logging

Signal 5: ManifoldClient queries Manifold Markets API for a matching binary market by keyword overlap (threshold 0.25) and applies a log-odds adjustment proportional to the divergence from the Polymarket prior. manifold_log_adj = (log_odds(manifold_prob) - log_odds(prior)) × 0.6 A 30pp divergence (Manifold 0.75 vs Poly 0.45) produces edge_gross ≈ 0.19, clearing the politics far-horizon regime_min=0.12 after costs. Confidence boosted +0.08 when Manifold match found. Per-feature observability: every SKIP_EDGE_NET and TRADE log line now includes fg=±X.XXX mom=±X.XXX mfld=±X.XXXX news=±X.XXXX so the contribution of each signal to edge is auditable per market. Files: bot/data/manifold.py (new), bot/strategy/bayesian.py, bot/main.py Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-17 10:07:47 +00:00
parent 411d346261
commit 0cdb0758c4
3 changed files with 185 additions and 9 deletions
@@ -0,0 +1,135 @@
 """
 Manifold Markets client — cross-platform prediction market probability signals.
 For each Polymarket question, searches Manifold for a matching binary market
 by keyword overlap and returns its probability as a calibration signal.
 Used for politics and tech markets where Manifold often has independent
 probability estimates that diverge from Polymarket.
 Cache TTL: 30 minutes (Manifold markets move slowly vs our 60 s cycle).
 Match threshold: >= 0.25 keyword overlap ratio between significant tokens.
 Weight choice: MANIFOLD_LOGODDS_WEIGHT = 0.6 in bayesian.py means a 30 pp
 divergence (Manifold 0.75 vs Poly 0.45) produces edge_gross ≈ 0.19, which
 clears the politics far-horizon regime threshold of 0.12 after costs.
 """
 import logging
 import re
 import time
 from typing import Optional
 import httpx
 MANIFOLD_API = "https://api.manifold.markets/v0"
 CACHE_TTL_SEC = 1800  # 30 minutes
 log = logging.getLogger(__name__)
 _MATCH_THRESHOLD = 0.25
 _STOP_WORDS = frozenset([
    "will", "the", "a", "an", "is", "are", "was", "were", "be", "been",
    "by", "in", "on", "at", "to", "for", "of", "and", "or", "not",
    "this", "that", "with", "from", "have", "has", "had", "do", "does",
    "did", "can", "could", "would", "should", "may", "might", "shall",
    "win", "lose", "get", "become", "make", "take", "give", "see",
    "any", "who", "what", "when", "where", "which", "how", "over", "under",
    "than", "more", "most", "least", "its", "their", "they",
    "him", "her", "his", "she", "been", "being", "into", "after",
    "before", "during", "until", "against", "between", "through",
 ])
 def _significant_words(text: str) -> set[str]:
    words = re.findall(r"[a-zA-Z]+", text.lower())
    return {w for w in words if w not in _STOP_WORDS and len(w) >= 3}
 def _build_search_query(question: str, max_words: int = 6) -> str:
    words = re.findall(r"[a-zA-Z0-9]+", question)
    sig = [w for w in words if w.lower() not in _STOP_WORDS and len(w) >= 3]
    return " ".join(sig[:max_words])
 def _best_match(poly_question: str, results: list[dict]) -> Optional[dict]:
    """Return best-matching open binary Manifold market, or None if below threshold."""
    poly_words = _significant_words(poly_question)
    if not poly_words:
        return None
    best_score = 0.0
    best: Optional[dict] = None
    for result in results:
        if result.get("outcomeType") != "BINARY":
            continue
        prob = result.get("probability")
        if prob is None or not (0.02 < float(prob) < 0.98):
            continue
        title = result.get("question", "")
        m_words = _significant_words(title)
        if not m_words:
            continue
        overlap = len(poly_words & m_words)
        score = overlap / min(len(poly_words), len(m_words))
        if score > best_score:
            best_score = score
            best = result
    if best_score >= _MATCH_THRESHOLD and best is not None:
        return best
    return None
 class ManifoldClient:
    """Async Manifold Markets client for cross-platform probability signals."""
    def __init__(self) -> None:
        self._client = httpx.AsyncClient(timeout=15)
        # question → (fetched_at_monotonic, probability_or_None)
        self._cache: dict[str, tuple[float, Optional[float]]] = {}
    async def get_probability(self, question: str) -> Optional[float]:
        """
        Return Manifold probability for a matching market, or None.
        Searches by keyword overlap.  Returns None if no match exceeds
        _MATCH_THRESHOLD or on any API error (caller degrades gracefully).
        """
        now = time.monotonic()
        cached = self._cache.get(question)
        if cached and (now - cached[0]) < CACHE_TTL_SEC:
            return cached[1]
        query = _build_search_query(question)
        if not query:
            self._cache[question] = (now, None)
            return None
        try:
            resp = await self._client.get(
                f"{MANIFOLD_API}/search-markets",
                params={"term": query, "limit": 5, "filter": "open"},
            )
            resp.raise_for_status()
            results = resp.json()
            match = _best_match(question, results)
            prob = float(match["probability"]) if match else None
            self._cache[question] = (now, prob)
            if prob is not None:
                log.info(
                    "Manifold match: %-50s → %.3f | %s",
                    question[:50], prob, match.get("question", "")[:60],
                )
            else:
                log.debug("Manifold no match for: %s (query=%r)", question[:50], query)
            return prob
        except Exception as e:
            log.warning("Manifold API error for %r: %s", question[:40], e)
            self._cache[question] = (now, None)
            return None
    async def close(self) -> None:
        await self._client.aclose()
@@ -10,6 +10,7 @@ from datetime import datetime, timezone
 from bot.data.polymarket import PolymarketClient, market_family_key
 from bot.data.external import ExternalDataClient
 from bot.data.news import NewsClient
 from bot.data.manifold import ManifoldClient
 from bot.strategy.bayesian import BayesianStrategy, gnews_priority, MAX_NEWS_QUERIES_PER_CYCLE
 from bot.risk.manager import RiskManager
 from bot.executor.paper import PaperExecutor
@@ -188,7 +189,8 @@ async def main() -> None:
    poly = PolymarketClient()
    external = ExternalDataClient()
    news = NewsClient()
-    strategy = BayesianStrategy(news=news)
+    manifold = ManifoldClient()
    strategy = BayesianStrategy(news=news, manifold=manifold)
    risk = RiskManager(max_position_pct=0.05, max_exposure_pct=0.30)
    executor = PaperExecutor(db=db, bankroll=PAPER_BANKROLL) if PAPER_MODE else None
    metrics = MetricsTracker(db=db)
@@ -205,6 +207,7 @@ async def main() -> None:
    finally:
        await db.disconnect()
        await news.close()
        await manifold.close()
 if __name__ == "__main__":
@@ -21,6 +21,7 @@ from bot.data.external import ExternalSignals
 if TYPE_CHECKING:
    from bot.data.news import NewsClient
    from bot.data.manifold import ManifoldClient
 log = logging.getLogger(__name__)
@@ -51,6 +52,12 @@ MIN_CONFIDENCE = 0.55   # Minimum confidence to generate a signal
 # which moves a 50% prior to ~18%/82% — strong but not overwhelming.
 NEWS_LOGODDS_WEIGHT = 1.5
 # Log-odds weight applied to Manifold cross-market probability signal.
 # Weight 0.6: a 30 pp divergence (Manifold 0.75 vs Poly 0.45) produces
 # edge_gross ≈ 0.19, clearing politics far-horizon regime_min=0.12 after costs.
 # Weaker than NEWS_LOGODDS_WEIGHT because Manifold can have illiquid/stale markets.
 MANIFOLD_LOGODDS_WEIGHT = 0.6
 # GNews free tier: 100 req/day.  We limit to 5 queries per trading cycle
 # (politics markets only) and rely on 6 h cache to stay within budget.
 MAX_NEWS_QUERIES_PER_CYCLE = 5
@@ -180,9 +187,14 @@ class BayesianStrategy:
    - Within evaluate(), the per-cycle cap is enforced.
    """
-    def __init__(self, news: Optional["NewsClient"] = None) -> None:
+    def __init__(
        self,
        news: Optional["NewsClient"] = None,
        manifold: Optional["ManifoldClient"] = None,
    ) -> None:
        self._signal_count = 0
        self._news = news
        self._manifold = manifold
        self._news_queries_this_cycle = 0
        # Per-cycle counters — reset by reset_cycle(), read by get_cycle_stats()
        self._skip_family: int = 0
@@ -337,11 +349,13 @@ class BayesianStrategy:
            momentum = ext.total_market_cap_change
            asset_label = "total mktcap"
        _momentum_contribution = 0.0
        if abs(momentum) > 2:
            momentum_adj = math.tanh(momentum / 20) * 0.15
            if is_politics or is_tech or is_events:
                momentum_adj *= 0.5
-            adjustments.append(momentum_adj if is_price_above else -momentum_adj)
+            _momentum_contribution = momentum_adj if is_price_above else -momentum_adj
            adjustments.append(_momentum_contribution)
            sources.append(f"{asset_label} 24h: {momentum:+.1f}%")
        # Signal 2: Fear & Greed
@@ -355,7 +369,8 @@ class BayesianStrategy:
        else:
            fg_adj = (fg - 50) / 50 * 0.04
            sources.append(f"Fear&Greed: {fg} (neutral)")
-        adjustments.append(fg_adj if is_price_above else -fg_adj)
+        _fg_contribution = fg_adj if is_price_above else -fg_adj
        adjustments.append(_fg_contribution)
        # Signal 3: BTC dominance — hurts altcoins when high
        if (is_eth or is_altcoin or is_general_crypto) and ext.btc_dominance > 55:
@@ -382,13 +397,26 @@ class BayesianStrategy:
                    market.question[:50], MAX_NEWS_QUERIES_PER_CYCLE,
                )
        # Signal 5: Manifold cross-market probability (politics + tech)
        # Applies a log-odds adjustment proportional to divergence from prior.
        # No query budget — 30 min cache means network cost is paid once per cycle.
        manifold_log_adj = 0.0
        if (is_politics or is_tech) and self._manifold is not None:
            manifold_prob = await self._manifold.get_probability(market.question)
            if manifold_prob is not None:
                m_clamped = max(0.05, min(0.95, manifold_prob))
                m_log = math.log(m_clamped / (1 - m_clamped))
                p_log = math.log(prior / (1 - prior))
                manifold_log_adj = (m_log - p_log) * MANIFOLD_LOGODDS_WEIGHT
                sources.append(f"Manifold:{manifold_prob:.2f}")
        # Confidence cap: macro/politics/tech signals are weaker proxies
        confidence_cap = 0.65 if (is_macro or is_politics or is_tech or is_events) else 0.90
        # Posterior via log-odds updating
        log_odds_prior = math.log(prior / (1 - prior))
        total_adj = sum(adjustments)
-        estimated_prob = _sigmoid(log_odds_prior + total_adj * 2 + news_log_adj)
+        estimated_prob = _sigmoid(log_odds_prior + total_adj * 2 + news_log_adj + manifold_log_adj)
        estimated_prob = max(0.05, min(0.95, estimated_prob))
        # ── Phase 1: edge_gross and edge_net ─────────────────────────────────
@@ -408,6 +436,16 @@ class BayesianStrategy:
        confidence = min(confidence_cap, 0.4 + (agreement / max(len(adjustments), 1)) * 0.5)
        if news_log_adj != 0.0:
            confidence = min(confidence_cap, confidence + 0.10)
        if manifold_log_adj != 0.0:
            confidence = min(confidence_cap, confidence + 0.08)
        # Per-feature contribution string for audit logging
        feat_str = (
            f"fg={_fg_contribution:+.3f} "
            f"mom={_momentum_contribution:+.3f} "
            f"mfld={manifold_log_adj:+.4f} "
            f"news={news_log_adj:+.4f}"
        )
        # ── Phase 5: structured audit log ────────────────────────────────────
        passed_gross = edge_gross >= regime_min
@@ -433,10 +471,10 @@ class BayesianStrategy:
            log.info(
                "SKIP_EDGE_NET      %-50s | cat=%-12s | family=%-28s | "
                "prior=%.3f | est=%.3f | gross=%+.3f | net=%+.3f | "
-                "regime=%.2f | days=%d | conf=%.2f | signals=%s | %s",
+                "regime=%.2f | days=%d | conf=%.2f | %s | signals=%s | %s",
                market.question[:50], category, family,
                prior, estimated_prob, edge_gross, edge_net,
-                regime_min, days, confidence,
+                regime_min, days, confidence, feat_str,
                ", ".join(sources[1:]) or "none",
                " | ".join(skip_parts),
            )
@@ -455,10 +493,10 @@ class BayesianStrategy:
        log.info(
            "TRADE              %-50s | cat=%-12s | family=%-28s | "
            "prior=%.3f | est=%.3f | gross=%+.3f | net=%+.3f | "
-            "regime=%.2f | days=%d | conf=%.2f | dir=%-8s | signals=%s",
+            "regime=%.2f | days=%d | conf=%.2f | dir=%-8s | %s | signals=%s",
            market.question[:50], category, family,
            prior, estimated_prob, edge_gross, edge_net,
-            regime_min, days, confidence, direction,
+            regime_min, days, confidence, direction, feat_str,
            ", ".join(sources[1:]) or "none",
        )