664ecab174
CI/CD / build-and-push (push) Successful in 9s
Add MANIFOLD_MATCHER_VERSION="v3_outcome_guard" tag persisted to manifold_match_audit.matcher_version so metrics can isolate current-matcher stats from pre-versioning records, whose accepted matches the outcome guard would now reject. - schema: add matcher_version column + index; idempotent startup backfill tagging NULL rows as legacy_pre_outcome_guard (no outcome types) or v2_outcome_guard_no_version (has outcome type, version not persisted) - save_manifold_audit: write matcher_version on every new record - get_manifold_matches: split summary into current_version / all_time / legacy; recent_matches now carry matcher_version Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
351 lines
14 KiB
Python
351 lines
14 KiB
Python
"""
|
|
Manifold Markets client — cross-platform prediction market probability signals.
|
|
|
|
For each Polymarket question, searches Manifold for a matching binary market
|
|
by keyword overlap and returns a ManifoldMatchResult with full audit metadata.
|
|
|
|
Match threshold: >= 0.40 Jaccard overlap (raised from 0.25 for stricter semantics).
|
|
|
|
Outcome compatibility guard (conservative):
|
|
- Conditional Manifold markets ("If X, will Y?" / "Conditional on..." / "Assuming..."
|
|
/ "Given that..." / mid-sentence "...if X is nominated, will...") are rejected:
|
|
a premise-gated question is not equivalent to a direct outcome question even when
|
|
token overlap is high. reason='conditional_market'.
|
|
- Each side is classified into an outcome_type (nomination | primary_win |
|
|
general_win | conditional | other). Matches with differing outcome_type — or any
|
|
conditional side — are rejected. reason='outcome_mismatch: poly=... manifold=...'.
|
|
|
|
Inversion guard (conservative):
|
|
- If Polymarket question names a party (democrat/republican) AND the matched
|
|
Manifold market names the OPPOSITE party → invert probability (1 - prob).
|
|
- If Polymarket question names a party AND Manifold market has NO party keyword
|
|
→ reject with reason='ambiguous_inversion' (can't determine if inversion applies).
|
|
- All other cases: no inversion, accept if score >= threshold.
|
|
- Ante duda, reject.
|
|
|
|
Cache TTL: 30 minutes.
|
|
"""
|
|
import logging
|
|
import re
|
|
import time
|
|
from dataclasses import dataclass, field
|
|
from typing import Optional
|
|
|
|
import httpx
|
|
|
|
# Version tag for every audit record this matcher produces. Persisted to
|
|
# manifold_match_audit.matcher_version so metrics can isolate current-version
|
|
# stats from legacy/pre-versioning records. Do NOT change this value once set;
|
|
# bump to a new string only when matcher semantics change materially.
|
|
MANIFOLD_MATCHER_VERSION = "v3_outcome_guard"
|
|
|
|
MANIFOLD_API = "https://api.manifold.markets/v0"
|
|
CACHE_TTL_SEC = 1800 # 30 minutes
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
_MATCH_THRESHOLD = 0.40 # raised from 0.25
|
|
|
|
_STOP_WORDS = frozenset([
|
|
"will", "the", "a", "an", "is", "are", "was", "were", "be", "been",
|
|
"by", "in", "on", "at", "to", "for", "of", "and", "or", "not",
|
|
"this", "that", "with", "from", "have", "has", "had", "do", "does",
|
|
"did", "can", "could", "would", "should", "may", "might", "shall",
|
|
"win", "lose", "get", "become", "make", "take", "give", "see",
|
|
"any", "who", "what", "when", "where", "which", "how", "over", "under",
|
|
"than", "more", "most", "least", "its", "their", "they",
|
|
"him", "her", "his", "she", "been", "being", "into", "after",
|
|
"before", "during", "until", "against", "between", "through",
|
|
])
|
|
|
|
_REPUBLICAN_WORDS = frozenset(["republican", "republicans", "gop"])
|
|
_DEMOCRAT_WORDS = frozenset(["democrat", "democrats", "democratic"])
|
|
|
|
|
|
@dataclass
|
|
class ManifoldMatchResult:
|
|
status: str # 'accepted' | 'rejected' | 'no_results'
|
|
prob_final: Optional[float] = None
|
|
prob_raw: Optional[float] = None
|
|
market_id: Optional[str] = None # Manifold internal market ID
|
|
market_title: Optional[str] = None
|
|
market_url: Optional[str] = None
|
|
match_score: Optional[float] = None # 0-1 Jaccard
|
|
match_reason: Optional[str] = None # human-readable explanation
|
|
inverted: bool = False
|
|
search_query: str = ""
|
|
poly_outcome_type: Optional[str] = None # nomination|primary_win|general_win|conditional|other
|
|
mfld_outcome_type: Optional[str] = None
|
|
|
|
|
|
def _significant_words(text: str) -> set[str]:
|
|
words = re.findall(r"[a-zA-Z]+", text.lower())
|
|
return {w for w in words if w not in _STOP_WORDS and len(w) >= 3}
|
|
|
|
|
|
def _build_search_query(question: str, max_words: int = 6) -> str:
|
|
words = re.findall(r"[a-zA-Z0-9]+", question)
|
|
sig = [w for w in words if w.lower() not in _STOP_WORDS and len(w) >= 3]
|
|
return " ".join(sig[:max_words])
|
|
|
|
|
|
def _detect_party(text: str) -> Optional[str]:
|
|
"""Return 'republican', 'democrat', or None if no party detected."""
|
|
words = set(re.findall(r"[a-zA-Z]+", text.lower()))
|
|
if words & _REPUBLICAN_WORDS:
|
|
return "republican"
|
|
if words & _DEMOCRAT_WORDS:
|
|
return "democrat"
|
|
return None
|
|
|
|
|
|
# ── Conditional-market detection (Task 1) ──────────────────────────────────────
|
|
# A market is "conditional" when its resolution is gated on a premise rather than
|
|
# asking the outcome directly (e.g. "If X is the nominee, will he win?"). Such a
|
|
# market is NOT equivalent to a direct outcome question even with high token overlap.
|
|
_CONDITIONAL_PREFIXES = ("if ", "conditional on", "assuming ", "given that")
|
|
# " if <clause>," — a mid-sentence conditional clause closed by a comma.
|
|
_CONDITIONAL_CLAUSE_RE = re.compile(r"\sif\s[^,]*,")
|
|
|
|
|
|
def _is_conditional(text: str) -> bool:
|
|
"""True if the question is phrased conditionally (premise-gated)."""
|
|
t = (text or "").strip().lower()
|
|
if t.startswith(_CONDITIONAL_PREFIXES):
|
|
return True
|
|
return bool(_CONDITIONAL_CLAUSE_RE.search(t))
|
|
|
|
|
|
def _classify_outcome(text: str) -> str:
|
|
"""
|
|
Coarse classification of what a question is *asking about*, used to reject
|
|
matches whose outcomes are not equivalent even when tokens overlap.
|
|
|
|
Returns one of: nomination | primary_win | general_win | conditional | other.
|
|
Order matters: conditional is checked first (premise-gated), then nomination
|
|
(which subsumes "primary nominee"), then primary, then general election.
|
|
"""
|
|
t = (text or "").strip().lower()
|
|
if t.startswith(_CONDITIONAL_PREFIXES):
|
|
return "conditional"
|
|
if any(k in t for k in ("nominee", "nominated", "nomination")):
|
|
return "nomination"
|
|
if any(k in t for k in ("primary", "win the primary", "first round")):
|
|
return "primary_win"
|
|
if any(k in t for k in ("win the election", "win the race",
|
|
"win the seat", "general election")):
|
|
return "general_win"
|
|
return "other"
|
|
|
|
|
|
def _find_best_candidate(poly_question: str, results: list[dict]) -> tuple[Optional[dict], float]:
|
|
"""Find the highest-scoring open binary Manifold market by Jaccard overlap."""
|
|
poly_words = _significant_words(poly_question)
|
|
if not poly_words:
|
|
return None, 0.0
|
|
|
|
best_score = 0.0
|
|
best: Optional[dict] = None
|
|
|
|
for result in results:
|
|
if result.get("outcomeType") != "BINARY":
|
|
continue
|
|
prob = result.get("probability")
|
|
if prob is None or not (0.02 < float(prob) < 0.98):
|
|
continue
|
|
title = result.get("question", "")
|
|
m_words = _significant_words(title)
|
|
if not m_words:
|
|
continue
|
|
overlap = len(poly_words & m_words)
|
|
score = overlap / min(len(poly_words), len(m_words))
|
|
if score > best_score:
|
|
best_score = score
|
|
best = result
|
|
|
|
return best, best_score
|
|
|
|
|
|
def _market_url(match: dict) -> Optional[str]:
|
|
slug = match.get("slug", "")
|
|
creator = match.get("creatorUsername", "")
|
|
return f"https://manifold.markets/{creator}/{slug}" if slug else None
|
|
|
|
|
|
class ManifoldClient:
|
|
"""Async Manifold Markets client for cross-platform probability signals."""
|
|
|
|
def __init__(self) -> None:
|
|
self._client = httpx.AsyncClient(timeout=15)
|
|
# question → (fetched_at_monotonic, ManifoldMatchResult)
|
|
self._cache: dict[str, tuple[float, ManifoldMatchResult]] = {}
|
|
|
|
async def get_match(self, question: str) -> ManifoldMatchResult:
|
|
"""
|
|
Return a ManifoldMatchResult for the given Polymarket question.
|
|
|
|
status='accepted' → prob_final is set and ready to use as signal
|
|
status='rejected' → match found but failed quality/inversion check
|
|
status='no_results' → API returned no results or call failed
|
|
"""
|
|
now = time.monotonic()
|
|
cached = self._cache.get(question)
|
|
if cached and (now - cached[0]) < CACHE_TTL_SEC:
|
|
return cached[1]
|
|
|
|
poly_outcome = _classify_outcome(question)
|
|
|
|
query = _build_search_query(question)
|
|
if not query:
|
|
result = ManifoldMatchResult(
|
|
status="no_results", search_query="",
|
|
poly_outcome_type=poly_outcome,
|
|
)
|
|
self._cache[question] = (now, result)
|
|
return result
|
|
|
|
try:
|
|
resp = await self._client.get(
|
|
f"{MANIFOLD_API}/search-markets",
|
|
params={"term": query, "limit": 5, "filter": "open"},
|
|
)
|
|
resp.raise_for_status()
|
|
results = resp.json()
|
|
except Exception as exc:
|
|
log.warning("Manifold API error for %r: %s", question[:40], exc)
|
|
result = ManifoldMatchResult(
|
|
status="no_results", search_query=query,
|
|
poly_outcome_type=poly_outcome,
|
|
)
|
|
self._cache[question] = (now, result)
|
|
return result
|
|
|
|
if not results:
|
|
result = ManifoldMatchResult(
|
|
status="no_results", search_query=query,
|
|
poly_outcome_type=poly_outcome,
|
|
)
|
|
self._cache[question] = (now, result)
|
|
return result
|
|
|
|
best, score = _find_best_candidate(question, results)
|
|
|
|
# ── Score threshold ───────────────────────────────────────────────────
|
|
if best is None or score < _MATCH_THRESHOLD:
|
|
reason = f"jaccard={score:.2f}<{_MATCH_THRESHOLD:.2f}"
|
|
log.info(
|
|
"Manifold REJECTED %-50s | score=%.2f < threshold=%.2f | query=%r",
|
|
question[:50], score, _MATCH_THRESHOLD, query,
|
|
)
|
|
result = ManifoldMatchResult(
|
|
status="rejected",
|
|
market_title=best.get("question") if best else None,
|
|
match_score=score if best else None,
|
|
match_reason=reason,
|
|
search_query=query,
|
|
poly_outcome_type=poly_outcome,
|
|
mfld_outcome_type=_classify_outcome(best.get("question", "")) if best else None,
|
|
)
|
|
self._cache[question] = (now, result)
|
|
return result
|
|
|
|
# ── Outcome compatibility + inversion analysis (conservative) ─────────
|
|
mfld_title = best.get("question", "")
|
|
mfld_outcome = _classify_outcome(mfld_title)
|
|
poly_party = _detect_party(question)
|
|
manifold_party = _detect_party(mfld_title)
|
|
|
|
poly_words = _significant_words(question)
|
|
mfld_words = _significant_words(mfld_title)
|
|
matched_tokens = sorted(poly_words & mfld_words)[:6]
|
|
|
|
inverted = False
|
|
rejection_reason: Optional[str] = None
|
|
|
|
# Task 1 — conditional Manifold market is never equivalent to a direct
|
|
# outcome question, regardless of token overlap.
|
|
if _is_conditional(mfld_title):
|
|
rejection_reason = "conditional_market: manifold question is conditional"
|
|
# Task 2 — outcome types must match; any conditional side is rejected.
|
|
elif (poly_outcome == "conditional" or mfld_outcome == "conditional"
|
|
or poly_outcome != mfld_outcome):
|
|
rejection_reason = (
|
|
f"outcome_mismatch: poly={poly_outcome} manifold={mfld_outcome}"
|
|
)
|
|
elif poly_party is not None:
|
|
if manifold_party is None:
|
|
# Poly specifies a party; Manifold does not → can't verify inversion safety
|
|
rejection_reason = (
|
|
f"ambiguous_inversion: poly_party={poly_party}, mfld_party=none"
|
|
)
|
|
elif manifold_party != poly_party:
|
|
# Clear opposite parties — apply inversion
|
|
inverted = True
|
|
# manifold_party == poly_party → same party, no inversion needed
|
|
|
|
if rejection_reason is not None:
|
|
url = _market_url(best)
|
|
log.info(
|
|
"Manifold REJECTED %-50s | score=%.2f | reason=%s\n"
|
|
" mfld_title: %s",
|
|
question[:50], score, rejection_reason, best.get("question", "")[:70],
|
|
)
|
|
result = ManifoldMatchResult(
|
|
status="rejected",
|
|
market_id=str(best.get("id", "")) or None,
|
|
market_title=best.get("question"),
|
|
market_url=url,
|
|
match_score=score,
|
|
match_reason=(
|
|
f"jaccard={score:.2f}, tokens={matched_tokens}, {rejection_reason}"
|
|
),
|
|
search_query=query,
|
|
poly_outcome_type=poly_outcome,
|
|
mfld_outcome_type=mfld_outcome,
|
|
)
|
|
self._cache[question] = (now, result)
|
|
return result
|
|
|
|
# ── Accepted ──────────────────────────────────────────────────────────
|
|
prob_raw = float(best["probability"])
|
|
prob_final = (1.0 - prob_raw) if inverted else prob_raw
|
|
url = _market_url(best)
|
|
|
|
match_reason = f"jaccard={score:.2f}, tokens={matched_tokens}"
|
|
if inverted:
|
|
match_reason += f", inverted=party({poly_party}≠{manifold_party})"
|
|
|
|
log.info(
|
|
"Manifold %s %-50s\n"
|
|
" poly: %s\n"
|
|
" mfld: %s\n"
|
|
" url: %s\n"
|
|
" score=%.2f | raw=%.3f | inverted=%s | final=%.3f",
|
|
"ACCEPTED_INVERTED" if inverted else "ACCEPTED ",
|
|
question[:50],
|
|
question,
|
|
best.get("question", ""),
|
|
url or "n/a",
|
|
score, prob_raw, inverted, prob_final,
|
|
)
|
|
|
|
result = ManifoldMatchResult(
|
|
status="accepted",
|
|
prob_final=prob_final,
|
|
prob_raw=prob_raw,
|
|
market_id=str(best.get("id", "")) or None,
|
|
market_title=best.get("question"),
|
|
market_url=url,
|
|
match_score=score,
|
|
match_reason=match_reason,
|
|
inverted=inverted,
|
|
search_query=query,
|
|
poly_outcome_type=poly_outcome,
|
|
mfld_outcome_type=mfld_outcome,
|
|
)
|
|
self._cache[question] = (now, result)
|
|
return result
|
|
|
|
async def close(self) -> None:
|
|
await self._client.aclose()
|