feat(manifold): audit matching quality with ManifoldMatchResult and manifold_match_audit table
CI/CD / build-and-push (push) Successful in 14s
CI/CD / build-and-push (push) Successful in 14s
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+144
-77
@@ -2,24 +2,24 @@
|
||||
Manifold Markets client — cross-platform prediction market probability signals.
|
||||
|
||||
For each Polymarket question, searches Manifold for a matching binary market
|
||||
by keyword overlap and returns its probability as a calibration signal.
|
||||
by keyword overlap and returns a ManifoldMatchResult with full audit metadata.
|
||||
|
||||
Inversion guard: if the Manifold market's winning side (Republican / Democrat)
|
||||
is the complement of the Polymarket question's winning side, the probability is
|
||||
automatically inverted (1 - prob). This prevents "Democrats win Ohio governor"
|
||||
from consuming the probability of a Manifold market titled "Republicans win Ohio
|
||||
governor" without adjustment.
|
||||
Match threshold: >= 0.40 Jaccard overlap (raised from 0.25 for stricter semantics).
|
||||
|
||||
Rejection guard: if the match score falls below _MATCH_THRESHOLD the market is
|
||||
rejected, even if inversion would otherwise apply. All decisions are logged at
|
||||
INFO so they can be audited per-cycle.
|
||||
Inversion guard (conservative):
|
||||
- If Polymarket question names a party (democrat/republican) AND the matched
|
||||
Manifold market names the OPPOSITE party → invert probability (1 - prob).
|
||||
- If Polymarket question names a party AND Manifold market has NO party keyword
|
||||
→ reject with reason='ambiguous_inversion' (can't determine if inversion applies).
|
||||
- All other cases: no inversion, accept if score >= threshold.
|
||||
- Ante duda, reject.
|
||||
|
||||
Cache TTL: 30 minutes (Manifold markets move slowly vs our 60 s cycle).
|
||||
Match threshold: >= 0.25 keyword overlap ratio between significant tokens.
|
||||
Cache TTL: 30 minutes.
|
||||
"""
|
||||
import logging
|
||||
import re
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
|
||||
import httpx
|
||||
@@ -29,7 +29,7 @@ CACHE_TTL_SEC = 1800 # 30 minutes
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
_MATCH_THRESHOLD = 0.25
|
||||
_MATCH_THRESHOLD = 0.40 # raised from 0.25
|
||||
|
||||
_STOP_WORDS = frozenset([
|
||||
"will", "the", "a", "an", "is", "are", "was", "were", "be", "been",
|
||||
@@ -43,9 +43,22 @@ _STOP_WORDS = frozenset([
|
||||
"before", "during", "until", "against", "between", "through",
|
||||
])
|
||||
|
||||
# Mutually exclusive political parties used for complement detection
|
||||
_REPUBLICAN_WORDS = frozenset(["republican", "republicans", "gop"])
|
||||
_DEMOCRAT_WORDS = frozenset(["democrat", "democrats", "democratic"])
|
||||
_DEMOCRAT_WORDS = frozenset(["democrat", "democrats", "democratic"])
|
||||
|
||||
|
||||
@dataclass
|
||||
class ManifoldMatchResult:
|
||||
status: str # 'accepted' | 'rejected' | 'no_results'
|
||||
prob_final: Optional[float] = None
|
||||
prob_raw: Optional[float] = None
|
||||
market_id: Optional[str] = None # Manifold internal market ID
|
||||
market_title: Optional[str] = None
|
||||
market_url: Optional[str] = None
|
||||
match_score: Optional[float] = None # 0-1 Jaccard
|
||||
match_reason: Optional[str] = None # human-readable explanation
|
||||
inverted: bool = False
|
||||
search_query: str = ""
|
||||
|
||||
|
||||
def _significant_words(text: str) -> set[str]:
|
||||
@@ -69,27 +82,14 @@ def _detect_party(text: str) -> Optional[str]:
|
||||
return None
|
||||
|
||||
|
||||
def _best_match_with_audit(
|
||||
poly_question: str,
|
||||
results: list[dict],
|
||||
) -> tuple[Optional[dict], float, bool]:
|
||||
"""
|
||||
Find the best-matching open binary Manifold market.
|
||||
|
||||
Returns (match, score, needs_inversion):
|
||||
match — best result dict, or None if below threshold
|
||||
score — keyword overlap score of best candidate (even if rejected)
|
||||
needs_inversion — True when Manifold market favours the OPPOSITE party/side
|
||||
to the Polymarket question (probability should be 1 - prob)
|
||||
"""
|
||||
def _find_best_candidate(poly_question: str, results: list[dict]) -> tuple[Optional[dict], float]:
|
||||
"""Find the highest-scoring open binary Manifold market by Jaccard overlap."""
|
||||
poly_words = _significant_words(poly_question)
|
||||
poly_party = _detect_party(poly_question)
|
||||
if not poly_words:
|
||||
return None, 0.0, False
|
||||
return None, 0.0
|
||||
|
||||
best_score = 0.0
|
||||
best: Optional[dict] = None
|
||||
best_needs_inv = False
|
||||
|
||||
for result in results:
|
||||
if result.get("outcomeType") != "BINARY":
|
||||
@@ -106,18 +106,14 @@ def _best_match_with_audit(
|
||||
if score > best_score:
|
||||
best_score = score
|
||||
best = result
|
||||
manifold_party = _detect_party(title)
|
||||
# Inversion is warranted only when both sides are unambiguously detected
|
||||
# and they are confirmed opposites (republican ≠ democrat).
|
||||
best_needs_inv = (
|
||||
poly_party is not None
|
||||
and manifold_party is not None
|
||||
and poly_party != manifold_party
|
||||
)
|
||||
|
||||
if best_score >= _MATCH_THRESHOLD and best is not None:
|
||||
return best, best_score, best_needs_inv
|
||||
return None, best_score, False
|
||||
return best, best_score
|
||||
|
||||
|
||||
def _market_url(match: dict) -> Optional[str]:
|
||||
slug = match.get("slug", "")
|
||||
creator = match.get("creatorUsername", "")
|
||||
return f"https://manifold.markets/{creator}/{slug}" if slug else None
|
||||
|
||||
|
||||
class ManifoldClient:
|
||||
@@ -125,17 +121,16 @@ class ManifoldClient:
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._client = httpx.AsyncClient(timeout=15)
|
||||
# question → (fetched_at_monotonic, probability_or_None)
|
||||
self._cache: dict[str, tuple[float, Optional[float]]] = {}
|
||||
# question → (fetched_at_monotonic, ManifoldMatchResult)
|
||||
self._cache: dict[str, tuple[float, ManifoldMatchResult]] = {}
|
||||
|
||||
async def get_probability(self, question: str) -> Optional[float]:
|
||||
async def get_match(self, question: str) -> ManifoldMatchResult:
|
||||
"""
|
||||
Return Manifold probability for a matching market, or None.
|
||||
Return a ManifoldMatchResult for the given Polymarket question.
|
||||
|
||||
Probability is already adjusted for party-direction inversion when
|
||||
the matched Manifold market is the complement of our question.
|
||||
|
||||
Full audit log is emitted at INFO for every resolved query.
|
||||
status='accepted' → prob_final is set and ready to use as signal
|
||||
status='rejected' → match found but failed quality/inversion check
|
||||
status='no_results' → API returned no results or call failed
|
||||
"""
|
||||
now = time.monotonic()
|
||||
cached = self._cache.get(question)
|
||||
@@ -144,8 +139,9 @@ class ManifoldClient:
|
||||
|
||||
query = _build_search_query(question)
|
||||
if not query:
|
||||
self._cache[question] = (now, None)
|
||||
return None
|
||||
result = ManifoldMatchResult(status="no_results", search_query="")
|
||||
self._cache[question] = (now, result)
|
||||
return result
|
||||
|
||||
try:
|
||||
resp = await self._client.get(
|
||||
@@ -154,45 +150,116 @@ class ManifoldClient:
|
||||
)
|
||||
resp.raise_for_status()
|
||||
results = resp.json()
|
||||
except Exception as e:
|
||||
log.warning("Manifold API error for %r: %s", question[:40], e)
|
||||
self._cache[question] = (now, None)
|
||||
return None
|
||||
except Exception as exc:
|
||||
log.warning("Manifold API error for %r: %s", question[:40], exc)
|
||||
result = ManifoldMatchResult(status="no_results", search_query=query)
|
||||
self._cache[question] = (now, result)
|
||||
return result
|
||||
|
||||
match, score, needs_inv = _best_match_with_audit(question, results)
|
||||
if not results:
|
||||
result = ManifoldMatchResult(status="no_results", search_query=query)
|
||||
self._cache[question] = (now, result)
|
||||
return result
|
||||
|
||||
if match is None:
|
||||
best, score = _find_best_candidate(question, results)
|
||||
|
||||
# ── Score threshold ───────────────────────────────────────────────────
|
||||
if best is None or score < _MATCH_THRESHOLD:
|
||||
reason = f"jaccard={score:.2f}<{_MATCH_THRESHOLD:.2f}"
|
||||
log.info(
|
||||
"Manifold no_match: %-50s | best_score=%.2f < %.2f | query=%r",
|
||||
"Manifold REJECTED %-50s | score=%.2f < threshold=%.2f | query=%r",
|
||||
question[:50], score, _MATCH_THRESHOLD, query,
|
||||
)
|
||||
self._cache[question] = (now, None)
|
||||
return None
|
||||
result = ManifoldMatchResult(
|
||||
status="rejected",
|
||||
market_title=best.get("question") if best else None,
|
||||
match_score=score if best else None,
|
||||
match_reason=reason,
|
||||
search_query=query,
|
||||
)
|
||||
self._cache[question] = (now, result)
|
||||
return result
|
||||
|
||||
prob_raw = float(match["probability"])
|
||||
prob_final = (1.0 - prob_raw) if needs_inv else prob_raw
|
||||
# ── Inversion analysis (conservative) ────────────────────────────────
|
||||
poly_party = _detect_party(question)
|
||||
manifold_party = _detect_party(best.get("question", ""))
|
||||
|
||||
# Build market URL from slug (best-effort; may be missing)
|
||||
slug = match.get("slug", "")
|
||||
creator = match.get("creatorUsername", "")
|
||||
url = f"https://manifold.markets/{creator}/{slug}" if slug else "n/a"
|
||||
poly_words = _significant_words(question)
|
||||
mfld_words = _significant_words(best.get("question", ""))
|
||||
matched_tokens = sorted(poly_words & mfld_words)[:6]
|
||||
|
||||
inverted = False
|
||||
rejection_reason: Optional[str] = None
|
||||
|
||||
if poly_party is not None:
|
||||
if manifold_party is None:
|
||||
# Poly specifies a party; Manifold does not → can't verify inversion safety
|
||||
rejection_reason = (
|
||||
f"ambiguous_inversion: poly_party={poly_party}, mfld_party=none"
|
||||
)
|
||||
elif manifold_party != poly_party:
|
||||
# Clear opposite parties — apply inversion
|
||||
inverted = True
|
||||
# manifold_party == poly_party → same party, no inversion needed
|
||||
|
||||
if rejection_reason is not None:
|
||||
url = _market_url(best)
|
||||
log.info(
|
||||
"Manifold REJECTED %-50s | score=%.2f | reason=%s\n"
|
||||
" mfld_title: %s",
|
||||
question[:50], score, rejection_reason, best.get("question", "")[:70],
|
||||
)
|
||||
result = ManifoldMatchResult(
|
||||
status="rejected",
|
||||
market_id=str(best.get("id", "")) or None,
|
||||
market_title=best.get("question"),
|
||||
market_url=url,
|
||||
match_score=score,
|
||||
match_reason=(
|
||||
f"jaccard={score:.2f}, tokens={matched_tokens}, {rejection_reason}"
|
||||
),
|
||||
search_query=query,
|
||||
)
|
||||
self._cache[question] = (now, result)
|
||||
return result
|
||||
|
||||
# ── Accepted ──────────────────────────────────────────────────────────
|
||||
prob_raw = float(best["probability"])
|
||||
prob_final = (1.0 - prob_raw) if inverted else prob_raw
|
||||
url = _market_url(best)
|
||||
|
||||
match_reason = f"jaccard={score:.2f}, tokens={matched_tokens}"
|
||||
if inverted:
|
||||
match_reason += f", inverted=party({poly_party}≠{manifold_party})"
|
||||
|
||||
log.info(
|
||||
"Manifold %s: %-50s\n"
|
||||
" poly_question: %s\n"
|
||||
" manifold_title: %s\n"
|
||||
" manifold_url: %s\n"
|
||||
" match_score: %.2f | prob_raw=%.3f | inverted=%s | prob_final=%.3f",
|
||||
"MATCH_INVERTED" if needs_inv else "MATCH",
|
||||
"Manifold %s %-50s\n"
|
||||
" poly: %s\n"
|
||||
" mfld: %s\n"
|
||||
" url: %s\n"
|
||||
" score=%.2f | raw=%.3f | inverted=%s | final=%.3f",
|
||||
"ACCEPTED_INVERTED" if inverted else "ACCEPTED ",
|
||||
question[:50],
|
||||
question,
|
||||
match.get("question", ""),
|
||||
url,
|
||||
score, prob_raw, needs_inv, prob_final,
|
||||
best.get("question", ""),
|
||||
url or "n/a",
|
||||
score, prob_raw, inverted, prob_final,
|
||||
)
|
||||
|
||||
self._cache[question] = (now, prob_final)
|
||||
return prob_final
|
||||
result = ManifoldMatchResult(
|
||||
status="accepted",
|
||||
prob_final=prob_final,
|
||||
prob_raw=prob_raw,
|
||||
market_id=str(best.get("id", "")) or None,
|
||||
market_title=best.get("question"),
|
||||
market_url=url,
|
||||
match_score=score,
|
||||
match_reason=match_reason,
|
||||
inverted=inverted,
|
||||
search_query=query,
|
||||
)
|
||||
self._cache[question] = (now, result)
|
||||
return result
|
||||
|
||||
async def close(self) -> None:
|
||||
await self._client.aclose()
|
||||
|
||||
Reference in New Issue
Block a user