diff --git a/bot/data/db.py b/bot/data/db.py index 3796d49..5cc491d 100644 --- a/bot/data/db.py +++ b/bot/data/db.py @@ -89,6 +89,23 @@ class Database: ) return {r["family_key"] for r in rows if r["family_key"]} + async def get_open_position_details(self) -> list[dict]: + """Return one row per open position with family_key and direction. + + Used at startup to detect positions that share a family_key (same + underlying event), which indicates a contradictory paper trade entered + before the general-election family fix was deployed. + """ + async with self._pool.acquire() as conn: + rows = await conn.fetch(""" + SELECT DISTINCT ON (market_id) + market_id, question, direction, edge_net, family_key, timestamp + FROM trades + WHERE paper = TRUE + ORDER BY market_id, timestamp DESC + """) + return [dict(r) for r in rows] + async def get_recent_trades(self, limit: int = 100) -> list[dict]: async with self._pool.acquire() as conn: rows = await conn.fetch( diff --git a/bot/data/manifold.py b/bot/data/manifold.py index 47011cc..d46294f 100644 --- a/bot/data/manifold.py +++ b/bot/data/manifold.py @@ -4,15 +4,18 @@ Manifold Markets client — cross-platform prediction market probability signals For each Polymarket question, searches Manifold for a matching binary market by keyword overlap and returns its probability as a calibration signal. -Used for politics and tech markets where Manifold often has independent -probability estimates that diverge from Polymarket. +Inversion guard: if the Manifold market's winning side (Republican / Democrat) +is the complement of the Polymarket question's winning side, the probability is +automatically inverted (1 - prob). This prevents "Democrats win Ohio governor" +from consuming the probability of a Manifold market titled "Republicans win Ohio +governor" without adjustment. + +Rejection guard: if the match score falls below _MATCH_THRESHOLD the market is +rejected, even if inversion would otherwise apply. All decisions are logged at +INFO so they can be audited per-cycle. Cache TTL: 30 minutes (Manifold markets move slowly vs our 60 s cycle). Match threshold: >= 0.25 keyword overlap ratio between significant tokens. - -Weight choice: MANIFOLD_LOGODDS_WEIGHT = 0.6 in bayesian.py means a 30 pp -divergence (Manifold 0.75 vs Poly 0.45) produces edge_gross ≈ 0.19, which -clears the politics far-horizon regime threshold of 0.12 after costs. """ import logging import re @@ -40,6 +43,10 @@ _STOP_WORDS = frozenset([ "before", "during", "until", "against", "between", "through", ]) +# Mutually exclusive political parties used for complement detection +_REPUBLICAN_WORDS = frozenset(["republican", "republicans", "gop"]) +_DEMOCRAT_WORDS = frozenset(["democrat", "democrats", "democratic"]) + def _significant_words(text: str) -> set[str]: words = re.findall(r"[a-zA-Z]+", text.lower()) @@ -52,14 +59,37 @@ def _build_search_query(question: str, max_words: int = 6) -> str: return " ".join(sig[:max_words]) -def _best_match(poly_question: str, results: list[dict]) -> Optional[dict]: - """Return best-matching open binary Manifold market, or None if below threshold.""" +def _detect_party(text: str) -> Optional[str]: + """Return 'republican', 'democrat', or None if no party detected.""" + words = set(re.findall(r"[a-zA-Z]+", text.lower())) + if words & _REPUBLICAN_WORDS: + return "republican" + if words & _DEMOCRAT_WORDS: + return "democrat" + return None + + +def _best_match_with_audit( + poly_question: str, + results: list[dict], +) -> tuple[Optional[dict], float, bool]: + """ + Find the best-matching open binary Manifold market. + + Returns (match, score, needs_inversion): + match — best result dict, or None if below threshold + score — keyword overlap score of best candidate (even if rejected) + needs_inversion — True when Manifold market favours the OPPOSITE party/side + to the Polymarket question (probability should be 1 - prob) + """ poly_words = _significant_words(poly_question) + poly_party = _detect_party(poly_question) if not poly_words: - return None + return None, 0.0, False best_score = 0.0 best: Optional[dict] = None + best_needs_inv = False for result in results: if result.get("outcomeType") != "BINARY": @@ -76,10 +106,18 @@ def _best_match(poly_question: str, results: list[dict]) -> Optional[dict]: if score > best_score: best_score = score best = result + manifold_party = _detect_party(title) + # Inversion is warranted only when both sides are unambiguously detected + # and they are confirmed opposites (republican ≠ democrat). + best_needs_inv = ( + poly_party is not None + and manifold_party is not None + and poly_party != manifold_party + ) if best_score >= _MATCH_THRESHOLD and best is not None: - return best - return None + return best, best_score, best_needs_inv + return None, best_score, False class ManifoldClient: @@ -94,8 +132,10 @@ class ManifoldClient: """ Return Manifold probability for a matching market, or None. - Searches by keyword overlap. Returns None if no match exceeds - _MATCH_THRESHOLD or on any API error (caller degrades gracefully). + Probability is already adjusted for party-direction inversion when + the matched Manifold market is the complement of our question. + + Full audit log is emitted at INFO for every resolved query. """ now = time.monotonic() cached = self._cache.get(question) @@ -114,22 +154,45 @@ class ManifoldClient: ) resp.raise_for_status() results = resp.json() - match = _best_match(question, results) - prob = float(match["probability"]) if match else None - self._cache[question] = (now, prob) - if prob is not None: - log.info( - "Manifold match: %-50s → %.3f | %s", - question[:50], prob, match.get("question", "")[:60], - ) - else: - log.debug("Manifold no match for: %s (query=%r)", question[:50], query) - return prob - except Exception as e: log.warning("Manifold API error for %r: %s", question[:40], e) self._cache[question] = (now, None) return None + match, score, needs_inv = _best_match_with_audit(question, results) + + if match is None: + log.info( + "Manifold no_match: %-50s | best_score=%.2f < %.2f | query=%r", + question[:50], score, _MATCH_THRESHOLD, query, + ) + self._cache[question] = (now, None) + return None + + prob_raw = float(match["probability"]) + prob_final = (1.0 - prob_raw) if needs_inv else prob_raw + + # Build market URL from slug (best-effort; may be missing) + slug = match.get("slug", "") + creator = match.get("creatorUsername", "") + url = f"https://manifold.markets/{creator}/{slug}" if slug else "n/a" + + log.info( + "Manifold %s: %-50s\n" + " poly_question: %s\n" + " manifold_title: %s\n" + " manifold_url: %s\n" + " match_score: %.2f | prob_raw=%.3f | inverted=%s | prob_final=%.3f", + "MATCH_INVERTED" if needs_inv else "MATCH", + question[:50], + question, + match.get("question", ""), + url, + score, prob_raw, needs_inv, prob_final, + ) + + self._cache[question] = (now, prob_final) + return prob_final + async def close(self) -> None: await self._client.aclose() diff --git a/bot/data/polymarket.py b/bot/data/polymarket.py index ea82da6..8092104 100644 --- a/bot/data/polymarket.py +++ b/bot/data/polymarket.py @@ -132,20 +132,47 @@ def market_family_key(market: "Market") -> str: return f"fed-{month_m.group(1).lower()}-{year}" return f"fed-{year}" - # 2. US state + party (primary, senate, governor, etc.) + # 2. US state + election event + # Key design: general elections group by office, not by party, so + # "Republicans win Ohio governor" and "Democrats win Ohio governor" + # share the same family (ohio-gubernatorial-2026) and the bot can only + # hold one position. Primaries keep the party because each party runs + # its own primary (texas-republican-primary is distinct from texas-democrat-primary). state_m = _US_STATE_RE.search(q) party_m = _PARTY_RE.search(q) - if state_m and party_m: + etype_m = _ELECTION_TYPE_RE.search(q) + + if state_m and (party_m or etype_m): state = re.sub(r"\s+", "-", state_m.group(1).lower()) - raw_party = party_m.group(1).lower() - # "democrat" prefix covers "democrat", "democrats", "democratic" + is_primary = etype_m is not None and "primary" in etype_m.group(1).lower() + + if party_m and is_primary: + # Primary race: party is the disambiguation (each party has its own primary) + raw_party = party_m.group(1).lower() + party = "democrat" if "democrat" in raw_party else "republican" + return f"{state}-{party}-{year}" + + if etype_m: + # General election: family = office, not party + # "Republicans win Ohio governor" == "Democrats win Ohio governor" → same race + raw_etype = etype_m.group(1).lower() + etype = { + "president": "presidential", + "mayor": "mayoral", + "governor": "gubernatorial", + }.get(raw_etype, raw_etype) + return f"{state}-{etype}-{year}" + + # Has party but no election type — preserve old behaviour (e.g. "Texas Republican") + raw_party = party_m.group(1).lower() # type: ignore[union-attr] party = "democrat" if "democrat" in raw_party else "republican" return f"{state}-{party}-{year}" # 3. Named non-US city / country for place_re, place_slug in _NAMED_PLACES: if place_re.search(q): - etype_m = _ELECTION_TYPE_RE.search(q) + if etype_m is None: + etype_m = _ELECTION_TYPE_RE.search(q) if etype_m: raw_etype = etype_m.group(1).lower() # Normalise synonyms diff --git a/bot/main.py b/bot/main.py index af05330..78e48ae 100644 --- a/bot/main.py +++ b/bot/main.py @@ -202,6 +202,30 @@ async def main() -> None: if PAPER_MODE: await executor.initialize() + # Contradiction scan: warn if any two open positions share a family_key. + # This can happen when the family logic was less strict on a prior deploy. + # Bot does NOT auto-close — operator decides which position to keep. + positions = await db.get_open_position_details() + family_map: dict[str, list[dict]] = {} + for pos in positions: + fk = pos.get("family_key") or "" + if fk: + family_map.setdefault(fk, []).append(pos) + for fk, members in family_map.items(): + if len(members) > 1: + best = max(members, key=lambda p: p.get("edge_net") or 0.0) + log.warning( + "CONTRADICTION family=%s has %d open positions — recommend keeping market_id=%s (edge_net=%.3f):", + fk, len(members), best["market_id"], best.get("edge_net") or 0.0, + ) + for m in members: + marker = "KEEP" if m["market_id"] == best["market_id"] else "REVIEW" + log.warning( + " [%s] %s | dir=%s | edge_net=%.3f | %s", + marker, m["market_id"], m["direction"], + m.get("edge_net") or 0.0, m["question"][:60], + ) + try: await run_trading_loop(poly, external, strategy, risk, executor, metrics, db) finally: