Files
polymarket-bot/bot/data/polymarket.py
T
chemavx 9add52ab05
CI/CD / build-and-push (push) Successful in 2m24s
fix(polymarket): _PARTY_RE: add Republicans? plural support for symmetry
Republicans (plural) previously didn't match _PARTY_RE because the pattern
was r"\bRepublican\b" (no optional s).  Added Republicans? for symmetry with
Democrats?.  The general-election family fix already handles this case via
etype_m, but the plural match is needed for the party-only fallback branch.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-17 10:35:07 +00:00

479 lines
20 KiB
Python

"""
Polymarket CLOB API client.
Docs: https://docs.polymarket.com
"""
import asyncio
import logging
import os
import re
from dataclasses import dataclass, field
from datetime import datetime, timezone, timedelta
from typing import Optional
import httpx
log = logging.getLogger(__name__)
POLYMARKET_API = "https://clob.polymarket.com"
GAMMA_API = "https://gamma-api.polymarket.com"
# ─────────────────────────────────────────────────────────────────────────────
# Phase 2 — Market family classification helpers
# Used by market_family_key() below.
# ─────────────────────────────────────────────────────────────────────────────
_YEAR_RE = re.compile(r"\b(202\d|203\d)\b")
_MONTH_RE = re.compile(
r"\b(january|february|march|april|may|june|july|august|"
r"september|october|november|december)\b",
re.IGNORECASE,
)
_FED_TRIGGER_RE = re.compile(
r"\b(federal reserve|interest rate|bps|basis point|fed\s+(rate|meeting|decision))",
re.IGNORECASE,
)
_US_STATE_RE = re.compile(
r"\b(Alabama|Alaska|Arizona|Arkansas|California|Colorado|Connecticut|"
r"Delaware|Florida|Georgia|Hawaii|Idaho|Illinois|Indiana|Iowa|Kansas|"
r"Kentucky|Louisiana|Maine|Maryland|Massachusetts|Michigan|Minnesota|"
r"Mississippi|Missouri|Montana|Nebraska|Nevada|New\s+Hampshire|"
r"New\s+Jersey|New\s+Mexico|New\s+York|North\s+Carolina|North\s+Dakota|"
r"Ohio|Oklahoma|Oregon|Pennsylvania|Rhode\s+Island|South\s+Carolina|"
r"South\s+Dakota|Tennessee|Texas|Utah|Vermont|Virginia|Washington|"
r"West\s+Virginia|Wisconsin|Wyoming)\b",
re.IGNORECASE,
)
_PARTY_RE = re.compile(r"\b(Republicans?|Democrats?|Democratic|GOP)\b", re.IGNORECASE)
_ELECTION_TYPE_RE = re.compile(
r"\b(presidential|president|mayoral|mayor|gubernatorial|governor|"
r"senate|congress(?:ional)?|primary|election)\b",
re.IGNORECASE,
)
# Ordered list of (pattern, place_slug) for named non-US locations.
# Checked after US-state patterns so US city/state names don't shadow these.
_NAMED_PLACES: list[tuple[re.Pattern, str]] = [
(re.compile(r"\bColomb", re.IGNORECASE), "colombia"),
(re.compile(r"\bSeoul\b", re.IGNORECASE), "seoul"),
(re.compile(r"\bBusan\b", re.IGNORECASE), "busan"),
(re.compile(r"\bGyeonggi\b", re.IGNORECASE), "gyeonggi"),
(re.compile(r"\bChungcheong", re.IGNORECASE), "chungcheong"),
(re.compile(r"\bSouth\s+Korean?\b", re.IGNORECASE), "south-korea"),
(re.compile(r"\bLos\s+Angeles\b", re.IGNORECASE), "los-angeles"),
(re.compile(r"\bCuba\b", re.IGNORECASE), "cuba"),
(re.compile(r"\bLebanon\b", re.IGNORECASE), "lebanon"),
(re.compile(r"\bIsrael\b", re.IGNORECASE), "israel"),
(re.compile(r"\bUkraine\b", re.IGNORECASE), "ukraine"),
(re.compile(r"\bRussia\b", re.IGNORECASE), "russia"),
]
# Ordered list of (pattern, company_slug) for tech/company markets.
_NAMED_COMPANIES: list[tuple[re.Pattern, str]] = [
(re.compile(r"\bopenai\b", re.IGNORECASE), "openai"),
(re.compile(r"\banthropic\b", re.IGNORECASE), "anthropic"),
(re.compile(r"\bnvidia\b", re.IGNORECASE), "nvidia"),
(re.compile(r"\bapple\b", re.IGNORECASE), "apple"),
(re.compile(r"\bmicrosoft\b", re.IGNORECASE), "microsoft"),
(re.compile(r"\bgoogle\b", re.IGNORECASE), "google"),
(re.compile(r"\btesla\b", re.IGNORECASE), "tesla"),
# \bmeta\b does NOT match MetaMask (no word boundary mid-compound-word)
(re.compile(r"\bmeta\b", re.IGNORECASE), "meta"),
]
def _end_month(market: "Market") -> str:
"""Return market end_date formatted as YYYY-MM, or '' if unparseable."""
raw = market.end_date
if not raw:
return ""
try:
dt = datetime.fromisoformat(raw.replace("Z", "+00:00"))
return dt.strftime("%Y-%m")
except (ValueError, TypeError):
return ""
def market_family_key(market: "Market") -> str:
"""
Return a stable slug that groups related markets together.
Markets in the same family share an underlying event (same election,
same Fed meeting decision, same company). The bot allows at most one
open position per family per cycle to avoid correlated exposure.
Priority order (first match wins):
1. Fed / interest-rate decision → fed-{month}-{year}
2. US state + party election → {state}-{party}-{year}
3. Named non-US city/country → {place}-{event_type}-{year}
4. Named tech company → {company}-{year}
5. Fallback → {category}-{end_YYYY-MM}
Examples:
"Will Ken Paxton win the 2026 Texas Republican Primary"
→ texas-republican-2026
"Will the Fed decrease rates by 25 bps after April 2026 meeting"
→ fed-april-2026
"Will OpenAI IPO by December 31 2026?"
→ openai-2026
"""
q = market.question
# Prefer year from question text; fall back to end_date year if absent
year_m = _YEAR_RE.search(q)
if year_m:
year = year_m.group(1)
else:
end_m = _end_month(market) # e.g. "2026-06"
year = end_m[:4] if end_m else "unknown"
# 1. Fed / interest-rate meeting
if _FED_TRIGGER_RE.search(q):
month_m = _MONTH_RE.search(q)
if month_m:
return f"fed-{month_m.group(1).lower()}-{year}"
return f"fed-{year}"
# 2. US state + election event
# Key design: general elections group by office, not by party, so
# "Republicans win Ohio governor" and "Democrats win Ohio governor"
# share the same family (ohio-gubernatorial-2026) and the bot can only
# hold one position. Primaries keep the party because each party runs
# its own primary (texas-republican-primary is distinct from texas-democrat-primary).
state_m = _US_STATE_RE.search(q)
party_m = _PARTY_RE.search(q)
etype_m = _ELECTION_TYPE_RE.search(q)
if state_m and (party_m or etype_m):
state = re.sub(r"\s+", "-", state_m.group(1).lower())
is_primary = etype_m is not None and "primary" in etype_m.group(1).lower()
if party_m and is_primary:
# Primary race: party is the disambiguation (each party has its own primary)
raw_party = party_m.group(1).lower()
party = "democrat" if "democrat" in raw_party else "republican"
return f"{state}-{party}-{year}"
if etype_m:
# General election: family = office, not party
# "Republicans win Ohio governor" == "Democrats win Ohio governor" → same race
raw_etype = etype_m.group(1).lower()
etype = {
"president": "presidential",
"mayor": "mayoral",
"governor": "gubernatorial",
}.get(raw_etype, raw_etype)
return f"{state}-{etype}-{year}"
# Has party but no election type — preserve old behaviour (e.g. "Texas Republican")
raw_party = party_m.group(1).lower() # type: ignore[union-attr]
party = "democrat" if "democrat" in raw_party else "republican"
return f"{state}-{party}-{year}"
# 3. Named non-US city / country
for place_re, place_slug in _NAMED_PLACES:
if place_re.search(q):
if etype_m is None:
etype_m = _ELECTION_TYPE_RE.search(q)
if etype_m:
raw_etype = etype_m.group(1).lower()
# Normalise synonyms
etype = {
"president": "presidential",
"mayor": "mayoral",
"governor": "gubernatorial",
}.get(raw_etype, raw_etype)
else:
etype = "event"
return f"{place_slug}-{etype}-{year}"
# 4. Named tech company
for company_re, company_slug in _NAMED_COMPANIES:
if company_re.search(q):
return f"{company_slug}-{year}"
# 5. Fallback: category + end_date month
end_month = _end_month(market)
base = market.category if market.category else "misc"
return f"{base}-{end_month}" if end_month else f"{base}-{year}"
@dataclass
class Market:
id: str
condition_id: str
question: str
yes_token_id: str
no_token_id: str
yes_price: float # 0-1, current best ask for YES
no_price: float
volume_24h: float
end_date: str
active: bool
category: str = ""
@dataclass
class OrderBook:
market_id: str
yes_bids: list[tuple[float, float]] = field(default_factory=list) # (price, size)
yes_asks: list[tuple[float, float]] = field(default_factory=list)
mid_price: float = 0.5
class PolymarketClient:
"""
Async Polymarket client.
In paper mode, API key is not needed — only public data.
API key required for placing real orders.
"""
def __init__(self) -> None:
self.api_key = os.getenv("POLYMARKET_API_KEY", "")
self.secret = os.getenv("POLYMARKET_SECRET", "")
self.passphrase = os.getenv("POLYMARKET_PASSPHRASE", "")
self._client = httpx.AsyncClient(timeout=30)
# Keywords that identify crypto / finance markets.
# Short tickers are padded with spaces to avoid false substring matches
# (e.g. " eth " won't match "Hegseth"; " sol " won't match "solar").
_CRYPTO_FINANCE_KEYWORDS: list[str] = [
"bitcoin", "btc", " eth ", "ethereum", " sol ", "solana",
"xrp", "ripple", "dogecoin", "doge", "litecoin", "ltc",
"coinbase", "binance", "kraken", "bybit", "okx",
"usdc", "usdt", "stablecoin",
"defi", "nft", "blockchain", "crypto",
" fdv", "airdrop", "token launch", "token listing",
"microstrategy", "mstr", "saylor",
"nasdaq", "sp500", "s&p 500", "s&p500",
"federal reserve", "fed rate", "interest rate",
"inflation", "tariff", "treasury yield",
"recession", " gdp ", "unemployment", "trade war", "trade deal",
" ipo ", "sec ", "cftc",
]
_POLITICS_KEYWORDS: list[str] = [
"election", "president", "congress", "senate", "vote", "war",
"trump", "biden", "ukraine", "russia", "israel", "nato",
]
_TECH_KEYWORDS: list[str] = [
" ai ", "openai", "apple", "google", "microsoft", "meta",
"nvidia", "regulation", "antitrust",
"tesla", "elon", "nuclear", "quantum", "chip",
]
_EVENTS_KEYWORDS: list[str] = [
"world cup", "oscar", "nobel", "spacex", "nasa",
]
# Sports markets are excluded entirely — BTC/F&G/GNews have no edge there.
# Checked before any category match so sports don't bleed into politics/events.
_SPORTS_EXCLUSIONS: list[str] = [
" nba ", " nfl ", " mlb ", " nhl ",
"basketball", "football", "baseball", "hockey", "soccer",
" mvp ", "rookie of the", "championship", "super bowl", "world series",
"playoffs", "playoff", "tournament",
"tennis", " golf ", " ufc ", "boxing", "wrestler", "wrestling",
"slam dunk", "home run", "touchdown",
# European / international football leagues
"la liga", "premier league", "bundesliga", "serie a", "ligue 1",
"champions league", "europa league", "conference league",
"copa del rey", "fa cup", "dfb pokal",
"relegation", "golden boot", "top scorer",
" liga ", "eredivisie", "primeira liga",
]
@classmethod
def _is_sports(cls, question: str) -> bool:
q = f" {question.lower()} "
return any(kw in q for kw in cls._SPORTS_EXCLUSIONS)
@classmethod
def _is_crypto_finance(cls, question: str) -> bool:
q = f" {question.lower()} " # pad so edge keywords match cleanly
return any(kw in q for kw in cls._CRYPTO_FINANCE_KEYWORDS)
@classmethod
def _is_politics(cls, question: str) -> bool:
q = f" {question.lower()} "
return any(kw in q for kw in cls._POLITICS_KEYWORDS)
@classmethod
def _is_tech(cls, question: str) -> bool:
q = f" {question.lower()} "
return any(kw in q for kw in cls._TECH_KEYWORDS)
@classmethod
def _is_events(cls, question: str) -> bool:
q = f" {question.lower()} "
return any(kw in q for kw in cls._EVENTS_KEYWORDS)
@classmethod
def _detect_category(cls, question: str) -> str:
"""Return the category label for a market question, or '' if unsupported."""
if cls._is_sports(question):
return "" # exclude sports regardless of other keyword matches
if cls._is_politics(question):
return "politics"
# Tech checked before crypto/finance: company-specific markets (OpenAI IPO,
# NVIDIA earnings, Apple antitrust) should be "tech" even when they contain
# generic finance keywords like "ipo" or "sec".
if cls._is_tech(question):
return "tech"
if cls._is_crypto_finance(question):
return "crypto/finance"
if cls._is_events(question):
return "events"
return ""
async def get_active_markets(
self,
min_volume: float = 500,
pages: int = 3,
page_size: int = 200,
max_days_to_resolution: int = 90,
) -> list[Market]:
"""Fetch active markets from Gamma API (no auth needed).
Fetches events without tag filtering (tag= param is unreliable),
then keeps only markets whose question matches any supported category
(crypto/finance, politics, tech, events) and that:
- have NOT already expired (end_dt >= now)
- resolve within max_days_to_resolution days
"""
seen: set[str] = set()
markets: list[Market] = []
now = datetime.now(timezone.utc)
cutoff = now + timedelta(days=max_days_to_resolution)
for page in range(pages):
try:
resp = await self._client.get(
f"{GAMMA_API}/events",
params={
"active": True,
"closed": False,
"limit": page_size,
"offset": page * page_size,
},
)
resp.raise_for_status()
events = resp.json()
if not events:
break # no more pages
for event in events:
event_title = event.get("title", "")
for m in event.get("markets", []):
try:
if not m.get("active") or m.get("closed"):
continue
question = m.get("question", "")
# Detect category from question or event title
category = self._detect_category(question)
if not category:
category = self._detect_category(event_title)
if not category:
continue
# Filter: skip already-expired and far-future markets
# Gamma API may return endDate or end_date (snake_case)
raw_end = m.get("endDate") or m.get("end_date") or m.get("endDateIso", "")
if raw_end:
try:
end_dt = datetime.fromisoformat(
raw_end.replace("Z", "+00:00")
)
# Make naive datetimes UTC-aware before comparing
if end_dt.tzinfo is None:
end_dt = end_dt.replace(tzinfo=timezone.utc)
if end_dt < now:
log.debug("Skipping expired market: %s", question[:60])
continue
if end_dt > cutoff:
continue
except (ValueError, TypeError):
pass # keep market if date unparseable
market_id = str(m["id"])
if market_id in seen:
continue
vol = float(m.get("volume24hr", 0))
if vol < min_volume:
continue
raw_prices = m.get("outcomePrices", ["0.5", "0.5"])
if isinstance(raw_prices, str):
import json as _json
raw_prices = _json.loads(raw_prices)
yes_price = float(raw_prices[0])
raw_tokens = m.get("clobTokenIds", ["", ""])
if isinstance(raw_tokens, str):
import json as _json
raw_tokens = _json.loads(raw_tokens)
seen.add(market_id)
markets.append(Market(
id=market_id,
condition_id=m.get("conditionId", ""),
question=question,
yes_token_id=raw_tokens[0] if raw_tokens else "",
no_token_id=raw_tokens[1] if len(raw_tokens) > 1 else "",
yes_price=yes_price,
no_price=1 - yes_price,
volume_24h=vol,
end_date=m.get("endDate", ""),
active=True,
category=category,
))
except (KeyError, ValueError, IndexError) as e:
log.debug("Skipping malformed market: %s", e)
except httpx.HTTPError as e:
log.error("Polymarket API error (page=%d): %s", page, e)
break
by_cat: dict[str, int] = {}
for mkt in markets:
by_cat[mkt.category] = by_cat.get(mkt.category, 0) + 1
log.info(
"Loaded %d markets (min_vol=%.0f, resolving within %dd): %s",
len(markets), min_volume, max_days_to_resolution,
", ".join(f"{k}={v}" for k, v in sorted(by_cat.items())),
)
return markets
async def get_order_book(self, token_id: str) -> Optional[OrderBook]:
"""Get order book for a specific token."""
try:
resp = await self._client.get(
f"{POLYMARKET_API}/book",
params={"token_id": token_id},
)
resp.raise_for_status()
data = resp.json()
bids = [(float(b["price"]), float(b["size"])) for b in data.get("bids", [])]
asks = [(float(a["price"]), float(a["size"])) for a in data.get("asks", [])]
mid = 0.5
if bids and asks:
mid = (bids[0][0] + asks[0][0]) / 2
return OrderBook(
market_id=token_id,
yes_bids=bids,
yes_asks=asks,
mid_price=mid,
)
except Exception as e:
log.warning("Order book fetch failed for %s: %s", token_id, e)
return None
async def close(self) -> None:
await self._client.aclose()