""" Polymarket CLOB API client. Docs: https://docs.polymarket.com """ import asyncio import logging import os import re from dataclasses import dataclass, field from datetime import datetime, timezone, timedelta from typing import Optional import httpx log = logging.getLogger(__name__) POLYMARKET_API = "https://clob.polymarket.com" GAMMA_API = "https://gamma-api.polymarket.com" # ───────────────────────────────────────────────────────────────────────────── # Phase 2 — Market family classification helpers # Used by market_family_key() below. # ───────────────────────────────────────────────────────────────────────────── _YEAR_RE = re.compile(r"\b(202\d|203\d)\b") _MONTH_RE = re.compile( r"\b(january|february|march|april|may|june|july|august|" r"september|october|november|december)\b", re.IGNORECASE, ) _FED_TRIGGER_RE = re.compile( r"\b(federal reserve|interest rate|bps|basis point|fed\s+(rate|meeting|decision))", re.IGNORECASE, ) _US_STATE_RE = re.compile( r"\b(Alabama|Alaska|Arizona|Arkansas|California|Colorado|Connecticut|" r"Delaware|Florida|Georgia|Hawaii|Idaho|Illinois|Indiana|Iowa|Kansas|" r"Kentucky|Louisiana|Maine|Maryland|Massachusetts|Michigan|Minnesota|" r"Mississippi|Missouri|Montana|Nebraska|Nevada|New\s+Hampshire|" r"New\s+Jersey|New\s+Mexico|New\s+York|North\s+Carolina|North\s+Dakota|" r"Ohio|Oklahoma|Oregon|Pennsylvania|Rhode\s+Island|South\s+Carolina|" r"South\s+Dakota|Tennessee|Texas|Utah|Vermont|Virginia|Washington|" r"West\s+Virginia|Wisconsin|Wyoming)\b", re.IGNORECASE, ) _PARTY_RE = re.compile(r"\b(Republicans?|Democrats?|Democratic|GOP)\b", re.IGNORECASE) _ELECTION_TYPE_RE = re.compile( r"\b(presidential|president|mayoral|mayor|gubernatorial|governor|" r"senate|congress(?:ional)?|primary|election)\b", re.IGNORECASE, ) # Ordered list of (pattern, place_slug) for named non-US locations. # Checked after US-state patterns so US city/state names don't shadow these. _NAMED_PLACES: list[tuple[re.Pattern, str]] = [ (re.compile(r"\bColomb", re.IGNORECASE), "colombia"), (re.compile(r"\bSeoul\b", re.IGNORECASE), "seoul"), (re.compile(r"\bBusan\b", re.IGNORECASE), "busan"), (re.compile(r"\bGyeonggi\b", re.IGNORECASE), "gyeonggi"), (re.compile(r"\bChungcheong", re.IGNORECASE), "chungcheong"), (re.compile(r"\bSouth\s+Korean?\b", re.IGNORECASE), "south-korea"), (re.compile(r"\bLos\s+Angeles\b", re.IGNORECASE), "los-angeles"), (re.compile(r"\bCuba\b", re.IGNORECASE), "cuba"), (re.compile(r"\bLebanon\b", re.IGNORECASE), "lebanon"), (re.compile(r"\bIsrael\b", re.IGNORECASE), "israel"), (re.compile(r"\bUkraine\b", re.IGNORECASE), "ukraine"), (re.compile(r"\bRussia\b", re.IGNORECASE), "russia"), ] # Ordered list of (pattern, company_slug) for tech/company markets. _NAMED_COMPANIES: list[tuple[re.Pattern, str]] = [ (re.compile(r"\bopenai\b", re.IGNORECASE), "openai"), (re.compile(r"\banthropic\b", re.IGNORECASE), "anthropic"), (re.compile(r"\bnvidia\b", re.IGNORECASE), "nvidia"), (re.compile(r"\bapple\b", re.IGNORECASE), "apple"), (re.compile(r"\bmicrosoft\b", re.IGNORECASE), "microsoft"), (re.compile(r"\bgoogle\b", re.IGNORECASE), "google"), (re.compile(r"\btesla\b", re.IGNORECASE), "tesla"), # \bmeta\b does NOT match MetaMask (no word boundary mid-compound-word) (re.compile(r"\bmeta\b", re.IGNORECASE), "meta"), ] def _end_month(market: "Market") -> str: """Return market end_date formatted as YYYY-MM, or '' if unparseable.""" raw = market.end_date if not raw: return "" try: dt = datetime.fromisoformat(raw.replace("Z", "+00:00")) return dt.strftime("%Y-%m") except (ValueError, TypeError): return "" def market_family_key(market: "Market") -> str: """ Return a stable slug that groups related markets together. Markets in the same family share an underlying event (same election, same Fed meeting decision, same company). The bot allows at most one open position per family per cycle to avoid correlated exposure. Priority order (first match wins): 1. Fed / interest-rate decision → fed-{month}-{year} 2. US state + party election → {state}-{party}-{year} 3. Named non-US city/country → {place}-{event_type}-{year} 4. Named tech company → {company}-{year} 5. Fallback → {category}-{end_YYYY-MM} Examples: "Will Ken Paxton win the 2026 Texas Republican Primary" → texas-republican-2026 "Will the Fed decrease rates by 25 bps after April 2026 meeting" → fed-april-2026 "Will OpenAI IPO by December 31 2026?" → openai-2026 """ q = market.question # Prefer year from question text; fall back to end_date year if absent year_m = _YEAR_RE.search(q) if year_m: year = year_m.group(1) else: end_m = _end_month(market) # e.g. "2026-06" year = end_m[:4] if end_m else "unknown" # 1. Fed / interest-rate meeting if _FED_TRIGGER_RE.search(q): month_m = _MONTH_RE.search(q) if month_m: return f"fed-{month_m.group(1).lower()}-{year}" return f"fed-{year}" # 2. US state + election event # Key design: general elections group by office, not by party, so # "Republicans win Ohio governor" and "Democrats win Ohio governor" # share the same family (ohio-gubernatorial-2026) and the bot can only # hold one position. Primaries keep the party because each party runs # its own primary (texas-republican-primary is distinct from texas-democrat-primary). state_m = _US_STATE_RE.search(q) party_m = _PARTY_RE.search(q) etype_m = _ELECTION_TYPE_RE.search(q) if state_m and (party_m or etype_m): state = re.sub(r"\s+", "-", state_m.group(1).lower()) is_primary = etype_m is not None and "primary" in etype_m.group(1).lower() if party_m and is_primary: # Primary race: party is the disambiguation (each party has its own primary) raw_party = party_m.group(1).lower() party = "democrat" if "democrat" in raw_party else "republican" return f"{state}-{party}-{year}" if etype_m: # General election: family = office, not party # "Republicans win Ohio governor" == "Democrats win Ohio governor" → same race raw_etype = etype_m.group(1).lower() etype = { "president": "presidential", "mayor": "mayoral", "governor": "gubernatorial", }.get(raw_etype, raw_etype) return f"{state}-{etype}-{year}" # Has party but no election type — preserve old behaviour (e.g. "Texas Republican") raw_party = party_m.group(1).lower() # type: ignore[union-attr] party = "democrat" if "democrat" in raw_party else "republican" return f"{state}-{party}-{year}" # 3. Named non-US city / country for place_re, place_slug in _NAMED_PLACES: if place_re.search(q): if etype_m is None: etype_m = _ELECTION_TYPE_RE.search(q) if etype_m: raw_etype = etype_m.group(1).lower() # Normalise synonyms etype = { "president": "presidential", "mayor": "mayoral", "governor": "gubernatorial", }.get(raw_etype, raw_etype) else: etype = "event" return f"{place_slug}-{etype}-{year}" # 4. Named tech company for company_re, company_slug in _NAMED_COMPANIES: if company_re.search(q): return f"{company_slug}-{year}" # 5. Fallback: category + end_date month end_month = _end_month(market) base = market.category if market.category else "misc" return f"{base}-{end_month}" if end_month else f"{base}-{year}" @dataclass class Market: id: str condition_id: str question: str yes_token_id: str no_token_id: str yes_price: float # 0-1, current best ask for YES no_price: float volume_24h: float end_date: str active: bool category: str = "" @dataclass class OrderBook: market_id: str yes_bids: list[tuple[float, float]] = field(default_factory=list) # (price, size) yes_asks: list[tuple[float, float]] = field(default_factory=list) mid_price: float = 0.5 class PolymarketClient: """ Async Polymarket client. In paper mode, API key is not needed — only public data. API key required for placing real orders. """ def __init__(self) -> None: self.api_key = os.getenv("POLYMARKET_API_KEY", "") self.secret = os.getenv("POLYMARKET_SECRET", "") self.passphrase = os.getenv("POLYMARKET_PASSPHRASE", "") self._client = httpx.AsyncClient(timeout=30) # Keywords that identify crypto / finance markets. # Short tickers are padded with spaces to avoid false substring matches # (e.g. " eth " won't match "Hegseth"; " sol " won't match "solar"). _CRYPTO_FINANCE_KEYWORDS: list[str] = [ "bitcoin", "btc", " eth ", "ethereum", " sol ", "solana", "xrp", "ripple", "dogecoin", "doge", "litecoin", "ltc", "coinbase", "binance", "kraken", "bybit", "okx", "usdc", "usdt", "stablecoin", "defi", "nft", "blockchain", "crypto", " fdv", "airdrop", "token launch", "token listing", "microstrategy", "mstr", "saylor", "nasdaq", "sp500", "s&p 500", "s&p500", "federal reserve", "fed rate", "interest rate", "inflation", "tariff", "treasury yield", "recession", " gdp ", "unemployment", "trade war", "trade deal", " ipo ", "sec ", "cftc", ] _POLITICS_KEYWORDS: list[str] = [ "election", "president", "congress", "senate", "vote", "war", "trump", "biden", "ukraine", "russia", "israel", "nato", ] _TECH_KEYWORDS: list[str] = [ " ai ", "openai", "apple", "google", "microsoft", "meta", "nvidia", "regulation", "antitrust", "tesla", "elon", "nuclear", "quantum", "chip", ] _EVENTS_KEYWORDS: list[str] = [ "world cup", "oscar", "nobel", "spacex", "nasa", ] # Sports markets are excluded entirely — BTC/F&G/GNews have no edge there. # Checked before any category match so sports don't bleed into politics/events. _SPORTS_EXCLUSIONS: list[str] = [ " nba ", " nfl ", " mlb ", " nhl ", "basketball", "football", "baseball", "hockey", "soccer", " mvp ", "rookie of the", "championship", "super bowl", "world series", "playoffs", "playoff", "tournament", "tennis", " golf ", " ufc ", "boxing", "wrestler", "wrestling", "slam dunk", "home run", "touchdown", # European / international football leagues "la liga", "premier league", "bundesliga", "serie a", "ligue 1", "champions league", "europa league", "conference league", "copa del rey", "fa cup", "dfb pokal", "relegation", "golden boot", "top scorer", " liga ", "eredivisie", "primeira liga", ] @classmethod def _is_sports(cls, question: str) -> bool: q = f" {question.lower()} " return any(kw in q for kw in cls._SPORTS_EXCLUSIONS) @classmethod def _is_crypto_finance(cls, question: str) -> bool: q = f" {question.lower()} " # pad so edge keywords match cleanly return any(kw in q for kw in cls._CRYPTO_FINANCE_KEYWORDS) @classmethod def _is_politics(cls, question: str) -> bool: q = f" {question.lower()} " return any(kw in q for kw in cls._POLITICS_KEYWORDS) @classmethod def _is_tech(cls, question: str) -> bool: q = f" {question.lower()} " return any(kw in q for kw in cls._TECH_KEYWORDS) @classmethod def _is_events(cls, question: str) -> bool: q = f" {question.lower()} " return any(kw in q for kw in cls._EVENTS_KEYWORDS) @classmethod def _detect_category(cls, question: str) -> str: """Return the category label for a market question, or '' if unsupported.""" if cls._is_sports(question): return "" # exclude sports regardless of other keyword matches if cls._is_politics(question): return "politics" # Tech checked before crypto/finance: company-specific markets (OpenAI IPO, # NVIDIA earnings, Apple antitrust) should be "tech" even when they contain # generic finance keywords like "ipo" or "sec". if cls._is_tech(question): return "tech" if cls._is_crypto_finance(question): return "crypto/finance" if cls._is_events(question): return "events" return "" async def get_active_markets( self, min_volume: float = 500, pages: int = 3, page_size: int = 200, max_days_to_resolution: int = 90, ) -> list[Market]: """Fetch active markets from Gamma API (no auth needed). Fetches events without tag filtering (tag= param is unreliable), then keeps only markets whose question matches any supported category (crypto/finance, politics, tech, events) and that: - have NOT already expired (end_dt >= now) - resolve within max_days_to_resolution days """ seen: set[str] = set() markets: list[Market] = [] now = datetime.now(timezone.utc) cutoff = now + timedelta(days=max_days_to_resolution) for page in range(pages): try: resp = await self._client.get( f"{GAMMA_API}/events", params={ "active": True, "closed": False, "limit": page_size, "offset": page * page_size, }, ) resp.raise_for_status() events = resp.json() if not events: break # no more pages for event in events: event_title = event.get("title", "") for m in event.get("markets", []): try: if not m.get("active") or m.get("closed"): continue question = m.get("question", "") # Detect category from question or event title category = self._detect_category(question) if not category: category = self._detect_category(event_title) if not category: continue # Filter: skip already-expired and far-future markets # Gamma API may return endDate or end_date (snake_case) raw_end = m.get("endDate") or m.get("end_date") or m.get("endDateIso", "") if raw_end: try: end_dt = datetime.fromisoformat( raw_end.replace("Z", "+00:00") ) # Make naive datetimes UTC-aware before comparing if end_dt.tzinfo is None: end_dt = end_dt.replace(tzinfo=timezone.utc) if end_dt < now: log.debug("Skipping expired market: %s", question[:60]) continue if end_dt > cutoff: continue except (ValueError, TypeError): pass # keep market if date unparseable market_id = str(m["id"]) if market_id in seen: continue vol = float(m.get("volume24hr", 0)) if vol < min_volume: continue raw_prices = m.get("outcomePrices", ["0.5", "0.5"]) if isinstance(raw_prices, str): import json as _json raw_prices = _json.loads(raw_prices) yes_price = float(raw_prices[0]) raw_tokens = m.get("clobTokenIds", ["", ""]) if isinstance(raw_tokens, str): import json as _json raw_tokens = _json.loads(raw_tokens) seen.add(market_id) markets.append(Market( id=market_id, condition_id=m.get("conditionId", ""), question=question, yes_token_id=raw_tokens[0] if raw_tokens else "", no_token_id=raw_tokens[1] if len(raw_tokens) > 1 else "", yes_price=yes_price, no_price=1 - yes_price, volume_24h=vol, end_date=m.get("endDate", ""), active=True, category=category, )) except (KeyError, ValueError, IndexError) as e: log.debug("Skipping malformed market: %s", e) except httpx.HTTPError as e: log.error("Polymarket API error (page=%d): %s", page, e) break by_cat: dict[str, int] = {} for mkt in markets: by_cat[mkt.category] = by_cat.get(mkt.category, 0) + 1 log.info( "Loaded %d markets (min_vol=%.0f, resolving within %dd): %s", len(markets), min_volume, max_days_to_resolution, ", ".join(f"{k}={v}" for k, v in sorted(by_cat.items())), ) return markets async def get_order_book(self, token_id: str) -> Optional[OrderBook]: """Get order book for a specific token.""" try: resp = await self._client.get( f"{POLYMARKET_API}/book", params={"token_id": token_id}, ) resp.raise_for_status() data = resp.json() bids = [(float(b["price"]), float(b["size"])) for b in data.get("bids", [])] asks = [(float(a["price"]), float(a["size"])) for a in data.get("asks", [])] mid = 0.5 if bids and asks: mid = (bids[0][0] + asks[0][0]) / 2 return OrderBook( market_id=token_id, yes_bids=bids, yes_asks=asks, mid_price=mid, ) except Exception as e: log.warning("Order book fetch failed for %s: %s", token_id, e) return None async def close(self) -> None: await self._client.aclose()