""" News sentiment client for GNews API. Free tier: 100 requests/day. Budget: - Cache TTL: 6 hours — same query is never repeated within 6 h - Max 5 queries per trading cycle (politics markets only) - 2-second sleep between actual API calls to avoid burst 429s With ≤9 politics markets and 6 h cache → ≤9 requests per 6 h = ≤36/day. Score returned: -1.0 (very negative headlines) → +1.0 (very positive). Returns 0.0 on any error or missing API key so the caller degrades gracefully. """ import asyncio import logging import os import re import time import httpx log = logging.getLogger(__name__) GNEWS_API = "https://gnews.io/api/v4/search" CACHE_TTL = 6 * 3600 # seconds — ≤9 politics markets × 4 cycles/day = ≤36 req/day _INTER_REQUEST_SLEEP = 2 # seconds between consecutive real API calls # --------------------------------------------------------------------------- # Keyword lists for headline sentiment # --------------------------------------------------------------------------- _POSITIVE = { "win", "wins", "won", "victory", "success", "successful", "agree", "agreed", "agreement", "approve", "approved", "approval", "confirm", "confirmed", "sign", "signed", "deal", "advance", "progress", "support", "peace", "likely", "probable", "imminent", "historic", "breakthrough", "resolve", "resolved", "resume", "resumed", } _NEGATIVE = { "fail", "fails", "failed", "failure", "reject", "rejected", "rejection", "block", "blocked", "refuse", "refused", "deny", "denied", "lose", "lost", "collapse", "collapsed", "crisis", "war", "attack", "veto", "oppose", "opposed", "unlikely", "impossible", "never", "stall", "stalled", "withdraw", "withdrew", "sanction", "sanctions", "threat", "threatens", "dead", "halt", "halted", "cancel", "cancelled", "breakdown", "escalate", "escalation", } # Words stripped when building the search query (too generic to help relevance) _QUERY_STOPWORDS = { "will", "the", "a", "an", "by", "in", "on", "at", "to", "of", "and", "or", "is", "be", "are", "was", "were", "have", "has", "had", "do", "does", "did", "for", "from", "with", "not", "no", "this", "that", "it", "its", "their", "they", "he", "she", "we", "most", "more", "least", "less", "any", "all", "both", "each", "win", "lose", "get", "make", "take", } # Regex patterns for dates / noise _DATE_RE = re.compile( r"\b(january|february|march|april|may|june|july|august|" r"september|october|november|december)\s+\d{1,2}\b" r"|\b20\d{2}\b" r"|\bQ[1-4]\b", flags=re.IGNORECASE, ) _PUNCT_RE = re.compile(r"[?!\"'.,;:()\[\]{}]") class NewsClient: """ Async GNews client with in-memory result cache. Usage:: client = NewsClient() score = await client.get_sentiment("Will Trump visit China") # score ∈ [-1.0, +1.0] — positive means bullish for the YES outcome await client.close() """ def __init__(self) -> None: self._api_key = os.getenv("GNEWS_API_KEY", "") self._client = httpx.AsyncClient( timeout=10, headers={"User-Agent": "Mozilla/5.0 (compatible; polymarket-bot/1.0)"}, ) # {cache_key: (fetched_at_monotonic, score)} self._cache: dict[str, tuple[float, float]] = {} # ------------------------------------------------------------------ # Public API # ------------------------------------------------------------------ async def get_sentiment(self, question: str) -> float: """ Return a sentiment score ∈ [-1.0, +1.0] for the market question. - Positive: most recent headlines suggest the YES outcome is more likely - Negative: headlines suggest the YES outcome is less likely - 0.0: neutral, no data, or API unavailable """ if not self._api_key: log.debug("GNEWS_API_KEY not set — skipping news signal") return 0.0 query = self._build_query(question) if len(query) < 3: return 0.0 cache_key = query.lower() now = time.monotonic() cached = self._cache.get(cache_key) if cached is not None: fetched_at, score = cached if now - fetched_at < CACHE_TTL: log.debug("News cache hit %r → %.3f", query, score) return score # Build URL exactly as documented for free tier: # https://gnews.io/api/v4/search?q=...&lang=en&max=10&token=... # NOTE: "from"/"to" date filters are paid-tier only — omit them. try: resp = await self._client.get( GNEWS_API, params={ "q": query, "lang": "en", "max": 10, "token": self._api_key, }, ) except Exception as exc: log.warning("GNews network error for %r: %s", query, exc) return 0.0 finally: # Always sleep after a real network attempt to avoid burst 429s await asyncio.sleep(_INTER_REQUEST_SLEEP) log.info("GNews HTTP %d for query %r", resp.status_code, query) if resp.status_code != 200: try: body = resp.json() except Exception: body = resp.text[:200] log.warning("GNews error body: %s", body) # Cache neutral for 1 h on client errors to avoid hammering the endpoint if resp.status_code in (400, 401, 403, 429): self._cache[cache_key] = (now, 0.0) return 0.0 try: data = resp.json() except Exception as exc: log.warning("GNews JSON decode error for %r: %s", query, exc) return 0.0 articles = data.get("articles", []) score = self._score_headlines(articles) self._cache[cache_key] = (now, score) log.info( "GNews %r → %d articles, sentiment=%.3f", query, len(articles), score, ) return score async def close(self) -> None: await self._client.aclose() # ------------------------------------------------------------------ # Internal helpers # ------------------------------------------------------------------ @staticmethod def _build_query(question: str) -> str: """Extract meaningful search terms from a market question.""" q = _DATE_RE.sub(" ", question) q = _PUNCT_RE.sub(" ", q) tokens = [ w for w in q.split() if w.lower() not in _QUERY_STOPWORDS and len(w) > 2 ] return " ".join(tokens[:8]) # GNews handles ~8 keyword queries well @staticmethod def _score_headlines(articles: list[dict]) -> float: """ Score each article title + description independently, then average. Each article vote: (pos_hits - neg_hits) / (pos_hits + neg_hits) ∈ [-1, 1]. Articles with no sentiment keywords contribute 0 (not excluded). """ if not articles: return 0.0 votes: list[float] = [] for art in articles: text = ( f"{art.get('title', '')} {art.get('description', '')}" ).lower() words = set(re.findall(r"\b\w+\b", text)) pos = len(words & _POSITIVE) neg = len(words & _NEGATIVE) total = pos + neg votes.append((pos - neg) / total if total > 0 else 0.0) return max(-1.0, min(1.0, sum(votes) / len(votes)))