Files
polymarket-bot/bot/data/news.py
T
chemavx 33ad86f352
CI/CD / build-and-push (push) Successful in 1m32s
feat(news): 6h cache, politics-only, max 5/cycle, 2s sleep between calls
- CACHE_TTL: 4h → 6h (≤36 req/day with ≤9 politics markets)
- GNews only called for is_politics markets (BTC/F&G cover crypto/macro)
- MAX_NEWS_QUERIES_PER_CYCLE=5: BayesianStrategy.reset_cycle() called each
  iteration; counter increments only on actual API call (cache hits free)
- 2s asyncio.sleep in news.py finally block after each real HTTP request
- main.py sorts markets: politics first by end_date ascending, so soonest-
  resolving markets consume the 5-query budget before others

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-14 12:33:26 +00:00

206 lines
7.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
News sentiment client for GNews API.
Free tier: 100 requests/day. Budget:
- Cache TTL: 6 hours — same query is never repeated within 6 h
- Max 5 queries per trading cycle (politics markets only)
- 2-second sleep between actual API calls to avoid burst 429s
With ≤9 politics markets and 6 h cache → ≤9 requests per 6 h = ≤36/day.
Score returned: -1.0 (very negative headlines) → +1.0 (very positive).
Returns 0.0 on any error or missing API key so the caller degrades gracefully.
"""
import asyncio
import logging
import os
import re
import time
import httpx
log = logging.getLogger(__name__)
GNEWS_API = "https://gnews.io/api/v4/search"
CACHE_TTL = 6 * 3600 # seconds — ≤9 politics markets × 4 cycles/day = ≤36 req/day
_INTER_REQUEST_SLEEP = 2 # seconds between consecutive real API calls
# ---------------------------------------------------------------------------
# Keyword lists for headline sentiment
# ---------------------------------------------------------------------------
_POSITIVE = {
"win", "wins", "won", "victory", "success", "successful",
"agree", "agreed", "agreement", "approve", "approved", "approval",
"confirm", "confirmed", "sign", "signed", "deal", "advance",
"progress", "support", "peace", "likely", "probable", "imminent",
"historic", "breakthrough", "resolve", "resolved", "resume", "resumed",
}
_NEGATIVE = {
"fail", "fails", "failed", "failure", "reject", "rejected", "rejection",
"block", "blocked", "refuse", "refused", "deny", "denied",
"lose", "lost", "collapse", "collapsed", "crisis", "war", "attack",
"veto", "oppose", "opposed", "unlikely", "impossible", "never",
"stall", "stalled", "withdraw", "withdrew", "sanction", "sanctions",
"threat", "threatens", "dead", "halt", "halted", "cancel", "cancelled",
"breakdown", "escalate", "escalation",
}
# Words stripped when building the search query (too generic to help relevance)
_QUERY_STOPWORDS = {
"will", "the", "a", "an", "by", "in", "on", "at", "to", "of",
"and", "or", "is", "be", "are", "was", "were", "have", "has",
"had", "do", "does", "did", "for", "from", "with", "not", "no",
"this", "that", "it", "its", "their", "they", "he", "she", "we",
"most", "more", "least", "less", "any", "all", "both", "each",
"win", "lose", "get", "make", "take",
}
# Regex patterns for dates / noise
_DATE_RE = re.compile(
r"\b(january|february|march|april|may|june|july|august|"
r"september|october|november|december)\s+\d{1,2}\b"
r"|\b20\d{2}\b"
r"|\bQ[1-4]\b",
flags=re.IGNORECASE,
)
_PUNCT_RE = re.compile(r"[?!\"'.,;:()\[\]{}]")
class NewsClient:
"""
Async GNews client with in-memory result cache.
Usage::
client = NewsClient()
score = await client.get_sentiment("Will Trump visit China")
# score ∈ [-1.0, +1.0] — positive means bullish for the YES outcome
await client.close()
"""
def __init__(self) -> None:
self._api_key = os.getenv("GNEWS_API_KEY", "")
self._client = httpx.AsyncClient(
timeout=10,
headers={"User-Agent": "Mozilla/5.0 (compatible; polymarket-bot/1.0)"},
)
# {cache_key: (fetched_at_monotonic, score)}
self._cache: dict[str, tuple[float, float]] = {}
# ------------------------------------------------------------------
# Public API
# ------------------------------------------------------------------
async def get_sentiment(self, question: str) -> float:
"""
Return a sentiment score ∈ [-1.0, +1.0] for the market question.
- Positive: most recent headlines suggest the YES outcome is more likely
- Negative: headlines suggest the YES outcome is less likely
- 0.0: neutral, no data, or API unavailable
"""
if not self._api_key:
log.debug("GNEWS_API_KEY not set — skipping news signal")
return 0.0
query = self._build_query(question)
if len(query) < 3:
return 0.0
cache_key = query.lower()
now = time.monotonic()
cached = self._cache.get(cache_key)
if cached is not None:
fetched_at, score = cached
if now - fetched_at < CACHE_TTL:
log.debug("News cache hit %r%.3f", query, score)
return score
# Build URL exactly as documented for free tier:
# https://gnews.io/api/v4/search?q=...&lang=en&max=10&token=...
# NOTE: "from"/"to" date filters are paid-tier only — omit them.
try:
resp = await self._client.get(
GNEWS_API,
params={
"q": query,
"lang": "en",
"max": 10,
"token": self._api_key,
},
)
except Exception as exc:
log.warning("GNews network error for %r: %s", query, exc)
return 0.0
finally:
# Always sleep after a real network attempt to avoid burst 429s
await asyncio.sleep(_INTER_REQUEST_SLEEP)
log.info("GNews HTTP %d for query %r", resp.status_code, query)
if resp.status_code != 200:
try:
body = resp.json()
except Exception:
body = resp.text[:200]
log.warning("GNews error body: %s", body)
# Cache neutral for 1 h on client errors to avoid hammering the endpoint
if resp.status_code in (400, 401, 403, 429):
self._cache[cache_key] = (now, 0.0)
return 0.0
try:
data = resp.json()
except Exception as exc:
log.warning("GNews JSON decode error for %r: %s", query, exc)
return 0.0
articles = data.get("articles", [])
score = self._score_headlines(articles)
self._cache[cache_key] = (now, score)
log.info(
"GNews %r%d articles, sentiment=%.3f",
query, len(articles), score,
)
return score
async def close(self) -> None:
await self._client.aclose()
# ------------------------------------------------------------------
# Internal helpers
# ------------------------------------------------------------------
@staticmethod
def _build_query(question: str) -> str:
"""Extract meaningful search terms from a market question."""
q = _DATE_RE.sub(" ", question)
q = _PUNCT_RE.sub(" ", q)
tokens = [
w for w in q.split()
if w.lower() not in _QUERY_STOPWORDS and len(w) > 2
]
return " ".join(tokens[:8]) # GNews handles ~8 keyword queries well
@staticmethod
def _score_headlines(articles: list[dict]) -> float:
"""
Score each article title + description independently, then average.
Each article vote: (pos_hits - neg_hits) / (pos_hits + neg_hits) ∈ [-1, 1].
Articles with no sentiment keywords contribute 0 (not excluded).
"""
if not articles:
return 0.0
votes: list[float] = []
for art in articles:
text = (
f"{art.get('title', '')} {art.get('description', '')}"
).lower()
words = set(re.findall(r"\b\w+\b", text))
pos = len(words & _POSITIVE)
neg = len(words & _NEGATIVE)
total = pos + neg
votes.append((pos - neg) / total if total > 0 else 0.0)
return max(-1.0, min(1.0, sum(votes) / len(votes)))