From 4dadd3c2c4c519db96d134bdcaa354bd17892d84 Mon Sep 17 00:00:00 2001 From: chemavx Date: Tue, 14 Apr 2026 08:24:11 +0000 Subject: [PATCH] feat: add GNews sentiment signal for politics/tech/events markets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bot/data/news.py (new): - NewsClient with in-memory cache (TTL=4h) to stay within 100 req/day limit - _build_query(): strips dates, punctuation and stopwords from market question - _score_headlines(): keyword-based pos/neg vote per article, averaged ∈ [-1, +1] - Degrades to 0.0 on missing key, 403 quota, or network error bot/strategy/bayesian.py: - BayesianStrategy(news=NewsClient) — optional, backwards compatible - Signal 4: GNews sentiment applied as direct log-odds shift (weight=1.5) so a ±1.0 sentiment score moves a 50% prior to 82%/18% - +0.10 confidence boost when news signal is present - NEWS_LOGODDS_WEIGHT constant documented at module level bot/main.py: - Instantiate NewsClient, pass to BayesianStrategy, close in finally block Co-Authored-By: Claude Sonnet 4.6 --- bot/data/news.py | 193 +++++++++++++++++++++++++++++++++++++++ bot/main.py | 5 +- bot/strategy/bayesian.py | 29 +++++- 3 files changed, 223 insertions(+), 4 deletions(-) create mode 100644 bot/data/news.py diff --git a/bot/data/news.py b/bot/data/news.py new file mode 100644 index 0000000..32e5c69 --- /dev/null +++ b/bot/data/news.py @@ -0,0 +1,193 @@ +""" +News sentiment client for GNews API. + +Free tier: 100 requests/day — we stay well within this by caching each +unique query for CACHE_TTL seconds (4 hours). With ~9 political markets +refreshed every 4 h that is 9 × 6 = 54 requests/day. + +Score returned: -1.0 (very negative headlines) → +1.0 (very positive). +Returns 0.0 on any error or missing API key so the caller degrades gracefully. +""" +import logging +import os +import re +import time +from datetime import datetime, timezone, timedelta + +import httpx + +log = logging.getLogger(__name__) + +GNEWS_API = "https://gnews.io/api/v4/search" +CACHE_TTL = 4 * 3600 # seconds — fits 100 req/day free tier + +# --------------------------------------------------------------------------- +# Keyword lists for headline sentiment +# --------------------------------------------------------------------------- +_POSITIVE = { + "win", "wins", "won", "victory", "success", "successful", + "agree", "agreed", "agreement", "approve", "approved", "approval", + "confirm", "confirmed", "sign", "signed", "deal", "advance", + "progress", "support", "peace", "likely", "probable", "imminent", + "historic", "breakthrough", "resolve", "resolved", "resume", "resumed", +} +_NEGATIVE = { + "fail", "fails", "failed", "failure", "reject", "rejected", "rejection", + "block", "blocked", "refuse", "refused", "deny", "denied", + "lose", "lost", "collapse", "collapsed", "crisis", "war", "attack", + "veto", "oppose", "opposed", "unlikely", "impossible", "never", + "stall", "stalled", "withdraw", "withdrew", "sanction", "sanctions", + "threat", "threatens", "dead", "halt", "halted", "cancel", "cancelled", + "breakdown", "escalate", "escalation", +} + +# Words stripped when building the search query (too generic to help relevance) +_QUERY_STOPWORDS = { + "will", "the", "a", "an", "by", "in", "on", "at", "to", "of", + "and", "or", "is", "be", "are", "was", "were", "have", "has", + "had", "do", "does", "did", "for", "from", "with", "not", "no", + "this", "that", "it", "its", "their", "they", "he", "she", "we", + "most", "more", "least", "less", "any", "all", "both", "each", + "win", "lose", "get", "make", "take", +} + +# Regex patterns for dates / noise +_DATE_RE = re.compile( + r"\b(january|february|march|april|may|june|july|august|" + r"september|october|november|december)\s+\d{1,2}\b" + r"|\b20\d{2}\b" + r"|\bQ[1-4]\b", + flags=re.IGNORECASE, +) +_PUNCT_RE = re.compile(r"[?!\"'.,;:()\[\]{}]") + + +class NewsClient: + """ + Async GNews client with in-memory result cache. + + Usage:: + + client = NewsClient() + score = await client.get_sentiment("Will Trump visit China") + # score ∈ [-1.0, +1.0] — positive means bullish for the YES outcome + await client.close() + """ + + def __init__(self) -> None: + self._api_key = os.getenv("GNEWS_API_KEY", "") + self._client = httpx.AsyncClient(timeout=10) + # {cache_key: (fetched_at_monotonic, score)} + self._cache: dict[str, tuple[float, float]] = {} + + # ------------------------------------------------------------------ + # Public API + # ------------------------------------------------------------------ + + async def get_sentiment(self, question: str, days: int = 7) -> float: + """ + Return a sentiment score ∈ [-1.0, +1.0] for the market question. + + - Positive: most recent headlines suggest the YES outcome is more likely + - Negative: headlines suggest the YES outcome is less likely + - 0.0: neutral, no data, or API unavailable + """ + if not self._api_key: + log.debug("GNEWS_API_KEY not set — skipping news signal") + return 0.0 + + query = self._build_query(question) + if len(query) < 3: + return 0.0 + + cache_key = query.lower() + now = time.monotonic() + cached = self._cache.get(cache_key) + if cached is not None: + fetched_at, score = cached + if now - fetched_at < CACHE_TTL: + log.debug("News cache hit %r → %.3f", query, score) + return score + + try: + resp = await self._client.get( + GNEWS_API, + params={ + "q": query, + "lang": "en", + "max": 10, + "from": _iso_days_ago(days), + "token": self._api_key, + }, + ) + except Exception as exc: + log.warning("GNews network error for %r: %s", query, exc) + return 0.0 + + if resp.status_code == 403: + log.warning("GNews: 403 — invalid key or daily quota exhausted") + # Cache a neutral result for 1 h to avoid hammering the endpoint + self._cache[cache_key] = (now, 0.0) + return 0.0 + + try: + resp.raise_for_status() + data = resp.json() + except Exception as exc: + log.warning("GNews bad response for %r: %s", query, exc) + return 0.0 + + articles = data.get("articles", []) + score = self._score_headlines(articles) + self._cache[cache_key] = (now, score) + log.info( + "GNews %r → %d articles, sentiment=%.3f", + query, len(articles), score, + ) + return score + + async def close(self) -> None: + await self._client.aclose() + + # ------------------------------------------------------------------ + # Internal helpers + # ------------------------------------------------------------------ + + @staticmethod + def _build_query(question: str) -> str: + """Extract meaningful search terms from a market question.""" + q = _DATE_RE.sub(" ", question) + q = _PUNCT_RE.sub(" ", q) + tokens = [ + w for w in q.split() + if w.lower() not in _QUERY_STOPWORDS and len(w) > 2 + ] + return " ".join(tokens[:8]) # GNews handles ~8 keyword queries well + + @staticmethod + def _score_headlines(articles: list[dict]) -> float: + """ + Score each article title + description independently, then average. + Each article vote: (pos_hits - neg_hits) / (pos_hits + neg_hits) ∈ [-1, 1]. + Articles with no sentiment keywords contribute 0 (not excluded). + """ + if not articles: + return 0.0 + + votes: list[float] = [] + for art in articles: + text = ( + f"{art.get('title', '')} {art.get('description', '')}" + ).lower() + words = set(re.findall(r"\b\w+\b", text)) + pos = len(words & _POSITIVE) + neg = len(words & _NEGATIVE) + total = pos + neg + votes.append((pos - neg) / total if total > 0 else 0.0) + + return max(-1.0, min(1.0, sum(votes) / len(votes))) + + +def _iso_days_ago(days: int) -> str: + dt = datetime.now(timezone.utc) - timedelta(days=days) + return dt.strftime("%Y-%m-%dT%H:%M:%SZ") diff --git a/bot/main.py b/bot/main.py index 4d984d3..701e7fb 100644 --- a/bot/main.py +++ b/bot/main.py @@ -9,6 +9,7 @@ from contextlib import asynccontextmanager from bot.data.polymarket import PolymarketClient from bot.data.external import ExternalDataClient +from bot.data.news import NewsClient from bot.strategy.bayesian import BayesianStrategy from bot.risk.manager import RiskManager from bot.executor.paper import PaperExecutor @@ -98,7 +99,8 @@ async def main() -> None: poly = PolymarketClient() external = ExternalDataClient() - strategy = BayesianStrategy() + news = NewsClient() + strategy = BayesianStrategy(news=news) risk = RiskManager(max_position_pct=0.05, max_exposure_pct=0.30) executor = PaperExecutor(db=db, bankroll=PAPER_BANKROLL) if PAPER_MODE else None metrics = MetricsTracker(db=db) @@ -115,6 +117,7 @@ async def main() -> None: await run_trading_loop(poly, external, strategy, risk, executor, metrics) finally: await db.disconnect() + await news.close() if __name__ == "__main__": diff --git a/bot/strategy/bayesian.py b/bot/strategy/bayesian.py index 9fea8db..56f9cd6 100644 --- a/bot/strategy/bayesian.py +++ b/bot/strategy/bayesian.py @@ -17,6 +17,7 @@ from typing import Optional from bot.data.polymarket import Market from bot.data.external import ExternalSignals +from bot.data.news import NewsClient log = logging.getLogger(__name__) @@ -26,6 +27,11 @@ log = logging.getLogger(__name__) MIN_EDGE = 0.10 # 10% edge minimum MIN_CONFIDENCE = 0.55 # Minimum confidence in our estimate +# Log-odds weight applied to the GNews sentiment score (range ±1.0). +# A weight of 1.5 means a fully negative/positive signal shifts log-odds by ±1.5, +# which moves a 50% prior to ~18%/82% — strong but not overwhelming. +NEWS_LOGODDS_WEIGHT = 1.5 + @dataclass class TradingSignal: @@ -53,8 +59,9 @@ class BayesianStrategy: to justify the fee + slippage cost (MIN_EDGE). """ - def __init__(self) -> None: + def __init__(self, news: Optional[NewsClient] = None) -> None: self._signal_count = 0 + self._news = news # Optional; degrades gracefully when None or key missing async def evaluate( self, @@ -165,16 +172,29 @@ class BayesianStrategy: adjustments.append(dom_adj) sources.append(f"BTC dom: {ext.btc_dominance:.1f}% (low → alt season)") + # Signal 4: GNews sentiment (politics / tech / events only) + # Applied as a direct log-odds shift — stronger signal than macro proxies. + # Weight NEWS_LOGODDS_WEIGHT=1.5 means a ±1.0 sentiment score shifts + # log-odds by ±1.5 (e.g. 50% prior → ~82% / ~18%). + news_log_adj = 0.0 + if (is_politics or is_tech or is_events) and self._news is not None: + sentiment = await self._news.get_sentiment(market.question) + if abs(sentiment) > 0.05: + news_log_adj = sentiment * NEWS_LOGODDS_WEIGHT + sources.append(f"GNews: {sentiment:+.2f}") + # Macro/politics/tech/events: cap confidence lower to reflect weaker signal quality if is_macro or is_politics or is_tech or is_events: confidence_cap = 0.65 else: confidence_cap = 0.90 - # Compute posterior using log-odds updating + # Compute posterior using log-odds updating. + # total_adj (BTC/F&G/dominance) is amplified ×2 because those are weak proxies. + # news_log_adj is applied at face value — it IS a direct log-odds signal. log_odds_prior = math.log(prior / (1 - prior)) total_adj = sum(adjustments) - estimated_prob = _sigmoid(log_odds_prior + total_adj * 2) + estimated_prob = _sigmoid(log_odds_prior + total_adj * 2 + news_log_adj) estimated_prob = max(0.05, min(0.95, estimated_prob)) # Compute edge @@ -185,6 +205,9 @@ class BayesianStrategy: # Confidence based on signal agreement agreement = sum(1 for a in adjustments if (a > 0) == (total_adj > 0)) confidence = min(confidence_cap, 0.4 + (agreement / max(len(adjustments), 1)) * 0.5) + # News signal available → boost confidence by 0.10 (news corroborates macro signals) + if news_log_adj != 0.0: + confidence = min(confidence_cap, confidence + 0.10) # Log evaluation result for every market action = "TRADE" if (abs_edge >= MIN_EDGE and confidence >= MIN_CONFIDENCE) else "SKIP"