feat: add GNews sentiment signal for politics/tech/events markets
CI/CD / build-and-push (push) Successful in 1m28s
CI/CD / build-and-push (push) Successful in 1m28s
bot/data/news.py (new): - NewsClient with in-memory cache (TTL=4h) to stay within 100 req/day limit - _build_query(): strips dates, punctuation and stopwords from market question - _score_headlines(): keyword-based pos/neg vote per article, averaged ∈ [-1, +1] - Degrades to 0.0 on missing key, 403 quota, or network error bot/strategy/bayesian.py: - BayesianStrategy(news=NewsClient) — optional, backwards compatible - Signal 4: GNews sentiment applied as direct log-odds shift (weight=1.5) so a ±1.0 sentiment score moves a 50% prior to 82%/18% - +0.10 confidence boost when news signal is present - NEWS_LOGODDS_WEIGHT constant documented at module level bot/main.py: - Instantiate NewsClient, pass to BayesianStrategy, close in finally block Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,193 @@
|
|||||||
|
"""
|
||||||
|
News sentiment client for GNews API.
|
||||||
|
|
||||||
|
Free tier: 100 requests/day — we stay well within this by caching each
|
||||||
|
unique query for CACHE_TTL seconds (4 hours). With ~9 political markets
|
||||||
|
refreshed every 4 h that is 9 × 6 = 54 requests/day.
|
||||||
|
|
||||||
|
Score returned: -1.0 (very negative headlines) → +1.0 (very positive).
|
||||||
|
Returns 0.0 on any error or missing API key so the caller degrades gracefully.
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
from datetime import datetime, timezone, timedelta
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
GNEWS_API = "https://gnews.io/api/v4/search"
|
||||||
|
CACHE_TTL = 4 * 3600 # seconds — fits 100 req/day free tier
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Keyword lists for headline sentiment
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
_POSITIVE = {
|
||||||
|
"win", "wins", "won", "victory", "success", "successful",
|
||||||
|
"agree", "agreed", "agreement", "approve", "approved", "approval",
|
||||||
|
"confirm", "confirmed", "sign", "signed", "deal", "advance",
|
||||||
|
"progress", "support", "peace", "likely", "probable", "imminent",
|
||||||
|
"historic", "breakthrough", "resolve", "resolved", "resume", "resumed",
|
||||||
|
}
|
||||||
|
_NEGATIVE = {
|
||||||
|
"fail", "fails", "failed", "failure", "reject", "rejected", "rejection",
|
||||||
|
"block", "blocked", "refuse", "refused", "deny", "denied",
|
||||||
|
"lose", "lost", "collapse", "collapsed", "crisis", "war", "attack",
|
||||||
|
"veto", "oppose", "opposed", "unlikely", "impossible", "never",
|
||||||
|
"stall", "stalled", "withdraw", "withdrew", "sanction", "sanctions",
|
||||||
|
"threat", "threatens", "dead", "halt", "halted", "cancel", "cancelled",
|
||||||
|
"breakdown", "escalate", "escalation",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Words stripped when building the search query (too generic to help relevance)
|
||||||
|
_QUERY_STOPWORDS = {
|
||||||
|
"will", "the", "a", "an", "by", "in", "on", "at", "to", "of",
|
||||||
|
"and", "or", "is", "be", "are", "was", "were", "have", "has",
|
||||||
|
"had", "do", "does", "did", "for", "from", "with", "not", "no",
|
||||||
|
"this", "that", "it", "its", "their", "they", "he", "she", "we",
|
||||||
|
"most", "more", "least", "less", "any", "all", "both", "each",
|
||||||
|
"win", "lose", "get", "make", "take",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Regex patterns for dates / noise
|
||||||
|
_DATE_RE = re.compile(
|
||||||
|
r"\b(january|february|march|april|may|june|july|august|"
|
||||||
|
r"september|october|november|december)\s+\d{1,2}\b"
|
||||||
|
r"|\b20\d{2}\b"
|
||||||
|
r"|\bQ[1-4]\b",
|
||||||
|
flags=re.IGNORECASE,
|
||||||
|
)
|
||||||
|
_PUNCT_RE = re.compile(r"[?!\"'.,;:()\[\]{}]")
|
||||||
|
|
||||||
|
|
||||||
|
class NewsClient:
|
||||||
|
"""
|
||||||
|
Async GNews client with in-memory result cache.
|
||||||
|
|
||||||
|
Usage::
|
||||||
|
|
||||||
|
client = NewsClient()
|
||||||
|
score = await client.get_sentiment("Will Trump visit China")
|
||||||
|
# score ∈ [-1.0, +1.0] — positive means bullish for the YES outcome
|
||||||
|
await client.close()
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._api_key = os.getenv("GNEWS_API_KEY", "")
|
||||||
|
self._client = httpx.AsyncClient(timeout=10)
|
||||||
|
# {cache_key: (fetched_at_monotonic, score)}
|
||||||
|
self._cache: dict[str, tuple[float, float]] = {}
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Public API
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
async def get_sentiment(self, question: str, days: int = 7) -> float:
|
||||||
|
"""
|
||||||
|
Return a sentiment score ∈ [-1.0, +1.0] for the market question.
|
||||||
|
|
||||||
|
- Positive: most recent headlines suggest the YES outcome is more likely
|
||||||
|
- Negative: headlines suggest the YES outcome is less likely
|
||||||
|
- 0.0: neutral, no data, or API unavailable
|
||||||
|
"""
|
||||||
|
if not self._api_key:
|
||||||
|
log.debug("GNEWS_API_KEY not set — skipping news signal")
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
query = self._build_query(question)
|
||||||
|
if len(query) < 3:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
cache_key = query.lower()
|
||||||
|
now = time.monotonic()
|
||||||
|
cached = self._cache.get(cache_key)
|
||||||
|
if cached is not None:
|
||||||
|
fetched_at, score = cached
|
||||||
|
if now - fetched_at < CACHE_TTL:
|
||||||
|
log.debug("News cache hit %r → %.3f", query, score)
|
||||||
|
return score
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp = await self._client.get(
|
||||||
|
GNEWS_API,
|
||||||
|
params={
|
||||||
|
"q": query,
|
||||||
|
"lang": "en",
|
||||||
|
"max": 10,
|
||||||
|
"from": _iso_days_ago(days),
|
||||||
|
"token": self._api_key,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
log.warning("GNews network error for %r: %s", query, exc)
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
if resp.status_code == 403:
|
||||||
|
log.warning("GNews: 403 — invalid key or daily quota exhausted")
|
||||||
|
# Cache a neutral result for 1 h to avoid hammering the endpoint
|
||||||
|
self._cache[cache_key] = (now, 0.0)
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
try:
|
||||||
|
resp.raise_for_status()
|
||||||
|
data = resp.json()
|
||||||
|
except Exception as exc:
|
||||||
|
log.warning("GNews bad response for %r: %s", query, exc)
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
articles = data.get("articles", [])
|
||||||
|
score = self._score_headlines(articles)
|
||||||
|
self._cache[cache_key] = (now, score)
|
||||||
|
log.info(
|
||||||
|
"GNews %r → %d articles, sentiment=%.3f",
|
||||||
|
query, len(articles), score,
|
||||||
|
)
|
||||||
|
return score
|
||||||
|
|
||||||
|
async def close(self) -> None:
|
||||||
|
await self._client.aclose()
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Internal helpers
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _build_query(question: str) -> str:
|
||||||
|
"""Extract meaningful search terms from a market question."""
|
||||||
|
q = _DATE_RE.sub(" ", question)
|
||||||
|
q = _PUNCT_RE.sub(" ", q)
|
||||||
|
tokens = [
|
||||||
|
w for w in q.split()
|
||||||
|
if w.lower() not in _QUERY_STOPWORDS and len(w) > 2
|
||||||
|
]
|
||||||
|
return " ".join(tokens[:8]) # GNews handles ~8 keyword queries well
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _score_headlines(articles: list[dict]) -> float:
|
||||||
|
"""
|
||||||
|
Score each article title + description independently, then average.
|
||||||
|
Each article vote: (pos_hits - neg_hits) / (pos_hits + neg_hits) ∈ [-1, 1].
|
||||||
|
Articles with no sentiment keywords contribute 0 (not excluded).
|
||||||
|
"""
|
||||||
|
if not articles:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
votes: list[float] = []
|
||||||
|
for art in articles:
|
||||||
|
text = (
|
||||||
|
f"{art.get('title', '')} {art.get('description', '')}"
|
||||||
|
).lower()
|
||||||
|
words = set(re.findall(r"\b\w+\b", text))
|
||||||
|
pos = len(words & _POSITIVE)
|
||||||
|
neg = len(words & _NEGATIVE)
|
||||||
|
total = pos + neg
|
||||||
|
votes.append((pos - neg) / total if total > 0 else 0.0)
|
||||||
|
|
||||||
|
return max(-1.0, min(1.0, sum(votes) / len(votes)))
|
||||||
|
|
||||||
|
|
||||||
|
def _iso_days_ago(days: int) -> str:
|
||||||
|
dt = datetime.now(timezone.utc) - timedelta(days=days)
|
||||||
|
return dt.strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||||
+4
-1
@@ -9,6 +9,7 @@ from contextlib import asynccontextmanager
|
|||||||
|
|
||||||
from bot.data.polymarket import PolymarketClient
|
from bot.data.polymarket import PolymarketClient
|
||||||
from bot.data.external import ExternalDataClient
|
from bot.data.external import ExternalDataClient
|
||||||
|
from bot.data.news import NewsClient
|
||||||
from bot.strategy.bayesian import BayesianStrategy
|
from bot.strategy.bayesian import BayesianStrategy
|
||||||
from bot.risk.manager import RiskManager
|
from bot.risk.manager import RiskManager
|
||||||
from bot.executor.paper import PaperExecutor
|
from bot.executor.paper import PaperExecutor
|
||||||
@@ -98,7 +99,8 @@ async def main() -> None:
|
|||||||
|
|
||||||
poly = PolymarketClient()
|
poly = PolymarketClient()
|
||||||
external = ExternalDataClient()
|
external = ExternalDataClient()
|
||||||
strategy = BayesianStrategy()
|
news = NewsClient()
|
||||||
|
strategy = BayesianStrategy(news=news)
|
||||||
risk = RiskManager(max_position_pct=0.05, max_exposure_pct=0.30)
|
risk = RiskManager(max_position_pct=0.05, max_exposure_pct=0.30)
|
||||||
executor = PaperExecutor(db=db, bankroll=PAPER_BANKROLL) if PAPER_MODE else None
|
executor = PaperExecutor(db=db, bankroll=PAPER_BANKROLL) if PAPER_MODE else None
|
||||||
metrics = MetricsTracker(db=db)
|
metrics = MetricsTracker(db=db)
|
||||||
@@ -115,6 +117,7 @@ async def main() -> None:
|
|||||||
await run_trading_loop(poly, external, strategy, risk, executor, metrics)
|
await run_trading_loop(poly, external, strategy, risk, executor, metrics)
|
||||||
finally:
|
finally:
|
||||||
await db.disconnect()
|
await db.disconnect()
|
||||||
|
await news.close()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -17,6 +17,7 @@ from typing import Optional
|
|||||||
|
|
||||||
from bot.data.polymarket import Market
|
from bot.data.polymarket import Market
|
||||||
from bot.data.external import ExternalSignals
|
from bot.data.external import ExternalSignals
|
||||||
|
from bot.data.news import NewsClient
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -26,6 +27,11 @@ log = logging.getLogger(__name__)
|
|||||||
MIN_EDGE = 0.10 # 10% edge minimum
|
MIN_EDGE = 0.10 # 10% edge minimum
|
||||||
MIN_CONFIDENCE = 0.55 # Minimum confidence in our estimate
|
MIN_CONFIDENCE = 0.55 # Minimum confidence in our estimate
|
||||||
|
|
||||||
|
# Log-odds weight applied to the GNews sentiment score (range ±1.0).
|
||||||
|
# A weight of 1.5 means a fully negative/positive signal shifts log-odds by ±1.5,
|
||||||
|
# which moves a 50% prior to ~18%/82% — strong but not overwhelming.
|
||||||
|
NEWS_LOGODDS_WEIGHT = 1.5
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class TradingSignal:
|
class TradingSignal:
|
||||||
@@ -53,8 +59,9 @@ class BayesianStrategy:
|
|||||||
to justify the fee + slippage cost (MIN_EDGE).
|
to justify the fee + slippage cost (MIN_EDGE).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self) -> None:
|
def __init__(self, news: Optional[NewsClient] = None) -> None:
|
||||||
self._signal_count = 0
|
self._signal_count = 0
|
||||||
|
self._news = news # Optional; degrades gracefully when None or key missing
|
||||||
|
|
||||||
async def evaluate(
|
async def evaluate(
|
||||||
self,
|
self,
|
||||||
@@ -165,16 +172,29 @@ class BayesianStrategy:
|
|||||||
adjustments.append(dom_adj)
|
adjustments.append(dom_adj)
|
||||||
sources.append(f"BTC dom: {ext.btc_dominance:.1f}% (low → alt season)")
|
sources.append(f"BTC dom: {ext.btc_dominance:.1f}% (low → alt season)")
|
||||||
|
|
||||||
|
# Signal 4: GNews sentiment (politics / tech / events only)
|
||||||
|
# Applied as a direct log-odds shift — stronger signal than macro proxies.
|
||||||
|
# Weight NEWS_LOGODDS_WEIGHT=1.5 means a ±1.0 sentiment score shifts
|
||||||
|
# log-odds by ±1.5 (e.g. 50% prior → ~82% / ~18%).
|
||||||
|
news_log_adj = 0.0
|
||||||
|
if (is_politics or is_tech or is_events) and self._news is not None:
|
||||||
|
sentiment = await self._news.get_sentiment(market.question)
|
||||||
|
if abs(sentiment) > 0.05:
|
||||||
|
news_log_adj = sentiment * NEWS_LOGODDS_WEIGHT
|
||||||
|
sources.append(f"GNews: {sentiment:+.2f}")
|
||||||
|
|
||||||
# Macro/politics/tech/events: cap confidence lower to reflect weaker signal quality
|
# Macro/politics/tech/events: cap confidence lower to reflect weaker signal quality
|
||||||
if is_macro or is_politics or is_tech or is_events:
|
if is_macro or is_politics or is_tech or is_events:
|
||||||
confidence_cap = 0.65
|
confidence_cap = 0.65
|
||||||
else:
|
else:
|
||||||
confidence_cap = 0.90
|
confidence_cap = 0.90
|
||||||
|
|
||||||
# Compute posterior using log-odds updating
|
# Compute posterior using log-odds updating.
|
||||||
|
# total_adj (BTC/F&G/dominance) is amplified ×2 because those are weak proxies.
|
||||||
|
# news_log_adj is applied at face value — it IS a direct log-odds signal.
|
||||||
log_odds_prior = math.log(prior / (1 - prior))
|
log_odds_prior = math.log(prior / (1 - prior))
|
||||||
total_adj = sum(adjustments)
|
total_adj = sum(adjustments)
|
||||||
estimated_prob = _sigmoid(log_odds_prior + total_adj * 2)
|
estimated_prob = _sigmoid(log_odds_prior + total_adj * 2 + news_log_adj)
|
||||||
estimated_prob = max(0.05, min(0.95, estimated_prob))
|
estimated_prob = max(0.05, min(0.95, estimated_prob))
|
||||||
|
|
||||||
# Compute edge
|
# Compute edge
|
||||||
@@ -185,6 +205,9 @@ class BayesianStrategy:
|
|||||||
# Confidence based on signal agreement
|
# Confidence based on signal agreement
|
||||||
agreement = sum(1 for a in adjustments if (a > 0) == (total_adj > 0))
|
agreement = sum(1 for a in adjustments if (a > 0) == (total_adj > 0))
|
||||||
confidence = min(confidence_cap, 0.4 + (agreement / max(len(adjustments), 1)) * 0.5)
|
confidence = min(confidence_cap, 0.4 + (agreement / max(len(adjustments), 1)) * 0.5)
|
||||||
|
# News signal available → boost confidence by 0.10 (news corroborates macro signals)
|
||||||
|
if news_log_adj != 0.0:
|
||||||
|
confidence = min(confidence_cap, confidence + 0.10)
|
||||||
|
|
||||||
# Log evaluation result for every market
|
# Log evaluation result for every market
|
||||||
action = "TRADE" if (abs_edge >= MIN_EDGE and confidence >= MIN_CONFIDENCE) else "SKIP"
|
action = "TRADE" if (abs_edge >= MIN_EDGE and confidence >= MIN_CONFIDENCE) else "SKIP"
|
||||||
|
|||||||
Reference in New Issue
Block a user