From d642dbd9cfac7dfe26d0a33437687ee0cbfa3ebb Mon Sep 17 00:00:00 2001 From: chemavx Date: Tue, 14 Apr 2026 08:36:46 +0000 Subject: [PATCH] fix(news): remove paid-tier 'from' param, add User-Agent, log status+body on error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Drop the 'from' date filter — it's a paid GNews feature, causes 403 on free tier - Add User-Agent header to httpx client; urllib default passes, httpx default blocked - Log actual HTTP status code for every request (INFO) and response body on non-200 - Cache neutral result on 400/401/403/429 to avoid hammering the quota - Remove unused _iso_days_ago() helper and 'days' param from get_sentiment() Co-Authored-By: Claude Sonnet 4.6 --- bot/data/news.py | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/bot/data/news.py b/bot/data/news.py index 32e5c69..bafa39f 100644 --- a/bot/data/news.py +++ b/bot/data/news.py @@ -12,7 +12,6 @@ import logging import os import re import time -from datetime import datetime, timezone, timedelta import httpx @@ -76,7 +75,10 @@ class NewsClient: def __init__(self) -> None: self._api_key = os.getenv("GNEWS_API_KEY", "") - self._client = httpx.AsyncClient(timeout=10) + self._client = httpx.AsyncClient( + timeout=10, + headers={"User-Agent": "Mozilla/5.0 (compatible; polymarket-bot/1.0)"}, + ) # {cache_key: (fetched_at_monotonic, score)} self._cache: dict[str, tuple[float, float]] = {} @@ -84,7 +86,7 @@ class NewsClient: # Public API # ------------------------------------------------------------------ - async def get_sentiment(self, question: str, days: int = 7) -> float: + async def get_sentiment(self, question: str) -> float: """ Return a sentiment score ∈ [-1.0, +1.0] for the market question. @@ -109,6 +111,9 @@ class NewsClient: log.debug("News cache hit %r → %.3f", query, score) return score + # Build URL exactly as documented for free tier: + # https://gnews.io/api/v4/search?q=...&lang=en&max=10&token=... + # NOTE: "from"/"to" date filters are paid-tier only — omit them. try: resp = await self._client.get( GNEWS_API, @@ -116,7 +121,6 @@ class NewsClient: "q": query, "lang": "en", "max": 10, - "from": _iso_days_ago(days), "token": self._api_key, }, ) @@ -124,17 +128,23 @@ class NewsClient: log.warning("GNews network error for %r: %s", query, exc) return 0.0 - if resp.status_code == 403: - log.warning("GNews: 403 — invalid key or daily quota exhausted") - # Cache a neutral result for 1 h to avoid hammering the endpoint - self._cache[cache_key] = (now, 0.0) + log.info("GNews HTTP %d for query %r", resp.status_code, query) + + if resp.status_code != 200: + try: + body = resp.json() + except Exception: + body = resp.text[:200] + log.warning("GNews error body: %s", body) + # Cache neutral for 1 h on client errors to avoid hammering the endpoint + if resp.status_code in (400, 401, 403, 429): + self._cache[cache_key] = (now, 0.0) return 0.0 try: - resp.raise_for_status() data = resp.json() except Exception as exc: - log.warning("GNews bad response for %r: %s", query, exc) + log.warning("GNews JSON decode error for %r: %s", query, exc) return 0.0 articles = data.get("articles", []) @@ -186,8 +196,3 @@ class NewsClient: votes.append((pos - neg) / total if total > 0 else 0.0) return max(-1.0, min(1.0, sum(votes) / len(votes))) - - -def _iso_days_ago(days: int) -> str: - dt = datetime.now(timezone.utc) - timedelta(days=days) - return dt.strftime("%Y-%m-%dT%H:%M:%SZ")