From 4bef9d2d1776c8f0b4a56c64e86e5dacc111995c Mon Sep 17 00:00:00 2001 From: ChemaVX Date: Mon, 4 May 2026 13:24:25 +0000 Subject: [PATCH] feat: queries DDG generadas por Claude en lugar de plantillas hardcodeadas --- src/scraper/exhaustive.py | 43 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/src/scraper/exhaustive.py b/src/scraper/exhaustive.py index 4b676d1..74ff47d 100644 --- a/src/scraper/exhaustive.py +++ b/src/scraper/exhaustive.py @@ -164,9 +164,8 @@ class ExhaustiveScraper: ] await asyncio.gather(*tasks, return_exceptions=True) - async def _seed_duckduckgo(self): - """Multiple DDG queries — fresh DDGS() per query to avoid cascading ratelimits""" - queries = [ + async def _generate_ddg_queries(self) -> list[str]: + fallback = [ self.topic, f"{self.topic} history facts", f"{self.topic} evidence analysis", @@ -176,6 +175,44 @@ class ExhaustiveScraper: f"{self.topic} documentary", f"{self.topic} research study", ] + + if not settings.anthropic_api_key: + return fallback + + try: + import anthropic + client = anthropic.AsyncAnthropic(api_key=settings.anthropic_api_key) + prompt = ( + f'Generate exactly 8 DuckDuckGo search queries to research: "{self.topic}"\n\n' + f'Rules:\n' + f'- Each query must be specific and distinct — no generic templates\n' + f'- Cover different angles: facts, history, official sources, criticism, ' + f'technical details, recent developments, expert opinions, primary sources\n' + f'- Use the most specific terminology for this topic\n' + f'- Include the topic language naturally (if topic is in Spanish, ' + f'mix Spanish and English queries for broader coverage)\n' + f'- Output ONLY the 8 queries, one per line, no numbering, ' + f'no explanations, no markdown\n' + ) + msg = await client.messages.create( + model=settings.claude_model, + max_tokens=300, + messages=[{"role": "user", "content": prompt}] + ) + raw = msg.content[0].text.strip() + queries = [q.strip() for q in raw.split('\n') if q.strip()] + if self.topic not in queries: + queries = [self.topic] + queries[:7] + queries = queries[:8] + logger.info("DDG queries generated by Claude", queries=queries) + return queries + except Exception as e: + logger.warning("Claude query generation failed, using fallback", error=str(e)) + return fallback + + async def _seed_duckduckgo(self): + """Multiple DDG queries — fresh DDGS() per query to avoid cascading ratelimits""" + queries = await self._generate_ddg_queries() for query in queries: if self._stop: break