feat: queries DDG generadas por Claude en lugar de plantillas hardcodeadas
Build & Deploy ResearchOwl / build-and-push (push) Successful in 6s
Build & Deploy ResearchOwl / build-and-push (push) Successful in 6s
This commit is contained in:
@@ -164,9 +164,8 @@ class ExhaustiveScraper:
|
|||||||
]
|
]
|
||||||
await asyncio.gather(*tasks, return_exceptions=True)
|
await asyncio.gather(*tasks, return_exceptions=True)
|
||||||
|
|
||||||
async def _seed_duckduckgo(self):
|
async def _generate_ddg_queries(self) -> list[str]:
|
||||||
"""Multiple DDG queries — fresh DDGS() per query to avoid cascading ratelimits"""
|
fallback = [
|
||||||
queries = [
|
|
||||||
self.topic,
|
self.topic,
|
||||||
f"{self.topic} history facts",
|
f"{self.topic} history facts",
|
||||||
f"{self.topic} evidence analysis",
|
f"{self.topic} evidence analysis",
|
||||||
@@ -176,6 +175,44 @@ class ExhaustiveScraper:
|
|||||||
f"{self.topic} documentary",
|
f"{self.topic} documentary",
|
||||||
f"{self.topic} research study",
|
f"{self.topic} research study",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
if not settings.anthropic_api_key:
|
||||||
|
return fallback
|
||||||
|
|
||||||
|
try:
|
||||||
|
import anthropic
|
||||||
|
client = anthropic.AsyncAnthropic(api_key=settings.anthropic_api_key)
|
||||||
|
prompt = (
|
||||||
|
f'Generate exactly 8 DuckDuckGo search queries to research: "{self.topic}"\n\n'
|
||||||
|
f'Rules:\n'
|
||||||
|
f'- Each query must be specific and distinct — no generic templates\n'
|
||||||
|
f'- Cover different angles: facts, history, official sources, criticism, '
|
||||||
|
f'technical details, recent developments, expert opinions, primary sources\n'
|
||||||
|
f'- Use the most specific terminology for this topic\n'
|
||||||
|
f'- Include the topic language naturally (if topic is in Spanish, '
|
||||||
|
f'mix Spanish and English queries for broader coverage)\n'
|
||||||
|
f'- Output ONLY the 8 queries, one per line, no numbering, '
|
||||||
|
f'no explanations, no markdown\n'
|
||||||
|
)
|
||||||
|
msg = await client.messages.create(
|
||||||
|
model=settings.claude_model,
|
||||||
|
max_tokens=300,
|
||||||
|
messages=[{"role": "user", "content": prompt}]
|
||||||
|
)
|
||||||
|
raw = msg.content[0].text.strip()
|
||||||
|
queries = [q.strip() for q in raw.split('\n') if q.strip()]
|
||||||
|
if self.topic not in queries:
|
||||||
|
queries = [self.topic] + queries[:7]
|
||||||
|
queries = queries[:8]
|
||||||
|
logger.info("DDG queries generated by Claude", queries=queries)
|
||||||
|
return queries
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Claude query generation failed, using fallback", error=str(e))
|
||||||
|
return fallback
|
||||||
|
|
||||||
|
async def _seed_duckduckgo(self):
|
||||||
|
"""Multiple DDG queries — fresh DDGS() per query to avoid cascading ratelimits"""
|
||||||
|
queries = await self._generate_ddg_queries()
|
||||||
for query in queries:
|
for query in queries:
|
||||||
if self._stop:
|
if self._stop:
|
||||||
break
|
break
|
||||||
|
|||||||
Reference in New Issue
Block a user