From 65b173994308cff9a94f60eca735c2ed06d6f8e0 Mon Sep 17 00:00:00 2001
From: ChemaVX <jmivanez@gmail.com>
Date: Wed, 29 Apr 2026 09:06:06 +0000
Subject: [PATCH] feat: Claude Haiku for content generation, Ollama fallback

Use Claude Haiku (via ANTHROPIC_API_KEY) for all output generation.
Falls back to Ollama qwen2.5:3b if no API key is set.
Also translates all user-turn prompts to Spanish for consistency.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/generator/generator.py | 151 +++++++++++++++++++++----------------
 1 file changed, 88 insertions(+), 63 deletions(-)

diff --git a/src/generator/generator.py b/src/generator/generator.py
index cd0d872..4d8d7ea 100644
--- a/src/generator/generator.py
+++ b/src/generator/generator.py
@@ -1,9 +1,10 @@
 """
 ResearchOwl Generators
-Produces structured outputs from processed research using Ollama
+Produces structured outputs from processed research using Claude or Ollama
 """
 import structlog
 
+from src.config import settings
 from src.processor.processor import OllamaClient, ContentProcessor
 from src.db.database import ResearchDB, OutputType
 
@@ -42,95 +43,95 @@ THREAD_SYSTEM = (
 
 PROMPTS = {
     OutputType.PODCAST: """\
-Write a podcast script about: "{topic}"
+Escribe un guion de podcast sobre: "{topic}"
 
-RULES — follow strictly:
-- Write as SPOKEN WORD: contractions, natural rhythm, as if talking to a friend
-- DO NOT use formal headings like "SEGMENT 1:" — just flow naturally
-- Each paragraph must introduce a NEW fact or angle — never restate something already said
-- If you find yourself repeating, stop and jump to the next new point
-- Aim for 800-1200 words of actual spoken content
+REGLAS — sigue estrictamente:
+- Escribe como PALABRA HABLADA: contracciones, ritmo natural, como si hablaras con un amigo
+- NO uses encabezados formales como "SEGMENTO 1:" — fluye de forma natural
+- Cada párrafo debe introducir un NUEVO hecho o ángulo — nunca repitas algo ya dicho
+- Si te encuentras repitiendo, para y salta al siguiente punto nuevo
+- Objetivo: 800-1200 palabras de contenido hablado real
 
-STRUCTURE (use natural transitions, not headers):
-1. Hook: open with the most surprising or dramatic fact
-2. Background: how did we get here?
-3. The key evidence or events (pick the 3 most interesting)
-4. Controversy or debate around the topic
-5. What does this mean / what happened next
+ESTRUCTURA (usa transiciones naturales, no encabezados):
+1. Gancho: abre con el hecho más sorprendente o dramático
+2. Contexto: ¿cómo llegamos aquí?
+3. Las evidencias o eventos clave (elige los 3 más interesantes)
+4. La controversia o debate sobre el tema
+5. ¿Qué significa esto / qué pasó después?
 
-RESEARCH MATERIAL:
+MATERIAL DE INVESTIGACIÓN:
 {context}
 
-Write the script now (spoken word only, no stage directions except occasional [PAUSE]):""",
+Escribe el guion ahora (solo palabra hablada, sin acotaciones excepto [PAUSA] ocasional):""",
 
     OutputType.BLOG: """\
-Write a blog post about: "{topic}"
+Escribe un artículo de blog sobre: "{topic}"
 
-RULES — follow strictly:
-- Each section under a heading must add NEW information not covered elsewhere
-- Do NOT summarize previous sections at the start of each new section
-- Do NOT repeat facts — if a fact appears once, do not mention it again
-- Use concrete details, numbers, names — avoid vague generalities
-- Target 1000-1500 words
+REGLAS — sigue estrictamente:
+- Cada sección bajo un encabezado debe añadir información NUEVA no cubierta en otro lugar
+- NO resumas secciones anteriores al inicio de cada nueva sección
+- NO repitas hechos — si un hecho aparece una vez, no lo menciones de nuevo
+- Usa detalles concretos, números, nombres — evita generalidades vagas
+- Objetivo: 1000-1500 palabras
 
-STRUCTURE:
-# [Compelling headline]
+ESTRUCTURA:
+# [Titular impactante]
 
-[Hook paragraph — the most surprising fact]
+[Párrafo gancho — el hecho más sorprendente]
 
-## Background
-[Context — what, when, who — only facts not covered elsewhere]
+## Contexto
+[Contexto — qué, cuándo, quién — solo hechos no cubiertos en otro lugar]
 
-## Key Facts
-[The most significant findings — each bullet must be distinct]
+## Hechos Clave
+[Los hallazgos más significativos — cada punto debe ser distinto]
 
-## Analysis / Significance
-[What this means — no repetition of Key Facts section]
+## Análisis / Importancia
+[Qué significa esto — sin repetir la sección de Hechos Clave]
 
-## Conclusion
-[Takeaway — no more than 2 sentences summarizing, then a forward-looking statement]
+## Conclusión
+[Conclusión — no más de 2 oraciones resumiendo, luego una declaración prospectiva]
 
-RESEARCH MATERIAL:
+MATERIAL DE INVESTIGACIÓN:
 {context}
 
-Write the complete blog post in markdown:""",
+Escribe el artículo completo en markdown:""",
 
     OutputType.REPORT: """\
-Write a research report about: "{topic}"
+Escribe un informe de investigación sobre: "{topic}"
 
-RULES — follow strictly:
-- Each numbered finding must be DISTINCT — no overlapping content
-- The Executive Summary must NOT repeat findings verbatim — only the 2-3 most critical points
-- Source quality and contradictions must reference specific claims, not generic statements
-- Be precise and concise — no filler
+REGLAS — sigue estrictamente:
+- Cada hallazgo numerado debe ser DISTINTO — sin contenido que se superponga
+- El Resumen Ejecutivo NO debe repetir los hallazgos literalmente — solo los 2-3 puntos más críticos
+- La calidad de las fuentes y contradicciones deben referenciar afirmaciones específicas, no declaraciones genéricas
+- Sé preciso y conciso — sin relleno
 
-STRUCTURE:
-1. Executive Summary (3-4 sentences, key takeaways only)
-2. Key Findings (5-10 numbered, each completely distinct)
-3. Evidence Analysis (what the sources show, with any contradictions)
-4. Timeline (if applicable — specific dates/events)
-5. Conclusions & Open Questions
+ESTRUCTURA:
+1. Resumen Ejecutivo (3-4 oraciones, solo puntos clave)
+2. Hallazgos Clave (5-10 numerados, cada uno completamente distinto)
+3. Análisis de Evidencia (lo que muestran las fuentes, con cualquier contradicción)
+4. Cronología (si aplica — fechas/eventos específicos)
+5. Conclusiones y Preguntas Abiertas
 
-RESEARCH MATERIAL:
+MATERIAL DE INVESTIGACIÓN:
 {context}
 
-Write the complete report in markdown:""",
+Escribe el informe completo en markdown:""",
 
     OutputType.THREAD: """\
-Write a Twitter/X thread about: "{topic}"
+Escribe un hilo de Twitter/X sobre: "{topic}"
 
-RULES — follow strictly:
-- Each tweet must reveal ONE new fact or idea — never restate a previous tweet
-- Max 280 characters per tweet (count carefully)
-- Number format: 1/ 2/ 3/ ... N/
-- Hook tweet must be the most surprising/provocative fact
-- Build toward a conclusion — do not repeat the hook at the end
-- 12-18 tweets total
+REGLAS — sigue estrictamente:
+- Cada tweet debe revelar UN nuevo hecho o idea — nunca repetir un tweet anterior
+- Máximo 280 caracteres por tweet (cuenta cuidadosamente)
+- Formato de numeración: 1/ 2/ 3/ ... N/
+- El tweet gancho debe ser el hecho más sorprendente/provocador
+- Avanza hacia una conclusión — no repitas el gancho al final
+- 12-18 tweets en total
 
-RESEARCH MATERIAL:
+MATERIAL DE INVESTIGACIÓN:
 {context}
 
-Write the thread (one tweet per line, nothing else):"""
+Escribe el hilo (un tweet por línea, nada más):"""
 }
 
 
@@ -170,15 +171,15 @@ class OutputGenerator:
         if len(context_words) > 6000:
             context = " ".join(context_words[:6000]) + "\n\n[... additional material truncated ...]"
 
+        backend = "Claude Haiku" if settings.anthropic_api_key else "Ollama"
         if progress_callback:
-            await progress_callback(f"✍️ Generating {output_type} with Ollama... (this takes 2-5 min)")
+            await progress_callback(f"✍️ Generando {output_type} con {backend}... (2-5 min)")
 
         # Build prompt
         system = self._get_system(output_type)
         prompt = PROMPTS[output_type].format(topic=topic, context=context)
 
-        # Generate — temperature=0.7 reduces repetition in small models
-        output = await self.ollama.generate(prompt, system=system, timeout=300, temperature=0.7)
+        output = await self._generate(prompt, system, output_type)
 
         # Add metadata header
         stats = await self.db.get_session_stats(session_id)
@@ -191,6 +192,30 @@ class OutputGenerator:
         logger.info("Output generated", type=output_type, length=len(full_output))
         return full_output
 
+    async def _generate(self, prompt: str, system: str, output_type: OutputType) -> str:
+        if settings.anthropic_api_key:
+            return await self._generate_with_claude(prompt, system, output_type)
+        return await self._generate_with_ollama(prompt, system)
+
+    async def _generate_with_claude(self, prompt: str, system: str, output_type: OutputType) -> str:
+        import anthropic
+        max_tokens = 4096 if output_type == OutputType.THREAD else 8192
+        try:
+            client = anthropic.AsyncAnthropic(api_key=settings.anthropic_api_key)
+            msg = await client.messages.create(
+                model=settings.claude_model,
+                max_tokens=max_tokens,
+                system=system,
+                messages=[{"role": "user", "content": prompt}],
+            )
+            return msg.content[0].text.strip()
+        except Exception as e:
+            logger.warning("Claude generation failed, falling back to Ollama", error=str(e))
+            return await self._generate_with_ollama(prompt, system)
+
+    async def _generate_with_ollama(self, prompt: str, system: str) -> str:
+        return await self.ollama.generate(prompt, system=system, timeout=300, temperature=0.7)
+
     def _get_rag_query(self, output_type: OutputType, topic: str) -> str:
         queries = {
             OutputType.PODCAST: f"{topic} story narrative facts interesting",