diff --git a/src/bot/bot.py b/src/bot/bot.py index 9576169..ca41a5d 100644 --- a/src/bot/bot.py +++ b/src/bot/bot.py @@ -277,6 +277,7 @@ async def cmd_generate(update: Update, ctx: ContextTypes.DEFAULT_TYPE): chat_id = update.effective_chat.id output_arg = ctx.args[0].lower() if ctx.args else "" + lang = "en" if len(ctx.args) > 1 and ctx.args[1].lower() == "en" else "es" type_map = { "podcast": OutputType.PODCAST, @@ -326,9 +327,11 @@ async def cmd_generate(update: Update, ctx: ContextTypes.DEFAULT_TYPE): session = dict(row) session_id = session["id"] + backend = "Claude Haiku" if settings.anthropic_api_key else f"Ollama ({settings.ollama_model})" + lang_label = " (EN)" if lang == "en" else "" msg = await update.message.reply_text( - f"⚙️ Generating *{output_type}* for: `{session['topic']}`\n" - f"Using Ollama ({settings.ollama_model})...\n" + f"⚙️ Generating *{output_type}{lang_label}* for: `{session['topic']}`\n" + f"Using {backend}...\n" f"This may take 2-5 minutes ☕", parse_mode=ParseMode.MARKDOWN ) @@ -343,7 +346,7 @@ async def cmd_generate(update: Update, ctx: ContextTypes.DEFAULT_TYPE): processor = ContentProcessor(db, ollama) generator = OutputGenerator(db, ollama, processor) - output = await generator.generate(session_id, output_type, gen_progress) + output = await generator.generate(session_id, output_type, gen_progress, lang=lang) # Send as file if very long if len(output) > 8000: diff --git a/src/config.py b/src/config.py index c4af2e0..c0c4e25 100644 --- a/src/config.py +++ b/src/config.py @@ -36,6 +36,8 @@ class Settings(BaseSettings): # Ghost CMS ghost_url: Optional[str] = Field(None, env="GHOST_URL") ghost_api_key: Optional[str] = Field(None, env="GHOST_API_KEY") + ghost_url_en: str = Field("", env="GHOST_URL_EN") + ghost_api_key_en: str = Field("", env="GHOST_API_KEY_EN") # Alerts cost_alert_threshold: float = Field(0.15, env="COST_ALERT_THRESHOLD") diff --git a/src/generator/generator.py b/src/generator/generator.py index 5486557..5729a43 100644 --- a/src/generator/generator.py +++ b/src/generator/generator.py @@ -33,6 +33,45 @@ BLOG_SYSTEM = ( "Cada sección debe añadir información nueva no cubierta en secciones anteriores." ) +BLOG_SYSTEM_EN = ( + "You write ALWAYS in English. " + "You are a journalist writing a blog article. Use clear markdown headers. " + "NEVER repeat the same fact or phrase twice — if you said it, move on. " + "Each section must add new information not covered in other sections." +) + +BLOG_PROMPT_EN = """\ +Write a blog article about: "{topic}" + +RULES — follow strictly: +- Each section under a heading must add NEW information not covered elsewhere +- Do NOT summarize previous sections at the start of each new section +- Do NOT repeat facts — if a fact appears once, do not mention it again +- Use concrete details, numbers, names — avoid vague generalities +- Target: 1000-1500 words + +STRUCTURE: +# [Impactful headline] + +[Hook paragraph — the most surprising fact] + +## Background +[Context — what, when, who — only facts not covered elsewhere] + +## Key Facts +[Most significant findings — each point must be distinct] + +## Analysis / Significance +[What this means — without repeating the Key Facts section] + +## Conclusion +[No more than 2 sentences summarizing, then a forward-looking statement] + +RESEARCH MATERIAL: +{context} + +Write the complete article in markdown:""" + REPORT_SYSTEM = ( "Escribe SIEMPRE en español. " "Eres un analista de investigación. Escribe un informe estructurado y factual. " @@ -239,9 +278,13 @@ def _strip_researchowl_header(content: str) -> str: class GhostPublisher: - def __init__(self): - self.url = (settings.ghost_url or "").rstrip("/") - self.api_key = settings.ghost_api_key or "" + def __init__(self, lang: str = "es"): + if lang == "en": + self.url = (settings.ghost_url_en or "").rstrip("/") + self.api_key = settings.ghost_api_key_en or "" + else: + self.url = (settings.ghost_url or "").rstrip("/") + self.api_key = settings.ghost_api_key or "" def is_configured(self) -> bool: return bool(self.url and self.api_key) @@ -318,12 +361,13 @@ class OutputGenerator: self.processor = processor async def generate(self, session_id: int, output_type: OutputType, - progress_callback=None) -> str: + progress_callback=None, lang: str = "es") -> str: """Generate an output for a research session""" if output_type in (OutputType.REPORT_EXTENDED, OutputType.BLOG_EXTENDED, OutputType.PODCAST_EXTENDED): - return await self.generate_extended(session_id, output_type, progress_callback) + return await self.generate_extended(session_id, output_type, progress_callback, + lang=lang) session = await self.db.get_session(session_id) if not session: @@ -355,6 +399,10 @@ class OutputGenerator: system = self._get_system(output_type) prompt = PROMPTS[output_type].format(topic=topic, context=context) + if lang == "en" and output_type == OutputType.BLOG: + system = BLOG_SYSTEM_EN + prompt = BLOG_PROMPT_EN.format(topic=topic, context=context) + output = await self._generate(prompt, system, output_type, session_id) # Add metadata header @@ -368,7 +416,7 @@ class OutputGenerator: # Auto-publish to Ghost for blog outputs ghost_notice = "" if output_type in (OutputType.BLOG, OutputType.BLOG_EXTENDED): - ghost = GhostPublisher() + ghost = GhostPublisher(lang=lang) if ghost.is_configured(): try: title = _extract_title(full_output) or topic @@ -439,7 +487,7 @@ class OutputGenerator: return systems.get(output_type, "You are a helpful research assistant.") async def generate_extended(self, session_id: int, output_type: OutputType, - progress_callback=None) -> str: + progress_callback=None, lang: str = "es") -> str: """ Generación por secciones para outputs exhaustivos. 1. Recupera muestra de contexto para el outline @@ -496,6 +544,8 @@ class OutputGenerator: # Paso 3: generar cada sección base_output_type = OutputType(base_type) system = self._get_system(base_output_type) + if lang == "en" and output_type == OutputType.BLOG_EXTENDED: + system = BLOG_SYSTEM_EN sections_text = [] for i, section in enumerate(sections, 1): @@ -512,6 +562,7 @@ class OutputGenerator: if not section_context: section_context = context_summary + lang_rule = "- Write in English\n" if lang == "en" else "- Escribe en español\n" section_prompt = ( f"Escribe la sección '{title}' del {base_type} sobre: '{topic}'\n\n" f"REGLAS:\n" @@ -520,8 +571,8 @@ class OutputGenerator: f"- No incluyas encabezados del documento completo, solo el contenido de esta sección\n" f"- Objetivo: aproximadamente {target_words} palabras\n" f"- Usa SOLO información del material siguiente — no inventes datos\n" - f"- Escribe en español\n\n" - f"MATERIAL:\n{section_context}" + f"{lang_rule}" + f"\nMATERIAL:\n{section_context}" ) section_text = await self._generate( @@ -540,7 +591,7 @@ class OutputGenerator: # Auto-publish to Ghost for extended blog outputs ghost_notice = "" if output_type == OutputType.BLOG_EXTENDED: - ghost = GhostPublisher() + ghost = GhostPublisher(lang=lang) if ghost.is_configured(): try: title = _extract_title(full_output) or topic