diff --git a/src/bot/bot.py b/src/bot/bot.py index 6e97016..a4bf5c4 100644 --- a/src/bot/bot.py +++ b/src/bot/bot.py @@ -153,6 +153,7 @@ async def cmd_start(update: Update, ctx: ContextTypes.DEFAULT_TYPE): "`/sources` — List all sources found\n" "`/outputs` — List generated outputs\n" "`/export` — Exportar último output como PDF\n" + "`/compare vs ` — Análisis comparativo\n" "`/costs` — Show API usage costs\n" "`/watch [h]` — Schedule periodic research\n" "`/unwatch ` — Remove a watch\n" @@ -918,6 +919,148 @@ async def cmd_purge(update: Update, ctx: ContextTypes.DEFAULT_TYPE): await db_conn.close() +async def cmd_compare(update: Update, ctx: ContextTypes.DEFAULT_TYPE): + if not is_authorized(update.effective_user.id): + return + + chat_id = update.effective_chat.id + text = " ".join(ctx.args).strip() if ctx.args else "" + + import re + match = re.split(r'\s+vs\.?\s+|\s+versus\s+', text, maxsplit=1, flags=re.IGNORECASE) + if len(match) != 2 or not match[0].strip() or not match[1].strip(): + await update.message.reply_text( + "❌ Uso: `/compare vs `\n" + "Ejemplo: `/compare energía solar vs energía nuclear`", + parse_mode=ParseMode.MARKDOWN + ) + return + + topic_a = match[0].strip() + topic_b = match[1].strip() + + if chat_id in _active_tasks and not _active_tasks[chat_id].done(): + await update.message.reply_text( + "⚠️ Ya hay una investigación en curso. Usa /cancel primero." + ) + return + + msg = await update.message.reply_text( + f"🔍 Comparando `{topic_a}` vs `{topic_b}`…\n" + f"Esto lanzará dos investigaciones en paralelo y tardará varios minutos.", + parse_mode=ParseMode.MARKDOWN + ) + + async def run_compare(): + db_conn_a = await get_db() + db_conn_b = await get_db() + db_a = ResearchDB(db_conn_a) + db_b = ResearchDB(db_conn_b) + + try: + session_id_a = await db_a.create_session(topic_a, chat_id) + session_id_b = await db_b.create_session(topic_b, chat_id) + _active_sessions[chat_id] = session_id_a + + await msg.edit_text( + f"🔍 Investigando en paralelo:\n" + f"• `{topic_a}`\n" + f"• `{topic_b}`\n\n" + f"Esto puede tardar 10-20 minutos…", + parse_mode=ParseMode.MARKDOWN + ) + + async def research_topic(session_id, topic, db): + scraper = ExhaustiveScraper(db, session_id, topic) + await scraper.run() + await db.update_session(session_id, status=ResearchStatus.SATURATED) + ollama = OllamaClient() + if await ollama.is_available(): + processor = ContentProcessor(db, ollama) + await processor.process_session(session_id, topic) + + await msg.edit_text( + f"🔍 Scraping en paralelo:\n• `{topic_a}`\n• `{topic_b}`…", + parse_mode=ParseMode.MARKDOWN + ) + + await asyncio.gather( + research_topic(session_id_a, topic_a, db_a), + research_topic(session_id_b, topic_b, db_b), + ) + + await msg.edit_text( + "✍️ Generando análisis comparativo…", + parse_mode=ParseMode.MARKDOWN + ) + + ollama = OllamaClient() + processor_a = ContentProcessor(db_a, ollama) + processor_b = ContentProcessor(db_b, ollama) + + context_a = await processor_a.rag_query(session_id_a, topic_a, top_k=40) + context_b = await processor_b.rag_query(session_id_b, topic_b, top_k=40) + + if not context_a: + chunks = await db_a.get_top_chunks(session_id_a, limit=20) + context_a = "\n\n---\n\n".join(c["content"] for c in chunks) + if not context_b: + chunks = await db_b.get_top_chunks(session_id_b, limit=20) + context_b = "\n\n---\n\n".join(c["content"] for c in chunks) + + from src.generator.generator import generate_comparison + comparison = await generate_comparison( + topic_a, topic_b, context_a, context_b, session_id_a, db_a + ) + + from datetime import datetime, timezone + now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC") + header = ( + f"---\n" + f"ResearchOwl | COMPARISON\n" + f"Topic A: {topic_a}\n" + f"Topic B: {topic_b}\n" + f"Generated: {now}\n" + f"---\n\n" + ) + full_output = header + comparison + + await db_a.save_output(session_id_a, OutputType.REPORT, full_output) + + if len(full_output) > 8000: + import io + filename = ( + f"compare_{topic_a[:20]}_{topic_b[:20]}.md" + .replace(" ", "_") + ) + await update.message.reply_document( + document=io.BytesIO(full_output.encode()), + filename=filename, + caption=f"📊 Comparación: {topic_a} vs {topic_b}" + ) + try: + await msg.delete() + except Exception: + pass + else: + await msg.edit_text(full_output, parse_mode=ParseMode.MARKDOWN) + + except asyncio.CancelledError: + await msg.edit_text("🛑 Comparación cancelada.") + except Exception as e: + logger.error("Compare task failed", error=str(e)) + try: + await msg.edit_text(f"❌ Error: {str(e)[:200]}") + except Exception: + pass + finally: + await db_conn_a.close() + await db_conn_b.close() + + task = asyncio.create_task(run_compare()) + _active_tasks[chat_id] = task + + def create_bot() -> Application: app = ( Application.builder() @@ -942,6 +1085,7 @@ def create_bot() -> Application: app.add_handler(CommandHandler("process", cmd_process)) app.add_handler(CommandHandler("cancel", cmd_cancel)) app.add_handler(CommandHandler("purge", cmd_purge)) + app.add_handler(CommandHandler("compare", cmd_compare)) return app diff --git a/src/generator/generator.py b/src/generator/generator.py index 5ddeb5f..d759b0a 100644 --- a/src/generator/generator.py +++ b/src/generator/generator.py @@ -653,3 +653,62 @@ async def generate_diff_summary( f"• {len(added_urls)} fuentes nuevas\n" f"Usa /generate report para ver el análisis completo." ) + + +async def generate_comparison( + topic_a: str, + topic_b: str, + context_a: str, + context_b: str, + session_id_a: int, + db, +) -> str: + from src.config import settings + import structlog + cmp_logger = structlog.get_logger() + + if not settings.anthropic_api_key: + raise ValueError("Claude API key required for comparison") + + prompt = ( + f'Eres un analista experto. Compara en profundidad estos dos temas:\n' + f'TEMA A: "{topic_a}"\n' + f'TEMA B: "{topic_b}"\n\n' + f'Escribe el análisis en español con esta estructura:\n\n' + f'## Resumen comparativo\n' + f'(2-3 párrafos con las diferencias y similitudes más importantes)\n\n' + f'## {topic_a}\n' + f'(Puntos clave únicos de este tema)\n\n' + f'## {topic_b}\n' + f'(Puntos clave únicos de este tema)\n\n' + f'## Similitudes\n' + f'(Qué tienen en común)\n\n' + f'## Diferencias clave\n' + f'(Tabla markdown o lista de las diferencias más relevantes)\n\n' + f'## Conclusión\n' + f'(Cuál es mejor/más relevante según el contexto, o qué conclusión se extrae)\n\n' + f'---\n' + f'MATERIAL DE INVESTIGACIÓN — {topic_a}:\n{context_a}\n\n' + f'---\n' + f'MATERIAL DE INVESTIGACIÓN — {topic_b}:\n{context_b}\n' + ) + + try: + import anthropic + client = anthropic.AsyncAnthropic(api_key=settings.anthropic_api_key) + msg = await client.messages.create( + model=settings.claude_model, + max_tokens=8192, + messages=[{"role": "user", "content": prompt}] + ) + try: + await db.log_api_call( + session_id_a, "comparison", settings.claude_model, + msg.usage.input_tokens, msg.usage.output_tokens + ) + except Exception: + pass + return msg.content[0].text.strip() + except Exception as e: + cmp_logger.error("Comparison generation failed", error=str(e)) + raise