feat: /compare — análisis comparativo de dos temas en paralelo
Build & Deploy ResearchOwl / build-and-push (push) Successful in 34s

This commit is contained in:
ChemaVX
2026-05-06 06:40:31 +00:00
parent c2bb301103
commit e8034f3f37
2 changed files with 203 additions and 0 deletions
+144
View File
@@ -153,6 +153,7 @@ async def cmd_start(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
"`/sources` — List all sources found\n"
"`/outputs` — List generated outputs\n"
"`/export` — Exportar último output como PDF\n"
"`/compare <tema1> vs <tema2>` — Análisis comparativo\n"
"`/costs` — Show API usage costs\n"
"`/watch <topic> [h]` — Schedule periodic research\n"
"`/unwatch <topic>` — Remove a watch\n"
@@ -918,6 +919,148 @@ async def cmd_purge(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
await db_conn.close()
async def cmd_compare(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
if not is_authorized(update.effective_user.id):
return
chat_id = update.effective_chat.id
text = " ".join(ctx.args).strip() if ctx.args else ""
import re
match = re.split(r'\s+vs\.?\s+|\s+versus\s+', text, maxsplit=1, flags=re.IGNORECASE)
if len(match) != 2 or not match[0].strip() or not match[1].strip():
await update.message.reply_text(
"❌ Uso: `/compare <tema1> vs <tema2>`\n"
"Ejemplo: `/compare energía solar vs energía nuclear`",
parse_mode=ParseMode.MARKDOWN
)
return
topic_a = match[0].strip()
topic_b = match[1].strip()
if chat_id in _active_tasks and not _active_tasks[chat_id].done():
await update.message.reply_text(
"⚠️ Ya hay una investigación en curso. Usa /cancel primero."
)
return
msg = await update.message.reply_text(
f"🔍 Comparando `{topic_a}` vs `{topic_b}`…\n"
f"Esto lanzará dos investigaciones en paralelo y tardará varios minutos.",
parse_mode=ParseMode.MARKDOWN
)
async def run_compare():
db_conn_a = await get_db()
db_conn_b = await get_db()
db_a = ResearchDB(db_conn_a)
db_b = ResearchDB(db_conn_b)
try:
session_id_a = await db_a.create_session(topic_a, chat_id)
session_id_b = await db_b.create_session(topic_b, chat_id)
_active_sessions[chat_id] = session_id_a
await msg.edit_text(
f"🔍 Investigando en paralelo:\n"
f"• `{topic_a}`\n"
f"• `{topic_b}`\n\n"
f"Esto puede tardar 10-20 minutos…",
parse_mode=ParseMode.MARKDOWN
)
async def research_topic(session_id, topic, db):
scraper = ExhaustiveScraper(db, session_id, topic)
await scraper.run()
await db.update_session(session_id, status=ResearchStatus.SATURATED)
ollama = OllamaClient()
if await ollama.is_available():
processor = ContentProcessor(db, ollama)
await processor.process_session(session_id, topic)
await msg.edit_text(
f"🔍 Scraping en paralelo:\n• `{topic_a}`\n• `{topic_b}`…",
parse_mode=ParseMode.MARKDOWN
)
await asyncio.gather(
research_topic(session_id_a, topic_a, db_a),
research_topic(session_id_b, topic_b, db_b),
)
await msg.edit_text(
"✍️ Generando análisis comparativo…",
parse_mode=ParseMode.MARKDOWN
)
ollama = OllamaClient()
processor_a = ContentProcessor(db_a, ollama)
processor_b = ContentProcessor(db_b, ollama)
context_a = await processor_a.rag_query(session_id_a, topic_a, top_k=40)
context_b = await processor_b.rag_query(session_id_b, topic_b, top_k=40)
if not context_a:
chunks = await db_a.get_top_chunks(session_id_a, limit=20)
context_a = "\n\n---\n\n".join(c["content"] for c in chunks)
if not context_b:
chunks = await db_b.get_top_chunks(session_id_b, limit=20)
context_b = "\n\n---\n\n".join(c["content"] for c in chunks)
from src.generator.generator import generate_comparison
comparison = await generate_comparison(
topic_a, topic_b, context_a, context_b, session_id_a, db_a
)
from datetime import datetime, timezone
now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
header = (
f"---\n"
f"ResearchOwl | COMPARISON\n"
f"Topic A: {topic_a}\n"
f"Topic B: {topic_b}\n"
f"Generated: {now}\n"
f"---\n\n"
)
full_output = header + comparison
await db_a.save_output(session_id_a, OutputType.REPORT, full_output)
if len(full_output) > 8000:
import io
filename = (
f"compare_{topic_a[:20]}_{topic_b[:20]}.md"
.replace(" ", "_")
)
await update.message.reply_document(
document=io.BytesIO(full_output.encode()),
filename=filename,
caption=f"📊 Comparación: {topic_a} vs {topic_b}"
)
try:
await msg.delete()
except Exception:
pass
else:
await msg.edit_text(full_output, parse_mode=ParseMode.MARKDOWN)
except asyncio.CancelledError:
await msg.edit_text("🛑 Comparación cancelada.")
except Exception as e:
logger.error("Compare task failed", error=str(e))
try:
await msg.edit_text(f"❌ Error: {str(e)[:200]}")
except Exception:
pass
finally:
await db_conn_a.close()
await db_conn_b.close()
task = asyncio.create_task(run_compare())
_active_tasks[chat_id] = task
def create_bot() -> Application:
app = (
Application.builder()
@@ -942,6 +1085,7 @@ def create_bot() -> Application:
app.add_handler(CommandHandler("process", cmd_process))
app.add_handler(CommandHandler("cancel", cmd_cancel))
app.add_handler(CommandHandler("purge", cmd_purge))
app.add_handler(CommandHandler("compare", cmd_compare))
return app