feat: /compare — análisis comparativo de dos temas en paralelo
Build & Deploy ResearchOwl / build-and-push (push) Successful in 34s

This commit is contained in:
ChemaVX
2026-05-06 06:40:31 +00:00
parent c2bb301103
commit e8034f3f37
2 changed files with 203 additions and 0 deletions
+144
View File
@@ -153,6 +153,7 @@ async def cmd_start(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
"`/sources` — List all sources found\n" "`/sources` — List all sources found\n"
"`/outputs` — List generated outputs\n" "`/outputs` — List generated outputs\n"
"`/export` — Exportar último output como PDF\n" "`/export` — Exportar último output como PDF\n"
"`/compare <tema1> vs <tema2>` — Análisis comparativo\n"
"`/costs` — Show API usage costs\n" "`/costs` — Show API usage costs\n"
"`/watch <topic> [h]` — Schedule periodic research\n" "`/watch <topic> [h]` — Schedule periodic research\n"
"`/unwatch <topic>` — Remove a watch\n" "`/unwatch <topic>` — Remove a watch\n"
@@ -918,6 +919,148 @@ async def cmd_purge(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
await db_conn.close() await db_conn.close()
async def cmd_compare(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
if not is_authorized(update.effective_user.id):
return
chat_id = update.effective_chat.id
text = " ".join(ctx.args).strip() if ctx.args else ""
import re
match = re.split(r'\s+vs\.?\s+|\s+versus\s+', text, maxsplit=1, flags=re.IGNORECASE)
if len(match) != 2 or not match[0].strip() or not match[1].strip():
await update.message.reply_text(
"❌ Uso: `/compare <tema1> vs <tema2>`\n"
"Ejemplo: `/compare energía solar vs energía nuclear`",
parse_mode=ParseMode.MARKDOWN
)
return
topic_a = match[0].strip()
topic_b = match[1].strip()
if chat_id in _active_tasks and not _active_tasks[chat_id].done():
await update.message.reply_text(
"⚠️ Ya hay una investigación en curso. Usa /cancel primero."
)
return
msg = await update.message.reply_text(
f"🔍 Comparando `{topic_a}` vs `{topic_b}`…\n"
f"Esto lanzará dos investigaciones en paralelo y tardará varios minutos.",
parse_mode=ParseMode.MARKDOWN
)
async def run_compare():
db_conn_a = await get_db()
db_conn_b = await get_db()
db_a = ResearchDB(db_conn_a)
db_b = ResearchDB(db_conn_b)
try:
session_id_a = await db_a.create_session(topic_a, chat_id)
session_id_b = await db_b.create_session(topic_b, chat_id)
_active_sessions[chat_id] = session_id_a
await msg.edit_text(
f"🔍 Investigando en paralelo:\n"
f"• `{topic_a}`\n"
f"• `{topic_b}`\n\n"
f"Esto puede tardar 10-20 minutos…",
parse_mode=ParseMode.MARKDOWN
)
async def research_topic(session_id, topic, db):
scraper = ExhaustiveScraper(db, session_id, topic)
await scraper.run()
await db.update_session(session_id, status=ResearchStatus.SATURATED)
ollama = OllamaClient()
if await ollama.is_available():
processor = ContentProcessor(db, ollama)
await processor.process_session(session_id, topic)
await msg.edit_text(
f"🔍 Scraping en paralelo:\n• `{topic_a}`\n• `{topic_b}`…",
parse_mode=ParseMode.MARKDOWN
)
await asyncio.gather(
research_topic(session_id_a, topic_a, db_a),
research_topic(session_id_b, topic_b, db_b),
)
await msg.edit_text(
"✍️ Generando análisis comparativo…",
parse_mode=ParseMode.MARKDOWN
)
ollama = OllamaClient()
processor_a = ContentProcessor(db_a, ollama)
processor_b = ContentProcessor(db_b, ollama)
context_a = await processor_a.rag_query(session_id_a, topic_a, top_k=40)
context_b = await processor_b.rag_query(session_id_b, topic_b, top_k=40)
if not context_a:
chunks = await db_a.get_top_chunks(session_id_a, limit=20)
context_a = "\n\n---\n\n".join(c["content"] for c in chunks)
if not context_b:
chunks = await db_b.get_top_chunks(session_id_b, limit=20)
context_b = "\n\n---\n\n".join(c["content"] for c in chunks)
from src.generator.generator import generate_comparison
comparison = await generate_comparison(
topic_a, topic_b, context_a, context_b, session_id_a, db_a
)
from datetime import datetime, timezone
now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
header = (
f"---\n"
f"ResearchOwl | COMPARISON\n"
f"Topic A: {topic_a}\n"
f"Topic B: {topic_b}\n"
f"Generated: {now}\n"
f"---\n\n"
)
full_output = header + comparison
await db_a.save_output(session_id_a, OutputType.REPORT, full_output)
if len(full_output) > 8000:
import io
filename = (
f"compare_{topic_a[:20]}_{topic_b[:20]}.md"
.replace(" ", "_")
)
await update.message.reply_document(
document=io.BytesIO(full_output.encode()),
filename=filename,
caption=f"📊 Comparación: {topic_a} vs {topic_b}"
)
try:
await msg.delete()
except Exception:
pass
else:
await msg.edit_text(full_output, parse_mode=ParseMode.MARKDOWN)
except asyncio.CancelledError:
await msg.edit_text("🛑 Comparación cancelada.")
except Exception as e:
logger.error("Compare task failed", error=str(e))
try:
await msg.edit_text(f"❌ Error: {str(e)[:200]}")
except Exception:
pass
finally:
await db_conn_a.close()
await db_conn_b.close()
task = asyncio.create_task(run_compare())
_active_tasks[chat_id] = task
def create_bot() -> Application: def create_bot() -> Application:
app = ( app = (
Application.builder() Application.builder()
@@ -942,6 +1085,7 @@ def create_bot() -> Application:
app.add_handler(CommandHandler("process", cmd_process)) app.add_handler(CommandHandler("process", cmd_process))
app.add_handler(CommandHandler("cancel", cmd_cancel)) app.add_handler(CommandHandler("cancel", cmd_cancel))
app.add_handler(CommandHandler("purge", cmd_purge)) app.add_handler(CommandHandler("purge", cmd_purge))
app.add_handler(CommandHandler("compare", cmd_compare))
return app return app
+59
View File
@@ -653,3 +653,62 @@ async def generate_diff_summary(
f"{len(added_urls)} fuentes nuevas\n" f"{len(added_urls)} fuentes nuevas\n"
f"Usa /generate report para ver el análisis completo." f"Usa /generate report para ver el análisis completo."
) )
async def generate_comparison(
topic_a: str,
topic_b: str,
context_a: str,
context_b: str,
session_id_a: int,
db,
) -> str:
from src.config import settings
import structlog
cmp_logger = structlog.get_logger()
if not settings.anthropic_api_key:
raise ValueError("Claude API key required for comparison")
prompt = (
f'Eres un analista experto. Compara en profundidad estos dos temas:\n'
f'TEMA A: "{topic_a}"\n'
f'TEMA B: "{topic_b}"\n\n'
f'Escribe el análisis en español con esta estructura:\n\n'
f'## Resumen comparativo\n'
f'(2-3 párrafos con las diferencias y similitudes más importantes)\n\n'
f'## {topic_a}\n'
f'(Puntos clave únicos de este tema)\n\n'
f'## {topic_b}\n'
f'(Puntos clave únicos de este tema)\n\n'
f'## Similitudes\n'
f'(Qué tienen en común)\n\n'
f'## Diferencias clave\n'
f'(Tabla markdown o lista de las diferencias más relevantes)\n\n'
f'## Conclusión\n'
f'(Cuál es mejor/más relevante según el contexto, o qué conclusión se extrae)\n\n'
f'---\n'
f'MATERIAL DE INVESTIGACIÓN — {topic_a}:\n{context_a}\n\n'
f'---\n'
f'MATERIAL DE INVESTIGACIÓN — {topic_b}:\n{context_b}\n'
)
try:
import anthropic
client = anthropic.AsyncAnthropic(api_key=settings.anthropic_api_key)
msg = await client.messages.create(
model=settings.claude_model,
max_tokens=8192,
messages=[{"role": "user", "content": prompt}]
)
try:
await db.log_api_call(
session_id_a, "comparison", settings.claude_model,
msg.usage.input_tokens, msg.usage.output_tokens
)
except Exception:
pass
return msg.content[0].text.strip()
except Exception as e:
cmp_logger.error("Comparison generation failed", error=str(e))
raise