feat: modo diff para /watch — notifica solo si hay novedades reales
Build & Deploy ResearchOwl / build-and-push (push) Successful in 7s
Build & Deploy ResearchOwl / build-and-push (push) Successful in 7s
This commit is contained in:
@@ -574,3 +574,82 @@ def generate_pdf(content: str, title: str = "ResearchOwl Output") -> bytes:
|
||||
|
||||
doc.build(story)
|
||||
return buf.getvalue()
|
||||
|
||||
|
||||
async def generate_diff_summary(
|
||||
topic: str,
|
||||
new_urls: set,
|
||||
old_urls: set,
|
||||
new_chunks: list,
|
||||
session_id: int,
|
||||
db,
|
||||
) -> str | None:
|
||||
from src.config import settings
|
||||
import structlog
|
||||
diff_logger = structlog.get_logger()
|
||||
|
||||
added_urls = new_urls - old_urls
|
||||
pct_new = len(added_urls) / max(len(new_urls), 1)
|
||||
|
||||
diff_logger.info("Diff analysis", topic=topic,
|
||||
new_urls=len(new_urls), old_urls=len(old_urls),
|
||||
added=len(added_urls), pct_new=round(pct_new, 2))
|
||||
|
||||
if pct_new < 0.20 and len(added_urls) < 5:
|
||||
diff_logger.info("Diff: no significant new sources", topic=topic)
|
||||
return None
|
||||
|
||||
if not new_chunks:
|
||||
return None
|
||||
|
||||
context = "\n\n---\n\n".join(
|
||||
f"[{c.get('source_type', 'web').upper()}] {c.get('title', '')}\n{c['content'][:400]}"
|
||||
for c in new_chunks[:20]
|
||||
)
|
||||
|
||||
if not settings.anthropic_api_key:
|
||||
return (
|
||||
f"📊 *Novedades detectadas sobre {topic}*\n\n"
|
||||
f"• {len(added_urls)} fuentes nuevas encontradas\n"
|
||||
f"• {len(new_chunks)} chunks de contenido procesados\n\n"
|
||||
f"Usa /generate report para ver el análisis completo."
|
||||
)
|
||||
|
||||
try:
|
||||
import anthropic
|
||||
client = anthropic.AsyncAnthropic(api_key=settings.anthropic_api_key)
|
||||
prompt = (
|
||||
f'Analiza el siguiente material de investigación sobre "{topic}" '
|
||||
f'y genera un resumen BREVE (máximo 300 palabras) de las novedades '
|
||||
f'más importantes encontradas. Escribe en español.\n\n'
|
||||
f'Si el contenido es muy similar a investigaciones anteriores o no '
|
||||
f'contiene información genuinamente nueva, responde SOLO con: '
|
||||
f'"SIN_NOVEDADES"\n\n'
|
||||
f'Material nuevo:\n{context}'
|
||||
)
|
||||
msg = await client.messages.create(
|
||||
model=settings.claude_model,
|
||||
max_tokens=500,
|
||||
messages=[{"role": "user", "content": prompt}]
|
||||
)
|
||||
summary = msg.content[0].text.strip()
|
||||
|
||||
if summary == "SIN_NOVEDADES":
|
||||
diff_logger.info("Diff: Claude found no new information", topic=topic)
|
||||
return None
|
||||
|
||||
try:
|
||||
await db.log_api_call(session_id, "diff", settings.claude_model,
|
||||
msg.usage.input_tokens, msg.usage.output_tokens)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return f"🔔 *Novedades — {topic}*\n\n{summary}\n\nUsa /generate para report completo."
|
||||
|
||||
except Exception as e:
|
||||
diff_logger.warning("Diff summary generation failed", error=str(e))
|
||||
return (
|
||||
f"📊 *Actualización — {topic}*\n\n"
|
||||
f"• {len(added_urls)} fuentes nuevas\n"
|
||||
f"Usa /generate report para ver el análisis completo."
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user