feat: modo diff para /watch — notifica solo si hay novedades reales
Build & Deploy ResearchOwl / build-and-push (push) Successful in 7s
Build & Deploy ResearchOwl / build-and-push (push) Successful in 7s
This commit is contained in:
+39
-3
@@ -73,7 +73,8 @@ async def send_chunked(message: Message, text: str, parse_mode=None):
|
|||||||
|
|
||||||
async def run_scheduled_research(bot, chat_id: int, topic: str,
|
async def run_scheduled_research(bot, chat_id: int, topic: str,
|
||||||
session_id: int, db: ResearchDB,
|
session_id: int, db: ResearchDB,
|
||||||
progress_message=None):
|
progress_message=None,
|
||||||
|
silent_completion: bool = False):
|
||||||
if progress_message is not None:
|
if progress_message is not None:
|
||||||
reporter = ProgressReporter(progress_message)
|
reporter = ProgressReporter(progress_message)
|
||||||
else:
|
else:
|
||||||
@@ -106,6 +107,11 @@ async def run_scheduled_research(bot, chat_id: int, topic: str,
|
|||||||
|
|
||||||
await processor.process_session(session_id, topic, proc_progress)
|
await processor.process_session(session_id, topic, proc_progress)
|
||||||
chunk_count = await db.get_chunks_count(session_id)
|
chunk_count = await db.get_chunks_count(session_id)
|
||||||
|
if silent_completion:
|
||||||
|
await reporter.done(
|
||||||
|
f"🔍 Investigación completada — analizando novedades…"
|
||||||
|
)
|
||||||
|
else:
|
||||||
await reporter.done(
|
await reporter.done(
|
||||||
f"✅ Listo — `{scraped}` fuentes · `{chunk_count}` chunks · usa /generate <tipo>"
|
f"✅ Listo — `{scraped}` fuentes · `{chunk_count}` chunks · usa /generate <tipo>"
|
||||||
)
|
)
|
||||||
@@ -726,11 +732,41 @@ async def _scheduler_loop(app: Application):
|
|||||||
_active_sessions[chat_id] = session_id
|
_active_sessions[chat_id] = session_id
|
||||||
await db.update_watch_run(watch["id"])
|
await db.update_watch_run(watch["id"])
|
||||||
|
|
||||||
async def _task(c=chat_id, t=topic, s=session_id):
|
async def _task(c=chat_id, t=topic, s=session_id, w_id=watch["id"]):
|
||||||
inner_db_conn = await get_db()
|
inner_db_conn = await get_db()
|
||||||
inner_db = ResearchDB(inner_db_conn)
|
inner_db = ResearchDB(inner_db_conn)
|
||||||
try:
|
try:
|
||||||
await run_scheduled_research(app.bot, c, t, s, inner_db)
|
await run_scheduled_research(app.bot, c, t, s, inner_db,
|
||||||
|
silent_completion=True)
|
||||||
|
|
||||||
|
prev_session = await inner_db.get_previous_session(c, t, s)
|
||||||
|
new_urls = await inner_db.get_session_urls(s)
|
||||||
|
old_urls = await inner_db.get_session_urls(prev_session["id"]) \
|
||||||
|
if prev_session else set()
|
||||||
|
new_chunks = await inner_db.get_top_chunks(s, limit=30)
|
||||||
|
|
||||||
|
try:
|
||||||
|
from src.generator.generator import generate_diff_summary
|
||||||
|
summary = await generate_diff_summary(
|
||||||
|
t, new_urls, old_urls, new_chunks, s, inner_db
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning("Diff summary failed", error=str(e))
|
||||||
|
summary = (
|
||||||
|
f"📊 *Actualización disponible — {t}*\n\n"
|
||||||
|
f"Usa /generate report para ver el análisis completo."
|
||||||
|
)
|
||||||
|
|
||||||
|
if summary:
|
||||||
|
await app.bot.send_message(
|
||||||
|
c, summary, parse_mode=ParseMode.MARKDOWN
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
await app.bot.send_message(
|
||||||
|
c,
|
||||||
|
f"🔄 *{t}* — sin novedades significativas esta vez.",
|
||||||
|
parse_mode=ParseMode.MARKDOWN
|
||||||
|
)
|
||||||
finally:
|
finally:
|
||||||
await inner_db_conn.close()
|
await inner_db_conn.close()
|
||||||
|
|
||||||
|
|||||||
@@ -159,6 +159,27 @@ class ResearchDB:
|
|||||||
row = await cursor.fetchone()
|
row = await cursor.fetchone()
|
||||||
return dict(row) if row else None
|
return dict(row) if row else None
|
||||||
|
|
||||||
|
async def get_session_urls(self, session_id: int) -> set:
|
||||||
|
async with self.db.execute(
|
||||||
|
"SELECT url FROM sources WHERE session_id = ?", (session_id,)
|
||||||
|
) as cur:
|
||||||
|
rows = await cur.fetchall()
|
||||||
|
return {r[0] for r in rows}
|
||||||
|
|
||||||
|
async def get_previous_session(self, chat_id: int, topic: str,
|
||||||
|
exclude_session_id: int) -> Optional[dict]:
|
||||||
|
async with self.db.execute(
|
||||||
|
"""SELECT id, topic, status, created_at FROM research_sessions
|
||||||
|
WHERE telegram_chat_id = ? AND topic = ? AND id != ?
|
||||||
|
ORDER BY created_at DESC LIMIT 1""",
|
||||||
|
(chat_id, topic, exclude_session_id)
|
||||||
|
) as cur:
|
||||||
|
row = await cur.fetchone()
|
||||||
|
if not row:
|
||||||
|
return None
|
||||||
|
return {"id": row[0], "topic": row[1],
|
||||||
|
"status": row[2], "created_at": row[3]}
|
||||||
|
|
||||||
async def get_active_session(self, chat_id: int) -> Optional[dict]:
|
async def get_active_session(self, chat_id: int) -> Optional[dict]:
|
||||||
cursor = await self.db.execute(
|
cursor = await self.db.execute(
|
||||||
"""SELECT * FROM research_sessions
|
"""SELECT * FROM research_sessions
|
||||||
|
|||||||
@@ -574,3 +574,82 @@ def generate_pdf(content: str, title: str = "ResearchOwl Output") -> bytes:
|
|||||||
|
|
||||||
doc.build(story)
|
doc.build(story)
|
||||||
return buf.getvalue()
|
return buf.getvalue()
|
||||||
|
|
||||||
|
|
||||||
|
async def generate_diff_summary(
|
||||||
|
topic: str,
|
||||||
|
new_urls: set,
|
||||||
|
old_urls: set,
|
||||||
|
new_chunks: list,
|
||||||
|
session_id: int,
|
||||||
|
db,
|
||||||
|
) -> str | None:
|
||||||
|
from src.config import settings
|
||||||
|
import structlog
|
||||||
|
diff_logger = structlog.get_logger()
|
||||||
|
|
||||||
|
added_urls = new_urls - old_urls
|
||||||
|
pct_new = len(added_urls) / max(len(new_urls), 1)
|
||||||
|
|
||||||
|
diff_logger.info("Diff analysis", topic=topic,
|
||||||
|
new_urls=len(new_urls), old_urls=len(old_urls),
|
||||||
|
added=len(added_urls), pct_new=round(pct_new, 2))
|
||||||
|
|
||||||
|
if pct_new < 0.20 and len(added_urls) < 5:
|
||||||
|
diff_logger.info("Diff: no significant new sources", topic=topic)
|
||||||
|
return None
|
||||||
|
|
||||||
|
if not new_chunks:
|
||||||
|
return None
|
||||||
|
|
||||||
|
context = "\n\n---\n\n".join(
|
||||||
|
f"[{c.get('source_type', 'web').upper()}] {c.get('title', '')}\n{c['content'][:400]}"
|
||||||
|
for c in new_chunks[:20]
|
||||||
|
)
|
||||||
|
|
||||||
|
if not settings.anthropic_api_key:
|
||||||
|
return (
|
||||||
|
f"📊 *Novedades detectadas sobre {topic}*\n\n"
|
||||||
|
f"• {len(added_urls)} fuentes nuevas encontradas\n"
|
||||||
|
f"• {len(new_chunks)} chunks de contenido procesados\n\n"
|
||||||
|
f"Usa /generate report para ver el análisis completo."
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
import anthropic
|
||||||
|
client = anthropic.AsyncAnthropic(api_key=settings.anthropic_api_key)
|
||||||
|
prompt = (
|
||||||
|
f'Analiza el siguiente material de investigación sobre "{topic}" '
|
||||||
|
f'y genera un resumen BREVE (máximo 300 palabras) de las novedades '
|
||||||
|
f'más importantes encontradas. Escribe en español.\n\n'
|
||||||
|
f'Si el contenido es muy similar a investigaciones anteriores o no '
|
||||||
|
f'contiene información genuinamente nueva, responde SOLO con: '
|
||||||
|
f'"SIN_NOVEDADES"\n\n'
|
||||||
|
f'Material nuevo:\n{context}'
|
||||||
|
)
|
||||||
|
msg = await client.messages.create(
|
||||||
|
model=settings.claude_model,
|
||||||
|
max_tokens=500,
|
||||||
|
messages=[{"role": "user", "content": prompt}]
|
||||||
|
)
|
||||||
|
summary = msg.content[0].text.strip()
|
||||||
|
|
||||||
|
if summary == "SIN_NOVEDADES":
|
||||||
|
diff_logger.info("Diff: Claude found no new information", topic=topic)
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
await db.log_api_call(session_id, "diff", settings.claude_model,
|
||||||
|
msg.usage.input_tokens, msg.usage.output_tokens)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return f"🔔 *Novedades — {topic}*\n\n{summary}\n\nUsa /generate para report completo."
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
diff_logger.warning("Diff summary generation failed", error=str(e))
|
||||||
|
return (
|
||||||
|
f"📊 *Actualización — {topic}*\n\n"
|
||||||
|
f"• {len(added_urls)} fuentes nuevas\n"
|
||||||
|
f"Usa /generate report para ver el análisis completo."
|
||||||
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user