fix: add /process command, log quality filtering, improve Reddit headers

- bot.py: add cmd_process handler to manually trigger chunk processing on the last session; register CommandHandler("process") - processor.py: log exceptions from asyncio.gather instead of silently dropping them; add per-chunk quality score debug logging; warn when all chunks filtered by quality threshold with actionable hint; raise fallback score to 0.6 so Ollama failures don't filter chunks - exhaustive.py: replace bot User-Agent with full browser UA + headers for REDDIT_HEADERS; downgrade Reddit 403 from warning to info since server IPs are routinely blocked; use content_type=None on json() to avoid aiohttp content-type mismatch errors Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-27 20:37:39 +00:00
parent bb8171359d
commit 0c7176dd0b
3 changed files with 119 additions and 19 deletions
@@ -66,6 +66,7 @@ async def cmd_start(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
        "`/research <topic>` — Start exhaustive research\n"
        "`/status` — Check current research progress\n"
        "`/finish` — Stop research and proceed to generation\n"
+        "`/process` — Manually trigger chunk processing\n"
        "`/generate <type>` — Generate output (podcast|blog|report|thread)\n"
        "`/sources` — List all sources found\n"
        "`/outputs` — List generated outputs\n"
@@ -426,6 +427,64 @@ async def cmd_outputs(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
        await db_conn.close()


+async def cmd_process(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
+    if not is_authorized(update.effective_user.id):
+        return
+
+    chat_id = update.effective_chat.id
+    db_conn = await get_db()
+    db = ResearchDB(db_conn)
+
+    try:
+        cursor = await db_conn.execute(
+            "SELECT * FROM research_sessions WHERE telegram_chat_id = ? ORDER BY created_at DESC LIMIT 1",
+            (chat_id,)
+        )
+        row = await cursor.fetchone()
+        if not row:
+            await update.message.reply_text("No research sessions found. Start with /research <topic>")
+            return
+
+        session = dict(row)
+        session_id = session["id"]
+        topic = session["topic"]
+
+        msg = await update.message.reply_text(
+            f"🧠 Processing session #{session_id}: `{topic}`\n"
+            f"Chunking & scoring with Ollama ({settings.ollama_model})...\n"
+            f"This may take a few minutes.",
+            parse_mode=ParseMode.MARKDOWN
+        )
+
+        ollama = OllamaClient()
+        if not await ollama.is_available():
+            await msg.edit_text("❌ Ollama not reachable. Check OLLAMA_URL setting.")
+            return
+
+        processor = ContentProcessor(db, ollama)
+
+        async def proc_progress(total_chunks, total_words):
+            try:
+                await msg.edit_text(
+                    f"🧠 *Processing complete!*\n"
+                    f"• Chunks stored: `{total_chunks}`\n"
+                    f"• Words researched: `{total_words:,}`\n\n"
+                    f"Ready! Use `/generate podcast|blog|report|thread`\n"
+                    f"_If 0 chunks: set `QUALITY_THRESHOLD=0.3` or `0` and retry_",
+                    parse_mode=ParseMode.MARKDOWN
+                )
+            except Exception:
+                pass
+
+        await processor.process_session(session_id, topic, proc_progress)
+
+    except Exception as e:
+        logger.error("Process command failed", error=str(e))
+        await update.message.reply_text(f"❌ Processing failed: {str(e)[:200]}")
+    finally:
+        await db_conn.close()
+
+
 async def cmd_cancel(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
    if not is_authorized(update.effective_user.id):
        return
@@ -456,6 +515,7 @@ def create_bot() -> Application:
    app.add_handler(CommandHandler("generate", cmd_generate))
    app.add_handler(CommandHandler("sources", cmd_sources))
    app.add_handler(CommandHandler("outputs", cmd_outputs))
+    app.add_handler(CommandHandler("process", cmd_process))
    app.add_handler(CommandHandler("cancel", cmd_cancel))

    return app