fix: add /process command, log quality filtering, improve Reddit headers
Build & Deploy ResearchOwl / build-and-push (push) Successful in 5s

- bot.py: add cmd_process handler to manually trigger chunk processing
  on the last session; register CommandHandler("process")
- processor.py: log exceptions from asyncio.gather instead of silently
  dropping them; add per-chunk quality score debug logging; warn when
  all chunks filtered by quality threshold with actionable hint;
  raise fallback score to 0.6 so Ollama failures don't filter chunks
- exhaustive.py: replace bot User-Agent with full browser UA + headers
  for REDDIT_HEADERS; downgrade Reddit 403 from warning to info since
  server IPs are routinely blocked; use content_type=None on json()
  to avoid aiohttp content-type mismatch errors

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
ChemaVX
2026-04-27 20:37:39 +00:00
parent bb8171359d
commit 0c7176dd0b
3 changed files with 119 additions and 19 deletions
+60
View File
@@ -66,6 +66,7 @@ async def cmd_start(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
"`/research <topic>` — Start exhaustive research\n"
"`/status` — Check current research progress\n"
"`/finish` — Stop research and proceed to generation\n"
"`/process` — Manually trigger chunk processing\n"
"`/generate <type>` — Generate output (podcast|blog|report|thread)\n"
"`/sources` — List all sources found\n"
"`/outputs` — List generated outputs\n"
@@ -426,6 +427,64 @@ async def cmd_outputs(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
await db_conn.close()
async def cmd_process(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
if not is_authorized(update.effective_user.id):
return
chat_id = update.effective_chat.id
db_conn = await get_db()
db = ResearchDB(db_conn)
try:
cursor = await db_conn.execute(
"SELECT * FROM research_sessions WHERE telegram_chat_id = ? ORDER BY created_at DESC LIMIT 1",
(chat_id,)
)
row = await cursor.fetchone()
if not row:
await update.message.reply_text("No research sessions found. Start with /research <topic>")
return
session = dict(row)
session_id = session["id"]
topic = session["topic"]
msg = await update.message.reply_text(
f"🧠 Processing session #{session_id}: `{topic}`\n"
f"Chunking & scoring with Ollama ({settings.ollama_model})...\n"
f"This may take a few minutes.",
parse_mode=ParseMode.MARKDOWN
)
ollama = OllamaClient()
if not await ollama.is_available():
await msg.edit_text("❌ Ollama not reachable. Check OLLAMA_URL setting.")
return
processor = ContentProcessor(db, ollama)
async def proc_progress(total_chunks, total_words):
try:
await msg.edit_text(
f"🧠 *Processing complete!*\n"
f"• Chunks stored: `{total_chunks}`\n"
f"• Words researched: `{total_words:,}`\n\n"
f"Ready! Use `/generate podcast|blog|report|thread`\n"
f"_If 0 chunks: set `QUALITY_THRESHOLD=0.3` or `0` and retry_",
parse_mode=ParseMode.MARKDOWN
)
except Exception:
pass
await processor.process_session(session_id, topic, proc_progress)
except Exception as e:
logger.error("Process command failed", error=str(e))
await update.message.reply_text(f"❌ Processing failed: {str(e)[:200]}")
finally:
await db_conn.close()
async def cmd_cancel(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
if not is_authorized(update.effective_user.id):
return
@@ -456,6 +515,7 @@ def create_bot() -> Application:
app.add_handler(CommandHandler("generate", cmd_generate))
app.add_handler(CommandHandler("sources", cmd_sources))
app.add_handler(CommandHandler("outputs", cmd_outputs))
app.add_handler(CommandHandler("process", cmd_process))
app.add_handler(CommandHandler("cancel", cmd_cancel))
return app