fix: WAL mode for concurrent reads, skipped stats, anti-repetition prompts

database.py: enable PRAGMA journal_mode=WAL + synchronous=NORMAL so /status reads from concurrent connections see committed data without blocking behind the scraper's writes; add 'skipped' to get_session_stats bot.py: show skipped count in fmt_progress and cmd_status; use 'or 0' to guard against NULL from SUM(); label active research in /status processor.py: raise generate() temperature default to 0.7 + add repeat_penalty=1.15/repeat_last_n=128 to Ollama options to stop qwen2.5:3b from looping; scoring prompt keeps temperature=0.1 generator.py: rewrite all prompts with explicit "NEVER repeat" constraints and distinct-content rules per section; podcast prompt now asks for spoken-word style (no formal headers); reduce thread to 12-18 tweets (was 15-25) to fit model context; pass temperature=0.7 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-28 10:15:30 +00:00
parent f7d62345b8
commit c4fb33fbf5
4 changed files with 115 additions and 73 deletions
@@ -34,14 +34,15 @@ def is_authorized(user_id: int) -> bool:


 def fmt_progress(iteration: int, total: int, new: int, stats: dict) -> str:
-    scraped = stats.get("scraped", 0)
-    failed = stats.get("failed", 0)
-    pending = stats.get("pending", 0)
+    scraped = stats.get("scraped") or 0
+    failed  = stats.get("failed")  or 0
+    pending = stats.get("pending") or 0
+    skipped = stats.get("skipped") or 0
    return (
        f"🔄 *Iteration {iteration}*\n"
        f"📚 Sources found: `{total}`\n"
-        f"✅ Scraped: `{scraped}` | ❌ Failed: `{failed}` | ⏳ Pending: `{pending}`\n"
-        f"🆕 New this round: `{new}`"
+        f"✅ Scraped: `{scraped}` | ⏭️ Skipped: `{skipped}` | ❌ Failed: `{failed}` | ⏳ Pending: `{pending}`\n"
+        f"🆕 New URLs this round: `{new}`"
    )


@@ -213,13 +214,14 @@ async def cmd_status(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
            f"📝 Topic: `{session['topic']}`\n"
            f"🔁 Status: `{session['status']}`\n"
            f"🔢 Iterations: `{session.get('iterations', 0)}`\n"
-            f"📚 Total sources: `{stats.get('total', 0)}`\n"
-            f"✅ Scraped: `{stats.get('scraped', 0)}`\n"
-            f"❌ Failed: `{stats.get('failed', 0)}`\n"
-            f"⏳ Pending: `{stats.get('pending', 0)}`\n"
+            f"📚 Total sources: `{stats.get('total') or 0}`\n"
+            f"✅ Scraped: `{stats.get('scraped') or 0}`\n"
+            f"⏭️ Skipped: `{stats.get('skipped') or 0}`\n"
+            f"❌ Failed: `{stats.get('failed') or 0}`\n"
+            f"⏳ Pending: `{stats.get('pending') or 0}`\n"
            f"💬 Chunks: `{session.get('total_chunks', 0)}`\n"
            f"📖 Words: `{session.get('total_words', 0):,}`\n"
-            f"{'🟢 Active' if is_active else '⚫ Idle'}",
+            f"{'🟢 Active — stats update each iteration' if is_active else '⚫ Idle'}",
            parse_mode=ParseMode.MARKDOWN
        )
    finally: