fix: WAL mode for concurrent reads, skipped stats, anti-repetition prompts
Build & Deploy ResearchOwl / build-and-push (push) Successful in 5s
Build & Deploy ResearchOwl / build-and-push (push) Successful in 5s
database.py: enable PRAGMA journal_mode=WAL + synchronous=NORMAL so /status reads from concurrent connections see committed data without blocking behind the scraper's writes; add 'skipped' to get_session_stats bot.py: show skipped count in fmt_progress and cmd_status; use 'or 0' to guard against NULL from SUM(); label active research in /status processor.py: raise generate() temperature default to 0.7 + add repeat_penalty=1.15/repeat_last_n=128 to Ollama options to stop qwen2.5:3b from looping; scoring prompt keeps temperature=0.1 generator.py: rewrite all prompts with explicit "NEVER repeat" constraints and distinct-content rules per section; podcast prompt now asks for spoken-word style (no formal headers); reduce thread to 12-18 tweets (was 15-25) to fit model context; pass temperature=0.7 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+12
-10
@@ -34,14 +34,15 @@ def is_authorized(user_id: int) -> bool:
|
||||
|
||||
|
||||
def fmt_progress(iteration: int, total: int, new: int, stats: dict) -> str:
|
||||
scraped = stats.get("scraped", 0)
|
||||
failed = stats.get("failed", 0)
|
||||
pending = stats.get("pending", 0)
|
||||
scraped = stats.get("scraped") or 0
|
||||
failed = stats.get("failed") or 0
|
||||
pending = stats.get("pending") or 0
|
||||
skipped = stats.get("skipped") or 0
|
||||
return (
|
||||
f"🔄 *Iteration {iteration}*\n"
|
||||
f"📚 Sources found: `{total}`\n"
|
||||
f"✅ Scraped: `{scraped}` | ❌ Failed: `{failed}` | ⏳ Pending: `{pending}`\n"
|
||||
f"🆕 New this round: `{new}`"
|
||||
f"✅ Scraped: `{scraped}` | ⏭️ Skipped: `{skipped}` | ❌ Failed: `{failed}` | ⏳ Pending: `{pending}`\n"
|
||||
f"🆕 New URLs this round: `{new}`"
|
||||
)
|
||||
|
||||
|
||||
@@ -213,13 +214,14 @@ async def cmd_status(update: Update, ctx: ContextTypes.DEFAULT_TYPE):
|
||||
f"📝 Topic: `{session['topic']}`\n"
|
||||
f"🔁 Status: `{session['status']}`\n"
|
||||
f"🔢 Iterations: `{session.get('iterations', 0)}`\n"
|
||||
f"📚 Total sources: `{stats.get('total', 0)}`\n"
|
||||
f"✅ Scraped: `{stats.get('scraped', 0)}`\n"
|
||||
f"❌ Failed: `{stats.get('failed', 0)}`\n"
|
||||
f"⏳ Pending: `{stats.get('pending', 0)}`\n"
|
||||
f"📚 Total sources: `{stats.get('total') or 0}`\n"
|
||||
f"✅ Scraped: `{stats.get('scraped') or 0}`\n"
|
||||
f"⏭️ Skipped: `{stats.get('skipped') or 0}`\n"
|
||||
f"❌ Failed: `{stats.get('failed') or 0}`\n"
|
||||
f"⏳ Pending: `{stats.get('pending') or 0}`\n"
|
||||
f"💬 Chunks: `{session.get('total_chunks', 0)}`\n"
|
||||
f"📖 Words: `{session.get('total_words', 0):,}`\n"
|
||||
f"{'🟢 Active' if is_active else '⚫ Idle'}",
|
||||
f"{'🟢 Active — stats update each iteration' if is_active else '⚫ Idle'}",
|
||||
parse_mode=ParseMode.MARKDOWN
|
||||
)
|
||||
finally:
|
||||
|
||||
Reference in New Issue
Block a user