From e66d728d68f661230ef437dcc3999aadca7f63bb Mon Sep 17 00:00:00 2001 From: ChemaVX Date: Wed, 29 Apr 2026 12:59:40 +0000 Subject: [PATCH] fix: wrap YouTubeTranscriptApi in run_in_executor with 30s timeout The synchronous get_transcript() call was blocking the asyncio event loop indefinitely, freezing the entire bot (including Telegram polling). Co-Authored-By: Claude Sonnet 4.6 --- src/scraper/exhaustive.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/scraper/exhaustive.py b/src/scraper/exhaustive.py index c5b6744..7995165 100644 --- a/src/scraper/exhaustive.py +++ b/src/scraper/exhaustive.py @@ -469,12 +469,23 @@ class ExhaustiveScraper: return None, None video_id = match.group(1) - try: - transcript_list = YouTubeTranscriptApi.get_transcript( + loop = asyncio.get_event_loop() + + def _fetch(): + return YouTubeTranscriptApi.get_transcript( video_id, languages=["en", "es", "en-US", "en-GB"] ) + + try: + transcript_list = await asyncio.wait_for( + loop.run_in_executor(None, _fetch), + timeout=30.0 + ) text = " ".join(t["text"] for t in transcript_list) return text, f"YouTube: {video_id}" + except asyncio.TimeoutError: + logger.warning("YouTube transcript timed out", video_id=video_id) + return None, None except NoTranscriptFound: return None, None except Exception as e: