fix: wrap YouTubeTranscriptApi in run_in_executor with 30s timeout
Build & Deploy ResearchOwl / build-and-push (push) Successful in 5s

The synchronous get_transcript() call was blocking the asyncio event
loop indefinitely, freezing the entire bot (including Telegram polling).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
ChemaVX
2026-04-29 12:59:40 +00:00
parent 65b1739943
commit e66d728d68
+13 -2
View File
@@ -469,12 +469,23 @@ class ExhaustiveScraper:
return None, None return None, None
video_id = match.group(1) video_id = match.group(1)
try: loop = asyncio.get_event_loop()
transcript_list = YouTubeTranscriptApi.get_transcript(
def _fetch():
return YouTubeTranscriptApi.get_transcript(
video_id, languages=["en", "es", "en-US", "en-GB"] video_id, languages=["en", "es", "en-US", "en-GB"]
) )
try:
transcript_list = await asyncio.wait_for(
loop.run_in_executor(None, _fetch),
timeout=30.0
)
text = " ".join(t["text"] for t in transcript_list) text = " ".join(t["text"] for t in transcript_list)
return text, f"YouTube: {video_id}" return text, f"YouTube: {video_id}"
except asyncio.TimeoutError:
logger.warning("YouTube transcript timed out", video_id=video_id)
return None, None
except NoTranscriptFound: except NoTranscriptFound:
return None, None return None, None
except Exception as e: except Exception as e: