fix: wrap YouTubeTranscriptApi in run_in_executor with 30s timeout
Build & Deploy ResearchOwl / build-and-push (push) Successful in 5s
Build & Deploy ResearchOwl / build-and-push (push) Successful in 5s
The synchronous get_transcript() call was blocking the asyncio event loop indefinitely, freezing the entire bot (including Telegram polling). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -469,12 +469,23 @@ class ExhaustiveScraper:
|
||||
return None, None
|
||||
|
||||
video_id = match.group(1)
|
||||
try:
|
||||
transcript_list = YouTubeTranscriptApi.get_transcript(
|
||||
loop = asyncio.get_event_loop()
|
||||
|
||||
def _fetch():
|
||||
return YouTubeTranscriptApi.get_transcript(
|
||||
video_id, languages=["en", "es", "en-US", "en-GB"]
|
||||
)
|
||||
|
||||
try:
|
||||
transcript_list = await asyncio.wait_for(
|
||||
loop.run_in_executor(None, _fetch),
|
||||
timeout=30.0
|
||||
)
|
||||
text = " ".join(t["text"] for t in transcript_list)
|
||||
return text, f"YouTube: {video_id}"
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning("YouTube transcript timed out", video_id=video_id)
|
||||
return None, None
|
||||
except NoTranscriptFound:
|
||||
return None, None
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user