fix: wrap YouTubeTranscriptApi in run_in_executor with 30s timeout
Build & Deploy ResearchOwl / build-and-push (push) Successful in 5s
Build & Deploy ResearchOwl / build-and-push (push) Successful in 5s
The synchronous get_transcript() call was blocking the asyncio event loop indefinitely, freezing the entire bot (including Telegram polling). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -469,12 +469,23 @@ class ExhaustiveScraper:
|
|||||||
return None, None
|
return None, None
|
||||||
|
|
||||||
video_id = match.group(1)
|
video_id = match.group(1)
|
||||||
try:
|
loop = asyncio.get_event_loop()
|
||||||
transcript_list = YouTubeTranscriptApi.get_transcript(
|
|
||||||
|
def _fetch():
|
||||||
|
return YouTubeTranscriptApi.get_transcript(
|
||||||
video_id, languages=["en", "es", "en-US", "en-GB"]
|
video_id, languages=["en", "es", "en-US", "en-GB"]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
transcript_list = await asyncio.wait_for(
|
||||||
|
loop.run_in_executor(None, _fetch),
|
||||||
|
timeout=30.0
|
||||||
|
)
|
||||||
text = " ".join(t["text"] for t in transcript_list)
|
text = " ".join(t["text"] for t in transcript_list)
|
||||||
return text, f"YouTube: {video_id}"
|
return text, f"YouTube: {video_id}"
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
logger.warning("YouTube transcript timed out", video_id=video_id)
|
||||||
|
return None, None
|
||||||
except NoTranscriptFound:
|
except NoTranscriptFound:
|
||||||
return None, None
|
return None, None
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
Reference in New Issue
Block a user