diff --git a/src/scraper/exhaustive.py b/src/scraper/exhaustive.py index c5b6744..7995165 100644 --- a/src/scraper/exhaustive.py +++ b/src/scraper/exhaustive.py @@ -469,12 +469,23 @@ class ExhaustiveScraper: return None, None video_id = match.group(1) - try: - transcript_list = YouTubeTranscriptApi.get_transcript( + loop = asyncio.get_event_loop() + + def _fetch(): + return YouTubeTranscriptApi.get_transcript( video_id, languages=["en", "es", "en-US", "en-GB"] ) + + try: + transcript_list = await asyncio.wait_for( + loop.run_in_executor(None, _fetch), + timeout=30.0 + ) text = " ".join(t["text"] for t in transcript_list) return text, f"YouTube: {video_id}" + except asyncio.TimeoutError: + logger.warning("YouTube transcript timed out", video_id=video_id) + return None, None except NoTranscriptFound: return None, None except Exception as e: