From d73c0a13f4deefad688200f39c493c341ad97fd0 Mon Sep 17 00:00:00 2001 From: SurprisedDuck Date: Tue, 2 Jun 2026 13:44:24 +0200 Subject: [PATCH] YouTube: enforce comment fetch timeout while waiting asyncio.wait_for wrapped create_subprocess_exec, which returns as soon as the child is spawned, so the timeout never bounded the actual work. yt-dlp could hang indefinitely on proc.communicate() and the except asyncio.TimeoutError branch was unreachable. Bind the wait to communicate() and kill/reap the child if it overruns. --- src/youtube_handler.py | 25 +++++++++----- tests/test_youtube_comments_timeout.py | 47 ++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 8 deletions(-) create mode 100644 tests/test_youtube_comments_timeout.py diff --git a/src/youtube_handler.py b/src/youtube_handler.py index c775bec..7d91194 100644 --- a/src/youtube_handler.py +++ b/src/youtube_handler.py @@ -198,15 +198,24 @@ async def fetch_youtube_comments( f"https://www.youtube.com/watch?v={video_id}", ] - proc = await asyncio.wait_for( - asyncio.create_subprocess_exec( - *cmd, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ), - timeout=timeout, + proc = await asyncio.create_subprocess_exec( + *cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, ) - stdout, stderr = await proc.communicate() + # Bound the wait on the process actually finishing, not on spawning it. + # create_subprocess_exec returns as soon as the child starts, so wrapping + # it in wait_for never enforces the timeout — proc.communicate() is the + # blocking step. Kill and reap the child if it overruns so it does not + # linger after we return. + try: + stdout, stderr = await asyncio.wait_for( + proc.communicate(), timeout=timeout + ) + except asyncio.TimeoutError: + proc.kill() + await proc.wait() + raise if proc.returncode != 0: return {"success": False, "error": f"yt-dlp failed: {stderr.decode()[:200]}", "comments": []} diff --git a/tests/test_youtube_comments_timeout.py b/tests/test_youtube_comments_timeout.py new file mode 100644 index 0000000..6eac7d4 --- /dev/null +++ b/tests/test_youtube_comments_timeout.py @@ -0,0 +1,47 @@ +"""Regression: fetch_youtube_comments must actually honour its timeout. + +The timeout previously wrapped ``create_subprocess_exec`` (which returns as soon +as the child is spawned) instead of ``proc.communicate()`` (the step that waits +for yt-dlp to finish). A hung yt-dlp would therefore block forever and the +``except asyncio.TimeoutError`` handler was unreachable. The wait must be bound +to communicate(), and the child killed when it overruns. +""" +import asyncio + +from src import youtube_handler + + +def test_comment_fetch_honours_timeout(monkeypatch): + monkeypatch.setattr(youtube_handler, "_find_ytdlp", lambda: "yt-dlp") + + killed = {"value": False} + + class HangingProc: + returncode = None + + async def communicate(self): + await asyncio.sleep(30) # far longer than the test timeout + return (b"", b"") + + def kill(self): + killed["value"] = True + + async def wait(self): + return 0 + + async def fake_create_subprocess_exec(*args, **kwargs): + return HangingProc() + + monkeypatch.setattr( + asyncio, "create_subprocess_exec", fake_create_subprocess_exec + ) + + result = asyncio.run( + youtube_handler.fetch_youtube_comments("vid123", timeout=0.1) + ) + + assert result["success"] is False + assert "timed out" in result["error"].lower() + assert result["comments"] == [] + # The overrunning child must be killed, not left running. + assert killed["value"] is True