YouTube: enforce comment fetch timeout while waiting

asyncio.wait_for wrapped create_subprocess_exec, which returns as soon
as the child is spawned, so the timeout never bounded the actual work.
yt-dlp could hang indefinitely on proc.communicate() and the
except asyncio.TimeoutError branch was unreachable. Bind the wait to
communicate() and kill/reap the child if it overruns.
This commit is contained in:
SurprisedDuck
2026-06-02 13:44:24 +02:00
committed by GitHub
parent e084dc993e
commit d73c0a13f4
2 changed files with 64 additions and 8 deletions

View File

@@ -198,15 +198,24 @@ async def fetch_youtube_comments(
f"https://www.youtube.com/watch?v={video_id}", f"https://www.youtube.com/watch?v={video_id}",
] ]
proc = await asyncio.wait_for( proc = await asyncio.create_subprocess_exec(
asyncio.create_subprocess_exec( *cmd,
*cmd, stdout=asyncio.subprocess.PIPE,
stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
),
timeout=timeout,
) )
stdout, stderr = await proc.communicate() # Bound the wait on the process actually finishing, not on spawning it.
# create_subprocess_exec returns as soon as the child starts, so wrapping
# it in wait_for never enforces the timeout — proc.communicate() is the
# blocking step. Kill and reap the child if it overruns so it does not
# linger after we return.
try:
stdout, stderr = await asyncio.wait_for(
proc.communicate(), timeout=timeout
)
except asyncio.TimeoutError:
proc.kill()
await proc.wait()
raise
if proc.returncode != 0: if proc.returncode != 0:
return {"success": False, "error": f"yt-dlp failed: {stderr.decode()[:200]}", "comments": []} return {"success": False, "error": f"yt-dlp failed: {stderr.decode()[:200]}", "comments": []}

View File

@@ -0,0 +1,47 @@
"""Regression: fetch_youtube_comments must actually honour its timeout.
The timeout previously wrapped ``create_subprocess_exec`` (which returns as soon
as the child is spawned) instead of ``proc.communicate()`` (the step that waits
for yt-dlp to finish). A hung yt-dlp would therefore block forever and the
``except asyncio.TimeoutError`` handler was unreachable. The wait must be bound
to communicate(), and the child killed when it overruns.
"""
import asyncio
from src import youtube_handler
def test_comment_fetch_honours_timeout(monkeypatch):
monkeypatch.setattr(youtube_handler, "_find_ytdlp", lambda: "yt-dlp")
killed = {"value": False}
class HangingProc:
returncode = None
async def communicate(self):
await asyncio.sleep(30) # far longer than the test timeout
return (b"", b"")
def kill(self):
killed["value"] = True
async def wait(self):
return 0
async def fake_create_subprocess_exec(*args, **kwargs):
return HangingProc()
monkeypatch.setattr(
asyncio, "create_subprocess_exec", fake_create_subprocess_exec
)
result = asyncio.run(
youtube_handler.fetch_youtube_comments("vid123", timeout=0.1)
)
assert result["success"] is False
assert "timed out" in result["error"].lower()
assert result["comments"] == []
# The overrunning child must be killed, not left running.
assert killed["value"] is True