YouTube: enforce comment fetch timeout while waiting
asyncio.wait_for wrapped create_subprocess_exec, which returns as soon as the child is spawned, so the timeout never bounded the actual work. yt-dlp could hang indefinitely on proc.communicate() and the except asyncio.TimeoutError branch was unreachable. Bind the wait to communicate() and kill/reap the child if it overruns.
This commit is contained in:
@@ -198,15 +198,24 @@ async def fetch_youtube_comments(
|
||||
f"https://www.youtube.com/watch?v={video_id}",
|
||||
]
|
||||
|
||||
proc = await asyncio.wait_for(
|
||||
asyncio.create_subprocess_exec(
|
||||
proc = await asyncio.create_subprocess_exec(
|
||||
*cmd,
|
||||
stdout=asyncio.subprocess.PIPE,
|
||||
stderr=asyncio.subprocess.PIPE,
|
||||
),
|
||||
timeout=timeout,
|
||||
)
|
||||
stdout, stderr = await proc.communicate()
|
||||
# Bound the wait on the process actually finishing, not on spawning it.
|
||||
# create_subprocess_exec returns as soon as the child starts, so wrapping
|
||||
# it in wait_for never enforces the timeout — proc.communicate() is the
|
||||
# blocking step. Kill and reap the child if it overruns so it does not
|
||||
# linger after we return.
|
||||
try:
|
||||
stdout, stderr = await asyncio.wait_for(
|
||||
proc.communicate(), timeout=timeout
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
proc.kill()
|
||||
await proc.wait()
|
||||
raise
|
||||
|
||||
if proc.returncode != 0:
|
||||
return {"success": False, "error": f"yt-dlp failed: {stderr.decode()[:200]}", "comments": []}
|
||||
|
||||
47
tests/test_youtube_comments_timeout.py
Normal file
47
tests/test_youtube_comments_timeout.py
Normal file
@@ -0,0 +1,47 @@
|
||||
"""Regression: fetch_youtube_comments must actually honour its timeout.
|
||||
|
||||
The timeout previously wrapped ``create_subprocess_exec`` (which returns as soon
|
||||
as the child is spawned) instead of ``proc.communicate()`` (the step that waits
|
||||
for yt-dlp to finish). A hung yt-dlp would therefore block forever and the
|
||||
``except asyncio.TimeoutError`` handler was unreachable. The wait must be bound
|
||||
to communicate(), and the child killed when it overruns.
|
||||
"""
|
||||
import asyncio
|
||||
|
||||
from src import youtube_handler
|
||||
|
||||
|
||||
def test_comment_fetch_honours_timeout(monkeypatch):
|
||||
monkeypatch.setattr(youtube_handler, "_find_ytdlp", lambda: "yt-dlp")
|
||||
|
||||
killed = {"value": False}
|
||||
|
||||
class HangingProc:
|
||||
returncode = None
|
||||
|
||||
async def communicate(self):
|
||||
await asyncio.sleep(30) # far longer than the test timeout
|
||||
return (b"", b"")
|
||||
|
||||
def kill(self):
|
||||
killed["value"] = True
|
||||
|
||||
async def wait(self):
|
||||
return 0
|
||||
|
||||
async def fake_create_subprocess_exec(*args, **kwargs):
|
||||
return HangingProc()
|
||||
|
||||
monkeypatch.setattr(
|
||||
asyncio, "create_subprocess_exec", fake_create_subprocess_exec
|
||||
)
|
||||
|
||||
result = asyncio.run(
|
||||
youtube_handler.fetch_youtube_comments("vid123", timeout=0.1)
|
||||
)
|
||||
|
||||
assert result["success"] is False
|
||||
assert "timed out" in result["error"].lower()
|
||||
assert result["comments"] == []
|
||||
# The overrunning child must be killed, not left running.
|
||||
assert killed["value"] is True
|
||||
Reference in New Issue
Block a user