YouTube: enforce comment fetch timeout while waiting
asyncio.wait_for wrapped create_subprocess_exec, which returns as soon as the child is spawned, so the timeout never bounded the actual work. yt-dlp could hang indefinitely on proc.communicate() and the except asyncio.TimeoutError branch was unreachable. Bind the wait to communicate() and kill/reap the child if it overruns.
This commit is contained in:
@@ -198,15 +198,24 @@ async def fetch_youtube_comments(
|
|||||||
f"https://www.youtube.com/watch?v={video_id}",
|
f"https://www.youtube.com/watch?v={video_id}",
|
||||||
]
|
]
|
||||||
|
|
||||||
proc = await asyncio.wait_for(
|
proc = await asyncio.create_subprocess_exec(
|
||||||
asyncio.create_subprocess_exec(
|
*cmd,
|
||||||
*cmd,
|
stdout=asyncio.subprocess.PIPE,
|
||||||
stdout=asyncio.subprocess.PIPE,
|
stderr=asyncio.subprocess.PIPE,
|
||||||
stderr=asyncio.subprocess.PIPE,
|
|
||||||
),
|
|
||||||
timeout=timeout,
|
|
||||||
)
|
)
|
||||||
stdout, stderr = await proc.communicate()
|
# Bound the wait on the process actually finishing, not on spawning it.
|
||||||
|
# create_subprocess_exec returns as soon as the child starts, so wrapping
|
||||||
|
# it in wait_for never enforces the timeout — proc.communicate() is the
|
||||||
|
# blocking step. Kill and reap the child if it overruns so it does not
|
||||||
|
# linger after we return.
|
||||||
|
try:
|
||||||
|
stdout, stderr = await asyncio.wait_for(
|
||||||
|
proc.communicate(), timeout=timeout
|
||||||
|
)
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
proc.kill()
|
||||||
|
await proc.wait()
|
||||||
|
raise
|
||||||
|
|
||||||
if proc.returncode != 0:
|
if proc.returncode != 0:
|
||||||
return {"success": False, "error": f"yt-dlp failed: {stderr.decode()[:200]}", "comments": []}
|
return {"success": False, "error": f"yt-dlp failed: {stderr.decode()[:200]}", "comments": []}
|
||||||
|
|||||||
47
tests/test_youtube_comments_timeout.py
Normal file
47
tests/test_youtube_comments_timeout.py
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
"""Regression: fetch_youtube_comments must actually honour its timeout.
|
||||||
|
|
||||||
|
The timeout previously wrapped ``create_subprocess_exec`` (which returns as soon
|
||||||
|
as the child is spawned) instead of ``proc.communicate()`` (the step that waits
|
||||||
|
for yt-dlp to finish). A hung yt-dlp would therefore block forever and the
|
||||||
|
``except asyncio.TimeoutError`` handler was unreachable. The wait must be bound
|
||||||
|
to communicate(), and the child killed when it overruns.
|
||||||
|
"""
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
from src import youtube_handler
|
||||||
|
|
||||||
|
|
||||||
|
def test_comment_fetch_honours_timeout(monkeypatch):
|
||||||
|
monkeypatch.setattr(youtube_handler, "_find_ytdlp", lambda: "yt-dlp")
|
||||||
|
|
||||||
|
killed = {"value": False}
|
||||||
|
|
||||||
|
class HangingProc:
|
||||||
|
returncode = None
|
||||||
|
|
||||||
|
async def communicate(self):
|
||||||
|
await asyncio.sleep(30) # far longer than the test timeout
|
||||||
|
return (b"", b"")
|
||||||
|
|
||||||
|
def kill(self):
|
||||||
|
killed["value"] = True
|
||||||
|
|
||||||
|
async def wait(self):
|
||||||
|
return 0
|
||||||
|
|
||||||
|
async def fake_create_subprocess_exec(*args, **kwargs):
|
||||||
|
return HangingProc()
|
||||||
|
|
||||||
|
monkeypatch.setattr(
|
||||||
|
asyncio, "create_subprocess_exec", fake_create_subprocess_exec
|
||||||
|
)
|
||||||
|
|
||||||
|
result = asyncio.run(
|
||||||
|
youtube_handler.fetch_youtube_comments("vid123", timeout=0.1)
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result["success"] is False
|
||||||
|
assert "timed out" in result["error"].lower()
|
||||||
|
assert result["comments"] == []
|
||||||
|
# The overrunning child must be killed, not left running.
|
||||||
|
assert killed["value"] is True
|
||||||
Reference in New Issue
Block a user