From 2d7d7b2412d7d2df7358cdfcc79686b39e9e17f4 Mon Sep 17 00:00:00 2001 From: tanmayraut45 Date: Tue, 2 Jun 2026 03:02:30 +0530 Subject: [PATCH] Fix TOCTOU race in chat stream status endpoint The /api/chat/stream_status handler did a membership test against _active_streams followed by an indexed read of the same key. Between those two ops, a sibling stream's finally block (or a stop / cleanup path) can pop the entry, turning the indexed read into a KeyError that bubbles up as a 500. The race is the exact one _stream_set was already written to avoid; the comment on the helper at the top of the module spells out why a single .get() is the right pattern here too. Collapse the two-step into a single .get() call so the lookup either returns the live record or None, and report 'detached' / 404 based on that single read. No behavior change on the happy path; the failure mode under concurrent stream cleanup is now handled deterministically. Closes #658. --- routes/chat_routes.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/routes/chat_routes.py b/routes/chat_routes.py index 3cdcb85..d0da480 100644 --- a/routes/chat_routes.py +++ b/routes/chat_routes.py @@ -920,11 +920,15 @@ def setup_chat_routes( _verify_session_owner(request, session_id) # A detached run can still be going even if _active_streams was popped; # report it as active so the client knows to reconnect via /resume. - if session_id not in _active_streams: + # Read once via .get() to avoid a KeyError race between the membership + # check and the indexed read if a sibling stream's finally pops the + # entry in between (same pattern _stream_set already uses). + rec = _active_streams.get(session_id) + if rec is None: if agent_runs.is_active(session_id): return {"status": "streaming", "detached": True} raise HTTPException(404, "No active stream for this session") - return _active_streams[session_id] + return rec # ------------------------------------------------------------------ # # POST /api/inject_context