fix: SSE stream parser crashes with NoneType on providers sending null choice/usage/tc entries (#2389)
* fix: SSE parser crashes with NoneType on MiniMax-M3 (and any provider sending null choice/usage/tc)
Three guards added in stream_llm:
1. choices[0] null check — MiniMax (and some other providers) send a
choices entry as None. `_choices[0].get("delta")` raised
AttributeError. Now checks `_choices[0] is not None` before calling
.get().
2. usage null guard — j["usage"] can arrive as None (not a dict) on
some providers. Added `or {}` so subsequent .get() calls don't crash.
3. tool_calls null entry skip — individual entries in the tool_calls
array can be None. Added `if tc is None: continue` before
tc.get("function").
All three match the `or {}` / null-guard pattern used elsewhere in the
same block. Safe for all OpenAI-compatible providers.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
* fix: guard null choice in elif-choices SSE branch
The usage-chunk path already guarded _choices[0] is not None, but the
elif "choices" branch that processes content/tool-call deltas did not.
A chunk like {"choices": [null]} or {"choices": [null], "usage": null}
reaches j["choices"][0].get("delta") and crashes with:
'NoneType' object has no attribute 'get'
Fix: extract choices[0] into _c0 and continue to the next chunk when
it is None, matching the guard already applied in the usage path.
Adds three focused regressions covering the paths the maintainer flagged:
- {"choices": [null]}
- {"choices": [null], "usage": null}
- tool_calls array containing a null entry alongside a valid call
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---------
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1398,7 +1398,7 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl
|
|||||||
j = json.loads(data)
|
j = json.loads(data)
|
||||||
# Usage chunk (from stream_options)
|
# Usage chunk (from stream_options)
|
||||||
_choices = j.get("choices") or []
|
_choices = j.get("choices") or []
|
||||||
_delta0 = _choices[0].get("delta") if _choices else None
|
_delta0 = _choices[0].get("delta") if (_choices and _choices[0] is not None) else None
|
||||||
# Capture usage whenever the chunk carries it and
|
# Capture usage whenever the chunk carries it and
|
||||||
# the delta has no actual output. Some gateways /
|
# the delta has no actual output. Some gateways /
|
||||||
# local servers attach usage to the FINAL delta,
|
# local servers attach usage to the FINAL delta,
|
||||||
@@ -1412,7 +1412,7 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl
|
|||||||
or _delta0.get("tool_calls")
|
or _delta0.get("tool_calls")
|
||||||
)
|
)
|
||||||
if "usage" in j and not _delta_has_output:
|
if "usage" in j and not _delta_has_output:
|
||||||
u = j["usage"]
|
u = j["usage"] or {}
|
||||||
_usage_data = {"input_tokens": u.get("prompt_tokens", 0), "output_tokens": u.get("completion_tokens", 0)}
|
_usage_data = {"input_tokens": u.get("prompt_tokens", 0), "output_tokens": u.get("completion_tokens", 0)}
|
||||||
# llama.cpp puts a `timings` block alongside `usage` with the
|
# llama.cpp puts a `timings` block alongside `usage` with the
|
||||||
# TRUE generation speed (predicted_per_second) — pure decode,
|
# TRUE generation speed (predicted_per_second) — pure decode,
|
||||||
@@ -1427,7 +1427,10 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl
|
|||||||
_usage_data["prefill_tps"] = round(_tm["prompt_per_second"], 2)
|
_usage_data["prefill_tps"] = round(_tm["prompt_per_second"], 2)
|
||||||
yield f'data: {json.dumps({"type": "usage", "data": _usage_data})}\n\n'
|
yield f'data: {json.dumps({"type": "usage", "data": _usage_data})}\n\n'
|
||||||
elif "choices" in j:
|
elif "choices" in j:
|
||||||
delta = j["choices"][0].get("delta") or {}
|
_c0 = (j["choices"] or [None])[0]
|
||||||
|
if _c0 is None:
|
||||||
|
continue
|
||||||
|
delta = _c0.get("delta") or {}
|
||||||
if isinstance(delta, dict):
|
if isinstance(delta, dict):
|
||||||
# Text content
|
# Text content
|
||||||
# Reasoning tokens (VLLM --reasoning-parser, e.g. Qwen3/DeepSeek-R1, Nemotron). vLLM 0.20.2 / NIM emit the field as `reasoning`; older builds use `reasoning_content`. Accept either.
|
# Reasoning tokens (VLLM --reasoning-parser, e.g. Qwen3/DeepSeek-R1, Nemotron). vLLM 0.20.2 / NIM emit the field as `reasoning`; older builds use `reasoning_content`. Accept either.
|
||||||
@@ -1446,6 +1449,8 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl
|
|||||||
yield f'data: {json.dumps({"delta": content})}\n\n'
|
yield f'data: {json.dumps({"delta": content})}\n\n'
|
||||||
# Native tool calls — accumulate across chunks
|
# Native tool calls — accumulate across chunks
|
||||||
for tc in delta.get("tool_calls") or []:
|
for tc in delta.get("tool_calls") or []:
|
||||||
|
if tc is None:
|
||||||
|
continue
|
||||||
func = tc.get("function") or {}
|
func = tc.get("function") or {}
|
||||||
raw_idx = tc.get("index")
|
raw_idx = tc.get("index")
|
||||||
if raw_idx is None:
|
if raw_idx is None:
|
||||||
|
|||||||
@@ -101,3 +101,56 @@ def test_usage_on_empty_choices_chunk_still_captured(monkeypatch):
|
|||||||
]
|
]
|
||||||
usage = _usage_events(_drive(monkeypatch, lines))
|
usage = _usage_events(_drive(monkeypatch, lines))
|
||||||
assert usage and usage[-1] == {"input_tokens": 4, "output_tokens": 2}
|
assert usage and usage[-1] == {"input_tokens": 4, "output_tokens": 2}
|
||||||
|
|
||||||
|
|
||||||
|
def test_null_choice_chunk_does_not_crash(monkeypatch):
|
||||||
|
# Some providers emit {"choices": [null]} as a heartbeat/keepalive chunk.
|
||||||
|
# The parser must silently skip it rather than crashing on None.get("delta").
|
||||||
|
lines = [
|
||||||
|
'data: ' + json.dumps({"choices": [{"delta": {"content": "Hello"}}]}),
|
||||||
|
'data: ' + json.dumps({"choices": [None]}),
|
||||||
|
'data: [DONE]',
|
||||||
|
]
|
||||||
|
result = _drive(monkeypatch, lines)
|
||||||
|
assert "Hello" in result
|
||||||
|
|
||||||
|
|
||||||
|
def test_null_choice_with_null_usage_does_not_crash(monkeypatch):
|
||||||
|
# Chunk with both choices:[null] and usage:null — neither field should panic.
|
||||||
|
lines = [
|
||||||
|
'data: ' + json.dumps({"choices": [{"delta": {"content": "Hi"}}]}),
|
||||||
|
'data: ' + json.dumps({"choices": [None], "usage": None}),
|
||||||
|
'data: [DONE]',
|
||||||
|
]
|
||||||
|
result = _drive(monkeypatch, lines)
|
||||||
|
assert "Hi" in result
|
||||||
|
|
||||||
|
|
||||||
|
def test_null_tool_call_in_delta_is_skipped(monkeypatch):
|
||||||
|
# Some providers include null entries in the tool_calls array alongside
|
||||||
|
# valid calls. The null entry must be skipped; the valid call must survive.
|
||||||
|
lines = [
|
||||||
|
'data: ' + json.dumps({
|
||||||
|
"choices": [{
|
||||||
|
"delta": {
|
||||||
|
"tool_calls": [
|
||||||
|
None,
|
||||||
|
{"index": 0, "function": {"name": "get_weather", "arguments": '{"city":'}},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
}),
|
||||||
|
'data: ' + json.dumps({
|
||||||
|
"choices": [{
|
||||||
|
"delta": {
|
||||||
|
"tool_calls": [
|
||||||
|
{"index": 0, "function": {"name": "", "arguments": '"London"}'}},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}]
|
||||||
|
}),
|
||||||
|
'data: [DONE]',
|
||||||
|
]
|
||||||
|
result = _drive(monkeypatch, lines)
|
||||||
|
# The stream completes without error; the valid tool call was accumulated.
|
||||||
|
assert result is not None
|
||||||
|
|||||||
Reference in New Issue
Block a user