diff --git a/src/llm_core.py b/src/llm_core.py
index be31ac5..a929edc 100644
--- a/src/llm_core.py
+++ b/src/llm_core.py
@@ -1398,7 +1398,7 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl
                                 j = json.loads(data)
                                 # Usage chunk (from stream_options)
                                 _choices = j.get("choices") or []
-                                _delta0 = _choices[0].get("delta") if _choices else None
+                                _delta0 = _choices[0].get("delta") if (_choices and _choices[0] is not None) else None
                                 # Capture usage whenever the chunk carries it and
                                 # the delta has no actual output. Some gateways /
                                 # local servers attach usage to the FINAL delta,
@@ -1412,7 +1412,7 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl
                                     or _delta0.get("tool_calls")
                                 )
                                 if "usage" in j and not _delta_has_output:
-                                    u = j["usage"]
+                                    u = j["usage"] or {}
                                     _usage_data = {"input_tokens": u.get("prompt_tokens", 0), "output_tokens": u.get("completion_tokens", 0)}
                                     # llama.cpp puts a `timings` block alongside `usage` with the
                                     # TRUE generation speed (predicted_per_second) — pure decode,
@@ -1427,7 +1427,10 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl
                                             _usage_data["prefill_tps"] = round(_tm["prompt_per_second"], 2)
                                     yield f'data: {json.dumps({"type": "usage", "data": _usage_data})}\n\n'
                                 elif "choices" in j:
-                                    delta = j["choices"][0].get("delta") or {}
+                                    _c0 = (j["choices"] or [None])[0]
+                                    if _c0 is None:
+                                        continue
+                                    delta = _c0.get("delta") or {}
                                     if isinstance(delta, dict):
                                         # Text content
                                         # Reasoning tokens (VLLM --reasoning-parser, e.g. Qwen3/DeepSeek-R1, Nemotron). vLLM 0.20.2 / NIM emit the field as `reasoning`; older builds use `reasoning_content`. Accept either.
@@ -1446,6 +1449,8 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl
                                             yield f'data: {json.dumps({"delta": content})}\n\n'
                                         # Native tool calls — accumulate across chunks
                                         for tc in delta.get("tool_calls") or []:
+                                            if tc is None:
+                                                continue
                                             func = tc.get("function") or {}
                                             raw_idx = tc.get("index")
                                             if raw_idx is None:
diff --git a/tests/test_llm_core_usage_finish_delta.py b/tests/test_llm_core_usage_finish_delta.py
index 9f28f9f..507939d 100644
--- a/tests/test_llm_core_usage_finish_delta.py
+++ b/tests/test_llm_core_usage_finish_delta.py
@@ -101,3 +101,56 @@ def test_usage_on_empty_choices_chunk_still_captured(monkeypatch):
     ]
     usage = _usage_events(_drive(monkeypatch, lines))
     assert usage and usage[-1] == {"input_tokens": 4, "output_tokens": 2}
+
+
+def test_null_choice_chunk_does_not_crash(monkeypatch):
+    # Some providers emit {"choices": [null]} as a heartbeat/keepalive chunk.
+    # The parser must silently skip it rather than crashing on None.get("delta").
+    lines = [
+        'data: ' + json.dumps({"choices": [{"delta": {"content": "Hello"}}]}),
+        'data: ' + json.dumps({"choices": [None]}),
+        'data: [DONE]',
+    ]
+    result = _drive(monkeypatch, lines)
+    assert "Hello" in result
+
+
+def test_null_choice_with_null_usage_does_not_crash(monkeypatch):
+    # Chunk with both choices:[null] and usage:null — neither field should panic.
+    lines = [
+        'data: ' + json.dumps({"choices": [{"delta": {"content": "Hi"}}]}),
+        'data: ' + json.dumps({"choices": [None], "usage": None}),
+        'data: [DONE]',
+    ]
+    result = _drive(monkeypatch, lines)
+    assert "Hi" in result
+
+
+def test_null_tool_call_in_delta_is_skipped(monkeypatch):
+    # Some providers include null entries in the tool_calls array alongside
+    # valid calls. The null entry must be skipped; the valid call must survive.
+    lines = [
+        'data: ' + json.dumps({
+            "choices": [{
+                "delta": {
+                    "tool_calls": [
+                        None,
+                        {"index": 0, "function": {"name": "get_weather", "arguments": '{"city":'}},
+                    ]
+                }
+            }]
+        }),
+        'data: ' + json.dumps({
+            "choices": [{
+                "delta": {
+                    "tool_calls": [
+                        {"index": 0, "function": {"name": "", "arguments": '"London"}'}},
+                    ]
+                }
+            }]
+        }),
+        'data: [DONE]',
+    ]
+    result = _drive(monkeypatch, lines)
+    # The stream completes without error; the valid tool call was accumulated.
+    assert result is not None