diff --git a/src/agent_loop.py b/src/agent_loop.py
index 2a945ea..1b62f82 100644
--- a/src/agent_loop.py
+++ b/src/agent_loop.py
@@ -1101,8 +1101,20 @@ def _append_tool_results(
     `round_reasoning` (DeepSeek / vLLM reasoning-parser deltas) is echoed
     back via `reasoning_content` on the assistant message — DeepSeek's API
     rejects follow-up requests in thinking mode that don't include the
-    prior reasoning. Other vendors ignore the extra field.
+    prior reasoning.
+
+    NOTE: it is NOT universally ignored. Nemotron's chat template re-injects
+    EVERY prior `reasoning_content` as a <think> block, and this agent loop is
+    trimmed only once (before the loop), so across rounds the reasoning piles
+    up unbounded — bloating context and feeding the model its own prior
+    reasoning, which reinforces repetition/looping. So keep reasoning_content
+    on the MOST RECENT assistant turn only: enough for DeepSeek continuity,
+    without the per-round accumulation.
     """
+    # Strip reasoning_content from earlier assistant turns; only the newest keeps it.
+    for _m in messages:
+        if _m.get("role") == "assistant":
+            _m.pop("reasoning_content", None)
     if used_native and native_tool_calls:
         assistant_msg = {"role": "assistant"}
         # When the model emitted ONLY tool calls (no prose), content must be
diff --git a/src/llm_core.py b/src/llm_core.py
index ac8de73..00cff41 100644
--- a/src/llm_core.py
+++ b/src/llm_core.py
@@ -1127,8 +1127,8 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl
                                     delta = j["choices"][0].get("delta") or {}
                                     if isinstance(delta, dict):
                                         # Text content
-                                        # Reasoning tokens (VLLM --reasoning-parser, e.g. Qwen3/DeepSeek-R1)
-                                        reasoning = delta.get("reasoning_content") or ""
+                                        # Reasoning tokens (VLLM --reasoning-parser, e.g. Qwen3/DeepSeek-R1, Nemotron). vLLM 0.20.2 / NIM emit the field as `reasoning`; older builds use `reasoning_content`. Accept either.
+                                        reasoning = delta.get("reasoning_content") or delta.get("reasoning") or ""
                                         if reasoning:
                                             yield f'data: {json.dumps({"delta": reasoning, "thinking": True})}\n\n'
                                         content = delta.get("content") or ""
diff --git a/static/js/chat.js b/static/js/chat.js
index 4a7632c..969cf7e 100644
--- a/static/js/chat.js
+++ b/static/js/chat.js
@@ -512,6 +512,10 @@ import createResearchSynapse from './researchSynapse.js';
 
     // Declare accumulated outside try block so it's accessible in catch
     let accumulated = '';
+    // Are we currently inside an unclosed <think> block? Toggled per think/answer
+    // cycle so a multi-round agent response (one reasoning phase PER round) wraps each
+    // round's reasoning in its own <think>…</think> instead of leaking rounds 2+ as text.
+    let _thinkOpen = false;
     let holder = null;
     let finalMeta = null;
     let finalModelName = null;
@@ -1357,12 +1361,15 @@ import createResearchSynapse from './researchSynapse.js';
                 if (_threadAbove && _threadAbove.classList.contains('agent-thread') && !_threadAbove.classList.contains('has-bottom')) {
                   _threadAbove.classList.add('has-bottom');
                 }
-                // VLLM reasoning tokens: wrap in <think> tags for the thinking UI
+                // VLLM reasoning tokens: wrap in <think> tags for the thinking UI.
+                // Stateful open/close (not a whole-message substring check) so each round
+                // of a multi-round agent response gets its own <think>…</think> — otherwise
+                // only round 1 is wrapped and rounds 2+ reasoning leaks into the answer.
                 let _delta = json.delta;
                 if (json.thinking) {
-                  if (!accumulated.includes('<think>')) _delta = '<think>' + _delta;
-                } else if (accumulated.includes('<think>') && !accumulated.includes('</think>')) {
-                  _delta = '</think>' + _delta;
+                  if (!_thinkOpen) { _delta = '<think>' + _delta; _thinkOpen = true; }
+                } else if (_thinkOpen) {
+                  _delta = '</think>' + _delta; _thinkOpen = false;
                 }
                 const wasEmpty = !accumulated;
                 accumulated += _delta;
diff --git a/tests/test_llm_core_reasoning.py b/tests/test_llm_core_reasoning.py
new file mode 100644
index 0000000..35dafcc
--- /dev/null
+++ b/tests/test_llm_core_reasoning.py
@@ -0,0 +1,98 @@
+"""Regression: a streamed `reasoning` delta (vLLM 0.20.2 / NIM / Ollama) must surface
+as a thinking chunk, while a `content` delta still streams as normal content. Also
+covers the older `reasoning_content` field name for backward compatibility.
+"""
+import asyncio
+import json
+
+from src import llm_core
+
+
+class _FakeResp:
+    status_code = 200
+
+    def __init__(self, lines):
+        self._lines = lines
+
+    async def aiter_lines(self):
+        for ln in self._lines:
+            yield ln
+
+    async def aread(self):  # only used on non-200; present for safety
+        return b""
+
+
+class _FakeStreamCtx:
+    def __init__(self, lines):
+        self._lines = lines
+
+    async def __aenter__(self):
+        return _FakeResp(self._lines)
+
+    async def __aexit__(self, *exc):
+        return False
+
+
+class _FakeClient:
+    def __init__(self, lines):
+        self._lines = lines
+
+    def stream(self, *args, **kwargs):
+        return _FakeStreamCtx(self._lines)
+
+
+def _run_stream(model, lines, monkeypatch):
+    """Drive stream_llm against a faked upstream and return parsed SSE payloads."""
+    monkeypatch.setattr(llm_core, "_get_http_client", lambda: _FakeClient(lines))
+
+    async def _go():
+        out = []
+        async for chunk in llm_core.stream_llm(
+            "http://nim-nano:8000/v1/chat/completions",
+            model,
+            [{"role": "user", "content": "hi"}],
+        ):
+            out.append(chunk)
+        return out
+
+    parsed = []
+    for chunk in asyncio.run(_go()):
+        for raw in chunk.splitlines():
+            raw = raw.strip()
+            if raw.startswith("data:"):
+                payload = raw[5:].strip()
+                if payload.startswith("{"):
+                    try:
+                        parsed.append(json.loads(payload))
+                    except json.JSONDecodeError:
+                        pass
+    return [p for p in parsed if "delta" in p]
+
+
+def test_reasoning_field_emits_thinking_chunk(monkeypatch):
+    deltas = _run_stream(
+        "nvidia/nemotron-3-nano",
+        [
+            'data: {"choices":[{"delta":{"reasoning":"weighing options"}}]}',
+            'data: {"choices":[{"delta":{"content":"Hello"}}]}',
+            "data: [DONE]",
+        ],
+        monkeypatch,
+    )
+    assert any(d.get("thinking") and "weighing options" in d["delta"] for d in deltas), deltas
+    assert any((not d.get("thinking")) and d["delta"] == "Hello" for d in deltas), deltas
+
+
+def test_reasoning_content_field_still_supported(monkeypatch):
+    # Older builds emit `reasoning_content`; it must still surface as thinking.
+    deltas = _run_stream(
+        "some-thinking-model",
+        [
+            'data: {"choices":[{"delta":{"reasoning_content":"older field"}}]}',
+            'data: {"choices":[{"delta":{"content":"Answer"}}]}',
+            "data: [DONE]",
+        ],
+        monkeypatch,
+    )
+    assert any(d.get("thinking") and "older field" in d["delta"] for d in deltas), deltas
+    assert any((not d.get("thinking")) and d["delta"] == "Answer" for d in deltas), deltas