diff --git a/src/llm_core.py b/src/llm_core.py
index 1995982..1baf184 100644
--- a/src/llm_core.py
+++ b/src/llm_core.py
@@ -1363,6 +1363,8 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl
     # can detect thinking-in-progress (some models output </think> but no <think>)
     _thinking_model = _supports_thinking(model)
     _first_content_sent = False
+    _in_think_tag = False        # True while consuming <think>…</think> content
+    _think_open_stripped = False  # opening <think> tag already removed
 
     def _emit_tool_calls():
         """Build the tool_calls event string if any were accumulated."""
@@ -1444,14 +1446,53 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl
                                             yield f'data: {json.dumps({"delta": reasoning, "thinking": True})}\n\n'
                                         content = delta.get("content") or ""
                                         if content:
-                                            # Some thinking backends start normal content with a
-                                            # stray closing tag. Repair only that shape; do not
-                                            # wrap every first token for model families like
-                                            # MiniMax, which often stream ordinary answers.
-                                            if _thinking_model and not _first_content_sent and content.lstrip().lower().startswith("</think"):
-                                                content = "<think>" + content
-                                            _first_content_sent = True
-                                            yield f'data: {json.dumps({"delta": content})}\n\n'
+                                            stripped = content.lstrip()
+                                            # Auto-detect <think>…</think> in content stream.
+                                            # Covers Qwen3-derived models (Qwopus, QwQ forks) whose
+                                            # names don't match _THINKING_MODEL_PATTERNS but still
+                                            # emit literal <think> markup via llama.cpp --jinja.
+                                            if not _first_content_sent and not _thinking_model and not _in_think_tag and stripped.lower().startswith("<think"):
+                                                _thinking_model = True
+                                                _in_think_tag = True
+                                            if _in_think_tag:
+                                                close_idx = content.lower().find("</think>")
+                                                if close_idx != -1:
+                                                    # Split: up-to-</think> → thinking, remainder → content
+                                                    think_part = content[:close_idx]
+                                                    if not _think_open_stripped:
+                                                        # Strip the opening <think[...] > from the first chunk.
+                                                        # Use a dedicated flag — _first_content_sent stays False
+                                                        # throughout the think block, so it must not be reused.
+                                                        tag_end = think_part.lower().find(">")
+                                                        if tag_end != -1:
+                                                            think_part = think_part[tag_end + 1:]
+                                                        _think_open_stripped = True
+                                                    regular_part = content[close_idx + len("</think>"):]
+                                                    _in_think_tag = False
+                                                    if think_part:
+                                                        yield f'data: {json.dumps({"delta": think_part, "thinking": True})}\n\n'
+                                                    if regular_part:
+                                                        _first_content_sent = True
+                                                        yield f'data: {json.dumps({"delta": regular_part})}\n\n'
+                                                else:
+                                                    # Still inside <think>: route to thinking channel
+                                                    if not _think_open_stripped:
+                                                        # Strip the opening <think[...] > tag (first chunk only)
+                                                        tag_end = stripped.lower().find(">")
+                                                        if tag_end != -1:
+                                                            content = stripped[tag_end + 1:]
+                                                        _think_open_stripped = True
+                                                    if content:
+                                                        yield f'data: {json.dumps({"delta": content, "thinking": True})}\n\n'
+                                            else:
+                                                # Some thinking backends start normal content with a
+                                                # stray closing tag. Repair only that shape; do not
+                                                # wrap every first token for model families like
+                                                # MiniMax, which often stream ordinary answers.
+                                                if _thinking_model and not _first_content_sent and stripped.lower().startswith("</think"):
+                                                    content = "<think>" + content
+                                                _first_content_sent = True
+                                                yield f'data: {json.dumps({"delta": content})}\n\n'
                                         # Native tool calls — accumulate across chunks
                                         for tc in delta.get("tool_calls") or []:
                                             if tc is None:
diff --git a/tests/test_llm_core_reasoning.py b/tests/test_llm_core_reasoning.py
index 35dafcc..03ce194 100644
--- a/tests/test_llm_core_reasoning.py
+++ b/tests/test_llm_core_reasoning.py
@@ -96,3 +96,79 @@ def test_reasoning_content_field_still_supported(monkeypatch):
     )
     assert any(d.get("thinking") and "older field" in d["delta"] for d in deltas), deltas
     assert any((not d.get("thinking")) and d["delta"] == "Answer" for d in deltas), deltas
+
+
+def test_think_tag_in_content_stream_routes_to_thinking_channel(monkeypatch):
+    # Regression: unregistered model (Qwopus-style) that emits <think>…</think>
+    # directly in the content field. Reasoning must surface as thinking chunks;
+    # only the answer after </think> is a normal delta.
+    deltas = _run_stream(
+        "Qwopus3-9B-custom",  # name not in _THINKING_MODEL_PATTERNS
+        [
+            'data: {"choices":[{"delta":{"content":"<think>step one "}}]}',
+            'data: {"choices":[{"delta":{"content":"step two"}}]}',
+            'data: {"choices":[{"delta":{"content":"</think>Final answer"}}]}',
+            "data: [DONE]",
+        ],
+        monkeypatch,
+    )
+    thinking = [d for d in deltas if d.get("thinking")]
+    regular = [d for d in deltas if not d.get("thinking")]
+    assert thinking, f"expected thinking deltas, got: {deltas}"
+    assert all("Final answer" not in d["delta"] for d in thinking), thinking
+    assert regular, f"expected regular delta after </think>, got: {deltas}"
+    assert any("Final answer" in d["delta"] for d in regular), regular
+
+
+def test_think_tag_and_close_in_same_chunk(monkeypatch):
+    # <think>reasoning</think>answer all arrive in a single content chunk.
+    deltas = _run_stream(
+        "Qwopus3-9B-custom",
+        [
+            'data: {"choices":[{"delta":{"content":"<think>my reasoning</think>my answer"}}]}',
+            "data: [DONE]",
+        ],
+        monkeypatch,
+    )
+    thinking = [d for d in deltas if d.get("thinking")]
+    regular = [d for d in deltas if not d.get("thinking")]
+    assert thinking and "my reasoning" in thinking[0]["delta"], thinking
+    assert regular and "my answer" in regular[0]["delta"], regular
+
+
+def test_think_tag_gt_in_mid_reasoning_not_truncated(monkeypatch):
+    # Regression for _first_content_sent misuse: the opening-tag strip ran on every
+    # chunk (not just the first) because _first_content_sent stays False throughout
+    # the think block. On chunk 2 it did find(">") over reasoning text and silently
+    # dropped everything before the first ">". Repro: 3 chunks, ">" in chunk 2.
+    deltas = _run_stream(
+        "Qwopus3-9B-custom",
+        [
+            'data: {"choices":[{"delta":{"content":"<think>reasoning a "}}]}',
+            'data: {"choices":[{"delta":{"content":"more c > d "}}]}',
+            'data: {"choices":[{"delta":{"content":"</think>answer"}}]}',
+            "data: [DONE]",
+        ],
+        monkeypatch,
+    )
+    thinking = [d for d in deltas if d.get("thinking")]
+    regular = [d for d in deltas if not d.get("thinking")]
+    # "more c " must survive — must not be truncated at the '>'
+    assert any("more c > d" in d["delta"] for d in thinking), thinking
+    assert any("answer" in d["delta"] for d in regular), regular
+
+
+def test_registered_thinking_model_stray_close_tag_repair_unchanged(monkeypatch):
+    # The existing </think> repair for registered models must not regress.
+    # A registered model that starts content with </think> gets <think> prepended.
+    deltas = _run_stream(
+        "qwq-32b",  # registered in _THINKING_MODEL_PATTERNS
+        [
+            'data: {"choices":[{"delta":{"content":"</think>Here is my answer"}}]}',
+            "data: [DONE]",
+        ],
+        monkeypatch,
+    )
+    assert deltas, deltas
+    first = deltas[0]["delta"]
+    assert first.startswith("<think>"), f"expected repair prefix, got: {first!r}"