diff --git a/routes/chat_helpers.py b/routes/chat_helpers.py index cc20036..0929b69 100644 --- a/routes/chat_helpers.py +++ b/routes/chat_helpers.py @@ -589,6 +589,8 @@ def _normalize_thinking(text: str) -> str: import re if not text: return text + from src.text_helpers import normalize_thinking_markup + text = normalize_thinking_markup(text) reasoning_prefix_re = re.compile( r'^\s*(?:thinking(?:\s+process)?\s*:|the user |i need |i should |i will |they are |the question |i can )', re.IGNORECASE, @@ -699,6 +701,10 @@ def _extract_thinking_meta(text: str) -> dict | None: import re if not text: return None + from src.text_helpers import normalize_thinking_markup + original_text = text + text = normalize_thinking_markup(text) + normalized_changed = text != original_text # Check for tags (native or injected) time_match = re.search(r' dict | None: if thinking and reply: return {"thinking": thinking, "reply": reply, "time": think_time} + if normalized_changed and text.strip() and text.strip() != original_text.strip(): + return {"thinking": "", "reply": text.strip(), "time": think_time} + return None @@ -737,7 +746,8 @@ def clean_thinking_for_save(content: str, metadata: dict | None = None) -> tuple md = dict(metadata) if metadata else {} info = _extract_thinking_meta(content) if info: - md["thinking"] = info["thinking"] + if info.get("thinking"): + md["thinking"] = info["thinking"] if info.get("time"): md["thinking_time"] = info["time"] return info["reply"], md @@ -781,8 +791,10 @@ def save_assistant_response( # Extract thinking into metadata (don't pollute message content with tags) _think_info = _extract_thinking_meta(full_response) if _think_info: - md["thinking"] = _think_info["thinking"] - md["thinking_time"] = _think_info.get("time") + if _think_info.get("thinking"): + md["thinking"] = _think_info["thinking"] + if _think_info.get("time"): + md["thinking_time"] = _think_info.get("time") _content = _think_info["reply"] else: _content = full_response diff --git a/src/text_helpers.py b/src/text_helpers.py index 90d66a9..733ced0 100644 --- a/src/text_helpers.py +++ b/src/text_helpers.py @@ -15,18 +15,33 @@ from __future__ import annotations import re +_THINK_TAG_NAME = r"(?:think(?:ing)?|thought)" + # Closed reasoning blocks. Multi-pass loop in `strip_think` handles nested # `...` patterns some models emit. -_THINK_CLOSED_RE = re.compile(r"[\s\S]*?\s*", re.IGNORECASE) +_THINK_CLOSED_RE = re.compile(rf"<{_THINK_TAG_NAME}(?:\s+[^>]*)?>[\s\S]*?\s*", re.IGNORECASE) # Orphan opening or closing tags that survive after the closed-pass. -_THINK_TAG_RE = re.compile(r"]*>\s*", re.IGNORECASE) +_THINK_TAG_RE = re.compile(rf"]*>\s*", re.IGNORECASE) # Dangling opener anywhere in the response with no closer — strip everything # from `` to the end of string. -_THINK_OPEN_RE = re.compile(r"[\s\S]*$", re.IGNORECASE) +_THINK_OPEN_RE = re.compile(rf"<{_THINK_TAG_NAME}(?:\s+[^>]*)?>[\s\S]*$", re.IGNORECASE) # Streaming models occasionally emit ``-style attributes. # Normalize to a plain `` so the regexes above catch them. -_THINK_ATTR_RE = re.compile(r"]*>", re.IGNORECASE) -_THINK_ATTR_CLOSE_RE = re.compile(r"]*>", re.IGNORECASE) +_THINK_ATTR_RE = re.compile(rf"<{_THINK_TAG_NAME}\s+[^>]*>", re.IGNORECASE) +_THINK_ATTR_CLOSE_RE = re.compile(rf"]*>", re.IGNORECASE) +_GEMMA_THOUGHT_OPEN_RE = re.compile(r"<\|channel>thought\s*\n?[\s\S]*$", re.IGNORECASE) +_GEMMA_RESPONSE_CHANNEL_RE = re.compile( + r"<\|channel>response\s*\n?([\s\S]*?)", + re.IGNORECASE, +) +_GEMMA_RESPONSE_OPEN_RE = re.compile(r"<\|channel>response\s*\n?", re.IGNORECASE) +_GEMMA_CHANNEL_CLOSE_RE = re.compile(r"", re.IGNORECASE) +_THOUGHT_TAG_OPEN_RE = re.compile(r"]*)?>", re.IGNORECASE) +_THOUGHT_TAG_CLOSE_RE = re.compile(r"", re.IGNORECASE) +_GEMMA_THOUGHT_CHANNEL_CAPTURE_RE = re.compile( + r"<\|channel>thought\s*\n?([\s\S]*?)\s*", + re.IGNORECASE, +) # Qwen and a few other models prefix the response with a "Thinking Process:" # block before the real answer. _QWEN_THINKING_RE = re.compile( @@ -78,6 +93,30 @@ def _strip_reasoning_prose(text: str) -> str: return "\n\n".join(keep).strip() if keep else text +def normalize_thinking_markup(text: str) -> str: + """Canonicalize supported thinking wrappers to `` markup. + + The chat UI and persistence layer already understand `...`. + Gemma 4 may instead emit `<|channel>thought\n...`, and some + gateways/models emit `...`. Normalize those shapes into + the existing representation and strip empty thought channels. + """ + if not text: + return text + out = _THOUGHT_TAG_OPEN_RE.sub(lambda m: "", text) + out = _THOUGHT_TAG_CLOSE_RE.sub("", out) + + def _replace_gemma_thought(match: re.Match) -> str: + thought = match.group(1).strip() + return f"{thought}\n" if thought else "" + + out = _GEMMA_THOUGHT_CHANNEL_CAPTURE_RE.sub(_replace_gemma_thought, out) + out = _GEMMA_RESPONSE_CHANNEL_RE.sub(lambda m: m.group(1), out) + out = _GEMMA_RESPONSE_OPEN_RE.sub("", out) + out = _GEMMA_CHANNEL_CLOSE_RE.sub("", out) + return out + + def strip_think(text: str, *, prose: bool = False, prompt_echo: bool = True) -> str: """Strip `` blocks from model output. @@ -92,13 +131,21 @@ def strip_think(text: str, *, prose: bool = False, prompt_echo: bool = True) -> "The user asks:" / "We need to" leaked prompt echoes. Robust to: - * closed `...` (any depth, both `` and ``) - * dangling unclosed `...` + * closed `...` (any depth, plus ``/``) + * dangling unclosed `...` / `...` * stray opener/closer tags * ``-style attributes + * Gemma 4 `<|channel>thought...` wrappers """ if not text: return "" + # Gemma 4 thinking-capable models use channel control tokens rather than + # XML tags when the runtime does not split reasoning into a separate field. + # The thought channel can be empty in non-thinking mode; either way it is + # not user-facing content. A response channel, when present, is only a + # wrapper around the final answer. + text = normalize_thinking_markup(text) + text = _GEMMA_THOUGHT_OPEN_RE.sub("", text) # Normalize attributes so the closed/open regexes can catch them. text = _THINK_ATTR_RE.sub("", text) text = _THINK_ATTR_CLOSE_RE.sub("", text) diff --git a/static/js/chat.js b/static/js/chat.js index 2415f40..4ba6f11 100644 --- a/static/js/chat.js +++ b/static/js/chat.js @@ -1120,7 +1120,7 @@ import createResearchSynapse from './researchSynapse.js'; let _measureDiv = null; function _replyAfterClosedThinking(text) { - const closeRe = /<\/think(?:ing)?>/gi; + const closeRe = /<\/(?:think(?:ing)?|thought)>|/gi; let match = null; let last = null; while ((match = closeRe.exec(text || '')) !== null) last = match; @@ -1147,7 +1147,7 @@ import createResearchSynapse from './researchSynapse.js'; replyTrimmed = (replyText || '').trim(); } else { // Non-tag: check for garbled (reasoning\nreply) - const _gm = dt.match(/^[\s\S]+?\s*([\s\S]*?)(?:<\/think(?:ing)?>)?\s*$/i); + const _gm = dt.match(/^[\s\S]+?<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>\s*([\s\S]*?)(?:<\/(?:think(?:ing)?|thought)>)?\s*$/i); if (_gm && _gm[1].trim()) { replyTrimmed = _gm[1].trim(); } else { @@ -1188,8 +1188,11 @@ import createResearchSynapse from './researchSynapse.js'; const prevLen = contentEl._prevTextLen || 0; // If thinking is still streaming (unclosed ), show indicator instead of raw text if (markdownModule.hasUnclosedThinkTag && markdownModule.hasUnclosedThinkTag(dt)) { - const thinkStart = dt.search(//i); - const thinkContent = dt.substring(thinkStart).replace(//i, '').trim(); + const thinkStart = dt.search(/<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>|<\|channel>thought/i); + const thinkContent = dt.substring(Math.max(thinkStart, 0)) + .replace(/<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>|<\|channel>thought\s*\n?/i, '') + .replace(//gi, '') + .trim(); const lines = thinkContent.split('\n').length; // Don't show beforeThink text during streaming — it'll appear in the final render // This prevents the "split into two" duplication @@ -1449,7 +1452,7 @@ import createResearchSynapse from './researchSynapse.js'; // Detect non-tag thinking patterns: "Thinking:", "Thinking Process:", Gemma-style reasoning // These patterns don't use tags, so we simulate unclosed thinking during streaming const _replyPrefixes = ['Hey', 'Hi ', 'Hi!', 'Hello', 'Sure', 'Yes', 'No ', 'No,', 'Yo', 'OK', 'Here', 'Absolutely', 'Of course', 'Great', 'Alright', 'Thanks', 'Welcome', 'Good ', "I'm happy", "I'd be"]; - if (!hasUnclosedThink && !roundText.includes(']*)?>|<\|channel>thought/i.test(roundText)) { const _trimmedRT = roundText.trimStart(); const _isReasoning = markdownModule.startsWithReasoningPrefix(_trimmedRT); if (_isReasoning) { @@ -1475,10 +1478,10 @@ import createResearchSynapse from './researchSynapse.js'; } } } - if (!hasUnclosedThink && /^\s*<\/think(?:ing)?>/i.test(roundText)) { + if (!hasUnclosedThink && /^<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>\s*<\/(?:think(?:ing)?|thought)>/i.test(roundText)) { // Empty — the model likely put thinking outside the tags - const afterEmpty = roundText.replace(/^\s*<\/think(?:ing)?>/i, '').trim(); - const closeTags = (afterEmpty.match(/<\/think(?:ing)?>/gi) || []).length; + const afterEmpty = roundText.replace(/^<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>\s*<\/(?:think(?:ing)?|thought)>/i, '').trim(); + const closeTags = (afterEmpty.match(/<\/(?:think(?:ing)?|thought)>/gi) || []).length; if (closeTags === 0 && afterEmpty.length > 0) { hasUnclosedThink = true; // still waiting for real closing tag } @@ -1487,13 +1490,13 @@ import createResearchSynapse from './researchSynapse.js'; // Only applies when there's a second later (model leaked thinking outside tags) // Do NOT trigger if the text after contains tool calls (that's real content) if (!hasUnclosedThink && isThinking) { - const _thinkMatch = roundText.match(/([\s\S]*?)<\/think(?:ing)?>/i); + const _thinkMatch = roundText.match(/<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>([\s\S]*?)<\/(?:think(?:ing)?|thought)>/i); const _thinkLen = _thinkMatch ? _thinkMatch[1].trim().length : 0; if (_thinkLen < 20) { - const _afterClose = roundText.replace(/([\s\S]*?)<\/think(?:ing)?>/i, '').trim(); + const _afterClose = roundText.replace(/<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>([\s\S]*?)<\/(?:think(?:ing)?|thought)>/i, '').trim(); // Only keep waiting if there's trailing text that looks like thinking (not tool calls) const _hasToolCall = /```(?:bash|python|web_search|read_file|write_file|create_document|edit_document|manage_|generate_image)/i.test(_afterClose); - const _hasOrphanClose = /<\/think(?:ing)?>/i.test(_afterClose); + const _hasOrphanClose = /<\/(?:think(?:ing)?|thought)>/i.test(_afterClose); if (!_hasToolCall && (_hasOrphanClose || (Date.now() - thinkingStartTime) < 500)) { hasUnclosedThink = true; // keep waiting for real } @@ -1550,8 +1553,12 @@ import createResearchSynapse from './researchSynapse.js'; } } else if (hasUnclosedThink && isThinking) { if (_liveThinkInner) { - // Extract raw thinking text (strip all / open/close tags and prefixes) - var thinkText = roundText.replace(/<\/?think(?:ing)?>/gi, ''); + // Extract raw thinking text (strip known thinking wrappers and prefixes) + var thinkText = roundText + .replace(/<\/?(?:think(?:ing)?|thought)(?:\s+[^>]*)?>/gi, '') + .replace(/<\|channel>thought\s*\n?/gi, '') + .replace(/<\|channel>response\s*\n?/gi, '') + .replace(//gi, ''); thinkText = thinkText.replace(/^\s*Thinking(?:\s+Process)?:\s*/i, ''); _liveThinkInner.innerHTML = markdownModule.mdToHtml(thinkText); // Keep thinking box scrolled to bottom @@ -2402,8 +2409,8 @@ import createResearchSynapse from './researchSynapse.js'; _finalReply = (_extracted.content || '').trim(); } else { // Non-tag thinking: extract reply from raw text - // Handle garbled tag: "Thinking: reasoning\nreply" - const _garbledMatch = finalDisplay.match(/^[\s\S]+?\s*([\s\S]*?)(?:<\/think(?:ing)?>)?\s*$/i); + // Handle garbled thinking tag: "Thinking: reasoning\nreply" + const _garbledMatch = finalDisplay.match(/^[\s\S]+?<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>\s*([\s\S]*?)(?:<\/(?:think(?:ing)?|thought)>)?\s*$/i); if (_garbledMatch && _garbledMatch[1].trim()) { _finalReply = _garbledMatch[1].trim(); } else { @@ -2452,8 +2459,8 @@ import createResearchSynapse from './researchSynapse.js'; _body4b.innerHTML = _sourcesData ? _buildSourcesBox(_sourcesData, _sourcesType, _wasExpanded2) : _sourcesHtml; } else if (roundHolder !== holder) { // Check if there's thinking content worth showing - const _thinkMatch = roundText.match(/([\s\S]*?)<\/think(?:ing)?>/i); - if (_thinkMatch && _thinkMatch[1].trim()) { + const _thinkingOnly = markdownModule.extractThinkingBlocks(roundText); + if (_thinkingOnly.thinkingBlocks?.length && !_thinkingOnly.content) { // Show thinking in a collapsed section even if no visible reply text const _body4c = roundHolder.querySelector('.body'); if (_body4c) _body4c.innerHTML = markdownModule.processWithThinking(roundText); @@ -4534,9 +4541,10 @@ import createResearchSynapse from './researchSynapse.js'; // never closes (so it would otherwise hide the whole answer). Peel all of // those off so what's left is just the rewritten text. const _stripThink = (t) => { - t = t.replace(/[\s\S]*?<\/think>/gi, ''); // complete blocks - if (/<\/think>/i.test(t)) t = t.replace(/^[\s\S]*?<\/think>/i, ''); // reasoning w/o opener - return t.replace(/<\/?think>/gi, '').trim(); // any orphan tag + t = markdownModule.normalizeThinkingMarkup(t || ''); + t = t.replace(/<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>[\s\S]*?<\/(?:think(?:ing)?|thought)>/gi, ''); // complete blocks + if (/<\/(?:think(?:ing)?|thought)>/i.test(t)) t = t.replace(/^[\s\S]*?<\/(?:think(?:ing)?|thought)>/i, ''); // reasoning w/o opener + return t.replace(/<\/?(?:think(?:ing)?|thought)(?:\s+[^>]*)?>/gi, '').trim(); // any orphan tag }; newText = _stripThink(newText); diff --git a/static/js/markdown.js b/static/js/markdown.js index b158220..bdbaff4 100644 --- a/static/js/markdown.js +++ b/static/js/markdown.js @@ -116,8 +116,13 @@ function sanitizeAllowedHtml(html) { * Check if text has unclosed think tag */ export function hasUnclosedThinkTag(text) { - const openCount = (text.match(//gi) || []).length; - const closeCount = (text.match(/<\/think(?:ing)?>/gi) || []).length; + text = text || ''; + const openCount = + (text.match(/<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>/gi) || []).length + + (text.match(/<\|channel>thought/gi) || []).length; + const closeCount = + (text.match(/<\/(?:think(?:ing)?|thought)>/gi) || []).length + + (text.match(//gi) || []).length; return openCount > closeCount; } @@ -125,8 +130,25 @@ export function startsWithReasoningPrefix(text) { return /^\s*(?:thinking(?:\s+process)?\s*:|the user |i need |i should |i will |they are |the question |i can )/i.test(text || ''); } +export function normalizeThinkingMarkup(text) { + if (!text) return text; + let normalized = text; + normalized = normalized.replace(/]*)?>/gi, (_m, attrs = '') => ``); + normalized = normalized.replace(/<\/thought>/gi, ''); + normalized = normalized.replace(/<\|channel>thought\s*\n?([\s\S]*?)\s*/gi, (_m, content = '') => { + const thought = String(content || '').trim(); + return thought ? `${thought}\n` : ''; + }); + normalized = normalized.replace(/<\|channel>response\s*\n?([\s\S]*?)/gi, (_m, content = '') => content || ''); + normalized = normalized.replace(/<\|channel>response\s*\n?/gi, ''); + normalized = normalized.replace(//gi, ''); + return normalized; +} + function normalizePlainThinking(text) { - if (!text || /]*)?>([\s\S]*)$/i); - if (strayOpener) { - cleanContent = strayOpener[1]; + const gemmaThoughtStart = cleanContent.search(/<\|channel>thought/i); + if (gemmaThoughtStart >= 0) { + const leakedThought = cleanContent + .slice(gemmaThoughtStart) + .replace(/^<\|channel>thought\s*\n?/i, '') + .trim(); + if (gemmaThoughtStart === 0 && leakedThought) thinkingBlocks.push(leakedThought); + cleanContent = cleanContent.slice(0, gemmaThoughtStart); } else { - cleanContent = cleanContent.replace(/]*)?>[\s\S]*$/gi, ''); + const strayOpener = cleanContent.match(/^\s*]*)?>([\s\S]*)$/i); + if (strayOpener) { + cleanContent = strayOpener[1]; + } else { + cleanContent = cleanContent.replace(/]*)?>[\s\S]*$/gi, ''); + } } } @@ -686,6 +718,7 @@ const markdownModule = { createCollapsible, hasUnclosedThinkTag, extractThinkingBlocks, + normalizeThinkingMarkup, startsWithReasoningPrefix, renderMermaid }; diff --git a/tests/test_chat_helpers.py b/tests/test_chat_helpers.py index f86ff26..7a7ed28 100644 --- a/tests/test_chat_helpers.py +++ b/tests/test_chat_helpers.py @@ -1,5 +1,5 @@ import pytest -from routes.chat_helpers import needs_auto_name +from routes.chat_helpers import clean_thinking_for_save, needs_auto_name @pytest.mark.parametrize("name,expected", [ @@ -27,3 +27,44 @@ from routes.chat_helpers import needs_auto_name ]) def test_needs_auto_name(name, expected): assert needs_auto_name(name) == expected, f"needs_auto_name({name!r}) should be {expected}" + + +def test_clean_thinking_for_save_extracts_gemma4_thought_channel(): + content, metadata = clean_thinking_for_save( + "<|channel>thought\ninternal reasoningFinal answer.", + {"model": "google/gemma-4-31B-it"}, + ) + + assert content == "Final answer." + assert metadata["thinking"] == "internal reasoning" + assert metadata["model"] == "google/gemma-4-31B-it" + + +def test_clean_thinking_for_save_strips_empty_gemma4_thought_channel(): + content, metadata = clean_thinking_for_save( + "<|channel>thought\nFinal answer.", + {"model": "google/gemma-4-31B-it"}, + ) + + assert content == "Final answer." + assert "thinking" not in metadata + + +def test_clean_thinking_for_save_unwraps_gemma4_response_channel(): + content, metadata = clean_thinking_for_save( + "<|channel>thought\ninternal reasoning<|channel>response\nFinal answer.", + {"model": "google/gemma-4-31B-it"}, + ) + + assert content == "Final answer." + assert metadata["thinking"] == "internal reasoning" + + +def test_clean_thinking_for_save_extracts_thought_tag(): + content, metadata = clean_thinking_for_save( + "internal reasoningFinal answer.", + {}, + ) + + assert content == "Final answer." + assert metadata["thinking"] == "internal reasoning" diff --git a/tests/test_markdown_rendering_js.py b/tests/test_markdown_rendering_js.py index 75af810..4f36528 100644 --- a/tests/test_markdown_rendering_js.py +++ b/tests/test_markdown_rendering_js.py @@ -18,7 +18,7 @@ def node_available(): pytest.skip("node binary not on PATH") -def _run_markdown_case(markdown: str) -> str: +def _run_markdown_case(markdown: str, render_expr: str = "mod.mdToHtml(input)"): script = textwrap.dedent( r""" import fs from 'node:fs'; @@ -54,9 +54,9 @@ def _run_markdown_case(markdown: str) -> str: const moduleUrl = 'data:text/javascript;base64,' + Buffer.from(source).toString('base64'); const mod = await import(moduleUrl); const input = JSON.parse(process.argv[1]); - console.log(JSON.stringify({ html: mod.mdToHtml(input) })); + console.log(JSON.stringify({ html: __RENDER_EXPR__ })); """ - ) + ).replace("__RENDER_EXPR__", render_expr) result = subprocess.run( ["node", "--input-type=module", "-e", script, json.dumps(markdown)], cwd=_REPO, @@ -99,3 +99,51 @@ def test_table_separator_row_not_rendered_as_data(node_available): assert "thought\ninternal reasoningFinal answer.", + "mod.processWithThinking(input)", + ) + + assert "thinking-section" in html + assert "internal reasoning" in html + assert "Final answer." in html + assert "<|channel>" not in html + assert "<|channel>" not in html + + +def test_process_with_thinking_strips_empty_gemma4_thought_channel(node_available): + html = _run_markdown_case( + "<|channel>thought\nFinal answer.", + "mod.processWithThinking(input)", + ) + + assert "thinking-section" not in html + assert "Final answer." in html + assert "<|channel>" not in html + assert "<|channel>" not in html + + +def test_process_with_thinking_unwraps_gemma4_response_channel(node_available): + html = _run_markdown_case( + "<|channel>thought\ninternal reasoning<|channel>response\nFinal answer.", + "mod.processWithThinking(input)", + ) + + assert "thinking-section" in html + assert "internal reasoning" in html + assert "Final answer." in html + assert "<|channel>" not in html + assert "<|channel>" not in html + + +def test_extract_thinking_blocks_handles_thought_tag(node_available): + result = _run_markdown_case( + "internal reasoningFinal answer.", + "mod.extractThinkingBlocks(input)", + ) + + assert result["thinkingBlocks"] == ["internal reasoning"] + assert result["content"] == "Final answer." diff --git a/tests/test_strip_think.py b/tests/test_strip_think.py index 5e36ef1..f2affe4 100644 --- a/tests/test_strip_think.py +++ b/tests/test_strip_think.py @@ -23,3 +23,22 @@ def test_strip_think_cases(): # 6. Multiple blocks (closed + unclosed) assert strip_think("Hello! closed Here is the answer. unclosed") == "Hello! Here is the answer." + + +def test_strip_think_handles_thought_tags(): + assert strip_think("internal reasoningFinal answer.") == "Final answer." + + +def test_strip_think_handles_gemma4_thought_channel(): + text = "<|channel>thought\ninternal reasoningFinal answer." + assert strip_think(text) == "Final answer." + + +def test_strip_think_handles_empty_gemma4_thought_channel(): + text = "<|channel>thought\nFinal answer." + assert strip_think(text) == "Final answer." + + +def test_strip_think_unwraps_gemma4_response_channel(): + text = "<|channel>thought\ninternal reasoning<|channel>response\nFinal answer." + assert strip_think(text) == "Final answer."