fix: normalize Gemma 4 thought-channel output (#2224)
This commit is contained in:
@@ -589,6 +589,8 @@ def _normalize_thinking(text: str) -> str:
|
|||||||
import re
|
import re
|
||||||
if not text:
|
if not text:
|
||||||
return text
|
return text
|
||||||
|
from src.text_helpers import normalize_thinking_markup
|
||||||
|
text = normalize_thinking_markup(text)
|
||||||
reasoning_prefix_re = re.compile(
|
reasoning_prefix_re = re.compile(
|
||||||
r'^\s*(?:thinking(?:\s+process)?\s*:|the user |i need |i should |i will |they are |the question |i can )',
|
r'^\s*(?:thinking(?:\s+process)?\s*:|the user |i need |i should |i will |they are |the question |i can )',
|
||||||
re.IGNORECASE,
|
re.IGNORECASE,
|
||||||
@@ -699,6 +701,10 @@ def _extract_thinking_meta(text: str) -> dict | None:
|
|||||||
import re
|
import re
|
||||||
if not text:
|
if not text:
|
||||||
return None
|
return None
|
||||||
|
from src.text_helpers import normalize_thinking_markup
|
||||||
|
original_text = text
|
||||||
|
text = normalize_thinking_markup(text)
|
||||||
|
normalized_changed = text != original_text
|
||||||
|
|
||||||
# Check for <think> tags (native or injected)
|
# Check for <think> tags (native or injected)
|
||||||
time_match = re.search(r'<think(?:ing)?\s+time="([\d.]+)"', text)
|
time_match = re.search(r'<think(?:ing)?\s+time="([\d.]+)"', text)
|
||||||
@@ -729,6 +735,9 @@ def _extract_thinking_meta(text: str) -> dict | None:
|
|||||||
if thinking and reply:
|
if thinking and reply:
|
||||||
return {"thinking": thinking, "reply": reply, "time": think_time}
|
return {"thinking": thinking, "reply": reply, "time": think_time}
|
||||||
|
|
||||||
|
if normalized_changed and text.strip() and text.strip() != original_text.strip():
|
||||||
|
return {"thinking": "", "reply": text.strip(), "time": think_time}
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
@@ -737,7 +746,8 @@ def clean_thinking_for_save(content: str, metadata: dict | None = None) -> tuple
|
|||||||
md = dict(metadata) if metadata else {}
|
md = dict(metadata) if metadata else {}
|
||||||
info = _extract_thinking_meta(content)
|
info = _extract_thinking_meta(content)
|
||||||
if info:
|
if info:
|
||||||
md["thinking"] = info["thinking"]
|
if info.get("thinking"):
|
||||||
|
md["thinking"] = info["thinking"]
|
||||||
if info.get("time"):
|
if info.get("time"):
|
||||||
md["thinking_time"] = info["time"]
|
md["thinking_time"] = info["time"]
|
||||||
return info["reply"], md
|
return info["reply"], md
|
||||||
@@ -781,8 +791,10 @@ def save_assistant_response(
|
|||||||
# Extract thinking into metadata (don't pollute message content with <think> tags)
|
# Extract thinking into metadata (don't pollute message content with <think> tags)
|
||||||
_think_info = _extract_thinking_meta(full_response)
|
_think_info = _extract_thinking_meta(full_response)
|
||||||
if _think_info:
|
if _think_info:
|
||||||
md["thinking"] = _think_info["thinking"]
|
if _think_info.get("thinking"):
|
||||||
md["thinking_time"] = _think_info.get("time")
|
md["thinking"] = _think_info["thinking"]
|
||||||
|
if _think_info.get("time"):
|
||||||
|
md["thinking_time"] = _think_info.get("time")
|
||||||
_content = _think_info["reply"]
|
_content = _think_info["reply"]
|
||||||
else:
|
else:
|
||||||
_content = full_response
|
_content = full_response
|
||||||
|
|||||||
@@ -15,18 +15,33 @@ from __future__ import annotations
|
|||||||
|
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
_THINK_TAG_NAME = r"(?:think(?:ing)?|thought)"
|
||||||
|
|
||||||
# Closed reasoning blocks. Multi-pass loop in `strip_think` handles nested
|
# Closed reasoning blocks. Multi-pass loop in `strip_think` handles nested
|
||||||
# `<think><think>...</think></think>` patterns some models emit.
|
# `<think><think>...</think></think>` patterns some models emit.
|
||||||
_THINK_CLOSED_RE = re.compile(r"<think(?:ing)?>[\s\S]*?</think(?:ing)?>\s*", re.IGNORECASE)
|
_THINK_CLOSED_RE = re.compile(rf"<{_THINK_TAG_NAME}(?:\s+[^>]*)?>[\s\S]*?</{_THINK_TAG_NAME}>\s*", re.IGNORECASE)
|
||||||
# Orphan opening or closing tags that survive after the closed-pass.
|
# Orphan opening or closing tags that survive after the closed-pass.
|
||||||
_THINK_TAG_RE = re.compile(r"</?think(?:ing)?[^>]*>\s*", re.IGNORECASE)
|
_THINK_TAG_RE = re.compile(rf"</?{_THINK_TAG_NAME}[^>]*>\s*", re.IGNORECASE)
|
||||||
# Dangling opener anywhere in the response with no closer — strip everything
|
# Dangling opener anywhere in the response with no closer — strip everything
|
||||||
# from `<think>` to the end of string.
|
# from `<think>` to the end of string.
|
||||||
_THINK_OPEN_RE = re.compile(r"<think(?:ing)?>[\s\S]*$", re.IGNORECASE)
|
_THINK_OPEN_RE = re.compile(rf"<{_THINK_TAG_NAME}(?:\s+[^>]*)?>[\s\S]*$", re.IGNORECASE)
|
||||||
# Streaming models occasionally emit `<thinking time="0.42">`-style attributes.
|
# Streaming models occasionally emit `<thinking time="0.42">`-style attributes.
|
||||||
# Normalize to a plain `<think>` so the regexes above catch them.
|
# Normalize to a plain `<think>` so the regexes above catch them.
|
||||||
_THINK_ATTR_RE = re.compile(r"<think(?:ing)?\s+[^>]*>", re.IGNORECASE)
|
_THINK_ATTR_RE = re.compile(rf"<{_THINK_TAG_NAME}\s+[^>]*>", re.IGNORECASE)
|
||||||
_THINK_ATTR_CLOSE_RE = re.compile(r"</think(?:ing)?\s+[^>]*>", re.IGNORECASE)
|
_THINK_ATTR_CLOSE_RE = re.compile(rf"</{_THINK_TAG_NAME}\s+[^>]*>", re.IGNORECASE)
|
||||||
|
_GEMMA_THOUGHT_OPEN_RE = re.compile(r"<\|channel>thought\s*\n?[\s\S]*$", re.IGNORECASE)
|
||||||
|
_GEMMA_RESPONSE_CHANNEL_RE = re.compile(
|
||||||
|
r"<\|channel>response\s*\n?([\s\S]*?)<channel\|>",
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
|
_GEMMA_RESPONSE_OPEN_RE = re.compile(r"<\|channel>response\s*\n?", re.IGNORECASE)
|
||||||
|
_GEMMA_CHANNEL_CLOSE_RE = re.compile(r"<channel\|>", re.IGNORECASE)
|
||||||
|
_THOUGHT_TAG_OPEN_RE = re.compile(r"<thought(\s+[^>]*)?>", re.IGNORECASE)
|
||||||
|
_THOUGHT_TAG_CLOSE_RE = re.compile(r"</thought>", re.IGNORECASE)
|
||||||
|
_GEMMA_THOUGHT_CHANNEL_CAPTURE_RE = re.compile(
|
||||||
|
r"<\|channel>thought\s*\n?([\s\S]*?)<channel\|>\s*",
|
||||||
|
re.IGNORECASE,
|
||||||
|
)
|
||||||
# Qwen and a few other models prefix the response with a "Thinking Process:"
|
# Qwen and a few other models prefix the response with a "Thinking Process:"
|
||||||
# block before the real answer.
|
# block before the real answer.
|
||||||
_QWEN_THINKING_RE = re.compile(
|
_QWEN_THINKING_RE = re.compile(
|
||||||
@@ -78,6 +93,30 @@ def _strip_reasoning_prose(text: str) -> str:
|
|||||||
return "\n\n".join(keep).strip() if keep else text
|
return "\n\n".join(keep).strip() if keep else text
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_thinking_markup(text: str) -> str:
|
||||||
|
"""Canonicalize supported thinking wrappers to `<think>` markup.
|
||||||
|
|
||||||
|
The chat UI and persistence layer already understand `<think>...</think>`.
|
||||||
|
Gemma 4 may instead emit `<|channel>thought\n...<channel|>`, and some
|
||||||
|
gateways/models emit `<thought>...</thought>`. Normalize those shapes into
|
||||||
|
the existing representation and strip empty thought channels.
|
||||||
|
"""
|
||||||
|
if not text:
|
||||||
|
return text
|
||||||
|
out = _THOUGHT_TAG_OPEN_RE.sub(lambda m: "<think" + (m.group(1) or "") + ">", text)
|
||||||
|
out = _THOUGHT_TAG_CLOSE_RE.sub("</think>", out)
|
||||||
|
|
||||||
|
def _replace_gemma_thought(match: re.Match) -> str:
|
||||||
|
thought = match.group(1).strip()
|
||||||
|
return f"<think>{thought}</think>\n" if thought else ""
|
||||||
|
|
||||||
|
out = _GEMMA_THOUGHT_CHANNEL_CAPTURE_RE.sub(_replace_gemma_thought, out)
|
||||||
|
out = _GEMMA_RESPONSE_CHANNEL_RE.sub(lambda m: m.group(1), out)
|
||||||
|
out = _GEMMA_RESPONSE_OPEN_RE.sub("", out)
|
||||||
|
out = _GEMMA_CHANNEL_CLOSE_RE.sub("", out)
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
def strip_think(text: str, *, prose: bool = False, prompt_echo: bool = True) -> str:
|
def strip_think(text: str, *, prose: bool = False, prompt_echo: bool = True) -> str:
|
||||||
"""Strip `<think>` blocks from model output.
|
"""Strip `<think>` blocks from model output.
|
||||||
|
|
||||||
@@ -92,13 +131,21 @@ def strip_think(text: str, *, prose: bool = False, prompt_echo: bool = True) ->
|
|||||||
"The user asks:" / "We need to" leaked prompt echoes.
|
"The user asks:" / "We need to" leaked prompt echoes.
|
||||||
|
|
||||||
Robust to:
|
Robust to:
|
||||||
* closed `<think>...</think>` (any depth, both `<think>` and `<thinking>`)
|
* closed `<think>...</think>` (any depth, plus `<thinking>`/`<thought>`)
|
||||||
* dangling unclosed `<think>...`
|
* dangling unclosed `<think>...` / `<thought>...`
|
||||||
* stray opener/closer tags
|
* stray opener/closer tags
|
||||||
* `<think time="0.42">`-style attributes
|
* `<think time="0.42">`-style attributes
|
||||||
|
* Gemma 4 `<|channel>thought...<channel|>` wrappers
|
||||||
"""
|
"""
|
||||||
if not text:
|
if not text:
|
||||||
return ""
|
return ""
|
||||||
|
# Gemma 4 thinking-capable models use channel control tokens rather than
|
||||||
|
# XML tags when the runtime does not split reasoning into a separate field.
|
||||||
|
# The thought channel can be empty in non-thinking mode; either way it is
|
||||||
|
# not user-facing content. A response channel, when present, is only a
|
||||||
|
# wrapper around the final answer.
|
||||||
|
text = normalize_thinking_markup(text)
|
||||||
|
text = _GEMMA_THOUGHT_OPEN_RE.sub("", text)
|
||||||
# Normalize attributes so the closed/open regexes can catch them.
|
# Normalize attributes so the closed/open regexes can catch them.
|
||||||
text = _THINK_ATTR_RE.sub("<think>", text)
|
text = _THINK_ATTR_RE.sub("<think>", text)
|
||||||
text = _THINK_ATTR_CLOSE_RE.sub("</think>", text)
|
text = _THINK_ATTR_CLOSE_RE.sub("</think>", text)
|
||||||
|
|||||||
@@ -1120,7 +1120,7 @@ import createResearchSynapse from './researchSynapse.js';
|
|||||||
let _measureDiv = null;
|
let _measureDiv = null;
|
||||||
|
|
||||||
function _replyAfterClosedThinking(text) {
|
function _replyAfterClosedThinking(text) {
|
||||||
const closeRe = /<\/think(?:ing)?>/gi;
|
const closeRe = /<\/(?:think(?:ing)?|thought)>|<channel\|>/gi;
|
||||||
let match = null;
|
let match = null;
|
||||||
let last = null;
|
let last = null;
|
||||||
while ((match = closeRe.exec(text || '')) !== null) last = match;
|
while ((match = closeRe.exec(text || '')) !== null) last = match;
|
||||||
@@ -1147,7 +1147,7 @@ import createResearchSynapse from './researchSynapse.js';
|
|||||||
replyTrimmed = (replyText || '').trim();
|
replyTrimmed = (replyText || '').trim();
|
||||||
} else {
|
} else {
|
||||||
// Non-tag: check for garbled <think> (reasoning\n<think>reply)
|
// Non-tag: check for garbled <think> (reasoning\n<think>reply)
|
||||||
const _gm = dt.match(/^[\s\S]+?<think(?:ing)?>\s*([\s\S]*?)(?:<\/think(?:ing)?>)?\s*$/i);
|
const _gm = dt.match(/^[\s\S]+?<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>\s*([\s\S]*?)(?:<\/(?:think(?:ing)?|thought)>)?\s*$/i);
|
||||||
if (_gm && _gm[1].trim()) {
|
if (_gm && _gm[1].trim()) {
|
||||||
replyTrimmed = _gm[1].trim();
|
replyTrimmed = _gm[1].trim();
|
||||||
} else {
|
} else {
|
||||||
@@ -1188,8 +1188,11 @@ import createResearchSynapse from './researchSynapse.js';
|
|||||||
const prevLen = contentEl._prevTextLen || 0;
|
const prevLen = contentEl._prevTextLen || 0;
|
||||||
// If thinking is still streaming (unclosed <think>), show indicator instead of raw text
|
// If thinking is still streaming (unclosed <think>), show indicator instead of raw text
|
||||||
if (markdownModule.hasUnclosedThinkTag && markdownModule.hasUnclosedThinkTag(dt)) {
|
if (markdownModule.hasUnclosedThinkTag && markdownModule.hasUnclosedThinkTag(dt)) {
|
||||||
const thinkStart = dt.search(/<think(?:ing)?>/i);
|
const thinkStart = dt.search(/<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>|<\|channel>thought/i);
|
||||||
const thinkContent = dt.substring(thinkStart).replace(/<think(?:ing)?>/i, '').trim();
|
const thinkContent = dt.substring(Math.max(thinkStart, 0))
|
||||||
|
.replace(/<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>|<\|channel>thought\s*\n?/i, '')
|
||||||
|
.replace(/<channel\|>/gi, '')
|
||||||
|
.trim();
|
||||||
const lines = thinkContent.split('\n').length;
|
const lines = thinkContent.split('\n').length;
|
||||||
// Don't show beforeThink text during streaming — it'll appear in the final render
|
// Don't show beforeThink text during streaming — it'll appear in the final render
|
||||||
// This prevents the "split into two" duplication
|
// This prevents the "split into two" duplication
|
||||||
@@ -1449,7 +1452,7 @@ import createResearchSynapse from './researchSynapse.js';
|
|||||||
// Detect non-tag thinking patterns: "Thinking:", "Thinking Process:", Gemma-style reasoning
|
// Detect non-tag thinking patterns: "Thinking:", "Thinking Process:", Gemma-style reasoning
|
||||||
// These patterns don't use <think> tags, so we simulate unclosed thinking during streaming
|
// These patterns don't use <think> tags, so we simulate unclosed thinking during streaming
|
||||||
const _replyPrefixes = ['Hey', 'Hi ', 'Hi!', 'Hello', 'Sure', 'Yes', 'No ', 'No,', 'Yo', 'OK', 'Here', 'Absolutely', 'Of course', 'Great', 'Alright', 'Thanks', 'Welcome', 'Good ', "I'm happy", "I'd be"];
|
const _replyPrefixes = ['Hey', 'Hi ', 'Hi!', 'Hello', 'Sure', 'Yes', 'No ', 'No,', 'Yo', 'OK', 'Here', 'Absolutely', 'Of course', 'Great', 'Alright', 'Thanks', 'Welcome', 'Good ', "I'm happy", "I'd be"];
|
||||||
if (!hasUnclosedThink && !roundText.includes('<think')) {
|
if (!hasUnclosedThink && !/<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>|<\|channel>thought/i.test(roundText)) {
|
||||||
const _trimmedRT = roundText.trimStart();
|
const _trimmedRT = roundText.trimStart();
|
||||||
const _isReasoning = markdownModule.startsWithReasoningPrefix(_trimmedRT);
|
const _isReasoning = markdownModule.startsWithReasoningPrefix(_trimmedRT);
|
||||||
if (_isReasoning) {
|
if (_isReasoning) {
|
||||||
@@ -1475,10 +1478,10 @@ import createResearchSynapse from './researchSynapse.js';
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!hasUnclosedThink && /^<think(?:ing)?>\s*<\/think(?:ing)?>/i.test(roundText)) {
|
if (!hasUnclosedThink && /^<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>\s*<\/(?:think(?:ing)?|thought)>/i.test(roundText)) {
|
||||||
// Empty <think></think> — the model likely put thinking outside the tags
|
// Empty <think></think> — the model likely put thinking outside the tags
|
||||||
const afterEmpty = roundText.replace(/^<think(?:ing)?>\s*<\/think(?:ing)?>/i, '').trim();
|
const afterEmpty = roundText.replace(/^<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>\s*<\/(?:think(?:ing)?|thought)>/i, '').trim();
|
||||||
const closeTags = (afterEmpty.match(/<\/think(?:ing)?>/gi) || []).length;
|
const closeTags = (afterEmpty.match(/<\/(?:think(?:ing)?|thought)>/gi) || []).length;
|
||||||
if (closeTags === 0 && afterEmpty.length > 0) {
|
if (closeTags === 0 && afterEmpty.length > 0) {
|
||||||
hasUnclosedThink = true; // still waiting for real closing tag
|
hasUnclosedThink = true; // still waiting for real closing tag
|
||||||
}
|
}
|
||||||
@@ -1487,13 +1490,13 @@ import createResearchSynapse from './researchSynapse.js';
|
|||||||
// Only applies when there's a second </think> later (model leaked thinking outside tags)
|
// Only applies when there's a second </think> later (model leaked thinking outside tags)
|
||||||
// Do NOT trigger if the text after </think> contains tool calls (that's real content)
|
// Do NOT trigger if the text after </think> contains tool calls (that's real content)
|
||||||
if (!hasUnclosedThink && isThinking) {
|
if (!hasUnclosedThink && isThinking) {
|
||||||
const _thinkMatch = roundText.match(/<think(?:ing)?>([\s\S]*?)<\/think(?:ing)?>/i);
|
const _thinkMatch = roundText.match(/<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>([\s\S]*?)<\/(?:think(?:ing)?|thought)>/i);
|
||||||
const _thinkLen = _thinkMatch ? _thinkMatch[1].trim().length : 0;
|
const _thinkLen = _thinkMatch ? _thinkMatch[1].trim().length : 0;
|
||||||
if (_thinkLen < 20) {
|
if (_thinkLen < 20) {
|
||||||
const _afterClose = roundText.replace(/<think(?:ing)?>([\s\S]*?)<\/think(?:ing)?>/i, '').trim();
|
const _afterClose = roundText.replace(/<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>([\s\S]*?)<\/(?:think(?:ing)?|thought)>/i, '').trim();
|
||||||
// Only keep waiting if there's trailing text that looks like thinking (not tool calls)
|
// Only keep waiting if there's trailing text that looks like thinking (not tool calls)
|
||||||
const _hasToolCall = /```(?:bash|python|web_search|read_file|write_file|create_document|edit_document|manage_|generate_image)/i.test(_afterClose);
|
const _hasToolCall = /```(?:bash|python|web_search|read_file|write_file|create_document|edit_document|manage_|generate_image)/i.test(_afterClose);
|
||||||
const _hasOrphanClose = /<\/think(?:ing)?>/i.test(_afterClose);
|
const _hasOrphanClose = /<\/(?:think(?:ing)?|thought)>/i.test(_afterClose);
|
||||||
if (!_hasToolCall && (_hasOrphanClose || (Date.now() - thinkingStartTime) < 500)) {
|
if (!_hasToolCall && (_hasOrphanClose || (Date.now() - thinkingStartTime) < 500)) {
|
||||||
hasUnclosedThink = true; // keep waiting for real </think>
|
hasUnclosedThink = true; // keep waiting for real </think>
|
||||||
}
|
}
|
||||||
@@ -1550,8 +1553,12 @@ import createResearchSynapse from './researchSynapse.js';
|
|||||||
}
|
}
|
||||||
} else if (hasUnclosedThink && isThinking) {
|
} else if (hasUnclosedThink && isThinking) {
|
||||||
if (_liveThinkInner) {
|
if (_liveThinkInner) {
|
||||||
// Extract raw thinking text (strip all <think>/<thinking> open/close tags and prefixes)
|
// Extract raw thinking text (strip known thinking wrappers and prefixes)
|
||||||
var thinkText = roundText.replace(/<\/?think(?:ing)?>/gi, '');
|
var thinkText = roundText
|
||||||
|
.replace(/<\/?(?:think(?:ing)?|thought)(?:\s+[^>]*)?>/gi, '')
|
||||||
|
.replace(/<\|channel>thought\s*\n?/gi, '')
|
||||||
|
.replace(/<\|channel>response\s*\n?/gi, '')
|
||||||
|
.replace(/<channel\|>/gi, '');
|
||||||
thinkText = thinkText.replace(/^\s*Thinking(?:\s+Process)?:\s*/i, '');
|
thinkText = thinkText.replace(/^\s*Thinking(?:\s+Process)?:\s*/i, '');
|
||||||
_liveThinkInner.innerHTML = markdownModule.mdToHtml(thinkText);
|
_liveThinkInner.innerHTML = markdownModule.mdToHtml(thinkText);
|
||||||
// Keep thinking box scrolled to bottom
|
// Keep thinking box scrolled to bottom
|
||||||
@@ -2402,8 +2409,8 @@ import createResearchSynapse from './researchSynapse.js';
|
|||||||
_finalReply = (_extracted.content || '').trim();
|
_finalReply = (_extracted.content || '').trim();
|
||||||
} else {
|
} else {
|
||||||
// Non-tag thinking: extract reply from raw text
|
// Non-tag thinking: extract reply from raw text
|
||||||
// Handle garbled <think> tag: "Thinking: reasoning\n<think>reply"
|
// Handle garbled thinking tag: "Thinking: reasoning\n<think>reply"
|
||||||
const _garbledMatch = finalDisplay.match(/^[\s\S]+?<think(?:ing)?>\s*([\s\S]*?)(?:<\/think(?:ing)?>)?\s*$/i);
|
const _garbledMatch = finalDisplay.match(/^[\s\S]+?<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>\s*([\s\S]*?)(?:<\/(?:think(?:ing)?|thought)>)?\s*$/i);
|
||||||
if (_garbledMatch && _garbledMatch[1].trim()) {
|
if (_garbledMatch && _garbledMatch[1].trim()) {
|
||||||
_finalReply = _garbledMatch[1].trim();
|
_finalReply = _garbledMatch[1].trim();
|
||||||
} else {
|
} else {
|
||||||
@@ -2452,8 +2459,8 @@ import createResearchSynapse from './researchSynapse.js';
|
|||||||
_body4b.innerHTML = _sourcesData ? _buildSourcesBox(_sourcesData, _sourcesType, _wasExpanded2) : _sourcesHtml;
|
_body4b.innerHTML = _sourcesData ? _buildSourcesBox(_sourcesData, _sourcesType, _wasExpanded2) : _sourcesHtml;
|
||||||
} else if (roundHolder !== holder) {
|
} else if (roundHolder !== holder) {
|
||||||
// Check if there's thinking content worth showing
|
// Check if there's thinking content worth showing
|
||||||
const _thinkMatch = roundText.match(/<think(?:ing)?>([\s\S]*?)<\/think(?:ing)?>/i);
|
const _thinkingOnly = markdownModule.extractThinkingBlocks(roundText);
|
||||||
if (_thinkMatch && _thinkMatch[1].trim()) {
|
if (_thinkingOnly.thinkingBlocks?.length && !_thinkingOnly.content) {
|
||||||
// Show thinking in a collapsed section even if no visible reply text
|
// Show thinking in a collapsed section even if no visible reply text
|
||||||
const _body4c = roundHolder.querySelector('.body');
|
const _body4c = roundHolder.querySelector('.body');
|
||||||
if (_body4c) _body4c.innerHTML = markdownModule.processWithThinking(roundText);
|
if (_body4c) _body4c.innerHTML = markdownModule.processWithThinking(roundText);
|
||||||
@@ -4534,9 +4541,10 @@ import createResearchSynapse from './researchSynapse.js';
|
|||||||
// never closes (so it would otherwise hide the whole answer). Peel all of
|
// never closes (so it would otherwise hide the whole answer). Peel all of
|
||||||
// those off so what's left is just the rewritten text.
|
// those off so what's left is just the rewritten text.
|
||||||
const _stripThink = (t) => {
|
const _stripThink = (t) => {
|
||||||
t = t.replace(/<think>[\s\S]*?<\/think>/gi, ''); // complete blocks
|
t = markdownModule.normalizeThinkingMarkup(t || '');
|
||||||
if (/<\/think>/i.test(t)) t = t.replace(/^[\s\S]*?<\/think>/i, ''); // reasoning w/o opener
|
t = t.replace(/<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>[\s\S]*?<\/(?:think(?:ing)?|thought)>/gi, ''); // complete blocks
|
||||||
return t.replace(/<\/?think>/gi, '').trim(); // any orphan tag
|
if (/<\/(?:think(?:ing)?|thought)>/i.test(t)) t = t.replace(/^[\s\S]*?<\/(?:think(?:ing)?|thought)>/i, ''); // reasoning w/o opener
|
||||||
|
return t.replace(/<\/?(?:think(?:ing)?|thought)(?:\s+[^>]*)?>/gi, '').trim(); // any orphan tag
|
||||||
};
|
};
|
||||||
newText = _stripThink(newText);
|
newText = _stripThink(newText);
|
||||||
|
|
||||||
|
|||||||
@@ -116,8 +116,13 @@ function sanitizeAllowedHtml(html) {
|
|||||||
* Check if text has unclosed think tag
|
* Check if text has unclosed think tag
|
||||||
*/
|
*/
|
||||||
export function hasUnclosedThinkTag(text) {
|
export function hasUnclosedThinkTag(text) {
|
||||||
const openCount = (text.match(/<think(?:ing)?>/gi) || []).length;
|
text = text || '';
|
||||||
const closeCount = (text.match(/<\/think(?:ing)?>/gi) || []).length;
|
const openCount =
|
||||||
|
(text.match(/<(?:think(?:ing)?|thought)(?:\s+[^>]*)?>/gi) || []).length
|
||||||
|
+ (text.match(/<\|channel>thought/gi) || []).length;
|
||||||
|
const closeCount =
|
||||||
|
(text.match(/<\/(?:think(?:ing)?|thought)>/gi) || []).length
|
||||||
|
+ (text.match(/<channel\|>/gi) || []).length;
|
||||||
return openCount > closeCount;
|
return openCount > closeCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -125,8 +130,25 @@ export function startsWithReasoningPrefix(text) {
|
|||||||
return /^\s*(?:thinking(?:\s+process)?\s*:|the user |i need |i should |i will |they are |the question |i can )/i.test(text || '');
|
return /^\s*(?:thinking(?:\s+process)?\s*:|the user |i need |i should |i will |they are |the question |i can )/i.test(text || '');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export function normalizeThinkingMarkup(text) {
|
||||||
|
if (!text) return text;
|
||||||
|
let normalized = text;
|
||||||
|
normalized = normalized.replace(/<thought(\s+[^>]*)?>/gi, (_m, attrs = '') => `<think${attrs || ''}>`);
|
||||||
|
normalized = normalized.replace(/<\/thought>/gi, '</think>');
|
||||||
|
normalized = normalized.replace(/<\|channel>thought\s*\n?([\s\S]*?)<channel\|>\s*/gi, (_m, content = '') => {
|
||||||
|
const thought = String(content || '').trim();
|
||||||
|
return thought ? `<think>${thought}</think>\n` : '';
|
||||||
|
});
|
||||||
|
normalized = normalized.replace(/<\|channel>response\s*\n?([\s\S]*?)<channel\|>/gi, (_m, content = '') => content || '');
|
||||||
|
normalized = normalized.replace(/<\|channel>response\s*\n?/gi, '');
|
||||||
|
normalized = normalized.replace(/<channel\|>/gi, '');
|
||||||
|
return normalized;
|
||||||
|
}
|
||||||
|
|
||||||
function normalizePlainThinking(text) {
|
function normalizePlainThinking(text) {
|
||||||
if (!text || /<think/i.test(text)) return text;
|
if (!text) return text;
|
||||||
|
text = normalizeThinkingMarkup(text);
|
||||||
|
if (/<think/i.test(text)) return text;
|
||||||
|
|
||||||
const trimmed = text.trimStart();
|
const trimmed = text.trimStart();
|
||||||
if (!startsWithReasoningPrefix(trimmed)) return text;
|
if (!startsWithReasoningPrefix(trimmed)) return text;
|
||||||
@@ -220,11 +242,21 @@ export function extractThinkingBlocks(text) {
|
|||||||
// (b) Cut-off mid-generation — there's already real reply text before the
|
// (b) Cut-off mid-generation — there's already real reply text before the
|
||||||
// opener. Drop from the tag onward as before (it's truncated thinking).
|
// opener. Drop from the tag onward as before (it's truncated thinking).
|
||||||
if (hasUnclosedThinkTag(normalized)) {
|
if (hasUnclosedThinkTag(normalized)) {
|
||||||
const strayOpener = cleanContent.match(/^\s*<think(?:ing)?(?:\s+[^>]*)?>([\s\S]*)$/i);
|
const gemmaThoughtStart = cleanContent.search(/<\|channel>thought/i);
|
||||||
if (strayOpener) {
|
if (gemmaThoughtStart >= 0) {
|
||||||
cleanContent = strayOpener[1];
|
const leakedThought = cleanContent
|
||||||
|
.slice(gemmaThoughtStart)
|
||||||
|
.replace(/^<\|channel>thought\s*\n?/i, '')
|
||||||
|
.trim();
|
||||||
|
if (gemmaThoughtStart === 0 && leakedThought) thinkingBlocks.push(leakedThought);
|
||||||
|
cleanContent = cleanContent.slice(0, gemmaThoughtStart);
|
||||||
} else {
|
} else {
|
||||||
cleanContent = cleanContent.replace(/<think(?:ing)?(?:\s+[^>]*)?>[\s\S]*$/gi, '');
|
const strayOpener = cleanContent.match(/^\s*<think(?:ing)?(?:\s+[^>]*)?>([\s\S]*)$/i);
|
||||||
|
if (strayOpener) {
|
||||||
|
cleanContent = strayOpener[1];
|
||||||
|
} else {
|
||||||
|
cleanContent = cleanContent.replace(/<think(?:ing)?(?:\s+[^>]*)?>[\s\S]*$/gi, '');
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -686,6 +718,7 @@ const markdownModule = {
|
|||||||
createCollapsible,
|
createCollapsible,
|
||||||
hasUnclosedThinkTag,
|
hasUnclosedThinkTag,
|
||||||
extractThinkingBlocks,
|
extractThinkingBlocks,
|
||||||
|
normalizeThinkingMarkup,
|
||||||
startsWithReasoningPrefix,
|
startsWithReasoningPrefix,
|
||||||
renderMermaid
|
renderMermaid
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
import pytest
|
import pytest
|
||||||
from routes.chat_helpers import needs_auto_name
|
from routes.chat_helpers import clean_thinking_for_save, needs_auto_name
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("name,expected", [
|
@pytest.mark.parametrize("name,expected", [
|
||||||
@@ -27,3 +27,44 @@ from routes.chat_helpers import needs_auto_name
|
|||||||
])
|
])
|
||||||
def test_needs_auto_name(name, expected):
|
def test_needs_auto_name(name, expected):
|
||||||
assert needs_auto_name(name) == expected, f"needs_auto_name({name!r}) should be {expected}"
|
assert needs_auto_name(name) == expected, f"needs_auto_name({name!r}) should be {expected}"
|
||||||
|
|
||||||
|
|
||||||
|
def test_clean_thinking_for_save_extracts_gemma4_thought_channel():
|
||||||
|
content, metadata = clean_thinking_for_save(
|
||||||
|
"<|channel>thought\ninternal reasoning<channel|>Final answer.",
|
||||||
|
{"model": "google/gemma-4-31B-it"},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert content == "Final answer."
|
||||||
|
assert metadata["thinking"] == "internal reasoning"
|
||||||
|
assert metadata["model"] == "google/gemma-4-31B-it"
|
||||||
|
|
||||||
|
|
||||||
|
def test_clean_thinking_for_save_strips_empty_gemma4_thought_channel():
|
||||||
|
content, metadata = clean_thinking_for_save(
|
||||||
|
"<|channel>thought\n<channel|>Final answer.",
|
||||||
|
{"model": "google/gemma-4-31B-it"},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert content == "Final answer."
|
||||||
|
assert "thinking" not in metadata
|
||||||
|
|
||||||
|
|
||||||
|
def test_clean_thinking_for_save_unwraps_gemma4_response_channel():
|
||||||
|
content, metadata = clean_thinking_for_save(
|
||||||
|
"<|channel>thought\ninternal reasoning<channel|><|channel>response\nFinal answer.<channel|>",
|
||||||
|
{"model": "google/gemma-4-31B-it"},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert content == "Final answer."
|
||||||
|
assert metadata["thinking"] == "internal reasoning"
|
||||||
|
|
||||||
|
|
||||||
|
def test_clean_thinking_for_save_extracts_thought_tag():
|
||||||
|
content, metadata = clean_thinking_for_save(
|
||||||
|
"<thought>internal reasoning</thought>Final answer.",
|
||||||
|
{},
|
||||||
|
)
|
||||||
|
|
||||||
|
assert content == "Final answer."
|
||||||
|
assert metadata["thinking"] == "internal reasoning"
|
||||||
|
|||||||
@@ -18,7 +18,7 @@ def node_available():
|
|||||||
pytest.skip("node binary not on PATH")
|
pytest.skip("node binary not on PATH")
|
||||||
|
|
||||||
|
|
||||||
def _run_markdown_case(markdown: str) -> str:
|
def _run_markdown_case(markdown: str, render_expr: str = "mod.mdToHtml(input)"):
|
||||||
script = textwrap.dedent(
|
script = textwrap.dedent(
|
||||||
r"""
|
r"""
|
||||||
import fs from 'node:fs';
|
import fs from 'node:fs';
|
||||||
@@ -54,9 +54,9 @@ def _run_markdown_case(markdown: str) -> str:
|
|||||||
const moduleUrl = 'data:text/javascript;base64,' + Buffer.from(source).toString('base64');
|
const moduleUrl = 'data:text/javascript;base64,' + Buffer.from(source).toString('base64');
|
||||||
const mod = await import(moduleUrl);
|
const mod = await import(moduleUrl);
|
||||||
const input = JSON.parse(process.argv[1]);
|
const input = JSON.parse(process.argv[1]);
|
||||||
console.log(JSON.stringify({ html: mod.mdToHtml(input) }));
|
console.log(JSON.stringify({ html: __RENDER_EXPR__ }));
|
||||||
"""
|
"""
|
||||||
)
|
).replace("__RENDER_EXPR__", render_expr)
|
||||||
result = subprocess.run(
|
result = subprocess.run(
|
||||||
["node", "--input-type=module", "-e", script, json.dumps(markdown)],
|
["node", "--input-type=module", "-e", script, json.dumps(markdown)],
|
||||||
cwd=_REPO,
|
cwd=_REPO,
|
||||||
@@ -99,3 +99,51 @@ def test_table_separator_row_not_rendered_as_data(node_available):
|
|||||||
assert "<th" in html
|
assert "<th" in html
|
||||||
assert "<td" in html
|
assert "<td" in html
|
||||||
assert "---" not in html
|
assert "---" not in html
|
||||||
|
|
||||||
|
|
||||||
|
def test_process_with_thinking_handles_gemma4_thought_channel(node_available):
|
||||||
|
html = _run_markdown_case(
|
||||||
|
"<|channel>thought\ninternal reasoning<channel|>Final answer.",
|
||||||
|
"mod.processWithThinking(input)",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert "thinking-section" in html
|
||||||
|
assert "internal reasoning" in html
|
||||||
|
assert "Final answer." in html
|
||||||
|
assert "<|channel>" not in html
|
||||||
|
assert "<|channel>" not in html
|
||||||
|
|
||||||
|
|
||||||
|
def test_process_with_thinking_strips_empty_gemma4_thought_channel(node_available):
|
||||||
|
html = _run_markdown_case(
|
||||||
|
"<|channel>thought\n<channel|>Final answer.",
|
||||||
|
"mod.processWithThinking(input)",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert "thinking-section" not in html
|
||||||
|
assert "Final answer." in html
|
||||||
|
assert "<|channel>" not in html
|
||||||
|
assert "<|channel>" not in html
|
||||||
|
|
||||||
|
|
||||||
|
def test_process_with_thinking_unwraps_gemma4_response_channel(node_available):
|
||||||
|
html = _run_markdown_case(
|
||||||
|
"<|channel>thought\ninternal reasoning<channel|><|channel>response\nFinal answer.<channel|>",
|
||||||
|
"mod.processWithThinking(input)",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert "thinking-section" in html
|
||||||
|
assert "internal reasoning" in html
|
||||||
|
assert "Final answer." in html
|
||||||
|
assert "<|channel>" not in html
|
||||||
|
assert "<|channel>" not in html
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_thinking_blocks_handles_thought_tag(node_available):
|
||||||
|
result = _run_markdown_case(
|
||||||
|
"<thought>internal reasoning</thought>Final answer.",
|
||||||
|
"mod.extractThinkingBlocks(input)",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert result["thinkingBlocks"] == ["internal reasoning"]
|
||||||
|
assert result["content"] == "Final answer."
|
||||||
|
|||||||
@@ -23,3 +23,22 @@ def test_strip_think_cases():
|
|||||||
|
|
||||||
# 6. Multiple blocks (closed + unclosed)
|
# 6. Multiple blocks (closed + unclosed)
|
||||||
assert strip_think("Hello! <think> closed </think> Here is the answer. <think> unclosed") == "Hello! Here is the answer."
|
assert strip_think("Hello! <think> closed </think> Here is the answer. <think> unclosed") == "Hello! Here is the answer."
|
||||||
|
|
||||||
|
|
||||||
|
def test_strip_think_handles_thought_tags():
|
||||||
|
assert strip_think("<thought>internal reasoning</thought>Final answer.") == "Final answer."
|
||||||
|
|
||||||
|
|
||||||
|
def test_strip_think_handles_gemma4_thought_channel():
|
||||||
|
text = "<|channel>thought\ninternal reasoning<channel|>Final answer."
|
||||||
|
assert strip_think(text) == "Final answer."
|
||||||
|
|
||||||
|
|
||||||
|
def test_strip_think_handles_empty_gemma4_thought_channel():
|
||||||
|
text = "<|channel>thought\n<channel|>Final answer."
|
||||||
|
assert strip_think(text) == "Final answer."
|
||||||
|
|
||||||
|
|
||||||
|
def test_strip_think_unwraps_gemma4_response_channel():
|
||||||
|
text = "<|channel>thought\ninternal reasoning<channel|><|channel>response\nFinal answer.<channel|>"
|
||||||
|
assert strip_think(text) == "Final answer."
|
||||||
|
|||||||
Reference in New Issue
Block a user