From dac64f20d90f1b3cbcfcb8ca897449d1e12e2bcc Mon Sep 17 00:00:00 2001 From: Tatlatat Date: Tue, 2 Jun 2026 18:36:37 +0700 Subject: [PATCH] Text: strip dangling think blocks after visible text `strip_think` removes a dangling (unclosed) `` block via `_THINK_OPEN_RE`, but that pattern was anchored to the start of the string (`^\s*`). An unclosed `` (or ``) opener that appears *after* any leading output was therefore only half-handled: the stray tag itself was removed by `_THINK_TAG_RE`, but the reasoning content following it leaked straight to the user. strip_think("Hello! I am thinking.") # -> "Hello! I am thinking." (leak) strip_think("Sure.\n\nLet me reconsider...") # -> leaks the reasoning `strip_think` feeds user-facing output across research, email replies, notes, and scheduled tasks, so this leaks chain-of-thought to end users. Un-anchor `_THINK_OPEN_RE` so a dangling opener anywhere strips from the opener to end of string, consistent with the existing start-of-string behavior. Content before the opener, closed `...` blocks, and tag-free text are all preserved. tests/test_strip_think.py covers the mid-text leak (fails before this change), start-anchored unclosed, closed blocks, no-tag passthrough, content-before-opener, and mixed closed+unclosed. Full existing think suite still passes. --- src/text_helpers.py | 6 +++--- tests/test_strip_think.py | 25 +++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 3 deletions(-) create mode 100644 tests/test_strip_think.py diff --git a/src/text_helpers.py b/src/text_helpers.py index 4fa4cde..10f24de 100644 --- a/src/text_helpers.py +++ b/src/text_helpers.py @@ -20,9 +20,9 @@ import re _THINK_CLOSED_RE = re.compile(r"[\s\S]*?\s*", re.IGNORECASE) # Orphan opening or closing tags that survive after the closed-pass. _THINK_TAG_RE = re.compile(r"]*>\s*", re.IGNORECASE) -# Dangling opener at the top of the response with no closer — strip everything -# from `` up to either `` (if it ever shows) or end of string. -_THINK_OPEN_RE = re.compile(r"^\s*.*?(?:|$)", re.DOTALL | re.IGNORECASE) +# Dangling opener anywhere in the response with no closer — strip everything +# from `` to the end of string. +_THINK_OPEN_RE = re.compile(r"[\s\S]*$", re.IGNORECASE) # Streaming models occasionally emit ``-style attributes. # Normalize to a plain `` so the regexes above catch them. _THINK_ATTR_RE = re.compile(r"]*>", re.IGNORECASE) diff --git a/tests/test_strip_think.py b/tests/test_strip_think.py new file mode 100644 index 0000000..5e36ef1 --- /dev/null +++ b/tests/test_strip_think.py @@ -0,0 +1,25 @@ +import pytest +from src.text_helpers import strip_think + +def test_strip_think_cases(): + # 1. Mid-text unclosed leak (fails before fix) + assert strip_think("Hello! I am thinking.") == "Hello!" + assert strip_think("Sure.\n\nLet me reconsider...") == "Sure." + assert strip_think("Sure.\n\nLet me reconsider...") == "Sure." + + # 2. Start-anchored unclosed + assert strip_think(" unclosed from start") == "" + assert strip_think(" thinking at start") == "" + + # 3. Closed block + assert strip_think("Hello! closed Here is the answer.") == "Hello! Here is the answer." + assert strip_think("Hello! closed Here is the answer.") == "Hello! Here is the answer." + + # 4. No-tag passthrough + assert strip_think("No tags here.") == "No tags here." + + # 5. Content-before-opener preserved (part of mid-text unclosed) + assert strip_think("Prefix text trailing thoughts") == "Prefix text" + + # 6. Multiple blocks (closed + unclosed) + assert strip_think("Hello! closed Here is the answer. unclosed") == "Hello! Here is the answer."