diff --git a/src/text_helpers.py b/src/text_helpers.py index 4fa4cde..10f24de 100644 --- a/src/text_helpers.py +++ b/src/text_helpers.py @@ -20,9 +20,9 @@ import re _THINK_CLOSED_RE = re.compile(r"[\s\S]*?\s*", re.IGNORECASE) # Orphan opening or closing tags that survive after the closed-pass. _THINK_TAG_RE = re.compile(r"]*>\s*", re.IGNORECASE) -# Dangling opener at the top of the response with no closer — strip everything -# from `` up to either `` (if it ever shows) or end of string. -_THINK_OPEN_RE = re.compile(r"^\s*.*?(?:|$)", re.DOTALL | re.IGNORECASE) +# Dangling opener anywhere in the response with no closer — strip everything +# from `` to the end of string. +_THINK_OPEN_RE = re.compile(r"[\s\S]*$", re.IGNORECASE) # Streaming models occasionally emit ``-style attributes. # Normalize to a plain `` so the regexes above catch them. _THINK_ATTR_RE = re.compile(r"]*>", re.IGNORECASE) diff --git a/tests/test_strip_think.py b/tests/test_strip_think.py new file mode 100644 index 0000000..5e36ef1 --- /dev/null +++ b/tests/test_strip_think.py @@ -0,0 +1,25 @@ +import pytest +from src.text_helpers import strip_think + +def test_strip_think_cases(): + # 1. Mid-text unclosed leak (fails before fix) + assert strip_think("Hello! I am thinking.") == "Hello!" + assert strip_think("Sure.\n\nLet me reconsider...") == "Sure." + assert strip_think("Sure.\n\nLet me reconsider...") == "Sure." + + # 2. Start-anchored unclosed + assert strip_think(" unclosed from start") == "" + assert strip_think(" thinking at start") == "" + + # 3. Closed block + assert strip_think("Hello! closed Here is the answer.") == "Hello! Here is the answer." + assert strip_think("Hello! closed Here is the answer.") == "Hello! Here is the answer." + + # 4. No-tag passthrough + assert strip_think("No tags here.") == "No tags here." + + # 5. Content-before-opener preserved (part of mid-text unclosed) + assert strip_think("Prefix text trailing thoughts") == "Prefix text" + + # 6. Multiple blocks (closed + unclosed) + assert strip_think("Hello! closed Here is the answer. unclosed") == "Hello! Here is the answer."