diff --git a/src/text_helpers.py b/src/text_helpers.py index 10f24de..90d66a9 100644 --- a/src/text_helpers.py +++ b/src/text_helpers.py @@ -62,16 +62,20 @@ def _strip_reasoning_prose(text: str) -> str: paragraphs = re.split(r"\n\s*\n", text.strip()) if len(paragraphs) <= 1: return text - last_reasoning_idx = -1 + # Strip only a LEADING contiguous run of reasoning paragraphs. Keeping the + # text after the *last* reasoning paragraph destroyed the real answer when a + # reasoning-style sentence trailed it: keep became empty and the function + # returned that trailing sentence instead of the answer above it. + first_keep = 0 for i, p in enumerate(paragraphs): if _REASONING_PREFIX_RE.match(p): - last_reasoning_idx = i - if last_reasoning_idx < 0: + first_keep = i + 1 + else: + break + if first_keep == 0: return text - keep = paragraphs[last_reasoning_idx + 1:] - if not keep: - return paragraphs[-1].strip() - return "\n\n".join(keep).strip() + keep = paragraphs[first_keep:] + return "\n\n".join(keep).strip() if keep else text def strip_think(text: str, *, prose: bool = False, prompt_echo: bool = True) -> str: diff --git a/tests/test_strip_reasoning_prose_dataloss.py b/tests/test_strip_reasoning_prose_dataloss.py new file mode 100644 index 0000000..d55a2d8 --- /dev/null +++ b/tests/test_strip_reasoning_prose_dataloss.py @@ -0,0 +1,25 @@ +"""Regression: _strip_reasoning_prose must not destroy the answer. + +It kept the text AFTER the *last* reasoning paragraph. When a reasoning-style +sentence trailed the real answer, `keep` became empty and the function returned +that trailing sentence (`paragraphs[-1]`), discarding the actual answer above +it. It now strips only a leading contiguous run of reasoning paragraphs. +""" +from src.text_helpers import strip_think + + +def test_leading_reasoning_is_stripped(): + out = strip_think("I need to draft a reply.\n\nThe answer is 42.", prose=True) + assert out == "The answer is 42." + + +def test_trailing_reasoning_does_not_destroy_answer(): + text = ("Dear Alice,\n\nI will send the report by Friday.\n\nBest, Bob" + "\n\nI need to keep this reply concise and professional.") + out = strip_think(text, prose=True) + assert "send the report by Friday" in out + assert "Dear Alice" in out + + +def test_plain_text_unchanged(): + assert strip_think("Just a normal answer.", prose=True) == "Just a normal answer."