fix: _strip_reasoning_prose discards the answer when reasoning trails it (#1643)
This commit is contained in:
@@ -62,16 +62,20 @@ def _strip_reasoning_prose(text: str) -> str:
|
|||||||
paragraphs = re.split(r"\n\s*\n", text.strip())
|
paragraphs = re.split(r"\n\s*\n", text.strip())
|
||||||
if len(paragraphs) <= 1:
|
if len(paragraphs) <= 1:
|
||||||
return text
|
return text
|
||||||
last_reasoning_idx = -1
|
# Strip only a LEADING contiguous run of reasoning paragraphs. Keeping the
|
||||||
|
# text after the *last* reasoning paragraph destroyed the real answer when a
|
||||||
|
# reasoning-style sentence trailed it: keep became empty and the function
|
||||||
|
# returned that trailing sentence instead of the answer above it.
|
||||||
|
first_keep = 0
|
||||||
for i, p in enumerate(paragraphs):
|
for i, p in enumerate(paragraphs):
|
||||||
if _REASONING_PREFIX_RE.match(p):
|
if _REASONING_PREFIX_RE.match(p):
|
||||||
last_reasoning_idx = i
|
first_keep = i + 1
|
||||||
if last_reasoning_idx < 0:
|
else:
|
||||||
|
break
|
||||||
|
if first_keep == 0:
|
||||||
return text
|
return text
|
||||||
keep = paragraphs[last_reasoning_idx + 1:]
|
keep = paragraphs[first_keep:]
|
||||||
if not keep:
|
return "\n\n".join(keep).strip() if keep else text
|
||||||
return paragraphs[-1].strip()
|
|
||||||
return "\n\n".join(keep).strip()
|
|
||||||
|
|
||||||
|
|
||||||
def strip_think(text: str, *, prose: bool = False, prompt_echo: bool = True) -> str:
|
def strip_think(text: str, *, prose: bool = False, prompt_echo: bool = True) -> str:
|
||||||
|
|||||||
25
tests/test_strip_reasoning_prose_dataloss.py
Normal file
25
tests/test_strip_reasoning_prose_dataloss.py
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
"""Regression: _strip_reasoning_prose must not destroy the answer.
|
||||||
|
|
||||||
|
It kept the text AFTER the *last* reasoning paragraph. When a reasoning-style
|
||||||
|
sentence trailed the real answer, `keep` became empty and the function returned
|
||||||
|
that trailing sentence (`paragraphs[-1]`), discarding the actual answer above
|
||||||
|
it. It now strips only a leading contiguous run of reasoning paragraphs.
|
||||||
|
"""
|
||||||
|
from src.text_helpers import strip_think
|
||||||
|
|
||||||
|
|
||||||
|
def test_leading_reasoning_is_stripped():
|
||||||
|
out = strip_think("I need to draft a reply.\n\nThe answer is 42.", prose=True)
|
||||||
|
assert out == "The answer is 42."
|
||||||
|
|
||||||
|
|
||||||
|
def test_trailing_reasoning_does_not_destroy_answer():
|
||||||
|
text = ("Dear Alice,\n\nI will send the report by Friday.\n\nBest, Bob"
|
||||||
|
"\n\nI need to keep this reply concise and professional.")
|
||||||
|
out = strip_think(text, prose=True)
|
||||||
|
assert "send the report by Friday" in out
|
||||||
|
assert "Dear Alice" in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_plain_text_unchanged():
|
||||||
|
assert strip_think("Just a normal answer.", prose=True) == "Just a normal answer."
|
||||||
Reference in New Issue
Block a user