fix: deep research runs the prompt's example queries when the model echoes them (#1666)

This commit is contained in:
Afonso Coutinho
2026-06-03 06:23:07 +01:00
committed by GitHub
parent 26d040d116
commit 8a0b79bc84
2 changed files with 81 additions and 2 deletions

View File

@@ -800,6 +800,17 @@ class DeepResearcher:
except json.JSONDecodeError:
pass
# Handle truncated arrays — e.g. '["query one", "query two", "query thr'
# Repair from the LAST array start so an echoed example array earlier
# in the reply is not harvested into the real query set.
last_start = text.rfind('[')
truncated = last_start != -1 and ']' not in text[last_start:]
if truncated:
complete_items = re.findall(r'"([^"]*)"', text[last_start:])
if complete_items:
logger.info(f"Repaired truncated JSON array: recovered {len(complete_items)} items")
return complete_items
# Greedy match to capture the full outermost array
match = re.search(r'\[[\s\S]*\]', text)
if match:
@@ -810,8 +821,22 @@ class DeepResearcher:
except json.JSONDecodeError:
pass
# Handle truncated arrays — e.g. '["query one", "query two", "query thr'
# Try to find the start of an array and repair it
# Multiple complete arrays in one reply (e.g. the model echoes the
# prompt's Example: [...] before the real array). The greedy match
# above spans them all and fails to parse, so scan non-greedily and
# keep the LAST parseable array, which is the model's actual answer.
last_parsed = None
for m in re.finditer(r'\[[\s\S]*?\]', text):
try:
parsed = json.loads(m.group())
if isinstance(parsed, list):
last_parsed = parsed
except json.JSONDecodeError:
continue
if last_parsed is not None:
return [str(item) for item in last_parsed]
# Last resort: harvest quoted strings from the first array start
arr_start = text.find('[')
if arr_start != -1:
fragment = text[arr_start:]

View File

@@ -0,0 +1,54 @@
"""_parse_json_array must not inject the prompt's example queries.
The query-generation prompt ends with an Example: [...] array. Weak models
echo that example before emitting the real array. The old parser's greedy
regex spanned both arrays, failed to parse, and the repair fallback then
harvested EVERY quoted string from the reply, so the engine ran literal
searches for "query one" / "query two" / "query three".
"""
from src.deep_research import DeepResearcher
def _dr():
# _parse_json_array only touches self via the static _strip_code_block,
# so skip the heavy __init__.
return object.__new__(DeepResearcher)
def test_example_echo_returns_only_the_real_array():
text = (
'Example: ["query one", "query two", "query three"]\n'
'["impact of AI on jobs", "AI automation statistics 2026"]'
)
assert _dr()._parse_json_array(text) == [
"impact of AI on jobs",
"AI automation statistics 2026",
]
def test_truncated_real_array_after_example_skips_example():
text = 'Example: ["query one", "query two"]\n["real query a", "real query b'
assert _dr()._parse_json_array(text) == ["real query a"]
def test_plain_array_still_parses():
assert _dr()._parse_json_array('["a", "b"]') == ["a", "b"]
def test_array_in_prose_still_parses():
out = _dr()._parse_json_array('Here are the queries: ["a", "b"] hope that helps')
assert out == ["a", "b"]
def test_truncated_single_array_still_repaired():
out = _dr()._parse_json_array('["query one", "query two", "query thr')
assert out == ["query one", "query two"]
def test_code_fenced_array_still_parses():
assert _dr()._parse_json_array('```json\n["a", "b"]\n```') == ["a", "b"]
def test_no_array_returns_empty():
assert _dr()._parse_json_array("no array here") == []