diff --git a/src/deep_research.py b/src/deep_research.py index ee50629..4617439 100644 --- a/src/deep_research.py +++ b/src/deep_research.py @@ -800,6 +800,17 @@ class DeepResearcher: except json.JSONDecodeError: pass + # Handle truncated arrays — e.g. '["query one", "query two", "query thr' + # Repair from the LAST array start so an echoed example array earlier + # in the reply is not harvested into the real query set. + last_start = text.rfind('[') + truncated = last_start != -1 and ']' not in text[last_start:] + if truncated: + complete_items = re.findall(r'"([^"]*)"', text[last_start:]) + if complete_items: + logger.info(f"Repaired truncated JSON array: recovered {len(complete_items)} items") + return complete_items + # Greedy match to capture the full outermost array match = re.search(r'\[[\s\S]*\]', text) if match: @@ -810,8 +821,22 @@ class DeepResearcher: except json.JSONDecodeError: pass - # Handle truncated arrays — e.g. '["query one", "query two", "query thr' - # Try to find the start of an array and repair it + # Multiple complete arrays in one reply (e.g. the model echoes the + # prompt's Example: [...] before the real array). The greedy match + # above spans them all and fails to parse, so scan non-greedily and + # keep the LAST parseable array, which is the model's actual answer. + last_parsed = None + for m in re.finditer(r'\[[\s\S]*?\]', text): + try: + parsed = json.loads(m.group()) + if isinstance(parsed, list): + last_parsed = parsed + except json.JSONDecodeError: + continue + if last_parsed is not None: + return [str(item) for item in last_parsed] + + # Last resort: harvest quoted strings from the first array start arr_start = text.find('[') if arr_start != -1: fragment = text[arr_start:] diff --git a/tests/test_deep_research_parse_json_array_echo.py b/tests/test_deep_research_parse_json_array_echo.py new file mode 100644 index 0000000..b8a7bec --- /dev/null +++ b/tests/test_deep_research_parse_json_array_echo.py @@ -0,0 +1,54 @@ +"""_parse_json_array must not inject the prompt's example queries. + +The query-generation prompt ends with an Example: [...] array. Weak models +echo that example before emitting the real array. The old parser's greedy +regex spanned both arrays, failed to parse, and the repair fallback then +harvested EVERY quoted string from the reply, so the engine ran literal +searches for "query one" / "query two" / "query three". +""" + +from src.deep_research import DeepResearcher + + +def _dr(): + # _parse_json_array only touches self via the static _strip_code_block, + # so skip the heavy __init__. + return object.__new__(DeepResearcher) + + +def test_example_echo_returns_only_the_real_array(): + text = ( + 'Example: ["query one", "query two", "query three"]\n' + '["impact of AI on jobs", "AI automation statistics 2026"]' + ) + assert _dr()._parse_json_array(text) == [ + "impact of AI on jobs", + "AI automation statistics 2026", + ] + + +def test_truncated_real_array_after_example_skips_example(): + text = 'Example: ["query one", "query two"]\n["real query a", "real query b' + assert _dr()._parse_json_array(text) == ["real query a"] + + +def test_plain_array_still_parses(): + assert _dr()._parse_json_array('["a", "b"]') == ["a", "b"] + + +def test_array_in_prose_still_parses(): + out = _dr()._parse_json_array('Here are the queries: ["a", "b"] hope that helps') + assert out == ["a", "b"] + + +def test_truncated_single_array_still_repaired(): + out = _dr()._parse_json_array('["query one", "query two", "query thr') + assert out == ["query one", "query two"] + + +def test_code_fenced_array_still_parses(): + assert _dr()._parse_json_array('```json\n["a", "b"]\n```') == ["a", "b"] + + +def test_no_array_returns_empty(): + assert _dr()._parse_json_array("no array here") == []