fix: deep research runs the prompt's example queries when the model echoes them (#1666)

2026-06-03 06:23:07 +01:00
parent 26d040d116
commit 8a0b79bc84
2 changed files with 81 additions and 2 deletions
--- a/src/deep_research.py
+++ b/src/deep_research.py
@@ -800,6 +800,17 @@ class DeepResearcher:
        except json.JSONDecodeError:
            pass

+        # Handle truncated arrays — e.g. '["query one", "query two", "query thr'
+        # Repair from the LAST array start so an echoed example array earlier
+        # in the reply is not harvested into the real query set.
+        last_start = text.rfind('[')
+        truncated = last_start != -1 and ']' not in text[last_start:]
+        if truncated:
+            complete_items = re.findall(r'"([^"]*)"', text[last_start:])
+            if complete_items:
+                logger.info(f"Repaired truncated JSON array: recovered {len(complete_items)} items")
+                return complete_items
+
        # Greedy match to capture the full outermost array
        match = re.search(r'\[[\s\S]*\]', text)
        if match:
@@ -810,8 +821,22 @@ class DeepResearcher:
            except json.JSONDecodeError:
                pass

-        # Handle truncated arrays — e.g. '["query one", "query two", "query thr'
-        # Try to find the start of an array and repair it
+        # Multiple complete arrays in one reply (e.g. the model echoes the
+        # prompt's Example: [...] before the real array). The greedy match
+        # above spans them all and fails to parse, so scan non-greedily and
+        # keep the LAST parseable array, which is the model's actual answer.
+        last_parsed = None
+        for m in re.finditer(r'\[[\s\S]*?\]', text):
+            try:
+                parsed = json.loads(m.group())
+                if isinstance(parsed, list):
+                    last_parsed = parsed
+            except json.JSONDecodeError:
+                continue
+        if last_parsed is not None:
+            return [str(item) for item in last_parsed]
+
+        # Last resort: harvest quoted strings from the first array start
        arr_start = text.find('[')
        if arr_start != -1:
            fragment = text[arr_start:]
--- a/tests/test_deep_research_parse_json_array_echo.py
+++ b/tests/test_deep_research_parse_json_array_echo.py
@@ -0,0 +1,54 @@
+"""_parse_json_array must not inject the prompt's example queries.
+
+The query-generation prompt ends with an Example: [...] array. Weak models
+echo that example before emitting the real array. The old parser's greedy
+regex spanned both arrays, failed to parse, and the repair fallback then
+harvested EVERY quoted string from the reply, so the engine ran literal
+searches for "query one" / "query two" / "query three".
+"""
+
+from src.deep_research import DeepResearcher
+
+
+def _dr():
+    # _parse_json_array only touches self via the static _strip_code_block,
+    # so skip the heavy __init__.
+    return object.__new__(DeepResearcher)
+
+
+def test_example_echo_returns_only_the_real_array():
+    text = (
+        'Example: ["query one", "query two", "query three"]\n'
+        '["impact of AI on jobs", "AI automation statistics 2026"]'
+    )
+    assert _dr()._parse_json_array(text) == [
+        "impact of AI on jobs",
+        "AI automation statistics 2026",
+    ]
+
+
+def test_truncated_real_array_after_example_skips_example():
+    text = 'Example: ["query one", "query two"]\n["real query a", "real query b'
+    assert _dr()._parse_json_array(text) == ["real query a"]
+
+
+def test_plain_array_still_parses():
+    assert _dr()._parse_json_array('["a", "b"]') == ["a", "b"]
+
+
+def test_array_in_prose_still_parses():
+    out = _dr()._parse_json_array('Here are the queries: ["a", "b"] hope that helps')
+    assert out == ["a", "b"]
+
+
+def test_truncated_single_array_still_repaired():
+    out = _dr()._parse_json_array('["query one", "query two", "query thr')
+    assert out == ["query one", "query two"]
+
+
+def test_code_fenced_array_still_parses():
+    assert _dr()._parse_json_array('```json\n["a", "b"]\n```') == ["a", "b"]
+
+
+def test_no_array_returns_empty():
+    assert _dr()._parse_json_array("no array here") == []