fix: deep research runs the prompt's example queries when the model echoes them (#1666)

This commit is contained in:
Afonso Coutinho
2026-06-03 06:23:07 +01:00
committed by GitHub
parent 26d040d116
commit 8a0b79bc84
2 changed files with 81 additions and 2 deletions

View File

@@ -800,6 +800,17 @@ class DeepResearcher:
except json.JSONDecodeError:
pass
# Handle truncated arrays — e.g. '["query one", "query two", "query thr'
# Repair from the LAST array start so an echoed example array earlier
# in the reply is not harvested into the real query set.
last_start = text.rfind('[')
truncated = last_start != -1 and ']' not in text[last_start:]
if truncated:
complete_items = re.findall(r'"([^"]*)"', text[last_start:])
if complete_items:
logger.info(f"Repaired truncated JSON array: recovered {len(complete_items)} items")
return complete_items
# Greedy match to capture the full outermost array
match = re.search(r'\[[\s\S]*\]', text)
if match:
@@ -810,8 +821,22 @@ class DeepResearcher:
except json.JSONDecodeError:
pass
# Handle truncated arrays — e.g. '["query one", "query two", "query thr'
# Try to find the start of an array and repair it
# Multiple complete arrays in one reply (e.g. the model echoes the
# prompt's Example: [...] before the real array). The greedy match
# above spans them all and fails to parse, so scan non-greedily and
# keep the LAST parseable array, which is the model's actual answer.
last_parsed = None
for m in re.finditer(r'\[[\s\S]*?\]', text):
try:
parsed = json.loads(m.group())
if isinstance(parsed, list):
last_parsed = parsed
except json.JSONDecodeError:
continue
if last_parsed is not None:
return [str(item) for item in last_parsed]
# Last resort: harvest quoted strings from the first array start
arr_start = text.find('[')
if arr_start != -1:
fragment = text[arr_start:]