fix(agent): extract web search sources from output key
tool_execution.py returns web search results as {"output": ..., "exit_code": 0}.
The sources-extraction block in stream_agent_loop only checked result.get("results")
and result.get("stdout"), so _src_text was always "" for every tool-call-mode web
search. Two consequences:
1. The SOURCES marker was never parsed and the web_sources SSE event was never
emitted -- the sources panel never appeared after agent-mode searches.
2. The marker (a large JSON blob) was left in result["output"] and forwarded
verbatim to the LLM in round 2 via format_tool_result, confusing some local
models into producing no tokens.
Fix: prepend result.get("output") to the lookup chain, and update the cleanup
assignment so result["output"] is overwritten with the stripped text.
Adds six regression tests in tests/test_agent_loop.py documenting the before/after
behaviour and verifying backward compat with the legacy results/stdout paths.
Co-authored-by: MohammadYusif <MohammadYusif@users.noreply.github.com>
This commit is contained in:
@@ -2001,8 +2001,11 @@ async def stream_agent_loop(
|
|||||||
)
|
)
|
||||||
desc, result = await _tool_task
|
desc, result = await _tool_task
|
||||||
|
|
||||||
# Extract structured web sources from web_search tool output
|
# Extract structured web sources from web_search tool output.
|
||||||
_src_text = result.get("results") or result.get("stdout") or ""
|
# web_search returns {"output": ..., "exit_code": 0}; check "output"
|
||||||
|
# first so the <!-- SOURCES:…--> marker is found and stripped even
|
||||||
|
# when the result doesn't carry a "results" or "stdout" key.
|
||||||
|
_src_text = result.get("output") or result.get("results") or result.get("stdout") or ""
|
||||||
if block.tool_type == "web_search" and _src_text:
|
if block.tool_type == "web_search" and _src_text:
|
||||||
_src_marker = "<!-- SOURCES:"
|
_src_marker = "<!-- SOURCES:"
|
||||||
_src_idx = _src_text.find(_src_marker)
|
_src_idx = _src_text.find(_src_marker)
|
||||||
@@ -2014,7 +2017,9 @@ async def stream_agent_loop(
|
|||||||
yield f'data: {json.dumps({"type": "web_sources", "data": _extracted_sources})}\n\n'
|
yield f'data: {json.dumps({"type": "web_sources", "data": _extracted_sources})}\n\n'
|
||||||
# Strip the marker from the result so it doesn't show in chat
|
# Strip the marker from the result so it doesn't show in chat
|
||||||
_clean = _src_text[:_src_idx].rstrip()
|
_clean = _src_text[:_src_idx].rstrip()
|
||||||
if "results" in result:
|
if "output" in result:
|
||||||
|
result["output"] = _clean
|
||||||
|
elif "results" in result:
|
||||||
result["results"] = _clean
|
result["results"] = _clean
|
||||||
elif "stdout" in result:
|
elif "stdout" in result:
|
||||||
result["stdout"] = _clean
|
result["stdout"] = _clean
|
||||||
|
|||||||
@@ -337,3 +337,82 @@ class TestAppendToolResultsThoughtSignature:
|
|||||||
)
|
)
|
||||||
# No empty/None extra_content leaks onto non-Gemini tool calls.
|
# No empty/None extra_content leaks onto non-Gemini tool calls.
|
||||||
assert "extra_content" not in messages[0]["tool_calls"][0]
|
assert "extra_content" not in messages[0]["tool_calls"][0]
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# web_search sources extraction — key lookup regression (#443)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
import json as _json
|
||||||
|
|
||||||
|
|
||||||
|
class TestWebSearchSourcesKeyLookup:
|
||||||
|
"""The web_search tool returns {"output": ..., "exit_code": 0}.
|
||||||
|
The sources-extraction block in stream_agent_loop must read from the
|
||||||
|
"output" key, not only from "results"/"stdout" (which web_search never
|
||||||
|
sets). Without the fix the SOURCES marker is never found, no
|
||||||
|
web_sources SSE event is emitted, and the raw JSON blob leaks into the
|
||||||
|
LLM's round-2 context."""
|
||||||
|
|
||||||
|
_SOURCES = [{"title": "Example", "url": "https://example.com", "snippet": "test"}]
|
||||||
|
|
||||||
|
def _make_result(self, key: str = "output") -> dict:
|
||||||
|
sources_json = _json.dumps(self._SOURCES)
|
||||||
|
text = f"Search results here.\n\n<!-- SOURCES:{sources_json} -->"
|
||||||
|
return {key: text, "exit_code": 0}
|
||||||
|
|
||||||
|
# ── Regression: the old lookup missed "output" ──────────────────────
|
||||||
|
|
||||||
|
def test_old_lookup_missed_output_key(self):
|
||||||
|
"""Documents the bug: result.get('results') and result.get('stdout')
|
||||||
|
are both absent when web_search returns its canonical {"output": ...}
|
||||||
|
shape, so _src_text was always '' and the if-block never ran."""
|
||||||
|
result = self._make_result("output")
|
||||||
|
old_src_text = result.get("results") or result.get("stdout") or ""
|
||||||
|
assert old_src_text == "", "confirms the pre-fix behaviour"
|
||||||
|
|
||||||
|
def test_fixed_lookup_finds_output_key(self):
|
||||||
|
"""After the fix, "output" is checked first so _src_text is non-empty."""
|
||||||
|
result = self._make_result("output")
|
||||||
|
src_text = result.get("output") or result.get("results") or result.get("stdout") or ""
|
||||||
|
assert src_text != ""
|
||||||
|
assert "SOURCES" in src_text
|
||||||
|
|
||||||
|
# ── Marker extraction works once _src_text is non-empty ─────────────
|
||||||
|
|
||||||
|
def test_sources_extracted_from_output(self):
|
||||||
|
result = self._make_result("output")
|
||||||
|
src_text = result.get("output") or result.get("results") or result.get("stdout") or ""
|
||||||
|
marker = "<!-- SOURCES:"
|
||||||
|
idx = src_text.find(marker)
|
||||||
|
end = src_text.find(" -->", idx)
|
||||||
|
extracted = _json.loads(src_text[idx + len(marker):end])
|
||||||
|
assert extracted == self._SOURCES
|
||||||
|
|
||||||
|
def test_marker_stripped_from_output_key(self):
|
||||||
|
"""After extraction the "output" value is cleaned so the LLM never
|
||||||
|
sees the raw JSON blob in its round-2 context."""
|
||||||
|
result = self._make_result("output")
|
||||||
|
src_text = result.get("output") or result.get("results") or result.get("stdout") or ""
|
||||||
|
marker = "<!-- SOURCES:"
|
||||||
|
idx = src_text.find(marker)
|
||||||
|
clean = src_text[:idx].rstrip()
|
||||||
|
# Apply to the correct key (was the bug: only "results"/"stdout" were updated)
|
||||||
|
if "output" in result:
|
||||||
|
result["output"] = clean
|
||||||
|
assert "SOURCES" not in result["output"]
|
||||||
|
assert result["output"] == "Search results here."
|
||||||
|
|
||||||
|
# ── Backward compat: "results"/"stdout" keys still work ─────────────
|
||||||
|
|
||||||
|
def test_results_key_still_works(self):
|
||||||
|
result = self._make_result("results")
|
||||||
|
src_text = result.get("output") or result.get("results") or result.get("stdout") or ""
|
||||||
|
assert src_text != ""
|
||||||
|
assert "SOURCES" in src_text
|
||||||
|
|
||||||
|
def test_stdout_key_still_works(self):
|
||||||
|
result = self._make_result("stdout")
|
||||||
|
src_text = result.get("output") or result.get("results") or result.get("stdout") or ""
|
||||||
|
assert src_text != ""
|
||||||
|
assert "SOURCES" in src_text
|
||||||
|
|||||||
Reference in New Issue
Block a user