fix(agent): extract web search sources from output key

tool_execution.py returns web search results as {"output": ..., "exit_code": 0}. The sources-extraction block in stream_agent_loop only checked result.get("results") and result.get("stdout"), so _src_text was always "" for every tool-call-mode web search. Two consequences: 1. The SOURCES marker was never parsed and the web_sources SSE event was never emitted -- the sources panel never appeared after agent-mode searches. 2. The marker (a large JSON blob) was left in result["output"] and forwarded verbatim to the LLM in round 2 via format_tool_result, confusing some local models into producing no tokens. Fix: prepend result.get("output") to the lookup chain, and update the cleanup assignment so result["output"] is overwritten with the stripped text. Adds six regression tests in tests/test_agent_loop.py documenting the before/after behaviour and verifying backward compat with the legacy results/stdout paths. Co-authored-by: MohammadYusif <MohammadYusif@users.noreply.github.com>
2026-06-02 07:06:09 +03:00
parent d46c406bd8
commit 65b5d65059
2 changed files with 87 additions and 3 deletions
--- a/src/agent_loop.py
+++ b/src/agent_loop.py
@@ -2001,8 +2001,11 @@ async def stream_agent_loop(
                )
            desc, result = await _tool_task
-            # Extract structured web sources from web_search tool output
+            # Extract structured web sources from web_search tool output.
-            _src_text = result.get("results") or result.get("stdout") or ""
+            # web_search returns {"output": ..., "exit_code": 0}; check "output"
            # first so the <!-- SOURCES:…--> marker is found and stripped even
            # when the result doesn't carry a "results" or "stdout" key.
            _src_text = result.get("output") or result.get("results") or result.get("stdout") or ""
            if block.tool_type == "web_search" and _src_text:
                _src_marker = "<!-- SOURCES:"
                _src_idx = _src_text.find(_src_marker)
@@ -2014,7 +2017,9 @@ async def stream_agent_loop(
                            yield f'data: {json.dumps({"type": "web_sources", "data": _extracted_sources})}\n\n'
                            # Strip the marker from the result so it doesn't show in chat
                            _clean = _src_text[:_src_idx].rstrip()
-                            if "results" in result:
+                            if "output" in result:
                                result["output"] = _clean
                            elif "results" in result:
                                result["results"] = _clean
                            elif "stdout" in result:
                                result["stdout"] = _clean
--- a/tests/test_agent_loop.py
+++ b/tests/test_agent_loop.py
@@ -337,3 +337,82 @@ class TestAppendToolResultsThoughtSignature:
        )
        # No empty/None extra_content leaks onto non-Gemini tool calls.
        assert "extra_content" not in messages[0]["tool_calls"][0]
 # ---------------------------------------------------------------------------
 # web_search sources extraction — key lookup regression (#443)
 # ---------------------------------------------------------------------------
 import json as _json
 class TestWebSearchSourcesKeyLookup:
    """The web_search tool returns {"output": ..., "exit_code": 0}.
    The sources-extraction block in stream_agent_loop must read from the
    "output" key, not only from "results"/"stdout" (which web_search never
    sets).  Without the fix the SOURCES marker is never found, no
    web_sources SSE event is emitted, and the raw JSON blob leaks into the
    LLM's round-2 context."""
    _SOURCES = [{"title": "Example", "url": "https://example.com", "snippet": "test"}]
    def _make_result(self, key: str = "output") -> dict:
        sources_json = _json.dumps(self._SOURCES)
        text = f"Search results here.\n\n<!-- SOURCES:{sources_json} -->"
        return {key: text, "exit_code": 0}
    # ── Regression: the old lookup missed "output" ──────────────────────
    def test_old_lookup_missed_output_key(self):
        """Documents the bug: result.get('results') and result.get('stdout')
        are both absent when web_search returns its canonical {"output": ...}
        shape, so _src_text was always '' and the if-block never ran."""
        result = self._make_result("output")
        old_src_text = result.get("results") or result.get("stdout") or ""
        assert old_src_text == "", "confirms the pre-fix behaviour"
    def test_fixed_lookup_finds_output_key(self):
        """After the fix, "output" is checked first so _src_text is non-empty."""
        result = self._make_result("output")
        src_text = result.get("output") or result.get("results") or result.get("stdout") or ""
        assert src_text != ""
        assert "SOURCES" in src_text
    # ── Marker extraction works once _src_text is non-empty ─────────────
    def test_sources_extracted_from_output(self):
        result = self._make_result("output")
        src_text = result.get("output") or result.get("results") or result.get("stdout") or ""
        marker = "<!-- SOURCES:"
        idx = src_text.find(marker)
        end = src_text.find(" -->", idx)
        extracted = _json.loads(src_text[idx + len(marker):end])
        assert extracted == self._SOURCES
    def test_marker_stripped_from_output_key(self):
        """After extraction the "output" value is cleaned so the LLM never
        sees the raw JSON blob in its round-2 context."""
        result = self._make_result("output")
        src_text = result.get("output") or result.get("results") or result.get("stdout") or ""
        marker = "<!-- SOURCES:"
        idx = src_text.find(marker)
        clean = src_text[:idx].rstrip()
        # Apply to the correct key (was the bug: only "results"/"stdout" were updated)
        if "output" in result:
            result["output"] = clean
        assert "SOURCES" not in result["output"]
        assert result["output"] == "Search results here."
    # ── Backward compat: "results"/"stdout" keys still work ─────────────
    def test_results_key_still_works(self):
        result = self._make_result("results")
        src_text = result.get("output") or result.get("results") or result.get("stdout") or ""
        assert src_text != ""
        assert "SOURCES" in src_text
    def test_stdout_key_still_works(self):
        result = self._make_result("stdout")
        src_text = result.get("output") or result.get("results") or result.get("stdout") or ""
        assert src_text != ""
        assert "SOURCES" in src_text