From 65b5d6505905ea8dca8033197fcfc8bb3bf85fd5 Mon Sep 17 00:00:00 2001
From: MohammadYusif <mosefbel@gmail.com>
Date: Tue, 2 Jun 2026 07:06:09 +0300
Subject: [PATCH] fix(agent): extract web search sources from output key

tool_execution.py returns web search results as {"output": ..., "exit_code": 0}.
The sources-extraction block in stream_agent_loop only checked result.get("results")
and result.get("stdout"), so _src_text was always "" for every tool-call-mode web
search. Two consequences:

1. The SOURCES marker was never parsed and the web_sources SSE event was never
   emitted -- the sources panel never appeared after agent-mode searches.
2. The marker (a large JSON blob) was left in result["output"] and forwarded
   verbatim to the LLM in round 2 via format_tool_result, confusing some local
   models into producing no tokens.

Fix: prepend result.get("output") to the lookup chain, and update the cleanup
assignment so result["output"] is overwritten with the stripped text.

Adds six regression tests in tests/test_agent_loop.py documenting the before/after
behaviour and verifying backward compat with the legacy results/stdout paths.

Co-authored-by: MohammadYusif <MohammadYusif@users.noreply.github.com>
---
 src/agent_loop.py        | 11 ++++--
 tests/test_agent_loop.py | 79 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 87 insertions(+), 3 deletions(-)

diff --git a/src/agent_loop.py b/src/agent_loop.py
index ad0ad00..94bed46 100644
--- a/src/agent_loop.py
+++ b/src/agent_loop.py
@@ -2001,8 +2001,11 @@ async def stream_agent_loop(
                 )
             desc, result = await _tool_task
 
-            # Extract structured web sources from web_search tool output
-            _src_text = result.get("results") or result.get("stdout") or ""
+            # Extract structured web sources from web_search tool output.
+            # web_search returns {"output": ..., "exit_code": 0}; check "output"
+            # first so the <!-- SOURCES:…--> marker is found and stripped even
+            # when the result doesn't carry a "results" or "stdout" key.
+            _src_text = result.get("output") or result.get("results") or result.get("stdout") or ""
             if block.tool_type == "web_search" and _src_text:
                 _src_marker = "<!-- SOURCES:"
                 _src_idx = _src_text.find(_src_marker)
@@ -2014,7 +2017,9 @@ async def stream_agent_loop(
                             yield f'data: {json.dumps({"type": "web_sources", "data": _extracted_sources})}\n\n'
                             # Strip the marker from the result so it doesn't show in chat
                             _clean = _src_text[:_src_idx].rstrip()
-                            if "results" in result:
+                            if "output" in result:
+                                result["output"] = _clean
+                            elif "results" in result:
                                 result["results"] = _clean
                             elif "stdout" in result:
                                 result["stdout"] = _clean
diff --git a/tests/test_agent_loop.py b/tests/test_agent_loop.py
index 7f11d6c..3726991 100644
--- a/tests/test_agent_loop.py
+++ b/tests/test_agent_loop.py
@@ -337,3 +337,82 @@ class TestAppendToolResultsThoughtSignature:
         )
         # No empty/None extra_content leaks onto non-Gemini tool calls.
         assert "extra_content" not in messages[0]["tool_calls"][0]
+
+
+# ---------------------------------------------------------------------------
+# web_search sources extraction — key lookup regression (#443)
+# ---------------------------------------------------------------------------
+
+import json as _json
+
+
+class TestWebSearchSourcesKeyLookup:
+    """The web_search tool returns {"output": ..., "exit_code": 0}.
+    The sources-extraction block in stream_agent_loop must read from the
+    "output" key, not only from "results"/"stdout" (which web_search never
+    sets).  Without the fix the SOURCES marker is never found, no
+    web_sources SSE event is emitted, and the raw JSON blob leaks into the
+    LLM's round-2 context."""
+
+    _SOURCES = [{"title": "Example", "url": "https://example.com", "snippet": "test"}]
+
+    def _make_result(self, key: str = "output") -> dict:
+        sources_json = _json.dumps(self._SOURCES)
+        text = f"Search results here.\n\n<!-- SOURCES:{sources_json} -->"
+        return {key: text, "exit_code": 0}
+
+    # ── Regression: the old lookup missed "output" ──────────────────────
+
+    def test_old_lookup_missed_output_key(self):
+        """Documents the bug: result.get('results') and result.get('stdout')
+        are both absent when web_search returns its canonical {"output": ...}
+        shape, so _src_text was always '' and the if-block never ran."""
+        result = self._make_result("output")
+        old_src_text = result.get("results") or result.get("stdout") or ""
+        assert old_src_text == "", "confirms the pre-fix behaviour"
+
+    def test_fixed_lookup_finds_output_key(self):
+        """After the fix, "output" is checked first so _src_text is non-empty."""
+        result = self._make_result("output")
+        src_text = result.get("output") or result.get("results") or result.get("stdout") or ""
+        assert src_text != ""
+        assert "SOURCES" in src_text
+
+    # ── Marker extraction works once _src_text is non-empty ─────────────
+
+    def test_sources_extracted_from_output(self):
+        result = self._make_result("output")
+        src_text = result.get("output") or result.get("results") or result.get("stdout") or ""
+        marker = "<!-- SOURCES:"
+        idx = src_text.find(marker)
+        end = src_text.find(" -->", idx)
+        extracted = _json.loads(src_text[idx + len(marker):end])
+        assert extracted == self._SOURCES
+
+    def test_marker_stripped_from_output_key(self):
+        """After extraction the "output" value is cleaned so the LLM never
+        sees the raw JSON blob in its round-2 context."""
+        result = self._make_result("output")
+        src_text = result.get("output") or result.get("results") or result.get("stdout") or ""
+        marker = "<!-- SOURCES:"
+        idx = src_text.find(marker)
+        clean = src_text[:idx].rstrip()
+        # Apply to the correct key (was the bug: only "results"/"stdout" were updated)
+        if "output" in result:
+            result["output"] = clean
+        assert "SOURCES" not in result["output"]
+        assert result["output"] == "Search results here."
+
+    # ── Backward compat: "results"/"stdout" keys still work ─────────────
+
+    def test_results_key_still_works(self):
+        result = self._make_result("results")
+        src_text = result.get("output") or result.get("results") or result.get("stdout") or ""
+        assert src_text != ""
+        assert "SOURCES" in src_text
+
+    def test_stdout_key_still_works(self):
+        result = self._make_result("stdout")
+        src_text = result.get("output") or result.get("results") or result.get("stdout") or ""
+        assert src_text != ""
+        assert "SOURCES" in src_text