From 535d05c1422074353e84dde5508c7b433290a157 Mon Sep 17 00:00:00 2001
From: Mubashir R <112580905+Mubashirrrr@users.noreply.github.com>
Date: Wed, 3 Jun 2026 09:33:56 +0500
Subject: [PATCH] fix: SearchService.search() calls comprehensive_web_search
 incorrectly (broken public API) (#1720)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

SearchService.search() did:

    raw_results = await comprehensive_web_search(
        query, max_results=10 * depth, fetch_content=fetch_content)

comprehensive_web_search is a synchronous function whose count knob is
`max_pages` (not `max_results`) and which has no `fetch_content` parameter, so
the call raised TypeError on argument binding; `await` on its non-coroutine
return would also fail. It returns a context string, or a (context, sources)
tuple with return_sources=True — not the list of dicts the wrapper iterates.

The method is exported in services/search/__init__.py and services/__init__.py
with a usage example in its docstring, so any caller of the documented public
API hit an immediate crash. Call it correctly via asyncio.to_thread with
max_pages + return_sources=True and use the returned source list as the rows.

Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
---
 services/search/service.py              | 15 ++++---
 tests/test_searchservice_search_call.py | 53 +++++++++++++++++++++++++
 2 files changed, 63 insertions(+), 5 deletions(-)
 create mode 100644 tests/test_searchservice_search_call.py

diff --git a/services/search/service.py b/services/search/service.py
index dab6ab6..422272e 100644
--- a/services/search/service.py
+++ b/services/search/service.py
@@ -62,13 +62,18 @@ class SearchService:
             SearchResponse with results
         """
         depth = depth or self.default_depth
-        fetch_content = fetch_content if fetch_content is not None else self.fetch_content
 
-        # Use existing search implementation
-        raw_results = await comprehensive_web_search(
+        # comprehensive_web_search is synchronous and, with return_sources=True,
+        # returns (context_str, [{"url", "title"}, ...]). Run it off the event
+        # loop so we don't block it, and use the source list as the result rows.
+        # `fetch_content` is accepted for API compatibility; the comprehensive
+        # search always fetches page content.
+        import asyncio
+        _context, raw_results = await asyncio.to_thread(
+            comprehensive_web_search,
             query,
-            max_results=10 * depth,
-            fetch_content=fetch_content,
+            max_pages=10 * depth,
+            return_sources=True,
         )
 
         results = []
diff --git a/tests/test_searchservice_search_call.py b/tests/test_searchservice_search_call.py
new file mode 100644
index 0000000..93e5b67
--- /dev/null
+++ b/tests/test_searchservice_search_call.py
@@ -0,0 +1,53 @@
+"""Regression: SearchService.search() must call the (synchronous)
+comprehensive_web_search correctly and return structured results.
+
+The wrapper previously did:
+
+    raw_results = await comprehensive_web_search(
+        query, max_results=10 * depth, fetch_content=fetch_content)
+
+which is broken three ways:
+  * comprehensive_web_search is a plain `def` (sync), so `await` on its return
+    raised TypeError;
+  * it accepts neither `max_results` nor `fetch_content` (the real knob is
+    `max_pages`), so the call raised TypeError on binding before running;
+  * it returns a context string (or a (context, sources) tuple), not the list
+    of dicts the wrapper then iterates.
+
+SearchService.search is exported via services/search/__init__.py and
+services/__init__.py (with a usage example in its own docstring), so this is a
+broken public API method. This test drives it with a stubbed search backend.
+"""
+import asyncio
+
+from services.search import service as search_service
+from services.search.service import SearchService, SearchResponse
+
+
+def test_search_returns_structured_results(monkeypatch):
+    calls = {}
+
+    def fake_search(query, max_pages=3, return_sources=False, **kwargs):
+        calls["query"] = query
+        calls["max_pages"] = max_pages
+        calls["return_sources"] = return_sources
+        calls["kwargs"] = kwargs
+        sources = [{"url": "https://example.com", "title": "Example"}]
+        return ("context text", sources) if return_sources else "context text"
+
+    monkeypatch.setattr(search_service, "comprehensive_web_search", fake_search)
+
+    svc = SearchService(default_depth=2)
+    resp = asyncio.run(svc.search("python async patterns"))
+
+    assert isinstance(resp, SearchResponse)
+    assert resp.total == 1
+    assert resp.results[0].url == "https://example.com"
+    assert resp.results[0].title == "Example"
+
+    # Called with the real param (max_pages, not max_results) and asked for the
+    # structured source list rather than the context string.
+    assert calls["return_sources"] is True
+    assert calls["max_pages"] == 20  # 10 * depth(2)
+    assert "max_results" not in calls["kwargs"]
+    assert "fetch_content" not in calls["kwargs"]