tavily_search, serper_search and google_pse_search parsed response.json() inside the network try block, which only caught httpx.RequestError and RateLimitError. When a provider returned a non-JSON body (an HTML error page, a truncated/empty body, a gateway 5xx), response.json() raised an UNCAUGHT json.JSONDecodeError that aborted the search in the background — exactly the 'search engines other than SearXNG fail in the background' symptom. brave_search already handles this correctly: it parses JSON in its own try block and returns [] on json.JSONDecodeError. Mirror that in the other three providers so a malformed provider response degrades to no-results instead of propagating an exception. Adds tests/test_search_provider_json.py: a non-JSON 200 body now yields [] for tavily, serper, google_pse, and brave (the last guards the reference behaviour). Co-authored-by: NubsCarson <nubs@nubs.site>
This commit is contained in:
@@ -492,7 +492,6 @@ def google_pse_search(query: str, count: int = 10, time_filter: Optional[str] =
|
|||||||
if response.status_code == 429:
|
if response.status_code == 429:
|
||||||
raise RateLimitError("Google PSE rate limit hit")
|
raise RateLimitError("Google PSE rate limit hit")
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
data = response.json()
|
|
||||||
except httpx.RequestError as e:
|
except httpx.RequestError as e:
|
||||||
error_logger.error(f"Google PSE search failed: {e}")
|
error_logger.error(f"Google PSE search failed: {e}")
|
||||||
return []
|
return []
|
||||||
@@ -500,6 +499,12 @@ def google_pse_search(query: str, count: int = 10, time_filter: Optional[str] =
|
|||||||
error_logger.error(str(e))
|
error_logger.error(str(e))
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = response.json()
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
error_logger.error(f"Google PSE returned invalid JSON: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
for item in data.get("items", [])[:count]:
|
for item in data.get("items", [])[:count]:
|
||||||
url = item.get("link", "")
|
url = item.get("link", "")
|
||||||
@@ -544,7 +549,6 @@ def tavily_search(query: str, count: int = 10, time_filter: Optional[str] = None
|
|||||||
if response.status_code == 429:
|
if response.status_code == 429:
|
||||||
raise RateLimitError("Tavily rate limit hit")
|
raise RateLimitError("Tavily rate limit hit")
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
data = response.json()
|
|
||||||
except httpx.RequestError as e:
|
except httpx.RequestError as e:
|
||||||
error_logger.error(f"Tavily search failed: {e}")
|
error_logger.error(f"Tavily search failed: {e}")
|
||||||
return []
|
return []
|
||||||
@@ -552,6 +556,12 @@ def tavily_search(query: str, count: int = 10, time_filter: Optional[str] = None
|
|||||||
error_logger.error(str(e))
|
error_logger.error(str(e))
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = response.json()
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
error_logger.error(f"Tavily returned invalid JSON: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
for item in data.get("results", [])[:count]:
|
for item in data.get("results", [])[:count]:
|
||||||
url = item.get("url", "")
|
url = item.get("url", "")
|
||||||
@@ -599,7 +609,6 @@ def serper_search(query: str, count: int = 10, time_filter: Optional[str] = None
|
|||||||
if response.status_code == 429:
|
if response.status_code == 429:
|
||||||
raise RateLimitError("Serper rate limit hit")
|
raise RateLimitError("Serper rate limit hit")
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
data = response.json()
|
|
||||||
except httpx.RequestError as e:
|
except httpx.RequestError as e:
|
||||||
error_logger.error(f"Serper search failed: {e}")
|
error_logger.error(f"Serper search failed: {e}")
|
||||||
return []
|
return []
|
||||||
@@ -607,6 +616,12 @@ def serper_search(query: str, count: int = 10, time_filter: Optional[str] = None
|
|||||||
error_logger.error(str(e))
|
error_logger.error(str(e))
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = response.json()
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
error_logger.error(f"Serper returned invalid JSON: {e}")
|
||||||
|
return []
|
||||||
|
|
||||||
results = []
|
results = []
|
||||||
for item in data.get("organic", [])[:count]:
|
for item in data.get("organic", [])[:count]:
|
||||||
url = item.get("link", "")
|
url = item.get("link", "")
|
||||||
|
|||||||
59
tests/test_search_provider_json.py
Normal file
59
tests/test_search_provider_json.py
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
"""Search providers must not raise on a non-JSON response body (issue #1129).
|
||||||
|
|
||||||
|
`brave_search` already wraps `response.json()` in its own try/except that catches
|
||||||
|
`json.JSONDecodeError` and returns []. The Tavily, Serper, and Google PSE
|
||||||
|
providers parsed JSON inside the network try block, which only caught
|
||||||
|
`httpx.RequestError`/`RateLimitError` — so a provider returning a non-JSON body
|
||||||
|
(an HTML error page, a truncated/empty body, a gateway error) raised an
|
||||||
|
UNCAUGHT `json.JSONDecodeError` that aborted the search in the background. These
|
||||||
|
pin that all four providers degrade to [] on malformed JSON, matching brave.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import json
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from services.search import providers
|
||||||
|
|
||||||
|
|
||||||
|
class _BadJSONResponse:
|
||||||
|
"""A 200 response whose body is not valid JSON (e.g. an HTML error page)."""
|
||||||
|
status_code = 200
|
||||||
|
|
||||||
|
def raise_for_status(self):
|
||||||
|
return None
|
||||||
|
|
||||||
|
def json(self):
|
||||||
|
raise json.JSONDecodeError("Expecting value", "<html>down</html>", 0)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def _offline(monkeypatch):
|
||||||
|
# Keep everything offline + deterministic: no settings/DB, keys via env, and
|
||||||
|
# both httpx verbs return a body that fails to decode.
|
||||||
|
monkeypatch.setattr(providers, "_get_search_settings", lambda: {}, raising=False)
|
||||||
|
monkeypatch.setattr(providers, "_safesearch_for", lambda *_a, **_k: None, raising=False)
|
||||||
|
monkeypatch.setenv("DATA_BRAVE_API_KEY", "k")
|
||||||
|
monkeypatch.setenv("TAVILY_API_KEY", "k")
|
||||||
|
monkeypatch.setenv("SERPER_API_KEY", "k")
|
||||||
|
monkeypatch.setenv("GOOGLE_API_KEY", "k")
|
||||||
|
monkeypatch.setenv("GOOGLE_PSE_CX", "cx")
|
||||||
|
monkeypatch.setattr(providers.httpx, "post", lambda *a, **k: _BadJSONResponse())
|
||||||
|
monkeypatch.setattr(providers.httpx, "get", lambda *a, **k: _BadJSONResponse())
|
||||||
|
|
||||||
|
|
||||||
|
def test_tavily_malformed_json_returns_empty():
|
||||||
|
assert providers.tavily_search("hello") == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_serper_malformed_json_returns_empty():
|
||||||
|
assert providers.serper_search("hello") == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_google_pse_malformed_json_returns_empty():
|
||||||
|
assert providers.google_pse_search("hello") == []
|
||||||
|
|
||||||
|
|
||||||
|
def test_brave_malformed_json_returns_empty():
|
||||||
|
# Already correct on main — guards against regressing the reference behaviour.
|
||||||
|
assert providers.brave_search("hello") == []
|
||||||
Reference in New Issue
Block a user