chore: deduplicate src/search modules (cache, content, query) into shims (#2506)
* chore: dedupe src/search/cache.py into a re-export shim src/search/cache.py was a byte-identical copy of services/search/cache.py. Convert it to a sys.modules alias of the canonical services module (matching src/search/core.py, providers.py, ranking.py) so the two cannot drift, and add an identity assertion to test_search_module_consolidation.py. content.py and query.py are intentionally left as-is: the copies have drifted and services lacks fixes that src has, so they need services reconciled first before they can be shimmed safely. * chore: dedupe src/search content.py and query.py into shims Convert src/search/content.py and query.py to sys.modules aliases of the canonical services/search/* (matching cache.py, core.py, providers.py, ranking.py) so the duplicate copies cannot drift. Repoint the two tests that were coupled to the src-copy internals onto the canonical services surface (behaviour is equivalent): - test_src_search_query_nonstring.py: import services.search.query instead of loading the src file by path. - test_security_regressions.py::test_web_fetch_guard_blocks_redirect_into_private: mock httpx.get (services uses the module-level get, not httpx.Client) and assert on the canonical 'Blocked' message. Drop the now-redundant [src_content, service_content] parametrization in test_search_content_extraction_parity.py and test_search_content_url_guards.py (after the shim both params are the same object); add content/query identity assertions to test_search_module_consolidation.py.
This commit is contained in:
committed by
GitHub
parent
66fba78011
commit
8bfd79fe8e
@@ -1,11 +1,10 @@
|
||||
"""Keep src.search and services.search content extraction behavior aligned."""
|
||||
"""Content extraction behavior for the canonical services.search.content module."""
|
||||
|
||||
import pytest
|
||||
|
||||
pytest.importorskip("bs4")
|
||||
|
||||
from services.search import content as service_content
|
||||
from src.search import content as src_content
|
||||
|
||||
|
||||
class _FakeResponse:
|
||||
@@ -20,7 +19,7 @@ class _FakeResponse:
|
||||
return None
|
||||
|
||||
|
||||
@pytest.mark.parametrize("module", [src_content, service_content])
|
||||
@pytest.mark.parametrize("module", [service_content])
|
||||
def test_content_fetcher_extracts_og_image_and_body_fallback(module, tmp_path, monkeypatch):
|
||||
html = """
|
||||
<html>
|
||||
|
||||
@@ -3,10 +3,9 @@ import ipaddress
|
||||
import pytest
|
||||
|
||||
from services.search import content as service_content
|
||||
from src.search import content as src_content
|
||||
|
||||
|
||||
@pytest.mark.parametrize("module", [src_content, service_content])
|
||||
@pytest.mark.parametrize("module", [service_content])
|
||||
@pytest.mark.parametrize("url", [
|
||||
"http://printer.local/",
|
||||
"http://nas.lan/",
|
||||
@@ -21,7 +20,7 @@ def test_search_content_url_guard_blocks_internal_names_and_address_classes(modu
|
||||
assert module._public_http_url(url) is False
|
||||
|
||||
|
||||
@pytest.mark.parametrize("module", [src_content, service_content])
|
||||
@pytest.mark.parametrize("module", [service_content])
|
||||
def test_search_content_url_guard_blocks_dns_to_multicast(monkeypatch, module):
|
||||
monkeypatch.setattr(
|
||||
module,
|
||||
@@ -32,6 +31,6 @@ def test_search_content_url_guard_blocks_dns_to_multicast(monkeypatch, module):
|
||||
assert module._public_http_url("https://example.test/page") is False
|
||||
|
||||
|
||||
@pytest.mark.parametrize("module", [src_content, service_content])
|
||||
@pytest.mark.parametrize("module", [service_content])
|
||||
def test_search_content_url_guard_still_allows_public_ip(module):
|
||||
assert module._public_http_url("https://93.184.216.34/") is True
|
||||
|
||||
@@ -33,3 +33,10 @@ def test_src_search_package_exports_still_resolve():
|
||||
assert search.searxng_search_results is service_search.searxng_search_results
|
||||
assert search.searxng_search_api is service_search.searxng_search_api
|
||||
assert search.PROVIDER_INFO is service_search.PROVIDER_INFO
|
||||
|
||||
|
||||
def test_src_search_cache_content_query_alias_services():
|
||||
for name in ("cache", "content", "query"):
|
||||
src_mod = importlib.import_module(f"src.search.{name}")
|
||||
svc_mod = importlib.import_module(f"services.search.{name}")
|
||||
assert src_mod is svc_mod, f"src.search.{name} should alias services.search.{name}"
|
||||
|
||||
@@ -860,19 +860,14 @@ def test_web_fetch_guard_blocks_redirect_into_private(monkeypatch):
|
||||
|
||||
class _Resp:
|
||||
status_code = 302
|
||||
url = "http://public.example/start"
|
||||
headers = {"location": "http://169.254.169.254/latest/meta-data/"}
|
||||
|
||||
class _FakeClient:
|
||||
def __init__(self, *a, **k): pass
|
||||
def __enter__(self): return self
|
||||
def __exit__(self, *a): return False
|
||||
def get(self, url): return _Resp()
|
||||
|
||||
monkeypatch.setattr(httpx, "Client", _FakeClient)
|
||||
monkeypatch.setattr(httpx, "get", lambda url, **kwargs: _Resp())
|
||||
|
||||
with _pytest.raises(httpx.RequestError) as exc:
|
||||
content._get_public_url("http://public.example/start", headers={}, timeout=5)
|
||||
assert "non-public" in str(exc.value)
|
||||
assert "Blocked" in str(exc.value)
|
||||
|
||||
|
||||
# ── audit fixes (2026-06-01): email XSS, attachment traversal, authz ──
|
||||
|
||||
@@ -1,22 +1,12 @@
|
||||
import importlib.machinery
|
||||
import importlib.util
|
||||
from pathlib import Path
|
||||
"""Query helpers must tolerate non-string input.
|
||||
|
||||
`src.search.query` is a compatibility shim that aliases the canonical
|
||||
`services.search.query`, so this exercises the live implementation.
|
||||
"""
|
||||
import services.search.query as q
|
||||
|
||||
|
||||
_PATH = Path(__file__).resolve().parents[1] / "src" / "search" / "query.py"
|
||||
|
||||
|
||||
def _load():
|
||||
loader = importlib.machinery.SourceFileLoader("odysseus_src_search_query", str(_PATH))
|
||||
spec = importlib.util.spec_from_loader(loader.name, loader)
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
loader.exec_module(module)
|
||||
return module
|
||||
|
||||
|
||||
def test_src_search_helpers_handle_non_string_queries():
|
||||
q = _load()
|
||||
|
||||
def test_query_helpers_handle_non_string_queries():
|
||||
assert q._detect_question_type(None) is None
|
||||
assert q._split_multi_part(None) == []
|
||||
assert q._extract_site_filter(None) == ("", None)
|
||||
@@ -25,9 +15,7 @@ def test_src_search_helpers_handle_non_string_queries():
|
||||
assert isinstance(q.build_enhanced_query(123), str)
|
||||
|
||||
|
||||
def test_src_search_valid_query_still_works():
|
||||
q = _load()
|
||||
|
||||
def test_query_valid_query_still_works():
|
||||
assert q._detect_question_type("who is bob") == "who"
|
||||
assert q._is_news_query("latest news today") is True
|
||||
assert q._extract_site_filter("cats site:x.com")[1] == "x.com"
|
||||
|
||||
Reference in New Issue
Block a user