diff --git a/services/search/content.py b/services/search/content.py index 290dc35..ff82a7f 100644 --- a/services/search/content.py +++ b/services/search/content.py @@ -39,7 +39,17 @@ _PRIVATE_NETWORKS = ( def _is_private_address(addr: ipaddress._BaseAddress) -> bool: - return addr.is_private or addr.is_loopback or addr.is_link_local or any(addr in net for net in _PRIVATE_NETWORKS) + if isinstance(addr, ipaddress.IPv6Address) and addr.ipv4_mapped is not None: + addr = addr.ipv4_mapped + return ( + addr.is_private + or addr.is_loopback + or addr.is_link_local + or addr.is_reserved + or addr.is_multicast + or addr.is_unspecified + or any(addr in net for net in _PRIVATE_NETWORKS) + ) def _resolve_hostname_ips(hostname: str) -> list[ipaddress._BaseAddress]: diff --git a/src/search/content.py b/src/search/content.py index a7eddb4..42f8e34 100644 --- a/src/search/content.py +++ b/src/search/content.py @@ -39,7 +39,17 @@ _PRIVATE_NETWORKS = ( def _is_private_address(addr: ipaddress._BaseAddress) -> bool: - return any(addr in net for net in _PRIVATE_NETWORKS) or addr.is_private or addr.is_loopback + if isinstance(addr, ipaddress.IPv6Address) and addr.ipv4_mapped is not None: + addr = addr.ipv4_mapped + return ( + addr.is_private + or addr.is_loopback + or addr.is_link_local + or addr.is_reserved + or addr.is_multicast + or addr.is_unspecified + or any(addr in net for net in _PRIVATE_NETWORKS) + ) def _resolve_hostname_ips(hostname: str) -> List[ipaddress._BaseAddress]: @@ -57,6 +67,8 @@ def _public_http_url(url: str) -> bool: host = parsed.hostname.strip().lower() if host in ("localhost", "metadata.google.internal", "metadata"): return False + if host.endswith((".local", ".localhost", ".internal", ".lan", ".intranet")): + return False try: return not _is_private_address(ipaddress.ip_address(host)) except ValueError: diff --git a/tests/test_search_content_url_guards.py b/tests/test_search_content_url_guards.py new file mode 100644 index 0000000..4c8a176 --- /dev/null +++ b/tests/test_search_content_url_guards.py @@ -0,0 +1,37 @@ +import ipaddress + +import pytest + +from services.search import content as service_content +from src.search import content as src_content + + +@pytest.mark.parametrize("module", [src_content, service_content]) +@pytest.mark.parametrize("url", [ + "http://printer.local/", + "http://nas.lan/", + "http://admin.internal/", + "http://service.intranet/", + "http://[::ffff:169.254.169.254]/latest/meta-data/", + "http://224.0.0.1/", + "http://[ff02::1]/", + "http://[::]/", +]) +def test_search_content_url_guard_blocks_internal_names_and_address_classes(module, url): + assert module._public_http_url(url) is False + + +@pytest.mark.parametrize("module", [src_content, service_content]) +def test_search_content_url_guard_blocks_dns_to_multicast(monkeypatch, module): + monkeypatch.setattr( + module, + "_resolve_hostname_ips", + lambda host: [ipaddress.ip_address("224.0.0.1")], + ) + + assert module._public_http_url("https://example.test/page") is False + + +@pytest.mark.parametrize("module", [src_content, service_content]) +def test_search_content_url_guard_still_allows_public_ip(module): + assert module._public_http_url("https://93.184.216.34/") is True