POST /api/embeddings/endpoint takes a user-supplied URL and immediately makes an outbound httpx request to it with no validation. The admin gate added earlier (PR #80) closed the unauthenticated-access part of #132; this addresses the remaining request: validate the URL before fetching it. Odysseus is local-first, so pointing the embedding endpoint at a loopback or LAN server (local vLLM / llama.cpp / Ollama) is a normal setup — a blanket private-IP block would break the primary use case. So the guard: - always rejects non-HTTP(S) schemes (file://, gopher://, ftp:// …), - always rejects the link-local range (169.254.0.0/16, incl. the cloud instance-metadata 169.254.169.254 exfil vector) plus multicast / reserved / unspecified, and IPv4-mapped-IPv6 forms of the above, - keeps loopback/LAN allowed by default, and - adds EMBEDDING_BLOCK_PRIVATE_IPS=true for full SSRF lockdown on exposed multi-tenant deployments. Logic lives in src/url_safety.py (stdlib only, resolver injectable) so it is unit-testable without real DNS; the route calls it before the health-check request. Covered by tests/test_url_safety.py (8 cases). Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
71 lines
2.5 KiB
Python
71 lines
2.5 KiB
Python
"""Tests for outbound URL safety / SSRF hardening (src/url_safety.py).
|
|
|
|
A stub resolver is injected so the tests never touch real DNS.
|
|
"""
|
|
|
|
from src.url_safety import check_outbound_url
|
|
|
|
|
|
def _resolver(mapping):
|
|
def resolve(host):
|
|
if host in mapping:
|
|
return mapping[host]
|
|
raise OSError(f"unresolvable: {host}")
|
|
return resolve
|
|
|
|
|
|
PUBLIC = _resolver({"example.com": ["93.184.216.34"]})
|
|
LOOPBACK = _resolver({"localhost": ["127.0.0.1"]})
|
|
LAN = _resolver({"nas.local": ["192.168.1.50"]})
|
|
METADATA = _resolver({"evil.example": ["169.254.169.254"]})
|
|
MAPPED_METADATA = _resolver({"evil6.example": ["::ffff:169.254.169.254"]})
|
|
|
|
|
|
def test_non_http_scheme_blocked():
|
|
for url in ("file:///etc/passwd", "ftp://x/y", "gopher://h", "redis://h:6379"):
|
|
ok, reason = check_outbound_url(url, resolver=PUBLIC)
|
|
assert ok is False, url
|
|
assert "scheme" in reason
|
|
|
|
|
|
def test_missing_host_or_empty_blocked():
|
|
assert check_outbound_url("", resolver=PUBLIC)[0] is False
|
|
assert check_outbound_url("http://", resolver=PUBLIC)[0] is False
|
|
|
|
|
|
def test_public_url_allowed():
|
|
ok, reason = check_outbound_url("https://example.com/v1/embeddings", resolver=PUBLIC)
|
|
assert ok is True, reason
|
|
|
|
|
|
def test_cloud_metadata_blocked_even_when_private_allowed():
|
|
# The headline SSRF vector must be blocked regardless of block_private.
|
|
ok, reason = check_outbound_url("http://evil.example/latest/meta-data/", resolver=METADATA)
|
|
assert ok is False
|
|
assert "link-local" in reason
|
|
|
|
|
|
def test_ipv4_mapped_metadata_blocked():
|
|
ok, reason = check_outbound_url("http://evil6.example/", resolver=MAPPED_METADATA)
|
|
assert ok is False
|
|
assert "link-local" in reason
|
|
|
|
|
|
def test_loopback_and_lan_allowed_by_default_local_first():
|
|
# Local-first: a localhost / LAN embedding server is a legitimate target.
|
|
assert check_outbound_url("http://localhost:8080/v1", resolver=LOOPBACK)[0] is True
|
|
assert check_outbound_url("http://nas.local:1234/v1", resolver=LAN)[0] is True
|
|
|
|
|
|
def test_strict_mode_blocks_private_and_loopback():
|
|
ok, reason = check_outbound_url("http://localhost:8080", block_private=True, resolver=LOOPBACK)
|
|
assert ok is False and "private" in reason
|
|
ok, reason = check_outbound_url("http://nas.local", block_private=True, resolver=LAN)
|
|
assert ok is False and "private" in reason
|
|
|
|
|
|
def test_unresolvable_host_blocked():
|
|
ok, reason = check_outbound_url("http://does-not-resolve.invalid", resolver=PUBLIC)
|
|
assert ok is False
|
|
assert "resolve" in reason
|