Files
odysseus/tests/test_url_safety.py
lekt8 87babb58d5 fix: SSRF hardening for the custom embedding endpoint URL (#132) (#1206)
POST /api/embeddings/endpoint takes a user-supplied URL and immediately
makes an outbound httpx request to it with no validation. The admin gate
added earlier (PR #80) closed the unauthenticated-access part of #132; this
addresses the remaining request: validate the URL before fetching it.

Odysseus is local-first, so pointing the embedding endpoint at a loopback or
LAN server (local vLLM / llama.cpp / Ollama) is a normal setup — a blanket
private-IP block would break the primary use case. So the guard:

  - always rejects non-HTTP(S) schemes (file://, gopher://, ftp:// …),
  - always rejects the link-local range (169.254.0.0/16, incl. the cloud
    instance-metadata 169.254.169.254 exfil vector) plus multicast /
    reserved / unspecified, and IPv4-mapped-IPv6 forms of the above,
  - keeps loopback/LAN allowed by default, and
  - adds EMBEDDING_BLOCK_PRIVATE_IPS=true for full SSRF lockdown on exposed
    multi-tenant deployments.

Logic lives in src/url_safety.py (stdlib only, resolver injectable) so it is
unit-testable without real DNS; the route calls it before the health-check
request. Covered by tests/test_url_safety.py (8 cases).

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-02 23:46:33 +09:00

71 lines
2.5 KiB
Python

"""Tests for outbound URL safety / SSRF hardening (src/url_safety.py).
A stub resolver is injected so the tests never touch real DNS.
"""
from src.url_safety import check_outbound_url
def _resolver(mapping):
def resolve(host):
if host in mapping:
return mapping[host]
raise OSError(f"unresolvable: {host}")
return resolve
PUBLIC = _resolver({"example.com": ["93.184.216.34"]})
LOOPBACK = _resolver({"localhost": ["127.0.0.1"]})
LAN = _resolver({"nas.local": ["192.168.1.50"]})
METADATA = _resolver({"evil.example": ["169.254.169.254"]})
MAPPED_METADATA = _resolver({"evil6.example": ["::ffff:169.254.169.254"]})
def test_non_http_scheme_blocked():
for url in ("file:///etc/passwd", "ftp://x/y", "gopher://h", "redis://h:6379"):
ok, reason = check_outbound_url(url, resolver=PUBLIC)
assert ok is False, url
assert "scheme" in reason
def test_missing_host_or_empty_blocked():
assert check_outbound_url("", resolver=PUBLIC)[0] is False
assert check_outbound_url("http://", resolver=PUBLIC)[0] is False
def test_public_url_allowed():
ok, reason = check_outbound_url("https://example.com/v1/embeddings", resolver=PUBLIC)
assert ok is True, reason
def test_cloud_metadata_blocked_even_when_private_allowed():
# The headline SSRF vector must be blocked regardless of block_private.
ok, reason = check_outbound_url("http://evil.example/latest/meta-data/", resolver=METADATA)
assert ok is False
assert "link-local" in reason
def test_ipv4_mapped_metadata_blocked():
ok, reason = check_outbound_url("http://evil6.example/", resolver=MAPPED_METADATA)
assert ok is False
assert "link-local" in reason
def test_loopback_and_lan_allowed_by_default_local_first():
# Local-first: a localhost / LAN embedding server is a legitimate target.
assert check_outbound_url("http://localhost:8080/v1", resolver=LOOPBACK)[0] is True
assert check_outbound_url("http://nas.local:1234/v1", resolver=LAN)[0] is True
def test_strict_mode_blocks_private_and_loopback():
ok, reason = check_outbound_url("http://localhost:8080", block_private=True, resolver=LOOPBACK)
assert ok is False and "private" in reason
ok, reason = check_outbound_url("http://nas.local", block_private=True, resolver=LAN)
assert ok is False and "private" in reason
def test_unresolvable_host_blocked():
ok, reason = check_outbound_url("http://does-not-resolve.invalid", resolver=PUBLIC)
assert ok is False
assert "resolve" in reason