diff --git a/.env.example b/.env.example index e53d2f8..f282880 100644 --- a/.env.example +++ b/.env.example @@ -27,6 +27,16 @@ LLM_HOST=localhost # Research service LLM endpoint # RESEARCH_LLM_ENDPOINT=http://localhost:8000/v1/chat/completions +# Extra CA bundle for LLM providers whose TLS chain isn't in the default +# trust store. Layered ON TOP of the system / certifi bundle — verification +# stays on for every host, the trust set just gets larger. Useful for: +# - GigaChat / Sber (Russian Trusted Root CA): without this the endpoint +# shows offline with CERTIFICATE_VERIFY_FAILED — self-signed certificate +# in certificate chain. +# - On-premise / corporate LLM gateways with an internal CA. +# Point at a PEM file containing the missing root(s). +# LLM_CA_BUNDLE=/etc/odysseus/ca/extra-roots.pem + # ============================================================ # Search & Web # ============================================================ diff --git a/routes/model_routes.py b/routes/model_routes.py index 0cf98d5..ac025ad 100644 --- a/routes/model_routes.py +++ b/routes/model_routes.py @@ -17,6 +17,7 @@ from fastapi.responses import StreamingResponse from core.database import SessionLocal, ModelEndpoint, Session as DbSession from core.middleware import require_admin from src.llm_core import _detect_provider, _host_match, ANTHROPIC_MODELS +from src.tls_overrides import llm_verify from src.settings import load_settings as _load_settings, save_settings as _save_settings from src.endpoint_resolver import ( normalize_base as _normalize_base, @@ -624,7 +625,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis if api_key: headers["x-api-key"] = api_key try: - r = httpx.get(url, headers=headers, timeout=timeout) + r = httpx.get(url, headers=headers, timeout=timeout, verify=llm_verify()) r.raise_for_status() data = r.json() models = [m.get("id") for m in (data.get("data") or []) if m.get("id")] @@ -645,7 +646,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis url = build_models_url(base) headers = build_headers(api_key, base) try: - r = httpx.get(url, headers=headers, timeout=timeout) + r = httpx.get(url, headers=headers, timeout=timeout, verify=llm_verify()) r.raise_for_status() data = r.json() # OpenAI format: {"data": [{"id": "model-name"}]} @@ -680,7 +681,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis parsed = urlparse(base) if parsed.port == 11434 or "ollama" in (parsed.hostname or "").lower(): root = base[:-3].rstrip("/") if base.endswith("/v1") else base - r = httpx.get(root + "/api/tags", timeout=timeout) + r = httpx.get(root + "/api/tags", timeout=timeout, verify=llm_verify()) r.raise_for_status() data = r.json() models = [m.get("name") or m.get("model") for m in (data.get("models") or []) if m.get("name") or m.get("model")] @@ -741,7 +742,7 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) -> break for path in ("/api/version", "/api/tags"): try: - r = httpx.get(root + path, timeout=timeout) + r = httpx.get(root + path, timeout=timeout, verify=llm_verify()) result = _result_from_response(r) if result["reachable"]: return result @@ -752,7 +753,7 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) -> pass try: - r = httpx.get(base, headers=headers, timeout=timeout) + r = httpx.get(base, headers=headers, timeout=timeout, verify=llm_verify()) return _result_from_response(r) except Exception as e: last_error = str(e)[:120] diff --git a/src/llm_core.py b/src/llm_core.py index 2d66685..be31ac5 100644 --- a/src/llm_core.py +++ b/src/llm_core.py @@ -129,7 +129,10 @@ def _get_http_client() -> httpx.AsyncClient: """Return process-wide AsyncClient. Per-request timeout is passed at call time.""" global _http_client if _http_client is None or _http_client.is_closed: - _http_client = httpx.AsyncClient(limits=_http_limits, http2=False) + from src.tls_overrides import llm_verify + _http_client = httpx.AsyncClient( + limits=_http_limits, http2=False, verify=llm_verify(), + ) return _http_client def _get_cached_response(cache_key: str) -> Optional[str]: diff --git a/src/tls_overrides.py b/src/tls_overrides.py new file mode 100644 index 0000000..dc4e460 --- /dev/null +++ b/src/tls_overrides.py @@ -0,0 +1,91 @@ +"""Extended TLS trust store for private-CA LLM providers. + +Some upstream LLM providers serve their API over TLS certificates that are +signed by a private root CA which is not part of the standard system bundle: + + - GigaChat (Sber) uses the Russian Trusted Root CA, not bundled with + OpenSSL / certifi / system trust on most non-Russian installs. The + chain looks self-signed to Python and the endpoint is marked offline + with `CERTIFICATE_VERIFY_FAILED: self-signed certificate in + certificate chain` (see issue #722). + - On-premise enterprise LLM gateways often present a corporate CA that + has not been imported into the runtime's trust store. + +Operators point `LLM_CA_BUNDLE` at a PEM file containing the extra CA +cert(s). The default system / certifi trust store is loaded first, then +the operator's PEM is layered on top, so verification still happens — +the trust set just gets larger. We deliberately do not provide a +"verify=off" knob: weakening verification globally (or per-host) would +expose those endpoints to MITM, and the operator-supplied bundle is the +correct fix for legitimate private-CA providers. + +Example (GigaChat): + # Sber publishes the chain at + # https://www.gosuslugi.ru/crt/rootca_ssl_rsa2022.cer + # Convert to PEM and point the env var at it. + LLM_CA_BUNDLE=/etc/odysseus/ca/russian-trusted-root.pem + +Scope: + `llm_verify()` is intentionally consumed by only two call sites — the + shared async client in `src/llm_core.py` and the endpoint probes in + `routes/model_routes.py`. Both reach LLM provider URLs. The override + is NOT threaded into web_fetch, search providers, gallery downloads, + embeddings, webhook delivery, or anything else that hits arbitrary + URLs, and it does NOT affect the app's own browser-facing TLS. That + boundary is pinned by `tests/test_tls_overrides_scope.py` — extending + it requires updating the allowlist there with a written justification. +""" + +import logging +import os +import ssl +from typing import Optional + +logger = logging.getLogger(__name__) + + +_extra_bundle_path: Optional[str] = (os.environ.get("LLM_CA_BUNDLE") or "").strip() or None + + +def _build_ssl_context() -> Optional[ssl.SSLContext]: + """Build an SSLContext that uses the default trust store and ALSO trusts + the operator-supplied PEM bundle. Returns None when no extra bundle is + configured, so callers fall through to httpx's default verify=True.""" + if not _extra_bundle_path: + return None + if not os.path.isfile(_extra_bundle_path): + logger.warning( + "LLM_CA_BUNDLE points at %r but the file does not exist; " + "falling back to the default trust store.", + _extra_bundle_path, + ) + return None + ctx = ssl.create_default_context() + try: + ctx.load_verify_locations(cafile=_extra_bundle_path) + except (ssl.SSLError, OSError) as e: + logger.warning( + "LLM_CA_BUNDLE=%r failed to load (%s); falling back to the " + "default trust store.", + _extra_bundle_path, e, + ) + return None + logger.info( + "Loaded extra CA bundle %r on top of the default trust store.", + _extra_bundle_path, + ) + return ctx + + +# Resolved once at import time. The httpx clients in src/llm_core.py are +# long-lived (process-wide), so editing LLM_CA_BUNDLE requires a restart — +# matching the existing semantics of LLM_HOST, SEARXNG_INSTANCE, etc. +_SHARED_SSL_CONTEXT: Optional[ssl.SSLContext] = _build_ssl_context() + + +def llm_verify(): + """Return the value to pass as `verify=` on httpx.get / httpx.Client / + httpx.AsyncClient. Returns the extended-trust SSLContext when + LLM_CA_BUNDLE is set and loaded; otherwise True (httpx default — system + / certifi bundle, verification fully on).""" + return _SHARED_SSL_CONTEXT if _SHARED_SSL_CONTEXT is not None else True diff --git a/tests/test_tls_overrides_scope.py b/tests/test_tls_overrides_scope.py new file mode 100644 index 0000000..e2ff114 --- /dev/null +++ b/tests/test_tls_overrides_scope.py @@ -0,0 +1,149 @@ +"""Scope tests for src/tls_overrides. + +#722 / PR #769 added an opt-in extra CA bundle (LLM_CA_BUNDLE) for +private-CA LLM providers. The whole point is that the override stays +SCOPED — it must extend trust for the intended outbound LLM provider +requests only, and never: + + - touch arbitrary URL fetching (web_fetch, document downloads, generic + httpx.get from any other module), + - touch browser-facing TLS (anything our app serves over HTTPS), + - weaken httpx's process-wide defaults, + - silently disable certificate verification. + +These tests prove that. They enumerate the call sites of `llm_verify()` +in the source tree and assert they match an allowlist; they verify the +override module itself never reaches for the well-known "skip TLS +verification" knobs; and they pin the safe default (verify=True) when +LLM_CA_BUNDLE is unset. + +If a future change threads `llm_verify()` into a non-LLM HTTP path, the +first test fails and the contributor either has to justify the new +caller (and add it to ALLOWED_CALLERS with a comment) or revert. That +keeps the security-sensitive helper hard to misuse. +""" + +from __future__ import annotations + +import os +import re +from pathlib import Path + +REPO = Path(__file__).resolve().parents[1] + + +# Files that legitimately need llm_verify() applied to their outbound +# httpx calls because the URL is an LLM provider's API. Every caller here +# is a discrete LLM HTTP entry point and intentional. Any addition must +# come with its own justification in code review. +ALLOWED_CALLERS = frozenset({ + "src/llm_core.py", # shared AsyncClient used by stream_llm + "routes/model_routes.py", # _probe_endpoint + _ping_endpoint +}) + + +def _grep_files(pattern: str) -> set[str]: + """Return the set of repo-relative .py file paths whose body matches + `pattern`. Skips tests, the override module itself, and worktree + scratch dirs.""" + rx = re.compile(pattern) + hits: set[str] = set() + for path in REPO.rglob("*.py"): + rel = path.relative_to(REPO).as_posix() + if rel.startswith("tests/"): + continue + if rel == "src/tls_overrides.py": # definition site, not a caller + continue + if rel.startswith(".claude/") or "/.claude/" in rel: + continue + try: + body = path.read_text(encoding="utf-8", errors="ignore") + except OSError: + continue + if rx.search(body): + hits.add(rel) + return hits + + +def test_llm_verify_only_used_in_allowlisted_files(): + """llm_verify() must only be consumed by the LLM provider HTTP path. + + The extra CA bundle is scoped to the two known LLM HTTP entry points. + If a future PR threads llm_verify() into web_fetch, search providers, + embeddings, gallery downloads, webhook delivery, or any other + arbitrary-URL caller, that's a scope expansion and a security review. + Adding a file to ALLOWED_CALLERS requires a written justification. + """ + callers = _grep_files(r"\bllm_verify\s*\(") + unexpected = callers - ALLOWED_CALLERS + missing = ALLOWED_CALLERS - callers + assert not unexpected, ( + f"llm_verify() called from unexpected file(s): {sorted(unexpected)}. " + f"Expected scope: {sorted(ALLOWED_CALLERS)}. If the new caller is an " + "LLM provider HTTP entry point, add it to ALLOWED_CALLERS with a " + "comment; if it's not, do not thread the extra CA bundle into it." + ) + assert not missing, ( + f"llm_verify() no longer called from {sorted(missing)} — the " + "extra CA bundle integration regressed or the allowlist is stale." + ) + + +def test_tls_overrides_does_not_weaken_global_tls(): + """src/tls_overrides must never reach for a TLS-weakening knob. + + Several common ways to silently weaken TLS in Python: + - ssl._create_default_https_context = ssl._create_unverified_context + - ssl._create_unverified_context (used as a default) + - urllib3.disable_warnings(...) + - httpx.AsyncClient(verify=False) (anywhere — must stay verify=True + or an SSLContext) + - requests.packages.urllib3.disable_warnings(...) + + The override module must only EXTEND trust by loading an additional + bundle into an ssl.SSLContext built on top of the system default. It + must never silently disable verification. + """ + body = (REPO / "src" / "tls_overrides.py").read_text(encoding="utf-8") + forbidden = [ + r"_create_default_https_context\s*=", + r"_create_unverified_context", + r"disable_warnings", + r"verify\s*=\s*False", + ] + for pat in forbidden: + assert not re.search(pat, body), ( + f"src/tls_overrides.py contains forbidden pattern {pat!r}. " + "The extra CA bundle must only ADD trust, never weaken it." + ) + + +def test_llm_verify_default_is_true_when_env_unset(): + """When LLM_CA_BUNDLE is unset, llm_verify() must return True so httpx + falls through to its built-in trust store. This is the safe default — + operators have to opt in to get any change at all.""" + os.environ.pop("LLM_CA_BUNDLE", None) + import importlib + + import src.tls_overrides as mod + importlib.reload(mod) + assert mod.llm_verify() is True, ( + f"Default llm_verify() must be True (httpx built-in trust store); " + f"got {mod.llm_verify()!r}. An accidental non-True default would " + "turn an opt-in extension into a process-wide change." + ) + + +def test_llm_verify_falls_back_to_true_for_missing_bundle_file(): + """Pointing LLM_CA_BUNDLE at a non-existent path must NOT raise and + must fall back to verify=True (system trust). A misconfigured env var + on a deploy box should never produce a silently TLS-disabled process.""" + os.environ["LLM_CA_BUNDLE"] = "/nonexistent/path/extra-roots.pem" + try: + import importlib + + import src.tls_overrides as mod + importlib.reload(mod) + assert mod.llm_verify() is True + finally: + os.environ.pop("LLM_CA_BUNDLE", None)