Support extra CA bundle for private-CA LLM providers (#769)
Adding GigaChat (Sber) or an on-premise enterprise LLM gateway as a
model endpoint fails on first probe with
CERTIFICATE_VERIFY_FAILED: self-signed certificate in certificate
chain (_ssl.c:1000)
because their TLS chain is signed by a private root CA (Russian Trusted
Root CA for GigaChat; corporate CA for on-prem) that isn't part of the
default system / certifi trust store. The endpoint shows offline in
the picker even though the URL and API key are correct (issue #722).
The right fix is to extend the trust store, not to weaken verification.
This change:
- src/tls_overrides.py: new module that resolves an opt-in env var
LLM_CA_BUNDLE at import time, builds a shared SSLContext via
ssl.create_default_context() (so the system / certifi bundle is
loaded first) and layers the operator's PEM on top with
load_verify_locations(). Exposes llm_verify() returning a value
suitable for httpx `verify=`. Defaults to True (httpx built-in
trust) when the env var is unset, when the file is missing, or
when the PEM fails to load — verification is never silently
disabled, the warning is logged and we fall back to the safe path.
- src/llm_core.py: thread llm_verify() into the shared AsyncClient
used by stream_llm / streaming completions.
- routes/model_routes.py: thread llm_verify() into the five httpx.get
call sites in _probe_endpoint / _ping_endpoint so adding a
private-CA endpoint goes green on the very first probe and the
picker stops showing it offline.
- .env.example: document LLM_CA_BUNDLE with the GigaChat case as the
concrete example.
Deliberately NOT included: a verify=False knob (global or per-host).
Disabling verification exposes the affected endpoint to MITM, and the
operator-supplied bundle is the correct fix for legitimate private-CA
providers — so the only switch in this PR is the safe one.
Closes #722.
This commit is contained in:
10
.env.example
10
.env.example
@@ -27,6 +27,16 @@ LLM_HOST=localhost
|
||||
# Research service LLM endpoint
|
||||
# RESEARCH_LLM_ENDPOINT=http://localhost:8000/v1/chat/completions
|
||||
|
||||
# Extra CA bundle for LLM providers whose TLS chain isn't in the default
|
||||
# trust store. Layered ON TOP of the system / certifi bundle — verification
|
||||
# stays on for every host, the trust set just gets larger. Useful for:
|
||||
# - GigaChat / Sber (Russian Trusted Root CA): without this the endpoint
|
||||
# shows offline with CERTIFICATE_VERIFY_FAILED — self-signed certificate
|
||||
# in certificate chain.
|
||||
# - On-premise / corporate LLM gateways with an internal CA.
|
||||
# Point at a PEM file containing the missing root(s).
|
||||
# LLM_CA_BUNDLE=/etc/odysseus/ca/extra-roots.pem
|
||||
|
||||
# ============================================================
|
||||
# Search & Web
|
||||
# ============================================================
|
||||
|
||||
@@ -17,6 +17,7 @@ from fastapi.responses import StreamingResponse
|
||||
from core.database import SessionLocal, ModelEndpoint, Session as DbSession
|
||||
from core.middleware import require_admin
|
||||
from src.llm_core import _detect_provider, _host_match, ANTHROPIC_MODELS
|
||||
from src.tls_overrides import llm_verify
|
||||
from src.settings import load_settings as _load_settings, save_settings as _save_settings
|
||||
from src.endpoint_resolver import (
|
||||
normalize_base as _normalize_base,
|
||||
@@ -624,7 +625,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
|
||||
if api_key:
|
||||
headers["x-api-key"] = api_key
|
||||
try:
|
||||
r = httpx.get(url, headers=headers, timeout=timeout)
|
||||
r = httpx.get(url, headers=headers, timeout=timeout, verify=llm_verify())
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
models = [m.get("id") for m in (data.get("data") or []) if m.get("id")]
|
||||
@@ -645,7 +646,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
|
||||
url = build_models_url(base)
|
||||
headers = build_headers(api_key, base)
|
||||
try:
|
||||
r = httpx.get(url, headers=headers, timeout=timeout)
|
||||
r = httpx.get(url, headers=headers, timeout=timeout, verify=llm_verify())
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
# OpenAI format: {"data": [{"id": "model-name"}]}
|
||||
@@ -680,7 +681,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
|
||||
parsed = urlparse(base)
|
||||
if parsed.port == 11434 or "ollama" in (parsed.hostname or "").lower():
|
||||
root = base[:-3].rstrip("/") if base.endswith("/v1") else base
|
||||
r = httpx.get(root + "/api/tags", timeout=timeout)
|
||||
r = httpx.get(root + "/api/tags", timeout=timeout, verify=llm_verify())
|
||||
r.raise_for_status()
|
||||
data = r.json()
|
||||
models = [m.get("name") or m.get("model") for m in (data.get("models") or []) if m.get("name") or m.get("model")]
|
||||
@@ -741,7 +742,7 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->
|
||||
break
|
||||
for path in ("/api/version", "/api/tags"):
|
||||
try:
|
||||
r = httpx.get(root + path, timeout=timeout)
|
||||
r = httpx.get(root + path, timeout=timeout, verify=llm_verify())
|
||||
result = _result_from_response(r)
|
||||
if result["reachable"]:
|
||||
return result
|
||||
@@ -752,7 +753,7 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->
|
||||
pass
|
||||
|
||||
try:
|
||||
r = httpx.get(base, headers=headers, timeout=timeout)
|
||||
r = httpx.get(base, headers=headers, timeout=timeout, verify=llm_verify())
|
||||
return _result_from_response(r)
|
||||
except Exception as e:
|
||||
last_error = str(e)[:120]
|
||||
|
||||
@@ -129,7 +129,10 @@ def _get_http_client() -> httpx.AsyncClient:
|
||||
"""Return process-wide AsyncClient. Per-request timeout is passed at call time."""
|
||||
global _http_client
|
||||
if _http_client is None or _http_client.is_closed:
|
||||
_http_client = httpx.AsyncClient(limits=_http_limits, http2=False)
|
||||
from src.tls_overrides import llm_verify
|
||||
_http_client = httpx.AsyncClient(
|
||||
limits=_http_limits, http2=False, verify=llm_verify(),
|
||||
)
|
||||
return _http_client
|
||||
|
||||
def _get_cached_response(cache_key: str) -> Optional[str]:
|
||||
|
||||
91
src/tls_overrides.py
Normal file
91
src/tls_overrides.py
Normal file
@@ -0,0 +1,91 @@
|
||||
"""Extended TLS trust store for private-CA LLM providers.
|
||||
|
||||
Some upstream LLM providers serve their API over TLS certificates that are
|
||||
signed by a private root CA which is not part of the standard system bundle:
|
||||
|
||||
- GigaChat (Sber) uses the Russian Trusted Root CA, not bundled with
|
||||
OpenSSL / certifi / system trust on most non-Russian installs. The
|
||||
chain looks self-signed to Python and the endpoint is marked offline
|
||||
with `CERTIFICATE_VERIFY_FAILED: self-signed certificate in
|
||||
certificate chain` (see issue #722).
|
||||
- On-premise enterprise LLM gateways often present a corporate CA that
|
||||
has not been imported into the runtime's trust store.
|
||||
|
||||
Operators point `LLM_CA_BUNDLE` at a PEM file containing the extra CA
|
||||
cert(s). The default system / certifi trust store is loaded first, then
|
||||
the operator's PEM is layered on top, so verification still happens —
|
||||
the trust set just gets larger. We deliberately do not provide a
|
||||
"verify=off" knob: weakening verification globally (or per-host) would
|
||||
expose those endpoints to MITM, and the operator-supplied bundle is the
|
||||
correct fix for legitimate private-CA providers.
|
||||
|
||||
Example (GigaChat):
|
||||
# Sber publishes the chain at
|
||||
# https://www.gosuslugi.ru/crt/rootca_ssl_rsa2022.cer
|
||||
# Convert to PEM and point the env var at it.
|
||||
LLM_CA_BUNDLE=/etc/odysseus/ca/russian-trusted-root.pem
|
||||
|
||||
Scope:
|
||||
`llm_verify()` is intentionally consumed by only two call sites — the
|
||||
shared async client in `src/llm_core.py` and the endpoint probes in
|
||||
`routes/model_routes.py`. Both reach LLM provider URLs. The override
|
||||
is NOT threaded into web_fetch, search providers, gallery downloads,
|
||||
embeddings, webhook delivery, or anything else that hits arbitrary
|
||||
URLs, and it does NOT affect the app's own browser-facing TLS. That
|
||||
boundary is pinned by `tests/test_tls_overrides_scope.py` — extending
|
||||
it requires updating the allowlist there with a written justification.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import ssl
|
||||
from typing import Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
_extra_bundle_path: Optional[str] = (os.environ.get("LLM_CA_BUNDLE") or "").strip() or None
|
||||
|
||||
|
||||
def _build_ssl_context() -> Optional[ssl.SSLContext]:
|
||||
"""Build an SSLContext that uses the default trust store and ALSO trusts
|
||||
the operator-supplied PEM bundle. Returns None when no extra bundle is
|
||||
configured, so callers fall through to httpx's default verify=True."""
|
||||
if not _extra_bundle_path:
|
||||
return None
|
||||
if not os.path.isfile(_extra_bundle_path):
|
||||
logger.warning(
|
||||
"LLM_CA_BUNDLE points at %r but the file does not exist; "
|
||||
"falling back to the default trust store.",
|
||||
_extra_bundle_path,
|
||||
)
|
||||
return None
|
||||
ctx = ssl.create_default_context()
|
||||
try:
|
||||
ctx.load_verify_locations(cafile=_extra_bundle_path)
|
||||
except (ssl.SSLError, OSError) as e:
|
||||
logger.warning(
|
||||
"LLM_CA_BUNDLE=%r failed to load (%s); falling back to the "
|
||||
"default trust store.",
|
||||
_extra_bundle_path, e,
|
||||
)
|
||||
return None
|
||||
logger.info(
|
||||
"Loaded extra CA bundle %r on top of the default trust store.",
|
||||
_extra_bundle_path,
|
||||
)
|
||||
return ctx
|
||||
|
||||
|
||||
# Resolved once at import time. The httpx clients in src/llm_core.py are
|
||||
# long-lived (process-wide), so editing LLM_CA_BUNDLE requires a restart —
|
||||
# matching the existing semantics of LLM_HOST, SEARXNG_INSTANCE, etc.
|
||||
_SHARED_SSL_CONTEXT: Optional[ssl.SSLContext] = _build_ssl_context()
|
||||
|
||||
|
||||
def llm_verify():
|
||||
"""Return the value to pass as `verify=` on httpx.get / httpx.Client /
|
||||
httpx.AsyncClient. Returns the extended-trust SSLContext when
|
||||
LLM_CA_BUNDLE is set and loaded; otherwise True (httpx default — system
|
||||
/ certifi bundle, verification fully on)."""
|
||||
return _SHARED_SSL_CONTEXT if _SHARED_SSL_CONTEXT is not None else True
|
||||
149
tests/test_tls_overrides_scope.py
Normal file
149
tests/test_tls_overrides_scope.py
Normal file
@@ -0,0 +1,149 @@
|
||||
"""Scope tests for src/tls_overrides.
|
||||
|
||||
#722 / PR #769 added an opt-in extra CA bundle (LLM_CA_BUNDLE) for
|
||||
private-CA LLM providers. The whole point is that the override stays
|
||||
SCOPED — it must extend trust for the intended outbound LLM provider
|
||||
requests only, and never:
|
||||
|
||||
- touch arbitrary URL fetching (web_fetch, document downloads, generic
|
||||
httpx.get from any other module),
|
||||
- touch browser-facing TLS (anything our app serves over HTTPS),
|
||||
- weaken httpx's process-wide defaults,
|
||||
- silently disable certificate verification.
|
||||
|
||||
These tests prove that. They enumerate the call sites of `llm_verify()`
|
||||
in the source tree and assert they match an allowlist; they verify the
|
||||
override module itself never reaches for the well-known "skip TLS
|
||||
verification" knobs; and they pin the safe default (verify=True) when
|
||||
LLM_CA_BUNDLE is unset.
|
||||
|
||||
If a future change threads `llm_verify()` into a non-LLM HTTP path, the
|
||||
first test fails and the contributor either has to justify the new
|
||||
caller (and add it to ALLOWED_CALLERS with a comment) or revert. That
|
||||
keeps the security-sensitive helper hard to misuse.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
REPO = Path(__file__).resolve().parents[1]
|
||||
|
||||
|
||||
# Files that legitimately need llm_verify() applied to their outbound
|
||||
# httpx calls because the URL is an LLM provider's API. Every caller here
|
||||
# is a discrete LLM HTTP entry point and intentional. Any addition must
|
||||
# come with its own justification in code review.
|
||||
ALLOWED_CALLERS = frozenset({
|
||||
"src/llm_core.py", # shared AsyncClient used by stream_llm
|
||||
"routes/model_routes.py", # _probe_endpoint + _ping_endpoint
|
||||
})
|
||||
|
||||
|
||||
def _grep_files(pattern: str) -> set[str]:
|
||||
"""Return the set of repo-relative .py file paths whose body matches
|
||||
`pattern`. Skips tests, the override module itself, and worktree
|
||||
scratch dirs."""
|
||||
rx = re.compile(pattern)
|
||||
hits: set[str] = set()
|
||||
for path in REPO.rglob("*.py"):
|
||||
rel = path.relative_to(REPO).as_posix()
|
||||
if rel.startswith("tests/"):
|
||||
continue
|
||||
if rel == "src/tls_overrides.py": # definition site, not a caller
|
||||
continue
|
||||
if rel.startswith(".claude/") or "/.claude/" in rel:
|
||||
continue
|
||||
try:
|
||||
body = path.read_text(encoding="utf-8", errors="ignore")
|
||||
except OSError:
|
||||
continue
|
||||
if rx.search(body):
|
||||
hits.add(rel)
|
||||
return hits
|
||||
|
||||
|
||||
def test_llm_verify_only_used_in_allowlisted_files():
|
||||
"""llm_verify() must only be consumed by the LLM provider HTTP path.
|
||||
|
||||
The extra CA bundle is scoped to the two known LLM HTTP entry points.
|
||||
If a future PR threads llm_verify() into web_fetch, search providers,
|
||||
embeddings, gallery downloads, webhook delivery, or any other
|
||||
arbitrary-URL caller, that's a scope expansion and a security review.
|
||||
Adding a file to ALLOWED_CALLERS requires a written justification.
|
||||
"""
|
||||
callers = _grep_files(r"\bllm_verify\s*\(")
|
||||
unexpected = callers - ALLOWED_CALLERS
|
||||
missing = ALLOWED_CALLERS - callers
|
||||
assert not unexpected, (
|
||||
f"llm_verify() called from unexpected file(s): {sorted(unexpected)}. "
|
||||
f"Expected scope: {sorted(ALLOWED_CALLERS)}. If the new caller is an "
|
||||
"LLM provider HTTP entry point, add it to ALLOWED_CALLERS with a "
|
||||
"comment; if it's not, do not thread the extra CA bundle into it."
|
||||
)
|
||||
assert not missing, (
|
||||
f"llm_verify() no longer called from {sorted(missing)} — the "
|
||||
"extra CA bundle integration regressed or the allowlist is stale."
|
||||
)
|
||||
|
||||
|
||||
def test_tls_overrides_does_not_weaken_global_tls():
|
||||
"""src/tls_overrides must never reach for a TLS-weakening knob.
|
||||
|
||||
Several common ways to silently weaken TLS in Python:
|
||||
- ssl._create_default_https_context = ssl._create_unverified_context
|
||||
- ssl._create_unverified_context (used as a default)
|
||||
- urllib3.disable_warnings(...)
|
||||
- httpx.AsyncClient(verify=False) (anywhere — must stay verify=True
|
||||
or an SSLContext)
|
||||
- requests.packages.urllib3.disable_warnings(...)
|
||||
|
||||
The override module must only EXTEND trust by loading an additional
|
||||
bundle into an ssl.SSLContext built on top of the system default. It
|
||||
must never silently disable verification.
|
||||
"""
|
||||
body = (REPO / "src" / "tls_overrides.py").read_text(encoding="utf-8")
|
||||
forbidden = [
|
||||
r"_create_default_https_context\s*=",
|
||||
r"_create_unverified_context",
|
||||
r"disable_warnings",
|
||||
r"verify\s*=\s*False",
|
||||
]
|
||||
for pat in forbidden:
|
||||
assert not re.search(pat, body), (
|
||||
f"src/tls_overrides.py contains forbidden pattern {pat!r}. "
|
||||
"The extra CA bundle must only ADD trust, never weaken it."
|
||||
)
|
||||
|
||||
|
||||
def test_llm_verify_default_is_true_when_env_unset():
|
||||
"""When LLM_CA_BUNDLE is unset, llm_verify() must return True so httpx
|
||||
falls through to its built-in trust store. This is the safe default —
|
||||
operators have to opt in to get any change at all."""
|
||||
os.environ.pop("LLM_CA_BUNDLE", None)
|
||||
import importlib
|
||||
|
||||
import src.tls_overrides as mod
|
||||
importlib.reload(mod)
|
||||
assert mod.llm_verify() is True, (
|
||||
f"Default llm_verify() must be True (httpx built-in trust store); "
|
||||
f"got {mod.llm_verify()!r}. An accidental non-True default would "
|
||||
"turn an opt-in extension into a process-wide change."
|
||||
)
|
||||
|
||||
|
||||
def test_llm_verify_falls_back_to_true_for_missing_bundle_file():
|
||||
"""Pointing LLM_CA_BUNDLE at a non-existent path must NOT raise and
|
||||
must fall back to verify=True (system trust). A misconfigured env var
|
||||
on a deploy box should never produce a silently TLS-disabled process."""
|
||||
os.environ["LLM_CA_BUNDLE"] = "/nonexistent/path/extra-roots.pem"
|
||||
try:
|
||||
import importlib
|
||||
|
||||
import src.tls_overrides as mod
|
||||
importlib.reload(mod)
|
||||
assert mod.llm_verify() is True
|
||||
finally:
|
||||
os.environ.pop("LLM_CA_BUNDLE", None)
|
||||
Reference in New Issue
Block a user