Support extra CA bundle for private-CA LLM providers (#769)

Adding GigaChat (Sber) or an on-premise enterprise LLM gateway as a model endpoint fails on first probe with CERTIFICATE_VERIFY_FAILED: self-signed certificate in certificate chain (_ssl.c:1000) because their TLS chain is signed by a private root CA (Russian Trusted Root CA for GigaChat; corporate CA for on-prem) that isn't part of the default system / certifi trust store. The endpoint shows offline in the picker even though the URL and API key are correct (issue #722). The right fix is to extend the trust store, not to weaken verification. This change: - src/tls_overrides.py: new module that resolves an opt-in env var LLM_CA_BUNDLE at import time, builds a shared SSLContext via ssl.create_default_context() (so the system / certifi bundle is loaded first) and layers the operator's PEM on top with load_verify_locations(). Exposes llm_verify() returning a value suitable for httpx `verify=`. Defaults to True (httpx built-in trust) when the env var is unset, when the file is missing, or when the PEM fails to load — verification is never silently disabled, the warning is logged and we fall back to the safe path. - src/llm_core.py: thread llm_verify() into the shared AsyncClient used by stream_llm / streaming completions. - routes/model_routes.py: thread llm_verify() into the five httpx.get call sites in _probe_endpoint / _ping_endpoint so adding a private-CA endpoint goes green on the very first probe and the picker stops showing it offline. - .env.example: document LLM_CA_BUNDLE with the GigaChat case as the concrete example. Deliberately NOT included: a verify=False knob (global or per-host). Disabling verification exposes the affected endpoint to MITM, and the operator-supplied bundle is the correct fix for legitimate private-CA providers — so the only switch in this PR is the safe one. Closes #722.
2026-06-04 17:48:50 +05:30
parent f876fc7704
commit f59edee611
5 changed files with 260 additions and 6 deletions
--- a/tests/test_tls_overrides_scope.py
+++ b/tests/test_tls_overrides_scope.py
@@ -0,0 +1,149 @@
+"""Scope tests for src/tls_overrides.
+
+#722 / PR #769 added an opt-in extra CA bundle (LLM_CA_BUNDLE) for
+private-CA LLM providers. The whole point is that the override stays
+SCOPED — it must extend trust for the intended outbound LLM provider
+requests only, and never:
+
+  - touch arbitrary URL fetching (web_fetch, document downloads, generic
+    httpx.get from any other module),
+  - touch browser-facing TLS (anything our app serves over HTTPS),
+  - weaken httpx's process-wide defaults,
+  - silently disable certificate verification.
+
+These tests prove that. They enumerate the call sites of `llm_verify()`
+in the source tree and assert they match an allowlist; they verify the
+override module itself never reaches for the well-known "skip TLS
+verification" knobs; and they pin the safe default (verify=True) when
+LLM_CA_BUNDLE is unset.
+
+If a future change threads `llm_verify()` into a non-LLM HTTP path, the
+first test fails and the contributor either has to justify the new
+caller (and add it to ALLOWED_CALLERS with a comment) or revert. That
+keeps the security-sensitive helper hard to misuse.
+"""
+
+from __future__ import annotations
+
+import os
+import re
+from pathlib import Path
+
+REPO = Path(__file__).resolve().parents[1]
+
+
+# Files that legitimately need llm_verify() applied to their outbound
+# httpx calls because the URL is an LLM provider's API. Every caller here
+# is a discrete LLM HTTP entry point and intentional. Any addition must
+# come with its own justification in code review.
+ALLOWED_CALLERS = frozenset({
+    "src/llm_core.py",          # shared AsyncClient used by stream_llm
+    "routes/model_routes.py",   # _probe_endpoint + _ping_endpoint
+})
+
+
+def _grep_files(pattern: str) -> set[str]:
+    """Return the set of repo-relative .py file paths whose body matches
+    `pattern`. Skips tests, the override module itself, and worktree
+    scratch dirs."""
+    rx = re.compile(pattern)
+    hits: set[str] = set()
+    for path in REPO.rglob("*.py"):
+        rel = path.relative_to(REPO).as_posix()
+        if rel.startswith("tests/"):
+            continue
+        if rel == "src/tls_overrides.py":  # definition site, not a caller
+            continue
+        if rel.startswith(".claude/") or "/.claude/" in rel:
+            continue
+        try:
+            body = path.read_text(encoding="utf-8", errors="ignore")
+        except OSError:
+            continue
+        if rx.search(body):
+            hits.add(rel)
+    return hits
+
+
+def test_llm_verify_only_used_in_allowlisted_files():
+    """llm_verify() must only be consumed by the LLM provider HTTP path.
+
+    The extra CA bundle is scoped to the two known LLM HTTP entry points.
+    If a future PR threads llm_verify() into web_fetch, search providers,
+    embeddings, gallery downloads, webhook delivery, or any other
+    arbitrary-URL caller, that's a scope expansion and a security review.
+    Adding a file to ALLOWED_CALLERS requires a written justification.
+    """
+    callers = _grep_files(r"\bllm_verify\s*\(")
+    unexpected = callers - ALLOWED_CALLERS
+    missing = ALLOWED_CALLERS - callers
+    assert not unexpected, (
+        f"llm_verify() called from unexpected file(s): {sorted(unexpected)}. "
+        f"Expected scope: {sorted(ALLOWED_CALLERS)}. If the new caller is an "
+        "LLM provider HTTP entry point, add it to ALLOWED_CALLERS with a "
+        "comment; if it's not, do not thread the extra CA bundle into it."
+    )
+    assert not missing, (
+        f"llm_verify() no longer called from {sorted(missing)} — the "
+        "extra CA bundle integration regressed or the allowlist is stale."
+    )
+
+
+def test_tls_overrides_does_not_weaken_global_tls():
+    """src/tls_overrides must never reach for a TLS-weakening knob.
+
+    Several common ways to silently weaken TLS in Python:
+      - ssl._create_default_https_context = ssl._create_unverified_context
+      - ssl._create_unverified_context (used as a default)
+      - urllib3.disable_warnings(...)
+      - httpx.AsyncClient(verify=False) (anywhere — must stay verify=True
+        or an SSLContext)
+      - requests.packages.urllib3.disable_warnings(...)
+
+    The override module must only EXTEND trust by loading an additional
+    bundle into an ssl.SSLContext built on top of the system default. It
+    must never silently disable verification.
+    """
+    body = (REPO / "src" / "tls_overrides.py").read_text(encoding="utf-8")
+    forbidden = [
+        r"_create_default_https_context\s*=",
+        r"_create_unverified_context",
+        r"disable_warnings",
+        r"verify\s*=\s*False",
+    ]
+    for pat in forbidden:
+        assert not re.search(pat, body), (
+            f"src/tls_overrides.py contains forbidden pattern {pat!r}. "
+            "The extra CA bundle must only ADD trust, never weaken it."
+        )
+
+
+def test_llm_verify_default_is_true_when_env_unset():
+    """When LLM_CA_BUNDLE is unset, llm_verify() must return True so httpx
+    falls through to its built-in trust store. This is the safe default —
+    operators have to opt in to get any change at all."""
+    os.environ.pop("LLM_CA_BUNDLE", None)
+    import importlib
+
+    import src.tls_overrides as mod
+    importlib.reload(mod)
+    assert mod.llm_verify() is True, (
+        f"Default llm_verify() must be True (httpx built-in trust store); "
+        f"got {mod.llm_verify()!r}. An accidental non-True default would "
+        "turn an opt-in extension into a process-wide change."
+    )
+
+
+def test_llm_verify_falls_back_to_true_for_missing_bundle_file():
+    """Pointing LLM_CA_BUNDLE at a non-existent path must NOT raise and
+    must fall back to verify=True (system trust). A misconfigured env var
+    on a deploy box should never produce a silently TLS-disabled process."""
+    os.environ["LLM_CA_BUNDLE"] = "/nonexistent/path/extra-roots.pem"
+    try:
+        import importlib
+
+        import src.tls_overrides as mod
+        importlib.reload(mod)
+        assert mod.llm_verify() is True
+    finally:
+        os.environ.pop("LLM_CA_BUNDLE", None)