Support extra CA bundle for private-CA LLM providers (#769)

Adding GigaChat (Sber) or an on-premise enterprise LLM gateway as a model endpoint fails on first probe with CERTIFICATE_VERIFY_FAILED: self-signed certificate in certificate chain (_ssl.c:1000) because their TLS chain is signed by a private root CA (Russian Trusted Root CA for GigaChat; corporate CA for on-prem) that isn't part of the default system / certifi trust store. The endpoint shows offline in the picker even though the URL and API key are correct (issue #722). The right fix is to extend the trust store, not to weaken verification. This change: - src/tls_overrides.py: new module that resolves an opt-in env var LLM_CA_BUNDLE at import time, builds a shared SSLContext via ssl.create_default_context() (so the system / certifi bundle is loaded first) and layers the operator's PEM on top with load_verify_locations(). Exposes llm_verify() returning a value suitable for httpx `verify=`. Defaults to True (httpx built-in trust) when the env var is unset, when the file is missing, or when the PEM fails to load — verification is never silently disabled, the warning is logged and we fall back to the safe path. - src/llm_core.py: thread llm_verify() into the shared AsyncClient used by stream_llm / streaming completions. - routes/model_routes.py: thread llm_verify() into the five httpx.get call sites in _probe_endpoint / _ping_endpoint so adding a private-CA endpoint goes green on the very first probe and the picker stops showing it offline. - .env.example: document LLM_CA_BUNDLE with the GigaChat case as the concrete example. Deliberately NOT included: a verify=False knob (global or per-host). Disabling verification exposes the affected endpoint to MITM, and the operator-supplied bundle is the correct fix for legitimate private-CA providers — so the only switch in this PR is the safe one. Closes #722.
2026-06-04 17:48:50 +05:30
parent f876fc7704
commit f59edee611
5 changed files with 260 additions and 6 deletions
--- a/routes/model_routes.py
+++ b/routes/model_routes.py
@@ -17,6 +17,7 @@ from fastapi.responses import StreamingResponse
 from core.database import SessionLocal, ModelEndpoint, Session as DbSession
 from core.middleware import require_admin
 from src.llm_core import _detect_provider, _host_match, ANTHROPIC_MODELS
+from src.tls_overrides import llm_verify
 from src.settings import load_settings as _load_settings, save_settings as _save_settings
 from src.endpoint_resolver import (
    normalize_base as _normalize_base,
@@ -624,7 +625,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
        if api_key:
            headers["x-api-key"] = api_key
        try:
-            r = httpx.get(url, headers=headers, timeout=timeout)
+            r = httpx.get(url, headers=headers, timeout=timeout, verify=llm_verify())
            r.raise_for_status()
            data = r.json()
            models = [m.get("id") for m in (data.get("data") or []) if m.get("id")]
@@ -645,7 +646,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
    url = build_models_url(base)
    headers = build_headers(api_key, base)
    try:
-        r = httpx.get(url, headers=headers, timeout=timeout)
+        r = httpx.get(url, headers=headers, timeout=timeout, verify=llm_verify())
        r.raise_for_status()
        data = r.json()
        # OpenAI format: {"data": [{"id": "model-name"}]}
@@ -680,7 +681,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
        parsed = urlparse(base)
        if parsed.port == 11434 or "ollama" in (parsed.hostname or "").lower():
            root = base[:-3].rstrip("/") if base.endswith("/v1") else base
-            r = httpx.get(root + "/api/tags", timeout=timeout)
+            r = httpx.get(root + "/api/tags", timeout=timeout, verify=llm_verify())
            r.raise_for_status()
            data = r.json()
            models = [m.get("name") or m.get("model") for m in (data.get("models") or []) if m.get("name") or m.get("model")]
@@ -741,7 +742,7 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->
                    break
            for path in ("/api/version", "/api/tags"):
                try:
-                    r = httpx.get(root + path, timeout=timeout)
+                    r = httpx.get(root + path, timeout=timeout, verify=llm_verify())
                    result = _result_from_response(r)
                    if result["reachable"]:
                        return result
@@ -752,7 +753,7 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->
        pass

    try:
-        r = httpx.get(base, headers=headers, timeout=timeout)
+        r = httpx.get(base, headers=headers, timeout=timeout, verify=llm_verify())
        return _result_from_response(r)
    except Exception as e:
        last_error = str(e)[:120]