* fix: fail fast when ChromaDB is unreachable instead of blocking startup * fix: only cache the ChromaDB client after a successful heartbeat * test: cover ChromaDB fast-fail preflight and no-cache-on-failure
This commit is contained in:
@@ -6,12 +6,27 @@ Connects to a ChromaDB instance running as a standalone service.
|
||||
"""
|
||||
|
||||
import os
|
||||
import socket
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_client = None
|
||||
|
||||
# A short connect probe so an unreachable ChromaDB fails fast instead of
|
||||
# blocking on the OS connection timeout (~30-60s, WinError 10060 on Windows),
|
||||
# which otherwise stalls app startup. Tunable via CHROMADB_CONNECT_TIMEOUT.
|
||||
_CONNECT_TIMEOUT = float(os.getenv("CHROMADB_CONNECT_TIMEOUT", "2.0"))
|
||||
|
||||
|
||||
def _port_open(host: str, port: int, timeout: float = None) -> bool:
|
||||
"""Return True if a TCP connection to host:port succeeds within timeout."""
|
||||
try:
|
||||
with socket.create_connection((host, port), timeout=timeout or _CONNECT_TIMEOUT):
|
||||
return True
|
||||
except OSError:
|
||||
return False
|
||||
|
||||
|
||||
def get_chroma_client():
|
||||
"""Get or create the singleton ChromaDB HTTP client.
|
||||
@@ -34,10 +49,20 @@ def get_chroma_client():
|
||||
host = os.getenv("CHROMADB_HOST", "localhost")
|
||||
port = int(os.getenv("CHROMADB_PORT", "8100"))
|
||||
|
||||
_client = chromadb.HttpClient(host=host, port=port)
|
||||
if not _port_open(host, port):
|
||||
raise RuntimeError(
|
||||
f"ChromaDB is not reachable at {host}:{port}. Start the ChromaDB "
|
||||
f"service (e.g. `docker compose up chromadb`) or set CHROMADB_HOST / "
|
||||
f"CHROMADB_PORT to point at a running instance."
|
||||
)
|
||||
|
||||
# Health check
|
||||
_client.heartbeat()
|
||||
client = chromadb.HttpClient(host=host, port=port)
|
||||
|
||||
# Health check before caching — if the port is open but the service isn't
|
||||
# healthy yet (e.g. still starting), don't poison the singleton with a dead
|
||||
# client; leave _client unset so the next call retries.
|
||||
client.heartbeat()
|
||||
_client = client
|
||||
logger.info(f"ChromaDB connected: {host}:{port}")
|
||||
return _client
|
||||
|
||||
|
||||
52
tests/test_chroma_client.py
Normal file
52
tests/test_chroma_client.py
Normal file
@@ -0,0 +1,52 @@
|
||||
"""Regression tests for the ChromaDB singleton client (issue #326).
|
||||
|
||||
Covers the fast-fail preflight (so an unreachable ChromaDB doesn't block
|
||||
startup for the full OS connection timeout) and the rule that a failed
|
||||
connection must not poison the cached singleton.
|
||||
"""
|
||||
import socket
|
||||
import time
|
||||
|
||||
import pytest
|
||||
|
||||
import src.chroma_client as cc
|
||||
|
||||
|
||||
def _free_port() -> int:
|
||||
"""Bind to port 0, grab the assigned port, release it — nothing listens."""
|
||||
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
s.bind(("127.0.0.1", 0))
|
||||
port = s.getsockname()[1]
|
||||
s.close()
|
||||
return port
|
||||
|
||||
|
||||
def test_port_open_false_for_closed_port_and_is_fast():
|
||||
port = _free_port()
|
||||
t0 = time.monotonic()
|
||||
assert cc._port_open("127.0.0.1", port, timeout=1.0) is False
|
||||
# The whole point: we fail fast, nowhere near the 30-60s OS timeout.
|
||||
assert time.monotonic() - t0 < 5.0
|
||||
|
||||
|
||||
def test_port_open_true_for_listening_socket():
|
||||
srv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
srv.bind(("127.0.0.1", 0))
|
||||
srv.listen(1)
|
||||
host, port = srv.getsockname()
|
||||
try:
|
||||
assert cc._port_open(host, port, timeout=1.0) is True
|
||||
finally:
|
||||
srv.close()
|
||||
|
||||
|
||||
def test_get_chroma_client_does_not_cache_when_unreachable(monkeypatch):
|
||||
pytest.importorskip("chromadb")
|
||||
cc.reset_client()
|
||||
monkeypatch.setenv("CHROMADB_HOST", "127.0.0.1")
|
||||
monkeypatch.setenv("CHROMADB_PORT", str(_free_port()))
|
||||
with pytest.raises(RuntimeError):
|
||||
cc.get_chroma_client()
|
||||
# A failed connection must leave the singleton unset so a later call
|
||||
# (once ChromaDB is up) can succeed.
|
||||
assert cc._client is None
|
||||
Reference in New Issue
Block a user