From 1eff46579aefc714d6ad029b5afd8f4b284efbfc Mon Sep 17 00:00:00 2001 From: Afonso Coutinho <116525378+afonsopc@users.noreply.github.com> Date: Mon, 1 Jun 2026 14:22:41 +0100 Subject: [PATCH] fix: ChromaDB unreachable blocks app startup for 30-60s (#326) (#476) * fix: fail fast when ChromaDB is unreachable instead of blocking startup * fix: only cache the ChromaDB client after a successful heartbeat * test: cover ChromaDB fast-fail preflight and no-cache-on-failure --- src/chroma_client.py | 31 +++++++++++++++++++--- tests/test_chroma_client.py | 52 +++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 3 deletions(-) create mode 100644 tests/test_chroma_client.py diff --git a/src/chroma_client.py b/src/chroma_client.py index 33bc3f5..3a0a80c 100644 --- a/src/chroma_client.py +++ b/src/chroma_client.py @@ -6,12 +6,27 @@ Connects to a ChromaDB instance running as a standalone service. """ import os +import socket import logging logger = logging.getLogger(__name__) _client = None +# A short connect probe so an unreachable ChromaDB fails fast instead of +# blocking on the OS connection timeout (~30-60s, WinError 10060 on Windows), +# which otherwise stalls app startup. Tunable via CHROMADB_CONNECT_TIMEOUT. +_CONNECT_TIMEOUT = float(os.getenv("CHROMADB_CONNECT_TIMEOUT", "2.0")) + + +def _port_open(host: str, port: int, timeout: float = None) -> bool: + """Return True if a TCP connection to host:port succeeds within timeout.""" + try: + with socket.create_connection((host, port), timeout=timeout or _CONNECT_TIMEOUT): + return True + except OSError: + return False + def get_chroma_client(): """Get or create the singleton ChromaDB HTTP client. @@ -34,10 +49,20 @@ def get_chroma_client(): host = os.getenv("CHROMADB_HOST", "localhost") port = int(os.getenv("CHROMADB_PORT", "8100")) - _client = chromadb.HttpClient(host=host, port=port) + if not _port_open(host, port): + raise RuntimeError( + f"ChromaDB is not reachable at {host}:{port}. Start the ChromaDB " + f"service (e.g. `docker compose up chromadb`) or set CHROMADB_HOST / " + f"CHROMADB_PORT to point at a running instance." + ) - # Health check - _client.heartbeat() + client = chromadb.HttpClient(host=host, port=port) + + # Health check before caching — if the port is open but the service isn't + # healthy yet (e.g. still starting), don't poison the singleton with a dead + # client; leave _client unset so the next call retries. + client.heartbeat() + _client = client logger.info(f"ChromaDB connected: {host}:{port}") return _client diff --git a/tests/test_chroma_client.py b/tests/test_chroma_client.py new file mode 100644 index 0000000..0a57fee --- /dev/null +++ b/tests/test_chroma_client.py @@ -0,0 +1,52 @@ +"""Regression tests for the ChromaDB singleton client (issue #326). + +Covers the fast-fail preflight (so an unreachable ChromaDB doesn't block +startup for the full OS connection timeout) and the rule that a failed +connection must not poison the cached singleton. +""" +import socket +import time + +import pytest + +import src.chroma_client as cc + + +def _free_port() -> int: + """Bind to port 0, grab the assigned port, release it — nothing listens.""" + s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + s.bind(("127.0.0.1", 0)) + port = s.getsockname()[1] + s.close() + return port + + +def test_port_open_false_for_closed_port_and_is_fast(): + port = _free_port() + t0 = time.monotonic() + assert cc._port_open("127.0.0.1", port, timeout=1.0) is False + # The whole point: we fail fast, nowhere near the 30-60s OS timeout. + assert time.monotonic() - t0 < 5.0 + + +def test_port_open_true_for_listening_socket(): + srv = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + srv.bind(("127.0.0.1", 0)) + srv.listen(1) + host, port = srv.getsockname() + try: + assert cc._port_open(host, port, timeout=1.0) is True + finally: + srv.close() + + +def test_get_chroma_client_does_not_cache_when_unreachable(monkeypatch): + pytest.importorskip("chromadb") + cc.reset_client() + monkeypatch.setenv("CHROMADB_HOST", "127.0.0.1") + monkeypatch.setenv("CHROMADB_PORT", str(_free_port())) + with pytest.raises(RuntimeError): + cc.get_chroma_client() + # A failed connection must leave the singleton unset so a later call + # (once ChromaDB is up) can succeed. + assert cc._client is None