From 7e7e441fec888861088f8e59dca37edbbbb387df Mon Sep 17 00:00:00 2001 From: LittleLlama <72672345+Ninjayeti@users.noreply.github.com> Date: Sun, 31 May 2026 22:32:13 -0700 Subject: [PATCH] Re-enable VectorRAG init with lazy retry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Personal Docs (POST /api/personal/add_directory and friends) currently returns HTTP 503 'RAG system is not available' for every request, because get_rag_manager() and rag_manager are both hardcoded off. The disablement was added when chromadb 1.4.1 / pydantic 2.12 were mutually incompatible at the client init layer. That compat issue is fixed in the current pins (chromadb 1.5.x + pydantic 2.13.x). Verified by calling the original lazy initializer against a running chroma server — VectorRAG instantiates, reports healthy=True, and indexes successfully. This change: 1. src/rag_singleton.py — replace the hardcoded `return None` in get_rag_manager() with the original lazy init body. Keeps the 30s retry-throttle so a missing chroma server doesn't busy-retry on every request. 2. app.py — replace the parallel `rag_manager = None` / `rag_available = False` hardcoding with a get_rag_manager() call. Logs the resolved state at startup. If chroma isn't reachable yet, rag_manager stays None and personal-doc routes still return 503, but the *next* request will hit the retry-throttle path in get_rag_manager() and try to init again. Doesn't touch requirements.txt. Repos using docker-compose get chroma automatically; manual installs that want Personal Docs to work still need to either pip install chromadb (full package) and run `chroma run` or point at an external chroma instance via env. That can be a follow-up README / requirements-optional note. --- app.py | 29 ++++++++++++++++++++--------- src/rag_singleton.py | 21 +++++++++++++-------- 2 files changed, 33 insertions(+), 17 deletions(-) diff --git a/app.py b/app.py index a66dbb0..a07e947 100644 --- a/app.py +++ b/app.py @@ -355,15 +355,26 @@ async def serve_generated_image(filename: str, request: Request): from services.youtube import init_youtube init_youtube() -# ========= RAG (vector document RAG — DISABLED) ========= -# VectorRAG (ChromaDB-backed personal-document semantic search) is unused -# (0 directories ever indexed) and its chromadb 1.4.1 / pydantic 2.12 client -# can't even instantiate — it threw at init and cost ~30s of startup waiting on -# the embedding probe. Disabled. All callers already guard on rag_available / -# `if rag_manager`, so personal-doc routes degrade cleanly. -rag_manager = None -rag_available = False -logger.info("Vector document RAG disabled (unused)") +# ========= RAG (vector document RAG) ========= +# VectorRAG (ChromaDB-backed personal-document semantic search). Initialized +# lazily via get_rag_manager() — returns None if ChromaDB isn't reachable +# (no server running on the configured host:port), in which case personal-doc +# routes return a clean 503 instead of busy-retrying every request. +# +# Note: this was previously hardcoded off because chromadb 1.4.1 / pydantic +# 2.12 were mutually incompatible at the time. With the current pins +# (chromadb 1.5.x + pydantic 2.13.x) the init works and Personal Docs +# (POST /api/personal/add_directory etc.) is functional again. +from src.rag_singleton import get_rag_manager +rag_manager = get_rag_manager() +rag_available = rag_manager is not None +if rag_available: + logger.info("Vector document RAG initialized") +else: + logger.info( + "Vector document RAG not available at startup " + "(ChromaDB may not be reachable yet — routes will retry lazily)" + ) # ========= IMPORT CONFIG ========= from src.config import config diff --git a/src/rag_singleton.py b/src/rag_singleton.py index 504145e..eb90e84 100644 --- a/src/rag_singleton.py +++ b/src/rag_singleton.py @@ -12,16 +12,21 @@ rag_instance = None _last_attempt = 0.0 _RETRY_INTERVAL = 30 # seconds between re-init attempts + def get_rag_manager(): - """Disabled: vector document RAG (VectorRAG/ChromaDB) is unused and its - client is incompatible with the installed pydantic. Return None so personal- - doc routes fall back to non-vector behavior instead of re-attempting (and - re-hanging on) a broken ChromaDB init every 30s.""" - return None + """Lazy ChromaDB-backed VectorRAG initializer. + Returns the VectorRAG instance on first successful init, None if ChromaDB + isn't reachable / available. Failed init attempts are throttled to once + per _RETRY_INTERVAL seconds so a missing ChromaDB doesn't busy-retry on + every request — callers (personal-doc routes etc.) get None back and + return a clean 503 to the user instead. -def _get_rag_manager_legacy(): - """Original lazy initializer, kept for reference / easy re-enable.""" + Historical note: this used to be hardcoded to ``return None`` with a + comment about chromadb 1.4.1 / pydantic 2.12 being mutually incompatible. + That compat issue is resolved in current pinned versions + (chromadb 1.5.x + pydantic 2.13.x), so the real initializer is back. + """ global rag_instance, _last_attempt if rag_instance is not None: @@ -29,7 +34,7 @@ def _get_rag_manager_legacy(): now = time.monotonic() if now - _last_attempt < _RETRY_INTERVAL: - return None # too soon to retry + return None # too soon to retry — last attempt failed _last_attempt = now