From 0e31c38be08cfd308dd28ec2fdc2a075005dc840 Mon Sep 17 00:00:00 2001 From: tanmayraut45 Date: Tue, 2 Jun 2026 07:56:38 +0530 Subject: [PATCH] Support in-place endpoint updates and recover empty-model sessions (#786) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "don't wipe endpoint_url/model on endpoint delete" half of #587 landed in 6a78b02 (Fix endpoint model preservation for tasks). The three remaining follow-up pieces from the original PR — flagged in the review on #786 — are: - routes/model_routes.py: toggle_model_endpoint (PATCH) now accepts api_key and base_url, so the admin UI can rotate a key or fix a typo'd URL without going through delete+recreate. base_url is normalized the same way the POST handler does (strip /models, /chat/completions, /completions, /v1/messages, then _normalize_base). Cache invalidation matches the POST/DELETE paths and the response includes base_url so the frontend can confirm what was saved. - routes/chat_routes.py: new _recover_empty_session_model picks cached_models[0] from the endpoint that matches sess.endpoint_url and persists it onto the Session row before the LLM call goes out. Wired into both /api/chat and /api/chat_stream after the existing _clear_orphaned_session_endpoint guard, so the order is: drop truly-orphaned sessions first, then heal the "picker showed it, session never knew" case. - routes/chat_routes.py: when recovery fails (no endpoint, no cached models) raise HTTP 400 with a clear message instead of letting model="" reach the upstream as 401/503. Closes #587. --- routes/chat_routes.py | 81 ++++++++++++++++++++++++++++++++++++++++++ routes/model_routes.py | 18 ++++++++++ 2 files changed, 99 insertions(+) diff --git a/routes/chat_routes.py b/routes/chat_routes.py index 044d02b..b2e9dd5 100644 --- a/routes/chat_routes.py +++ b/routes/chat_routes.py @@ -96,6 +96,65 @@ def _clear_orphaned_session_endpoint(sess) -> bool: db.close() +def _recover_empty_session_model(sess, session_id: str) -> bool: + """Re-populate sess.model from the matching endpoint's cached models. + + Covers the window between endpoint setup and the first chat send: the + picker showed a model in the dropdown but the session record never got + written (Issue #587 — UI uses the cached endpoint list, not s.model). + Without this, we'd POST the upstream with model="" and get a generic + 401/503 instead of using the model the user already picked. + + Returns True iff sess.model was repaired. + """ + if getattr(sess, "model", None): + return False + db = SessionLocal() + try: + # Prefer the endpoint whose base URL matches the session — we know the + # user already pointed this session at that endpoint, so its first + # cached model is the most defensible default. + ep = None + if getattr(sess, "endpoint_url", ""): + endpoints = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True).all() + for cand in endpoints: + if _session_url_matches_endpoint(sess.endpoint_url or "", cand.base_url or ""): + ep = cand + break + if not ep: + return False + try: + cached = json.loads(ep.cached_models) if isinstance(ep.cached_models, str) else (ep.cached_models or []) + except Exception: + cached = [] + if not cached: + return False + model = cached[0] + if not isinstance(model, str) or not model.strip(): + return False + model = model.strip() + # Persist so the next request, websocket reconnect, or page reload + # picks up the same model (we'd otherwise re-pick on every send + # and silently switch on the user if the cached order shifts). + db_session = db.query(DBSession).filter(DBSession.id == session_id).first() + if db_session: + db_session.model = model + db_session.updated_at = datetime.utcnow() + db.commit() + sess.model = model + logger.info( + "Recovered empty session model for %s — picked %r from endpoint %s", + session_id, model, ep.id, + ) + return True + except Exception as e: + db.rollback() + logger.warning("Failed to recover empty session model for %s: %s", session_id, e) + return False + finally: + db.close() + + def setup_chat_routes( session_manager, chat_handler, @@ -133,6 +192,16 @@ def setup_chat_routes( if _clear_orphaned_session_endpoint(sess): raise HTTPException(400, "Selected model endpoint was removed. Pick another model in Settings.") + # Empty model + live endpoint = setup race (Issue #587). Repair from + # the endpoint's cached model list before privilege checks, which + # otherwise see "" and behave inconsistently with the allowlist. + _recover_empty_session_model(sess, session) + if not getattr(sess, "model", "").strip(): + raise HTTPException( + 400, + "No model selected for this chat. Open the model picker and choose one before sending.", + ) + # Same allowed_models + daily-cap gate as chat_stream (mirror so the # non-streaming path can't be used to bypass). _enforce_chat_privileges(request, sess) @@ -272,6 +341,18 @@ def setup_chat_routes( sess = session_manager.get_session(session) if _clear_orphaned_session_endpoint(sess): raise HTTPException(400, "Selected model endpoint was removed. Pick another model in Settings.") + # Issue #587: picker shows a model from the endpoint cache but + # s.model never made it onto the DB row (first-send race after + # endpoint setup, or a previous endpoint delete/recreate). Pull + # the first cached model off the matching endpoint so the + # upstream isn't called with model="" (which surfaces as a + # generic 401/503). + _recover_empty_session_model(sess, session) + if not getattr(sess, "model", "").strip(): + raise HTTPException( + 400, + "No model selected for this chat. Open the model picker and choose one before sending.", + ) except SessionNotFoundError as e: raise HTTPException(404, str(e)) except (ValueError, ValidationError): diff --git a/routes/model_routes.py b/routes/model_routes.py index 935f3b9..770871d 100644 --- a/routes/model_routes.py +++ b/routes/model_routes.py @@ -1294,16 +1294,34 @@ def setup_model_routes(model_discovery): ep.name = body["name"].strip() or ep.name if "model_type" in body and isinstance(body["model_type"], str): ep.model_type = body["model_type"].strip() or ep.model_type + # Rotating an API key used to require DELETE+POST, which wiped + # endpoint_url/model from every session referencing the old base + # URL. Allow in-place updates so the admin can change the key + # (or correct a typo'd base URL) without nuking session state. + if "api_key" in body and isinstance(body["api_key"], str): + _new_key = body["api_key"].strip() + # Empty string means "clear it" (e.g. local Ollama no longer needs a key). + ep.api_key = _new_key or None + if "base_url" in body and isinstance(body["base_url"], str): + _new_base = body["base_url"].strip().rstrip("/") + for _suffix in ("/models", "/chat/completions", "/completions", "/v1/messages"): + if _new_base.endswith(_suffix): + _new_base = _new_base[: -len(_suffix)].rstrip("/") + _new_base = _normalize_base(_new_base) + if _new_base: + ep.base_url = _new_base else: ep.is_enabled = not ep.is_enabled db.commit() _invalidate_models_cache() + _local_probe_cache["data"] = None return { "id": ep.id, "is_enabled": ep.is_enabled, "supports_tools": ep.supports_tools, "name": ep.name, "model_type": ep.model_type, + "base_url": ep.base_url, } finally: db.close()