From 3f6d630b56a4081d5116df7b2c06c5c9ba61d2df Mon Sep 17 00:00:00 2001 From: wundervrc <147297600+wundervrc@users.noreply.github.com> Date: Mon, 1 Jun 2026 23:40:43 -0230 Subject: [PATCH] Never resolve to a disabled endpoint model (#861) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Background tasks (e.g. the Email Tags / check_email_urgency action) resolve their model through resolve_endpoint("utility") → Default Chat. When the configured model is one the user has since disabled on the endpoint, the resolver still dispatched to it — on Groq that surfaces as every email failing with "HTTP 400: model ... requires terms acceptance". Two paths fed this: - The auto-pick fallback selected from cached_models without excluding the endpoint's hidden_models, so a disabled model listed first won. - A stale default_model left pointing at a now-disabled model (seeded at endpoint registration from raw model_ids[0]) was used verbatim. Fix resolve_endpoint / resolve_endpoint_by_id to drop a configured model that's in hidden_models and to pick the first ENABLED chat model. Also seed default_model on registration via _first_chat_model so we never pin the global default to an embedding/tts entry a provider lists first. Checks: python -m pytest tests/test_endpoint_resolver.py tests/test_model_routes.py tests/test_model_context.py (all pass); python -m py_compile app.py routes/model_routes.py src/endpoint_resolver.py. Co-authored-by: Claude Opus 4.8 --- routes/model_routes.py | 8 ++- src/endpoint_resolver.py | 39 ++++++++++++- tests/test_endpoint_resolver.py | 99 +++++++++++++++++++++++++++++++++ 3 files changed, 141 insertions(+), 5 deletions(-) diff --git a/routes/model_routes.py b/routes/model_routes.py index 44b4abd..4282285 100644 --- a/routes/model_routes.py +++ b/routes/model_routes.py @@ -1052,11 +1052,15 @@ def setup_model_routes(model_discovery): ) db.add(ep) db.commit() - # Auto-set as default chat endpoint if none configured yet + # Auto-set as default chat endpoint if none configured yet. Seed + # the first CHAT model (not raw model_ids[0]) so we don't pin the + # global default to an embedding/tts/etc. entry a provider happens + # to list first. settings = _load_settings() if not settings.get("default_endpoint_id"): + from src.endpoint_resolver import _first_chat_model settings["default_endpoint_id"] = ep.id - settings["default_model"] = model_ids[0] if model_ids else "" + settings["default_model"] = _first_chat_model(model_ids) or "" _save_settings(settings) _invalidate_models_cache() _local_probe_cache["data"] = None diff --git a/src/endpoint_resolver.py b/src/endpoint_resolver.py index 72cd054..aec81a8 100644 --- a/src/endpoint_resolver.py +++ b/src/endpoint_resolver.py @@ -47,6 +47,29 @@ def _endpoint_cached_models(ep) -> list: return models if isinstance(models, list) else [] +def _endpoint_hidden_models(ep) -> set: + """Model ids the admin disabled on this endpoint (the UI's hidden list).""" + raw = getattr(ep, "hidden_models", None) + if not raw: + return set() + try: + hidden = json.loads(raw) if isinstance(raw, str) else raw + except Exception: + return set() + return set(hidden) if isinstance(hidden, list) else set() + + +def _endpoint_enabled_models(ep) -> list: + """Cached models minus the ones disabled on the endpoint, order preserved. + + The auto-pick fallback must never select a model the user disabled — a + Groq endpoint can list 16 models with only 1 enabled, and picking the + raw first one resolves to a model that 400s ("requires terms acceptance"). + """ + hidden = _endpoint_hidden_models(ep) + return [m for m in _endpoint_cached_models(ep) if m not in hidden] + + # Cache for Tailscale hostname → IP resolution _tailscale_cache: Dict[str, Optional[str]] = {} @@ -248,9 +271,15 @@ def resolve_endpoint( chat_url = build_chat_url(base) headers = build_headers(ep.api_key, base) - # If no model specified, try to pick the first from endpoint's cached list. + # Discard a configured model the user has since disabled on the + # endpoint (e.g. a stale `default_model` left pointing at a now-hidden + # model). Treat it as unset so the picker below selects a live one + # instead of dispatching to a disabled model that 400s. + if model and model in _endpoint_hidden_models(ep): + model = "" + # If no (usable) model specified, pick the first enabled chat model. if not model: - model = _first_chat_model(_endpoint_cached_models(ep)) or "" + model = _first_chat_model(_endpoint_enabled_models(ep)) or "" return chat_url, model or fallback_model, headers except Exception as e: @@ -282,8 +311,12 @@ def resolve_endpoint_by_id( chat_url = build_chat_url(base) headers = build_headers(ep.api_key, base) m = (model or "").strip() + # Drop a model the user disabled on the endpoint, then pick the first + # enabled chat model rather than a hidden one. + if m and m in _endpoint_hidden_models(ep): + m = "" if not m: - m = _first_chat_model(_endpoint_cached_models(ep)) or "" + m = _first_chat_model(_endpoint_enabled_models(ep)) or "" if not m: return None return chat_url, m, headers diff --git a/tests/test_endpoint_resolver.py b/tests/test_endpoint_resolver.py index 447aecd..1c638ea 100644 --- a/tests/test_endpoint_resolver.py +++ b/tests/test_endpoint_resolver.py @@ -1,4 +1,5 @@ """Tests for endpoint_resolver — pure functions tested directly to avoid import pollution.""" +import json import re from urllib.parse import urlparse @@ -6,6 +7,45 @@ from urllib.parse import urlparse # Copy the pure functions to test them without importing the full module. # This avoids module cache conflicts with other test files that mock dependencies. +_NON_CHAT_MODEL = ( + "text-embedding", "embedding", "tts-", "whisper", "dall-e", + "moderation", "rerank", "reranker", "clip", "stable-diffusion", +) + + +def _first_chat_model(models): + for m in (models or []): + if not any(p in str(m).lower() for p in _NON_CHAT_MODEL): + return m + return (models[0] if models else None) + + +def _endpoint_cached_models(ep) -> list: + raw = getattr(ep, "cached_models", None) or getattr(ep, "models", None) + if not raw: + return [] + try: + models = json.loads(raw) if isinstance(raw, str) else raw + except Exception: + return [] + return models if isinstance(models, list) else [] + + +def _endpoint_hidden_models(ep) -> set: + raw = getattr(ep, "hidden_models", None) + if not raw: + return set() + try: + hidden = json.loads(raw) if isinstance(raw, str) else raw + except Exception: + return set() + return set(hidden) if isinstance(hidden, list) else set() + + +def _endpoint_enabled_models(ep) -> list: + hidden = _endpoint_hidden_models(ep) + return [m for m in _endpoint_cached_models(ep) if m not in hidden] + def normalize_base(url: str) -> str: url = (url or "").strip().rstrip("/") for suffix in ["/models", "/chat/completions", "/completions", "/v1/messages"]: @@ -137,3 +177,62 @@ class TestBuildHeaders: def test_empty_key(self): assert build_headers("", "https://api.openai.com/v1") == {} + + +class _Ep: + """Minimal ModelEndpoint stand-in for the model-picking helpers.""" + def __init__(self, cached=None, hidden=None): + self.cached_models = json.dumps(cached) if cached is not None else None + self.hidden_models = json.dumps(hidden) if hidden is not None else None + + +class TestFirstChatModel: + def test_skips_embedding_and_tts(self): + models = ["text-embedding-ada-002", "whisper-large-v3", "gpt-4o"] + assert _first_chat_model(models) == "gpt-4o" + + def test_falls_back_to_first_when_all_non_chat(self): + assert _first_chat_model(["whisper-large-v3"]) == "whisper-large-v3" + + def test_empty(self): + assert _first_chat_model([]) is None + + +class TestEnabledModels: + def test_excludes_hidden(self): + # The Groq repro: 16 models, only gpt-oss-120b enabled. + cached = [ + "openai/gpt-oss-safeguard-20b", "canopylabs/orpheus-arabic-saudi", + "whisper-large-v3", "openai/gpt-oss-120b", + ] + hidden = [ + "openai/gpt-oss-safeguard-20b", "canopylabs/orpheus-arabic-saudi", + "whisper-large-v3", + ] + ep = _Ep(cached=cached, hidden=hidden) + assert _endpoint_enabled_models(ep) == ["openai/gpt-oss-120b"] + + def test_no_hidden_returns_all(self): + ep = _Ep(cached=["a", "b"], hidden=None) + assert _endpoint_enabled_models(ep) == ["a", "b"] + + def test_picker_never_selects_disabled_model(self): + # Regression: a disabled model listed first must not be auto-picked. + cached = ["canopylabs/orpheus-arabic-saudi", "openai/gpt-oss-120b"] + hidden = ["canopylabs/orpheus-arabic-saudi"] + ep = _Ep(cached=cached, hidden=hidden) + assert _first_chat_model(_endpoint_enabled_models(ep)) == "openai/gpt-oss-120b" + + def test_stale_configured_model_is_discarded(self): + # A configured model that's been disabled is dropped, falling through + # to the first enabled chat model. + ep = _Ep( + cached=["canopylabs/orpheus-arabic-saudi", "openai/gpt-oss-120b"], + hidden=["canopylabs/orpheus-arabic-saudi"], + ) + configured = "canopylabs/orpheus-arabic-saudi" + if configured in _endpoint_hidden_models(ep): + configured = "" + if not configured: + configured = _first_chat_model(_endpoint_enabled_models(ep)) + assert configured == "openai/gpt-oss-120b"