From 3f6d630b56a4081d5116df7b2c06c5c9ba61d2df Mon Sep 17 00:00:00 2001
From: wundervrc <147297600+wundervrc@users.noreply.github.com>
Date: Mon, 1 Jun 2026 23:40:43 -0230
Subject: [PATCH] Never resolve to a disabled endpoint model (#861)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Background tasks (e.g. the Email Tags / check_email_urgency action)
resolve their model through resolve_endpoint("utility") → Default Chat.
When the configured model is one the user has since disabled on the
endpoint, the resolver still dispatched to it — on Groq that surfaces as
every email failing with "HTTP 400: model ... requires terms acceptance".

Two paths fed this:
- The auto-pick fallback selected from cached_models without excluding
  the endpoint's hidden_models, so a disabled model listed first won.
- A stale default_model left pointing at a now-disabled model (seeded at
  endpoint registration from raw model_ids[0]) was used verbatim.

Fix resolve_endpoint / resolve_endpoint_by_id to drop a configured model
that's in hidden_models and to pick the first ENABLED chat model. Also
seed default_model on registration via _first_chat_model so we never pin
the global default to an embedding/tts entry a provider lists first.

Checks: python -m pytest tests/test_endpoint_resolver.py
        tests/test_model_routes.py tests/test_model_context.py (all pass);
        python -m py_compile app.py routes/model_routes.py
        src/endpoint_resolver.py.

Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
---
 routes/model_routes.py          |  8 ++-
 src/endpoint_resolver.py        | 39 ++++++++++++-
 tests/test_endpoint_resolver.py | 99 +++++++++++++++++++++++++++++++++
 3 files changed, 141 insertions(+), 5 deletions(-)

diff --git a/routes/model_routes.py b/routes/model_routes.py
index 44b4abd..4282285 100644
--- a/routes/model_routes.py
+++ b/routes/model_routes.py
@@ -1052,11 +1052,15 @@ def setup_model_routes(model_discovery):
             )
             db.add(ep)
             db.commit()
-            # Auto-set as default chat endpoint if none configured yet
+            # Auto-set as default chat endpoint if none configured yet. Seed
+            # the first CHAT model (not raw model_ids[0]) so we don't pin the
+            # global default to an embedding/tts/etc. entry a provider happens
+            # to list first.
             settings = _load_settings()
             if not settings.get("default_endpoint_id"):
+                from src.endpoint_resolver import _first_chat_model
                 settings["default_endpoint_id"] = ep.id
-                settings["default_model"] = model_ids[0] if model_ids else ""
+                settings["default_model"] = _first_chat_model(model_ids) or ""
                 _save_settings(settings)
             _invalidate_models_cache()
             _local_probe_cache["data"] = None
diff --git a/src/endpoint_resolver.py b/src/endpoint_resolver.py
index 72cd054..aec81a8 100644
--- a/src/endpoint_resolver.py
+++ b/src/endpoint_resolver.py
@@ -47,6 +47,29 @@ def _endpoint_cached_models(ep) -> list:
     return models if isinstance(models, list) else []
 
 
+def _endpoint_hidden_models(ep) -> set:
+    """Model ids the admin disabled on this endpoint (the UI's hidden list)."""
+    raw = getattr(ep, "hidden_models", None)
+    if not raw:
+        return set()
+    try:
+        hidden = json.loads(raw) if isinstance(raw, str) else raw
+    except Exception:
+        return set()
+    return set(hidden) if isinstance(hidden, list) else set()
+
+
+def _endpoint_enabled_models(ep) -> list:
+    """Cached models minus the ones disabled on the endpoint, order preserved.
+
+    The auto-pick fallback must never select a model the user disabled — a
+    Groq endpoint can list 16 models with only 1 enabled, and picking the
+    raw first one resolves to a model that 400s ("requires terms acceptance").
+    """
+    hidden = _endpoint_hidden_models(ep)
+    return [m for m in _endpoint_cached_models(ep) if m not in hidden]
+
+
 # Cache for Tailscale hostname → IP resolution
 _tailscale_cache: Dict[str, Optional[str]] = {}
 
@@ -248,9 +271,15 @@ def resolve_endpoint(
         chat_url = build_chat_url(base)
         headers = build_headers(ep.api_key, base)
 
-        # If no model specified, try to pick the first from endpoint's cached list.
+        # Discard a configured model the user has since disabled on the
+        # endpoint (e.g. a stale `default_model` left pointing at a now-hidden
+        # model). Treat it as unset so the picker below selects a live one
+        # instead of dispatching to a disabled model that 400s.
+        if model and model in _endpoint_hidden_models(ep):
+            model = ""
+        # If no (usable) model specified, pick the first enabled chat model.
         if not model:
-            model = _first_chat_model(_endpoint_cached_models(ep)) or ""
+            model = _first_chat_model(_endpoint_enabled_models(ep)) or ""
 
         return chat_url, model or fallback_model, headers
     except Exception as e:
@@ -282,8 +311,12 @@ def resolve_endpoint_by_id(
         chat_url = build_chat_url(base)
         headers = build_headers(ep.api_key, base)
         m = (model or "").strip()
+        # Drop a model the user disabled on the endpoint, then pick the first
+        # enabled chat model rather than a hidden one.
+        if m and m in _endpoint_hidden_models(ep):
+            m = ""
         if not m:
-            m = _first_chat_model(_endpoint_cached_models(ep)) or ""
+            m = _first_chat_model(_endpoint_enabled_models(ep)) or ""
         if not m:
             return None
         return chat_url, m, headers
diff --git a/tests/test_endpoint_resolver.py b/tests/test_endpoint_resolver.py
index 447aecd..1c638ea 100644
--- a/tests/test_endpoint_resolver.py
+++ b/tests/test_endpoint_resolver.py
@@ -1,4 +1,5 @@
 """Tests for endpoint_resolver — pure functions tested directly to avoid import pollution."""
+import json
 import re
 from urllib.parse import urlparse
 
@@ -6,6 +7,45 @@ from urllib.parse import urlparse
 # Copy the pure functions to test them without importing the full module.
 # This avoids module cache conflicts with other test files that mock dependencies.
 
+_NON_CHAT_MODEL = (
+    "text-embedding", "embedding", "tts-", "whisper", "dall-e",
+    "moderation", "rerank", "reranker", "clip", "stable-diffusion",
+)
+
+
+def _first_chat_model(models):
+    for m in (models or []):
+        if not any(p in str(m).lower() for p in _NON_CHAT_MODEL):
+            return m
+    return (models[0] if models else None)
+
+
+def _endpoint_cached_models(ep) -> list:
+    raw = getattr(ep, "cached_models", None) or getattr(ep, "models", None)
+    if not raw:
+        return []
+    try:
+        models = json.loads(raw) if isinstance(raw, str) else raw
+    except Exception:
+        return []
+    return models if isinstance(models, list) else []
+
+
+def _endpoint_hidden_models(ep) -> set:
+    raw = getattr(ep, "hidden_models", None)
+    if not raw:
+        return set()
+    try:
+        hidden = json.loads(raw) if isinstance(raw, str) else raw
+    except Exception:
+        return set()
+    return set(hidden) if isinstance(hidden, list) else set()
+
+
+def _endpoint_enabled_models(ep) -> list:
+    hidden = _endpoint_hidden_models(ep)
+    return [m for m in _endpoint_cached_models(ep) if m not in hidden]
+
 def normalize_base(url: str) -> str:
     url = (url or "").strip().rstrip("/")
     for suffix in ["/models", "/chat/completions", "/completions", "/v1/messages"]:
@@ -137,3 +177,62 @@ class TestBuildHeaders:
 
     def test_empty_key(self):
         assert build_headers("", "https://api.openai.com/v1") == {}
+
+
+class _Ep:
+    """Minimal ModelEndpoint stand-in for the model-picking helpers."""
+    def __init__(self, cached=None, hidden=None):
+        self.cached_models = json.dumps(cached) if cached is not None else None
+        self.hidden_models = json.dumps(hidden) if hidden is not None else None
+
+
+class TestFirstChatModel:
+    def test_skips_embedding_and_tts(self):
+        models = ["text-embedding-ada-002", "whisper-large-v3", "gpt-4o"]
+        assert _first_chat_model(models) == "gpt-4o"
+
+    def test_falls_back_to_first_when_all_non_chat(self):
+        assert _first_chat_model(["whisper-large-v3"]) == "whisper-large-v3"
+
+    def test_empty(self):
+        assert _first_chat_model([]) is None
+
+
+class TestEnabledModels:
+    def test_excludes_hidden(self):
+        # The Groq repro: 16 models, only gpt-oss-120b enabled.
+        cached = [
+            "openai/gpt-oss-safeguard-20b", "canopylabs/orpheus-arabic-saudi",
+            "whisper-large-v3", "openai/gpt-oss-120b",
+        ]
+        hidden = [
+            "openai/gpt-oss-safeguard-20b", "canopylabs/orpheus-arabic-saudi",
+            "whisper-large-v3",
+        ]
+        ep = _Ep(cached=cached, hidden=hidden)
+        assert _endpoint_enabled_models(ep) == ["openai/gpt-oss-120b"]
+
+    def test_no_hidden_returns_all(self):
+        ep = _Ep(cached=["a", "b"], hidden=None)
+        assert _endpoint_enabled_models(ep) == ["a", "b"]
+
+    def test_picker_never_selects_disabled_model(self):
+        # Regression: a disabled model listed first must not be auto-picked.
+        cached = ["canopylabs/orpheus-arabic-saudi", "openai/gpt-oss-120b"]
+        hidden = ["canopylabs/orpheus-arabic-saudi"]
+        ep = _Ep(cached=cached, hidden=hidden)
+        assert _first_chat_model(_endpoint_enabled_models(ep)) == "openai/gpt-oss-120b"
+
+    def test_stale_configured_model_is_discarded(self):
+        # A configured model that's been disabled is dropped, falling through
+        # to the first enabled chat model.
+        ep = _Ep(
+            cached=["canopylabs/orpheus-arabic-saudi", "openai/gpt-oss-120b"],
+            hidden=["canopylabs/orpheus-arabic-saudi"],
+        )
+        configured = "canopylabs/orpheus-arabic-saudi"
+        if configured in _endpoint_hidden_models(ep):
+            configured = ""
+        if not configured:
+            configured = _first_chat_model(_endpoint_enabled_models(ep))
+        assert configured == "openai/gpt-oss-120b"