"""Tests for model route helper functions — pure logic, no server needed.""" import asyncio import json import sys import threading import time import types from unittest.mock import MagicMock from types import SimpleNamespace import httpx import pytest _endpoint_resolver = sys.modules.get("src.endpoint_resolver") if _endpoint_resolver is not None and not getattr(_endpoint_resolver, "__file__", None): # Other tests stub this module during collection. These helper tests need # the real URL normalization helpers so Anthropic /v1 handling is covered. sys.modules.pop("src.endpoint_resolver", None) sys.modules.pop("routes.model_routes", None) if "core.database" not in sys.modules: _core_db = types.ModuleType("core.database") for _name in [ "SessionLocal", "ModelEndpoint", "Session", "ChatMessage", "Document", "DocumentVersion", "GalleryImage", "GalleryAlbum", "Note", "CalendarCal", "CalendarEvent", "ScheduledTask", "TaskRun", "McpServer", ]: setattr(_core_db, _name, MagicMock()) sys.modules["core.database"] = _core_db import routes.model_routes as model_routes import src.database as src_database import src.endpoint_resolver as endpoint_resolver import src.llm_core as llm_core from routes.model_routes import ( _match_provider_curated, _curate_models, _visible_models, _normalize_model_ids, _is_chat_model, _classify_endpoint, _effective_endpoint_kind, _probe_endpoint, _ping_endpoint, _parse_model_list, _normalize_refresh_mode, _truthy, _speech_settings_using_endpoint, _clear_speech_settings_for_endpoint, _endpoint_settings_using_endpoint, _clear_endpoint_settings_for_endpoint, _clear_user_pref_endpoint_refs, _PROVIDER_CURATED, ) from src.llm_core import ANTHROPIC_MODELS # ── speech endpoint settings ── def test_speech_endpoint_dependents_include_stt(): settings = {"stt_provider": "endpoint:voice"} assert _speech_settings_using_endpoint(settings, "voice") == ["Speech to Text"] def test_clear_speech_endpoint_settings_resets_tts_and_stt(): settings = { "tts_provider": "endpoint:voice", "tts_model": "custom-tts", "stt_provider": "endpoint:voice", "stt_model": "custom-stt", } assert _clear_speech_settings_for_endpoint(settings, "voice") == [ "Text to Speech", "Speech to Text", ] assert settings == { "tts_provider": "disabled", "tts_model": "tts-1", "stt_provider": "disabled", "stt_model": "base", } def test_endpoint_cleanup_removes_primary_and_fallback_references(): settings = { "default_endpoint_id": "dead", "default_model": "primary", "default_model_fallbacks": [ {"endpoint_id": "dead", "model": "fallback-a"}, {"endpoint_id": "keep", "model": "fallback-b"}, ], "utility_model_fallbacks": [{"endpoint_id": "dead", "model": "utility"}], "vision_model_fallbacks": [{"endpoint_id": "dead", "model": "vision"}], "stt_provider": "endpoint:dead", "stt_model": "whisper", } assert _endpoint_settings_using_endpoint(settings, "dead", include_speech=True) == [ "Default Model", "Default Model Fallbacks", "Utility Model Fallbacks", "Vision Model Fallbacks", "Speech to Text", ] assert _clear_endpoint_settings_for_endpoint(settings, "dead", include_speech=True) == [ "Default Model", "Default Model Fallbacks", "Utility Model Fallbacks", "Vision Model Fallbacks", "Speech to Text", ] assert settings["default_endpoint_id"] == "" assert settings["default_model"] == "" assert settings["default_model_fallbacks"] == [ {"endpoint_id": "keep", "model": "fallback-b"}, ] assert settings["utility_model_fallbacks"] == [] assert settings["vision_model_fallbacks"] == [] assert settings["stt_provider"] == "disabled" assert settings["stt_model"] == "base" def test_endpoint_cleanup_updates_scoped_and_legacy_user_prefs(): scoped = { "_users": { "alice": { "utility_endpoint_id": "dead", "utility_model": "utility", "vision_model_fallbacks": [{"endpoint_id": "dead", "model": "vision"}], }, "bob": { "default_endpoint_id": "keep", "default_model": "chat", }, }, } assert _clear_user_pref_endpoint_refs(scoped, "dead") == 1 assert scoped["_users"]["alice"] == { "utility_endpoint_id": "", "utility_model": "", "vision_model_fallbacks": [], } assert scoped["_users"]["bob"]["default_endpoint_id"] == "keep" legacy = { "default_model_fallbacks": [{"endpoint_id": "dead", "model": "chat"}], } assert _clear_user_pref_endpoint_refs(legacy, "dead") == 1 assert legacy["default_model_fallbacks"] == [] # ── _match_provider_curated ── class TestMatchProviderCurated: def test_url_match_overrides_provider(self): assert _match_provider_curated("https://z.ai/v1", "openai") == "zai" def test_deepseek_url(self): assert _match_provider_curated("https://api.deepseek.com/v1", "openai") == "deepseek" def test_groq_url(self): assert _match_provider_curated("https://api.groq.com/openai/v1", "openai") == "groq" def test_mistral_url(self): assert _match_provider_curated("https://api.mistral.ai/v1", "openai") == "mistral" def test_together_url(self): assert _match_provider_curated("https://api.together.xyz/v1", "openai") == "together" def test_fireworks_url(self): assert _match_provider_curated("https://api.fireworks.ai/inference/v1", "openai") == "fireworks" def test_google_url(self): assert _match_provider_curated("https://generativelanguage.googleapis.com/v1beta", "openai") == "google" def test_xai_url(self): assert _match_provider_curated("https://api.x.ai/v1", "openai") == "xai" def test_ollama_url(self): assert _match_provider_curated("https://ollama.com/api", "openai") == "ollama" def test_no_url_match_returns_provider(self): assert _match_provider_curated("https://localhost:1234", "openai") == "openai" def test_none_provider_passthrough(self): assert _match_provider_curated("https://localhost:1234", None) is None def test_none_url_safe(self): assert _match_provider_curated(None, "openai") == "openai" # ── _curate_models ── class TestCurateModels: def test_known_provider_partitions(self): models = ["gpt-4o", "gpt-4o-mini", "ft:gpt-4o:custom", "some-random-model"] curated, extra = _curate_models(models, "openai") assert "gpt-4o" in curated assert "gpt-4o-mini" in curated assert "some-random-model" in extra def test_unknown_provider_returns_all_as_curated(self): models = ["model-a", "model-b"] curated, extra = _curate_models(models, "unknown_provider") assert curated == models assert extra == [] def test_curated_sorted_by_priority(self): models = ["gpt-4o-mini", "gpt-4o", "o3"] curated, _ = _curate_models(models, "openai") # gpt-4o should come before gpt-4o-mini in the curated list priority gpt4o_idx = curated.index("gpt-4o") gpt4o_mini_idx = curated.index("gpt-4o-mini") assert gpt4o_idx < gpt4o_mini_idx def test_empty_models(self): curated, extra = _curate_models([], "openai") assert curated == [] assert extra == [] def test_deepseek_curated(self): models = ["deepseek-chat", "deepseek-reasoner", "deepseek-coder"] curated, extra = _curate_models(models, "deepseek") assert "deepseek-chat" in curated assert "deepseek-reasoner" in curated assert "deepseek-coder" in extra def test_xai_curated(self): models = ["grok-4", "grok-3-fast", "grok-2"] curated, extra = _curate_models(models, "xai") assert "grok-4" in curated assert "grok-3-fast" in curated assert "grok-2" in extra def test_xai_current_grok_43_curated(self): curated, extra = _curate_models(["grok-4.3", "grok-4.3-fast"], "xai") assert curated == ["grok-4.3", "grok-4.3-fast"] assert extra == [] def test_groq_current_models_curated(self): models = [ "openai/gpt-oss-120b", "groq/compound", "llama-3.1-8b-instant", "llama-4-scout-17b-16e-instruct", ] curated, extra = _curate_models(models, "groq") assert curated == models assert extra == [] def test_google_current_gemini_curated(self): curated, extra = _curate_models(["gemini-3.5-flash", "gemini-3.1-pro"], "google") assert curated == ["gemini-3.5-flash", "gemini-3.1-pro"] assert extra == [] # ── _is_chat_model ── class TestIsChatModel: @pytest.mark.parametrize("model_id", [ "gpt-4o", "gpt-4o-mini", "claude-sonnet-4", "llama-3.3-70b", "deepseek-chat", "gemini-2.0-flash", "o3", "llama-4-scout-17b-16e-instruct", ]) def test_chat_models(self, model_id): assert _is_chat_model(model_id) is True @pytest.mark.parametrize("model_id", [ "dall-e-3", "tts-1", "whisper-1", "text-embedding-3-small", "gpt-image-1", "sora-1", ]) def test_non_chat_models(self, model_id): assert _is_chat_model(model_id) is False def test_realtime_excluded(self): assert _is_chat_model("gpt-4o-realtime-preview") is False def test_audio_preview_is_chat(self): # gpt-4o-audio-preview is a chat model (has "audio" not "gpt-audio") assert _is_chat_model("gpt-4o-audio-preview") is True def test_gpt_audio_is_not_chat(self): assert _is_chat_model("gpt-audio") is False def test_legacy_openai_instruct_is_not_chat(self): assert _is_chat_model("gpt-3.5-turbo-instruct") is False # ── _classify_endpoint ── class TestClassifyEndpoint: def test_localhost(self): assert _classify_endpoint("http://localhost:1234") == "local" def test_127(self): assert _classify_endpoint("http://127.0.0.1:8080/v1") == "local" def test_private_192(self): assert _classify_endpoint("http://192.168.1.100:5000") == "local" def test_private_10(self): assert _classify_endpoint("http://10.0.0.5:8000") == "local" def test_public_api(self): assert _classify_endpoint("https://api.openai.com/v1") == "api" def test_empty_string(self): assert _classify_endpoint("") == "api" def test_malformed_url(self): assert _classify_endpoint("not-a-url") == "api" def test_tailscale_auto_is_local(self): assert _classify_endpoint("http://100.117.136.97:34521/v1") == "local" def test_tailscale_proxy_override_is_api(self): assert _classify_endpoint("http://100.117.136.97:34521/v1", "proxy") == "api" def test_tailscale_api_override_is_api(self): assert _classify_endpoint("http://100.117.136.97:34521/v1", "api") == "api" def test_public_local_override_is_local(self): assert _classify_endpoint("https://api.openai.com/v1", "local") == "local" def test_keyed_legacy_v1_endpoint_is_effective_proxy(self): ep = SimpleNamespace(endpoint_kind="auto", api_key="fake-key") assert _effective_endpoint_kind(ep, "http://100.117.136.97:34521/v1") == "proxy" def test_proxy_refresh_mode_defaults_manual(self): assert _normalize_refresh_mode("", "proxy") == "manual" assert _normalize_refresh_mode("auto", "proxy") == "manual" assert _normalize_refresh_mode("manual", "proxy") == "manual" assert _normalize_refresh_mode("auto", "api") == "auto" def test_parse_model_list_accepts_json_and_text(self): assert _parse_model_list('["a", "b", "a"]') == ["a", "b"] assert _parse_model_list("a, b\nc") == ["a", "b", "c"] def test_ping_endpoint_does_not_request_models_for_openai_style_proxy(self, monkeypatch): monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url, raising=False) seen = [] def fake_head(*args, **kwargs): raise AssertionError("generic proxy health check should not use HEAD") def fake_get(url, headers=None, timeout=None, verify=None, **kwargs): seen.append(("GET", url)) request = httpx.Request("GET", url) return httpx.Response(200, request=request) monkeypatch.setattr(model_routes.httpx, "head", fake_head) monkeypatch.setattr(model_routes.httpx, "get", fake_get) result = _ping_endpoint("http://100.117.136.97:34521/v1", "fake-key", timeout=1) assert result["reachable"] is True assert result["status_code"] == 200 assert seen == [("GET", "http://100.117.136.97:34521/v1")] assert all(not url.endswith("/models") for _, url in seen) # ── setup probing ── class TestSetupProbeSafety: @pytest.mark.parametrize("value", ["true", "1", "yes", "on", " TRUE "]) def test_truthy_true_values(self, value): assert _truthy(value) is True @pytest.mark.parametrize("value", ["false", "0", "no", "", None]) def test_truthy_false_values(self, value): assert _truthy(value) is False def test_keyed_probe_does_not_fallback_to_curated_on_auth_failure(self, monkeypatch): monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url, raising=False) monkeypatch.setattr(model_routes, "_normalize_base", lambda url: url.rstrip("/")) def fake_get(url, headers=None, timeout=None, verify=None, **kwargs): request = httpx.Request("GET", url) response = httpx.Response(401, request=request) raise httpx.HTTPStatusError("unauthorized", request=request, response=response) monkeypatch.setattr(model_routes.httpx, "get", fake_get) assert _probe_endpoint("https://api.groq.com/openai/v1", "bad-key") == [] def test_unkeyed_probe_can_still_use_curated_fallback(self, monkeypatch): monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url, raising=False) monkeypatch.setattr(model_routes, "_normalize_base", lambda url: url.rstrip("/")) def fake_get(url, headers=None, timeout=None, verify=None, **kwargs): raise httpx.ConnectError("offline") monkeypatch.setattr(model_routes.httpx, "get", fake_get) assert _probe_endpoint("https://api.groq.com/openai/v1") == _PROVIDER_CURATED["groq"] def test_keyed_anthropic_probe_does_not_fallback_on_failure(self, monkeypatch): monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url, raising=False) monkeypatch.setattr(model_routes, "_normalize_base", lambda url: url.rstrip("/")) def fake_get(url, headers=None, timeout=None, verify=None, **kwargs): raise httpx.ConnectError("offline") monkeypatch.setattr(model_routes.httpx, "get", fake_get) assert _probe_endpoint("https://api.anthropic.com/v1", "bad-key") == [] def test_anthropic_probe_does_not_double_v1(self, monkeypatch): monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url, raising=False) monkeypatch.setattr(model_routes, "_normalize_base", lambda url: url.rstrip("/")) seen = [] def fake_get(url, headers=None, timeout=None, verify=None, **kwargs): seen.append(url) request = httpx.Request("GET", url) response = httpx.Response( 200, request=request, json={"data": [{"id": "claude-sonnet-4-5"}]}, ) return response monkeypatch.setattr(model_routes.httpx, "get", fake_get) assert _probe_endpoint("https://api.anthropic.com/v1", "good-key") == ["claude-sonnet-4-5"] assert seen == ["https://api.anthropic.com/v1/models"] def test_ollama_cloud_probe_uses_native_tags_endpoint(self, monkeypatch): monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url, raising=False) monkeypatch.setattr(model_routes, "_normalize_base", lambda url: url.rstrip("/")) seen = [] def fake_get(url, headers=None, timeout=None, verify=None, **kwargs): seen.append((url, headers)) request = httpx.Request("GET", url) response = httpx.Response( 200, request=request, json={"models": [{"name": "gpt-oss:120b"}, {"model": "qwen3:235b"}]}, ) return response monkeypatch.setattr(model_routes.httpx, "get", fake_get) assert _probe_endpoint("https://ollama.com/api", "ollama-key") == ["gpt-oss:120b", "qwen3:235b"] assert seen == [("https://ollama.com/api/tags", {"Authorization": "Bearer ollama-key"})] def test_unkeyed_anthropic_probe_can_use_curated_fallback(self, monkeypatch): monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda url: url, raising=False) monkeypatch.setattr(model_routes, "_normalize_base", lambda url: url.rstrip("/")) def fake_get(url, headers=None, timeout=None, verify=None, **kwargs): raise httpx.ConnectError("offline") monkeypatch.setattr(model_routes.httpx, "get", fake_get) assert _probe_endpoint("https://api.anthropic.com/v1") == ANTHROPIC_MODELS def test_ollama_endpoint_error_message_includes_troubleshooting(): msg = model_routes._model_endpoint_error_message( "http://localhost:11434/v1", {"error": "Connection refused"}, ) assert "No Ollama models found" in msg assert "Connection refused" in msg assert "http://localhost:11434/v1" in msg assert "ollama list" in msg def test_generic_endpoint_error_message_preserves_probe_error(): msg = model_routes._model_endpoint_error_message( "https://api.example.com/v1", {"error": "HTTP 401"}, ) assert msg == "No models found for that provider/key. Last probe error: HTTP 401." # ── _rewrite_loopback_for_docker (issue #25: LM Studio on host loopback) ── class TestDockerLoopbackRewrite: def test_rewrites_loopback_when_in_docker(self, monkeypatch): monkeypatch.setattr(model_routes, "_docker_host_gateway_reachable", lambda: True) assert (model_routes._rewrite_loopback_for_docker("http://localhost:1234/v1") == "http://host.docker.internal:1234/v1") assert (model_routes._rewrite_loopback_for_docker("http://127.0.0.1:1234/v1") == "http://host.docker.internal:1234/v1") def test_no_rewrite_when_not_in_docker(self, monkeypatch): monkeypatch.setattr(model_routes, "_docker_host_gateway_reachable", lambda: False) assert (model_routes._rewrite_loopback_for_docker("http://localhost:1234/v1") == "http://localhost:1234/v1") def test_non_loopback_untouched_even_in_docker(self, monkeypatch): # Cloud and LAN hosts must never be rewritten or they would break. monkeypatch.setattr(model_routes, "_docker_host_gateway_reachable", lambda: True) assert (model_routes._rewrite_loopback_for_docker("https://api.openai.com/v1") == "https://api.openai.com/v1") assert (model_routes._rewrite_loopback_for_docker("http://192.168.1.50:1234/v1") == "http://192.168.1.50:1234/v1") class TestDockerHostGatewayReachable: def test_native_host_is_false_and_skips_dns(self, monkeypatch): monkeypatch.setattr(model_routes.os.path, "exists", lambda p: False) def _no_cgroup(*a, **k): raise FileNotFoundError monkeypatch.setattr("builtins.open", _no_cgroup) def _must_not_run(*a, **k): raise AssertionError("getaddrinfo must not run on native hosts") monkeypatch.setattr(model_routes.socket, "getaddrinfo", _must_not_run) assert model_routes._docker_host_gateway_reachable() is False def test_container_with_host_gateway_is_true(self, monkeypatch): monkeypatch.setattr(model_routes.os.path, "exists", lambda p: p == "/.dockerenv") monkeypatch.setattr(model_routes.socket, "getaddrinfo", lambda *a, **k: [("ok",)]) assert model_routes._docker_host_gateway_reachable() is True def test_container_without_host_gateway_is_false(self, monkeypatch): monkeypatch.setattr(model_routes.os.path, "exists", lambda p: p == "/.dockerenv") def _fail(*a, **k): raise OSError("name or service not known") monkeypatch.setattr(model_routes.socket, "getaddrinfo", _fail) assert model_routes._docker_host_gateway_reachable() is False # ── pinned model IDs: normalization helper ── class TestNormalizeModelIds: def test_list_passthrough_trims_and_dedupes(self): assert _normalize_model_ids([" a ", "a", "b", ""]) == ["a", "b"] def test_json_string_list(self): assert _normalize_model_ids('["x", "y", "x"]') == ["x", "y"] def test_comma_and_newline_string(self): assert _normalize_model_ids("a, b\n c ,a") == ["a", "b", "c"] def test_none_and_empty(self): assert _normalize_model_ids(None) == [] assert _normalize_model_ids("") == [] assert _normalize_model_ids(" ") == [] def test_non_string_values_ignored(self): assert _normalize_model_ids([1, "ok", None, {"a": 1}]) == ["ok"] # ── pinned model IDs: _visible_models merge ── class TestVisibleModelsPinned: def test_includes_pinned_not_in_cached(self): visible = _visible_models(["a"], None, ["deploy-1"]) assert visible == ["a", "deploy-1"] def test_cached_plus_pinned_dedup_preserves_order(self): visible = _visible_models(["a", "b"], None, ["b", "c"]) assert visible == ["a", "b", "c"] def test_hidden_can_hide_a_pinned_model(self): visible = _visible_models(["a"], ["deploy-1"], ["deploy-1"]) assert visible == ["a"] def test_accepts_json_string_inputs(self): visible = _visible_models('["a"]', '["a"]', '["b"]') assert visible == ["b"] # ── pinned model IDs: route behaviour ── # Building the router exercises FastAPI's Form() routes, which require # python-multipart. The test env ships without it, so register a minimal stub # (mirrors tests/test_review_regressions.py) only when it's genuinely missing. if "python_multipart" not in sys.modules: try: import python_multipart # noqa: F401 except ImportError: _mp_stub = types.ModuleType("python_multipart") _mp_stub.__version__ = "0.0.13" sys.modules["python_multipart"] = _mp_stub class _RouteCondition: def __init__(self, op, field, value): self.op = op self.field = field self.value = value def __or__(self, other): return ("or", self, other) class _RouteColumn: def __init__(self, name): self.name = name def __eq__(self, value): return _RouteCondition("eq", self.name, value) def is_(self, value): return _RouteCondition("eq", self.name, value) def desc(self): return self class _RouteModelEndpoint: """ModelEndpoint stand-in that stores constructor kwargs as attributes. Class-level fake columns let it double as the query class in the dedupe lookup; instance attributes (set in __init__) shadow them per-row. """ id = _RouteColumn("id") base_url = _RouteColumn("base_url") is_enabled = _RouteColumn("is_enabled") owner = _RouteColumn("owner") created_at = _RouteColumn("created_at") def __init__(self, **kwargs): for key, value in kwargs.items(): setattr(self, key, value) _RecordingEndpoint = _RouteModelEndpoint class _PinnedFakeRequest: def __init__(self, body=None, headers=None): self._body = body if body is not None else {} self.headers = headers or {} async def json(self): return self._body def _get_route(path, method): from routes.model_routes import setup_model_routes router = setup_model_routes(model_discovery=None) for route in router.routes: if getattr(route, "path", "") == path and method in getattr(route, "methods", set()): return route.endpoint raise AssertionError(f"{method} {path} not found") def _make_endpoint(**kwargs): base = dict( id="ep1", name="EP", base_url="http://localhost:9999/v1", api_key=None, is_enabled=True, hidden_models=None, cached_models=None, pinned_models=None, model_type="llm", supports_tools=None, endpoint_kind="auto", model_refresh_mode="auto", model_refresh_interval=None, model_refresh_timeout=None, owner=None, created_at=None, updated_at=None, ) base.update(kwargs) return SimpleNamespace(**base) def test_patch_models_saves_pinned_models(monkeypatch): ep = _make_endpoint() db = _PinnedFakeDb([ep]) monkeypatch.setattr(model_routes, "SessionLocal", lambda: db) monkeypatch.setattr(model_routes, "require_admin", lambda request: None) endpoint = _get_route("/api/model-endpoints/{ep_id}/models", "PATCH") request = _PinnedFakeRequest(body={"pinned_models": ["deploy-1", "deploy-1", "deploy-2"]}) result = asyncio.run(endpoint("ep1", request)) assert json.loads(ep.pinned_models) == ["deploy-1", "deploy-2"] assert result["pinned_count"] == 2 def test_patch_models_pinned_does_not_clobber_hidden(monkeypatch): ep = _make_endpoint(hidden_models=json.dumps(["hide-me"])) db = _PinnedFakeDb([ep]) monkeypatch.setattr(model_routes, "SessionLocal", lambda: db) monkeypatch.setattr(model_routes, "require_admin", lambda request: None) endpoint = _get_route("/api/model-endpoints/{ep_id}/models", "PATCH") request = _PinnedFakeRequest(body={"pinned_models": ["deploy-1"]}) asyncio.run(endpoint("ep1", request)) assert json.loads(ep.hidden_models) == ["hide-me"] assert json.loads(ep.pinned_models) == ["deploy-1"] def test_get_models_returns_pinned_when_probe_empty(monkeypatch): ep = _make_endpoint(pinned_models=json.dumps(["deploy-1"])) db = _PinnedFakeDb([ep]) monkeypatch.setattr(model_routes, "SessionLocal", lambda: db) monkeypatch.setattr(model_routes, "require_admin", lambda request: None) monkeypatch.setattr(model_routes, "_probe_endpoint", lambda *a, **k: []) endpoint = _get_route("/api/model-endpoints/{ep_id}/models", "GET") result = endpoint("ep1", _PinnedFakeRequest(), SimpleNamespace(headers={})) ids = [row["id"] for row in result] assert ids == ["deploy-1"] assert result[0]["is_pinned"] is True def test_reprobe_preserves_pinned_models(monkeypatch): ep = _make_endpoint(pinned_models=json.dumps(["deploy-1"])) db = _PinnedFakeDb([ep]) monkeypatch.setattr(model_routes, "SessionLocal", lambda: db) monkeypatch.setattr(model_routes, "require_admin", lambda request: None) monkeypatch.setattr(model_routes, "_probe_endpoint", lambda *a, **k: ["m1"]) monkeypatch.setattr(model_routes, "_is_chat_model", lambda m: True) monkeypatch.setattr( model_routes, "_probe_single_model", lambda *a, **k: {"status": "ok"} ) endpoint = _get_route("/api/model-endpoints/{ep_id}/probe", "GET") response = endpoint("ep1", _PinnedFakeRequest()) async def _drain(): async for _ in response.body_iterator: pass asyncio.run(_drain()) # Probe rewrites cached/hidden but must never touch admin-pinned IDs. assert json.loads(ep.pinned_models) == ["deploy-1"] assert json.loads(ep.cached_models) == ["m1"] def test_visible_models_handles_malformed_strings(): # Non-JSON cached/pinned strings are treated as comma/newline lists and # never raise; a malformed hidden string is normalized too. result = _visible_models("a,b", "b", "{bad json") assert isinstance(result, list) assert result == ["a", "{bad json"] assert _visible_models("", None, "") == [] assert _visible_models("only-cached", None, None) == ["only-cached"] def _create_form_kwargs(**overrides): """Defaults for every Form() param create_model_endpoint reads directly. Calling the route as a plain function bypasses FastAPI form parsing, so the Form() sentinels must be replaced with real strings. """ kwargs = dict( name="", api_key="", skip_probe="true", # avoid any network probe in unit tests require_models="false", model_type="llm", endpoint_kind="auto", model_refresh_mode="", model_refresh_interval="", model_refresh_timeout="", supports_tools="", pinned_models="", container_local="false", shared="true", ) kwargs.update(overrides) return kwargs def _patch_create_deps(monkeypatch, db): import src.auth_helpers as auth_helpers monkeypatch.setattr(model_routes, "SessionLocal", lambda: db) monkeypatch.setattr(model_routes, "require_admin", lambda request: None) monkeypatch.setattr(model_routes, "ModelEndpoint", _RecordingEndpoint) monkeypatch.setattr(model_routes, "_normalize_base", lambda b: b) monkeypatch.setattr(model_routes, "_rewrite_loopback_for_docker", lambda b, **k: b) monkeypatch.setattr(model_routes, "_load_settings", lambda: {"default_endpoint_id": "exists"}) monkeypatch.setattr(endpoint_resolver, "resolve_url", lambda u: u) monkeypatch.setattr(auth_helpers, "get_current_user", lambda req: None) def test_post_creates_endpoint_with_pinned_models(monkeypatch): db = _PinnedFakeDb([]) # no existing row → fresh create path _patch_create_deps(monkeypatch, db) create = _get_route("/api/model-endpoints", "POST") result = create( _PinnedFakeRequest(), base_url="http://host:1234/v1", **_create_form_kwargs(pinned_models="deploy-1, deploy-1\ndeploy-2"), ) assert result["pinned_models"] == ["deploy-1", "deploy-2"] assert result["models"] == ["deploy-1", "deploy-2"] assert result["online"] is True # Persisted onto the created row. assert len(db.added) == 1 assert json.loads(db.added[0].pinned_models) == ["deploy-1", "deploy-2"] def test_post_dedupe_existing_merges_and_returns_pinned(monkeypatch): existing = _make_endpoint( base_url="http://host:1234/v1", cached_models=json.dumps(["m1"]), hidden_models=None, pinned_models=json.dumps(["old-pin"]), ) db = _PinnedFakeDb([existing]) _patch_create_deps(monkeypatch, db) create = _get_route("/api/model-endpoints", "POST") result = create( _PinnedFakeRequest(), base_url="http://host:1234/v1", **_create_form_kwargs(pinned_models="new-pin"), ) assert result["existing"] is True # Incoming pin merged onto the existing pins (no clobber, order preserved). assert json.loads(existing.pinned_models) == ["old-pin", "new-pin"] assert result["pinned_models"] == ["old-pin", "new-pin"] # models = cached + pinned - hidden, visible merged list. assert result["models"] == ["m1", "old-pin", "new-pin"] # No new row created on the dedupe path. assert db.added == [] def test_post_dedupe_existing_does_not_clobber_pinned_when_omitted(monkeypatch): existing = _make_endpoint( base_url="http://host:1234/v1", cached_models=json.dumps(["m1"]), pinned_models=json.dumps(["keep-me"]), ) db = _PinnedFakeDb([existing]) _patch_create_deps(monkeypatch, db) create = _get_route("/api/model-endpoints", "POST") result = create( _PinnedFakeRequest(), base_url="http://host:1234/v1", **_create_form_kwargs(), # pinned_models defaults to "" ) assert json.loads(existing.pinned_models) == ["keep-me"] assert result["pinned_models"] == ["keep-me"] assert db.committed == 0 # nothing to persist class _RouteQuery: def __init__(self, rows): self.rows = list(rows) def filter(self, *conditions): for condition in conditions: if isinstance(condition, _RouteCondition) and condition.op == "eq": self.rows = [row for row in self.rows if getattr(row, condition.field, None) == condition.value] elif isinstance(condition, tuple) and condition and condition[0] == "or": keep = [] for row in self.rows: matched = False for part in condition[1:]: if isinstance(part, _RouteCondition) and part.op == "eq": matched = matched or (getattr(row, part.field, None) == part.value) if matched: keep.append(row) self.rows = keep return self def order_by(self, *args, **kwargs): return self def all(self): return list(self.rows) def first(self): return self.rows[0] if self.rows else None class _RouteDb: def __init__(self, rows): self.rows = rows self.added = [] self.committed = 0 self.commits = 0 self.closed = False def query(self, model): return _RouteQuery(self.rows) def commit(self): self.committed += 1 self.commits += 1 def close(self): self.closed = True def add(self, row): self.rows.append(row) self.added.append(row) _PinnedFakeDb = _RouteDb class _ImmediateThread: def __init__(self, target, daemon=None): self.target = target def start(self): self.target() def _wait_for(predicate, timeout=2.0): deadline = time.time() + timeout while time.time() < deadline: if predicate(): return True time.sleep(0.01) return bool(predicate()) def _route_endpoint(router, path, method="GET"): for route in router.routes: if getattr(route, "path", "") == path and method in getattr(route, "methods", set()): return route.endpoint raise AssertionError(f"{method} {path} route not found") def _route_ep( id, base_url, *, cached_models=None, endpoint_kind="auto", api_key=None, name=None, pinned_models=None, refresh_mode="auto", refresh_timeout=None, ): return SimpleNamespace( id=id, name=name or id, base_url=base_url, api_key=api_key, is_enabled=True, cached_models=json.dumps(cached_models) if cached_models is not None else None, hidden_models=None, pinned_models=json.dumps(pinned_models) if pinned_models is not None else None, model_type="llm", endpoint_kind=endpoint_kind, model_refresh_mode=refresh_mode, model_refresh_interval=None, model_refresh_timeout=refresh_timeout, supports_tools=None, owner=None, created_at=None, updated_at=None, ) def _route_request(): return SimpleNamespace( state=SimpleNamespace(current_user=None), app=SimpleNamespace(state=SimpleNamespace(auth_manager=None)), ) def test_api_models_returns_cached_proxy_models_without_refresh_probe(monkeypatch): row = _route_ep( "proxy", "http://100.117.136.97:34521/v1", cached_models=["cached-model"], endpoint_kind="proxy", api_key="fake-key", refresh_mode="manual", ) db = _RouteDb([row]) router = model_routes.setup_model_routes(model_discovery=None) monkeypatch.setattr(model_routes, "ModelEndpoint", _RouteModelEndpoint) monkeypatch.setattr(model_routes, "SessionLocal", lambda: db) monkeypatch.setattr(model_routes, "_auth_disabled", lambda: True) monkeypatch.setattr(model_routes, "build_chat_url", lambda base: f"{base}/chat/completions") def fail_probe(*args, **kwargs): raise AssertionError("/models probe should not run for cached manual proxy") monkeypatch.setattr(model_routes, "_probe_endpoint", fail_probe) monkeypatch.setattr(threading, "Thread", _ImmediateThread) result = _route_endpoint(router, "/api/models")(_route_request()) assert result["items"][0]["models"] == ["cached-model"] assert result["items"][0]["category"] == "api" assert result["items"][0]["endpoint_kind"] == "proxy" assert "offline" not in result["items"][0] assert json.loads(row.cached_models) == ["cached-model"] @pytest.mark.asyncio async def test_probe_local_skips_tailscale_proxy_endpoint(monkeypatch): proxy = _route_ep( "proxy", "http://100.117.136.97:34521/v1", cached_models=["cached-model"], endpoint_kind="proxy", api_key="fake-key", ) local = _route_ep("local", "http://127.0.0.1:8000/v1", endpoint_kind="local") db = _RouteDb([proxy, local]) router = model_routes.setup_model_routes(model_discovery=None) monkeypatch.setattr(model_routes, "ModelEndpoint", _RouteModelEndpoint) monkeypatch.setattr(model_routes, "SessionLocal", lambda: db) monkeypatch.setattr(model_routes, "require_admin", lambda request: None) monkeypatch.setattr(model_routes, "_probe_endpoint", lambda *a, **k: (_ for _ in ()).throw(AssertionError("full probe should not run"))) pinged = [] def fake_ping(base_url, api_key=None, timeout=1.5): pinged.append(base_url) return {"reachable": True, "status_code": 404, "error": "HTTP 404"} monkeypatch.setattr(model_routes, "_ping_endpoint", fake_ping) result = await _route_endpoint(router, "/api/model-endpoints/probe-local")(_route_request()) assert set(result) == {"local"} assert pinged == ["http://127.0.0.1:8000/v1"] def test_background_refresh_deduplicates_same_base_url(monkeypatch): ep1 = _route_ep("a", "http://127.0.0.1:8000/v1", endpoint_kind="local") ep2 = _route_ep("b", "http://127.0.0.1:8000/v1", endpoint_kind="local") db = _RouteDb([ep1, ep2]) router = model_routes.setup_model_routes(model_discovery=None) monkeypatch.setattr(model_routes, "ModelEndpoint", _RouteModelEndpoint) monkeypatch.setattr(model_routes, "SessionLocal", lambda: db) monkeypatch.setattr(model_routes, "_auth_disabled", lambda: True) monkeypatch.setattr(model_routes, "build_chat_url", lambda base: f"{base}/chat/completions") calls = [] probe_done = threading.Event() def fake_probe(base_url, api_key=None, timeout=2): calls.append(base_url) probe_done.set() return ["live-model"] monkeypatch.setattr(model_routes, "_probe_endpoint", fake_probe) _route_endpoint(router, "/api/models")(_route_request(), refresh=True) assert probe_done.wait(2) assert _wait_for(lambda: ep1.cached_models and ep2.cached_models) assert calls == ["http://127.0.0.1:8000/v1"] assert json.loads(ep1.cached_models) == ["live-model"] assert json.loads(ep2.cached_models) == ["live-model"] def test_background_refresh_failure_keeps_existing_cached_models(monkeypatch): ep = _route_ep( "local", "http://127.0.0.1:8000/v1", cached_models=["cached-model"], endpoint_kind="local", ) db = _RouteDb([ep]) router = model_routes.setup_model_routes(model_discovery=None) monkeypatch.setattr(model_routes, "ModelEndpoint", _RouteModelEndpoint) monkeypatch.setattr(model_routes, "SessionLocal", lambda: db) monkeypatch.setattr(model_routes, "_auth_disabled", lambda: True) monkeypatch.setattr(model_routes, "build_chat_url", lambda base: f"{base}/chat/completions") probe_done = threading.Event() def fake_probe(*args, **kwargs): probe_done.set() return [] monkeypatch.setattr(model_routes, "_probe_endpoint", fake_probe) result = _route_endpoint(router, "/api/models")(_route_request(), refresh=True) assert probe_done.wait(2) assert _wait_for(lambda: db.commits > 0) assert result["items"][0]["models"] == ["cached-model"] assert json.loads(ep.cached_models) == ["cached-model"] def test_llm_core_list_model_ids_uses_cached_configured_proxy(monkeypatch): ep = _route_ep( "proxy", "http://100.117.136.97:34521/v1", cached_models=["cached-model", "hidden-model"], endpoint_kind="proxy", ) ep.hidden_models = json.dumps(["hidden-model"]) db = _RouteDb([ep]) monkeypatch.setattr(src_database, "ModelEndpoint", _RouteModelEndpoint) monkeypatch.setattr(src_database, "SessionLocal", lambda: db) monkeypatch.setattr(llm_core.httpx, "get", lambda *a, **k: (_ for _ in ()).throw(AssertionError("/models should not be fetched"))) assert llm_core.list_model_ids("http://100.117.136.97:34521/v1/chat/completions", timeout=1) == ["cached-model"] def test_explicit_proxy_test_fetches_models_with_long_timeout(monkeypatch): router = model_routes.setup_model_routes(model_discovery=None) monkeypatch.setattr(model_routes, "require_admin", lambda request: None) monkeypatch.setattr(model_routes, "_ping_endpoint", lambda *a, **k: (_ for _ in ()).throw(AssertionError("ping should not run when model listing succeeds"))) calls = [] returned = ["NVIDIA NIM/openai/gpt-oss-120b", "mistral/mistral-small-2603"] def fake_probe(base_url, api_key=None, timeout=2): calls.append({"base_url": base_url, "api_key": api_key, "timeout": timeout}) return returned monkeypatch.setattr(model_routes, "_probe_endpoint", fake_probe) result = _route_endpoint(router, "/api/model-endpoints/test", "POST")( _route_request(), base_url="http://100.117.136.97:34521/v1", api_key="fake-key", endpoint_kind="proxy", ) assert result["online"] is True assert result["status"] == "online" assert result["models"] == returned assert calls == [{ "base_url": "http://100.117.136.97:34521/v1", "api_key": "fake-key", "timeout": 30.0, }] def test_explicit_proxy_add_fetches_and_caches_models_with_long_timeout(monkeypatch): db = _RouteDb([]) router = model_routes.setup_model_routes(model_discovery=None) monkeypatch.setattr(model_routes, "ModelEndpoint", _RouteModelEndpoint) monkeypatch.setattr(model_routes, "SessionLocal", lambda: db) monkeypatch.setattr(model_routes, "require_admin", lambda request: None) monkeypatch.setattr(model_routes, "_load_settings", lambda: {}) monkeypatch.setattr(model_routes, "_save_settings", lambda settings: None) monkeypatch.setattr("src.auth_helpers.get_current_user", lambda request: None) monkeypatch.setattr(model_routes, "_ping_endpoint", lambda *a, **k: (_ for _ in ()).throw(AssertionError("ping should not run when model listing succeeds"))) calls = [] returned = ["NVIDIA NIM/openai/gpt-oss-120b", "mistral/mistral-small-2603"] def fake_probe(base_url, api_key=None, timeout=2): calls.append({"base_url": base_url, "api_key": api_key, "timeout": timeout}) return returned monkeypatch.setattr(model_routes, "_probe_endpoint", fake_probe) result = _route_endpoint(router, "/api/model-endpoints", "POST")( _route_request(), name="Bifrost", base_url="http://100.117.136.97:34521/v1", api_key="fake-key", skip_probe="true", require_models="false", model_type="llm", endpoint_kind="proxy", model_refresh_mode="manual", model_refresh_interval="", model_refresh_timeout="", supports_tools="", container_local="false", shared="true", ) assert result["online"] is True assert result["status"] == "online" assert result["models"] == returned assert calls == [{ "base_url": "http://100.117.136.97:34521/v1", "api_key": "fake-key", "timeout": 30.0, }] assert len(db.rows) == 1 assert json.loads(db.rows[0].cached_models) == returned assert db.rows[0].endpoint_kind == "proxy" assert db.rows[0].model_refresh_mode == "manual" def test_manual_refresh_uses_long_timeout_and_saves_full_model_list(monkeypatch): ep = _route_ep( "proxy", "http://100.117.136.97:34521/v1", cached_models=["cached-model"], endpoint_kind="proxy", api_key="fake-key", refresh_mode="manual", ) db = _RouteDb([ep]) router = model_routes.setup_model_routes(model_discovery=None) monkeypatch.setattr(model_routes, "ModelEndpoint", _RouteModelEndpoint) monkeypatch.setattr(model_routes, "SessionLocal", lambda: db) monkeypatch.setattr(model_routes, "require_admin", lambda request: None) calls = [] refreshed = ["cached-model", "mistral/mistral-small-2603", "provider/nested/model/id"] def fake_probe(base_url, api_key=None, timeout=2): calls.append({"base_url": base_url, "api_key": api_key, "timeout": timeout}) return refreshed monkeypatch.setattr(model_routes, "_probe_endpoint", fake_probe) response = SimpleNamespace(headers={}) result = _route_endpoint(router, "/api/model-endpoints/{ep_id}/models")( "proxy", _route_request(), response, refresh=True, refresh_timeout=60, ) assert [m["id"] for m in result] == refreshed assert calls == [{ "base_url": "http://100.117.136.97:34521/v1", "api_key": "fake-key", "timeout": 60.0, }] assert json.loads(ep.cached_models) == refreshed assert db.commits == 1 assert response.headers["X-Model-Refresh-Status"] == "refreshed" assert response.headers["X-Model-Refresh-Count"] == "3" def test_manual_refresh_defaults_to_proxy_long_timeout(monkeypatch): ep = _route_ep( "proxy", "https://proxy.example.test/v1", cached_models=["cached-model"], endpoint_kind="proxy", refresh_mode="manual", ) db = _RouteDb([ep]) router = model_routes.setup_model_routes(model_discovery=None) monkeypatch.setattr(model_routes, "ModelEndpoint", _RouteModelEndpoint) monkeypatch.setattr(model_routes, "SessionLocal", lambda: db) monkeypatch.setattr(model_routes, "require_admin", lambda request: None) timeouts = [] def fake_probe(base_url, api_key=None, timeout=2): timeouts.append(timeout) return ["cached-model", "new-model"] monkeypatch.setattr(model_routes, "_probe_endpoint", fake_probe) response = SimpleNamespace(headers={}) _route_endpoint(router, "/api/model-endpoints/{ep_id}/models")( "proxy", _route_request(), response, refresh=True, ) assert timeouts == [30.0] assert json.loads(ep.cached_models) == ["cached-model", "new-model"] def test_manual_refresh_timeout_keeps_cached_models_and_warns(monkeypatch): ep = _route_ep( "proxy", "http://100.117.136.97:34521/v1", cached_models=["cached-model"], endpoint_kind="proxy", api_key="fake-key", refresh_mode="manual", ) db = _RouteDb([ep]) router = model_routes.setup_model_routes(model_discovery=None) monkeypatch.setattr(model_routes, "ModelEndpoint", _RouteModelEndpoint) monkeypatch.setattr(model_routes, "SessionLocal", lambda: db) monkeypatch.setattr(model_routes, "require_admin", lambda request: None) def fake_probe(base_url, api_key=None, timeout=2): raise httpx.TimeoutException("timed out") monkeypatch.setattr(model_routes, "_probe_endpoint", fake_probe) response = SimpleNamespace(headers={}) result = _route_endpoint(router, "/api/model-endpoints/{ep_id}/models")( "proxy", _route_request(), response, refresh=True, refresh_timeout=60, ) assert [m["id"] for m in result] == ["cached-model"] assert json.loads(ep.cached_models) == ["cached-model"] assert db.commits == 0 assert response.headers["X-Model-Refresh-Status"] == "failed" assert "kept cached models" in response.headers["X-Model-Refresh-Warning"]