ModelEndpoint is defined in core.database, not src.database. The wrong import silently prevented the module from loading in deployment configurations that do not have a src/database.py shim, resulting in an ImportError at startup. Also adds a warning log when resolve_endpoint finds no usable model (all models hidden or the list is empty), making the otherwise-silent failure visible in operator logs. The test_auth_regressions stub for src.endpoint_resolver was missing the build_models_url attribute, which caused test collection errors. Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
384 lines
14 KiB
Python
384 lines
14 KiB
Python
# src/endpoint_resolver.py
|
|
"""Unified endpoint resolution for all backend services.
|
|
|
|
Consolidates the 4+ copies of normalize_base / resolve_endpoint logic into one place.
|
|
"""
|
|
|
|
import json
|
|
import logging
|
|
import socket
|
|
import subprocess
|
|
from typing import Optional, Tuple, Dict
|
|
from urllib.parse import urlparse, urlunparse
|
|
|
|
from core.database import SessionLocal, ModelEndpoint
|
|
from src.llm_core import _detect_provider, _host_match
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Model-name substrings that are NOT chat/generation models. When an endpoint
|
|
# has no explicit model configured we pick the first CHAT model from its list —
|
|
# never an embedding/tts/etc. (an OpenAI-style endpoint often lists
|
|
# `text-embedding-ada-002` first, which silently broke email-summarize and
|
|
# other resolve_endpoint callers with "Cannot reach model").
|
|
_NON_CHAT_MODEL = (
|
|
"text-embedding", "embedding", "tts-", "whisper", "dall-e",
|
|
"moderation", "rerank", "reranker", "clip", "stable-diffusion",
|
|
)
|
|
|
|
|
|
def _first_chat_model(models) -> Optional[str]:
|
|
"""First model that isn't an embedding/tts/etc.; falls back to models[0]."""
|
|
for m in (models or []):
|
|
if not any(p in str(m).lower() for p in _NON_CHAT_MODEL):
|
|
return m
|
|
return (models[0] if models else None)
|
|
|
|
|
|
def _endpoint_cached_models(ep) -> list:
|
|
"""Return cached model ids from the current or legacy endpoint field."""
|
|
raw = getattr(ep, "cached_models", None) or getattr(ep, "models", None)
|
|
if not raw:
|
|
return []
|
|
try:
|
|
models = json.loads(raw) if isinstance(raw, str) else raw
|
|
except Exception:
|
|
return []
|
|
return models if isinstance(models, list) else []
|
|
|
|
|
|
def _endpoint_hidden_models(ep) -> set:
|
|
"""Model ids the admin disabled on this endpoint (the UI's hidden list)."""
|
|
raw = getattr(ep, "hidden_models", None)
|
|
if not raw:
|
|
return set()
|
|
try:
|
|
hidden = json.loads(raw) if isinstance(raw, str) else raw
|
|
except Exception:
|
|
return set()
|
|
return set(hidden) if isinstance(hidden, list) else set()
|
|
|
|
|
|
def _endpoint_enabled_models(ep) -> list:
|
|
"""Cached models minus the ones disabled on the endpoint, order preserved.
|
|
|
|
The auto-pick fallback must never select a model the user disabled — a
|
|
Groq endpoint can list 16 models with only 1 enabled, and picking the
|
|
raw first one resolves to a model that 400s ("requires terms acceptance").
|
|
"""
|
|
hidden = _endpoint_hidden_models(ep)
|
|
return [m for m in _endpoint_cached_models(ep) if m not in hidden]
|
|
|
|
|
|
# Cache for Tailscale hostname → IP resolution
|
|
_tailscale_cache: Dict[str, Optional[str]] = {}
|
|
|
|
|
|
def _resolve_tailscale_host(hostname: str) -> Optional[str]:
|
|
"""Try to resolve a hostname via 'tailscale status' if DNS fails."""
|
|
if hostname in _tailscale_cache:
|
|
return _tailscale_cache[hostname]
|
|
|
|
# First check if normal DNS works
|
|
try:
|
|
socket.getaddrinfo(hostname, None, socket.AF_INET)
|
|
_tailscale_cache[hostname] = None # DNS works, no override needed
|
|
return None
|
|
except socket.gaierror:
|
|
pass
|
|
|
|
# DNS failed — try tailscale
|
|
try:
|
|
result = subprocess.run(
|
|
["tailscale", "status", "--json"],
|
|
capture_output=True, text=True, timeout=5
|
|
)
|
|
if result.returncode == 0:
|
|
import json as _json
|
|
data = _json.loads(result.stdout)
|
|
peers = data.get("Peer", {})
|
|
for _id, peer in peers.items():
|
|
peer_name = (peer.get("HostName") or "").lower()
|
|
dns_name = (peer.get("DNSName") or "").split(".")[0].lower()
|
|
if peer_name == hostname.lower() or dns_name == hostname.lower():
|
|
addrs = peer.get("TailscaleIPs", [])
|
|
if addrs:
|
|
ip = addrs[0]
|
|
logger.info(f"Resolved '{hostname}' via Tailscale → {ip}")
|
|
_tailscale_cache[hostname] = ip
|
|
return ip
|
|
except Exception as e:
|
|
logger.debug(f"Tailscale resolution failed for '{hostname}': {e}")
|
|
|
|
_tailscale_cache[hostname] = None
|
|
return None
|
|
|
|
|
|
def resolve_url(url: str) -> str:
|
|
"""If a URL's hostname can't be resolved via DNS, try Tailscale."""
|
|
parsed = urlparse(url)
|
|
hostname = parsed.hostname
|
|
if not hostname:
|
|
return url
|
|
ip = _resolve_tailscale_host(hostname)
|
|
if ip:
|
|
# Replace hostname with IP in the URL
|
|
netloc = ip
|
|
if parsed.port:
|
|
netloc = f"{ip}:{parsed.port}"
|
|
return urlunparse(parsed._replace(netloc=netloc))
|
|
return url
|
|
|
|
|
|
def normalize_base(url: str) -> str:
|
|
"""Strip known API path suffixes from a base URL."""
|
|
url = (url or "").strip().rstrip("/")
|
|
for suffix in ["/models", "/chat/completions", "/completions", "/v1/messages"]:
|
|
if url.endswith(suffix):
|
|
url = url[: -len(suffix)].rstrip("/")
|
|
for suffix in ["/chat", "/tags", "/generate"]:
|
|
if url.endswith("/api" + suffix):
|
|
url = url[: -len(suffix)].rstrip("/")
|
|
return url
|
|
|
|
|
|
def _anthropic_api_root(base: str) -> str:
|
|
"""Return Anthropic's API root, preserving /v1 for OpenAI-compatible APIs elsewhere."""
|
|
base = (base or "").strip().rstrip("/")
|
|
if _host_match(base, "anthropic.com") and base.endswith("/v1"):
|
|
return base[:-3].rstrip("/")
|
|
return base
|
|
|
|
|
|
def _ollama_api_root(base: str) -> str:
|
|
"""Return the native Ollama API root, adding /api for ollama.com hosts."""
|
|
base = (base or "").strip().rstrip("/")
|
|
parsed = urlparse(base)
|
|
path = (parsed.path or "").rstrip("/")
|
|
if path.endswith("/api"):
|
|
return base
|
|
if _host_match(base, "ollama.com"):
|
|
root = f"{parsed.scheme}://{parsed.netloc}" if parsed.scheme and parsed.netloc else "https://ollama.com"
|
|
return root.rstrip("/") + "/api"
|
|
return base
|
|
|
|
|
|
def build_chat_url(base: str) -> str:
|
|
"""Return the correct chat endpoint URL for a given base."""
|
|
base = resolve_url(base)
|
|
provider = _detect_provider(base)
|
|
if provider == "anthropic":
|
|
return _anthropic_api_root(base) + "/v1/messages"
|
|
if provider == "ollama":
|
|
return _ollama_api_root(base) + "/chat"
|
|
return base + "/chat/completions"
|
|
|
|
|
|
def build_models_url(base: str) -> str:
|
|
"""Return the provider-specific model-list endpoint URL for a base."""
|
|
base = resolve_url(base)
|
|
provider = _detect_provider(base)
|
|
if provider == "anthropic":
|
|
return _anthropic_api_root(base) + "/v1/models"
|
|
if provider == "ollama":
|
|
return _ollama_api_root(base) + "/tags"
|
|
return base + "/models"
|
|
|
|
|
|
def build_headers(api_key: Optional[str], base: str) -> Dict[str, str]:
|
|
"""Build auth headers for an endpoint."""
|
|
provider = _detect_provider(base)
|
|
headers: Dict[str, str] = {}
|
|
if provider == "anthropic":
|
|
if api_key:
|
|
headers["x-api-key"] = api_key
|
|
headers["anthropic-version"] = "2023-06-01"
|
|
return headers
|
|
if api_key:
|
|
headers["Authorization"] = f"Bearer {api_key}"
|
|
if provider == "openrouter":
|
|
headers.setdefault("HTTP-Referer", "https://github.com/pewdiepie-archdaemon/odysseus")
|
|
headers.setdefault("X-OpenRouter-Title", "Odysseus")
|
|
return headers
|
|
|
|
|
|
def resolve_endpoint(
|
|
setting_prefix: str,
|
|
fallback_url: Optional[str] = None,
|
|
fallback_model: Optional[str] = None,
|
|
fallback_headers: Optional[Dict] = None,
|
|
owner: Optional[str] = None,
|
|
) -> Tuple[Optional[str], Optional[str], Optional[Dict]]:
|
|
"""Resolve an endpoint/model from settings, with fallback.
|
|
|
|
Args:
|
|
setting_prefix: Settings key prefix, e.g. "research", "task", "utility", "default".
|
|
Reads ``{prefix}_endpoint_id`` and ``{prefix}_model`` from settings.
|
|
fallback_url: URL to use if settings are empty or endpoint missing.
|
|
fallback_model: Model to use if settings are empty.
|
|
fallback_headers: Headers to use if using fallback.
|
|
|
|
Returns:
|
|
(endpoint_url, model, headers) — resolved or fallback values.
|
|
"""
|
|
try:
|
|
from src.settings import get_user_setting, load_settings
|
|
settings = load_settings()
|
|
except Exception:
|
|
return fallback_url, fallback_model, fallback_headers
|
|
|
|
owner_str = owner or ""
|
|
def _stg(key: str) -> str:
|
|
return (get_user_setting(key, owner_str, settings.get(key, "")) or "").strip()
|
|
|
|
ep_id = _stg(f"{setting_prefix}_endpoint_id")
|
|
model = _stg(f"{setting_prefix}_model")
|
|
|
|
# If the specific endpoint is not configured, but the caller provided a
|
|
# valid fallback (e.g. the active session model), use that immediately.
|
|
# This prevents background tasks from jumping to the global default_model
|
|
# when the user is mid-conversation with a different model.
|
|
if not ep_id and fallback_url and fallback_model:
|
|
return fallback_url, fallback_model, fallback_headers
|
|
|
|
# Unset Utility means "same as Default Chat Model".
|
|
if setting_prefix == "utility" and not ep_id:
|
|
ep_id = _stg("default_endpoint_id")
|
|
model = _stg("default_model")
|
|
|
|
# Fall back to utility model for task/research/auto-naming if not specifically configured.
|
|
# If Utility itself is unset, the block above makes that resolve to Default Chat.
|
|
if not ep_id and setting_prefix != "utility":
|
|
ep_id = _stg("utility_endpoint_id")
|
|
model = _stg("utility_model")
|
|
if not ep_id:
|
|
ep_id = _stg("default_endpoint_id")
|
|
model = _stg("default_model")
|
|
|
|
if not ep_id:
|
|
return fallback_url, fallback_model, fallback_headers
|
|
|
|
db = SessionLocal()
|
|
try:
|
|
ep = db.query(ModelEndpoint).filter(
|
|
ModelEndpoint.id == ep_id,
|
|
ModelEndpoint.is_enabled == True,
|
|
)
|
|
if owner:
|
|
from src.auth_helpers import owner_filter
|
|
ep = owner_filter(ep, ModelEndpoint, owner).first()
|
|
else:
|
|
ep = ep.first()
|
|
if not ep:
|
|
return fallback_url, fallback_model, fallback_headers
|
|
|
|
base = normalize_base(ep.base_url)
|
|
chat_url = build_chat_url(base)
|
|
headers = build_headers(ep.api_key, base)
|
|
|
|
# Discard a configured model the user has since disabled on the
|
|
# endpoint (e.g. a stale `default_model` left pointing at a now-hidden
|
|
# model). Treat it as unset so the picker below selects a live one
|
|
# instead of dispatching to a disabled model that 400s.
|
|
if model and model in _endpoint_hidden_models(ep):
|
|
model = ""
|
|
# If no (usable) model specified, pick the first enabled chat model.
|
|
if not model:
|
|
model = _first_chat_model(_endpoint_enabled_models(ep)) or ""
|
|
if not model and not fallback_model:
|
|
logger.warning('[resolve_endpoint] no usable model (all models hidden or list empty)')
|
|
|
|
return chat_url, model or fallback_model, headers
|
|
except Exception as e:
|
|
logger.debug(f"Could not resolve {setting_prefix} endpoint: {e}")
|
|
return fallback_url, fallback_model, fallback_headers
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
def resolve_endpoint_by_id(
|
|
ep_id: str, model: Optional[str] = None, owner: Optional[str] = None
|
|
) -> Optional[Tuple[str, str, Dict]]:
|
|
"""Resolve a specific endpoint id (+ optional model) to (chat_url, model, headers).
|
|
|
|
Returns None if the endpoint doesn't exist or is disabled. Used to turn
|
|
a configured fallback entry ({endpoint_id, model}) into a dispatch target.
|
|
"""
|
|
if not ep_id:
|
|
return None
|
|
db = SessionLocal()
|
|
try:
|
|
q = db.query(ModelEndpoint).filter(
|
|
ModelEndpoint.id == ep_id,
|
|
ModelEndpoint.is_enabled == True,
|
|
)
|
|
if owner:
|
|
from src.auth_helpers import owner_filter
|
|
q = owner_filter(q, ModelEndpoint, owner)
|
|
ep = q.first()
|
|
if not ep:
|
|
return None
|
|
base = normalize_base(ep.base_url)
|
|
chat_url = build_chat_url(base)
|
|
headers = build_headers(ep.api_key, base)
|
|
m = (model or "").strip()
|
|
# Drop a model the user disabled on the endpoint, then pick the first
|
|
# enabled chat model rather than a hidden one.
|
|
if m and m in _endpoint_hidden_models(ep):
|
|
m = ""
|
|
if not m:
|
|
m = _first_chat_model(_endpoint_enabled_models(ep)) or ""
|
|
if not m:
|
|
return None
|
|
return chat_url, m, headers
|
|
except Exception as e:
|
|
logger.debug(f"Could not resolve endpoint {ep_id}: {e}")
|
|
return None
|
|
finally:
|
|
db.close()
|
|
|
|
|
|
def resolve_chat_fallback_candidates(owner: Optional[str] = None) -> list:
|
|
"""Build the configured default-chat fallback chain as a list of
|
|
(chat_url, model, headers) tuples, skipping any that can't resolve.
|
|
|
|
The primary model is NOT included — callers prepend their session's
|
|
current (url, model, headers) so per-session model overrides are honored.
|
|
"""
|
|
return _resolve_fallback_candidates("default_model_fallbacks", owner=owner)
|
|
|
|
|
|
def resolve_utility_fallback_candidates(owner: Optional[str] = None) -> list:
|
|
"""Configured fallback chain for the Utility model (`utility_model_fallbacks`)."""
|
|
try:
|
|
from src.settings import get_user_setting, load_settings
|
|
settings = load_settings()
|
|
utility_ep = (get_user_setting("utility_endpoint_id", owner or "", settings.get("utility_endpoint_id", "")) or "").strip()
|
|
if not utility_ep:
|
|
return _resolve_fallback_candidates("default_model_fallbacks", owner=owner)
|
|
except Exception:
|
|
pass
|
|
return _resolve_fallback_candidates("utility_model_fallbacks", owner=owner)
|
|
|
|
|
|
def resolve_vision_fallback_candidates(owner: Optional[str] = None) -> list:
|
|
"""Configured fallback chain for the Vision model (`vision_model_fallbacks`)."""
|
|
return _resolve_fallback_candidates("vision_model_fallbacks", owner=owner)
|
|
|
|
|
|
def _resolve_fallback_candidates(setting_key: str, owner: Optional[str] = None) -> list:
|
|
out = []
|
|
try:
|
|
from src.settings import get_user_setting, load_settings
|
|
settings = load_settings()
|
|
chain = get_user_setting(setting_key, owner or "", settings.get(setting_key) or []) or []
|
|
except Exception:
|
|
return out
|
|
for entry in chain:
|
|
if not isinstance(entry, dict):
|
|
continue
|
|
resolved = resolve_endpoint_by_id(entry.get("endpoint_id", ""), entry.get("model", ""), owner=owner)
|
|
if resolved:
|
|
out.append(resolved)
|
|
return out
|