diff --git a/routes/model_routes.py b/routes/model_routes.py index 858c236..47e527c 100644 --- a/routes/model_routes.py +++ b/routes/model_routes.py @@ -348,7 +348,24 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) -> if api_key: headers["Authorization"] = f"Bearer {api_key}" + # Ollama exposes /v1/models (OpenAI-compatible) AND native /api/version, + # /api/tags. The OpenAI-style GET base + "/models" returns 404 when the + # base is the host root or the native /api root (e.g. http://localhost:11434, + # http://localhost:11434/api) because /models lives under /v1 there. Treat + # 4xx on a port-11434 / Ollama-named base as "try the native paths" rather + # than as a definitive offline verdict — Ollama is reachable, it just + # doesn't speak OpenAI on that prefix. Without this gate the quickstart + # marks an alive Ollama as offline whenever cached_models is empty (issue + # #1025): _probe_endpoint() falls through to /api/tags on the same 404, but + # _ping_endpoint() was returning before that fallback could run. + parsed_base = urlparse(base) + looks_like_ollama = ( + parsed_base.port == 11434 + or "ollama" in (parsed_base.hostname or "").lower() + ) + url = base + "/models" + last_error: Optional[str] = None try: r = httpx.get(url, headers=headers, timeout=timeout) if 300 <= r.status_code < 400: @@ -360,17 +377,21 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) -> "error": "That is Odysseus, not a model server. Use the Ollama URL, usually http://host.docker.internal:11434/v1 in Docker.", } return {"reachable": False, "status_code": r.status_code, "error": f"HTTP {r.status_code} redirect"} - if r.status_code < 500: - return {"reachable": r.status_code < 400, "status_code": r.status_code, "error": None if r.status_code < 400 else f"HTTP {r.status_code}"} + if r.status_code < 400: + return {"reachable": True, "status_code": r.status_code, "error": None} + if r.status_code < 500 and not looks_like_ollama: + return {"reachable": False, "status_code": r.status_code, "error": f"HTTP {r.status_code}"} + last_error = f"HTTP {r.status_code}" except Exception as e: last_error = str(e)[:120] - else: - last_error = f"HTTP {r.status_code}" try: - parsed = urlparse(base) - if parsed.port == 11434 or "ollama" in (parsed.hostname or "").lower(): - root = base[:-3].rstrip("/") if base.endswith("/v1") else base + if looks_like_ollama: + root = base + for suffix in ("/v1", "/api"): + if root.endswith(suffix): + root = root[: -len(suffix)].rstrip("/") + break for path in ("/api/version", "/api/tags"): try: r = httpx.get(root + path, timeout=timeout)