From 2c4b8b57dddcf3ff0a5fbc6544b6b758dc94e9c5 Mon Sep 17 00:00:00 2001 From: Alexander Kenley Date: Mon, 1 Jun 2026 15:26:10 +1000 Subject: [PATCH] feat(ai): add OpenRouter and Ollama Cloud providers (#231) Co-authored-by: Alex Kenley --- routes/chat_helpers.py | 14 +- routes/compare_routes.py | 6 +- routes/model_routes.py | 83 +++++++++-- routes/session_routes.py | 5 +- routes/webhook_routes.py | 25 +++- src/agent_loop.py | 1 + src/ai_interaction.py | 42 +++--- src/endpoint_resolver.py | 31 ++++ src/llm_core.py | 171 +++++++++++++++++++++- src/teacher_escalation.py | 1 + static/index.html | 1 + static/js/admin.js | 35 ++++- static/js/assistant.js | 6 +- static/js/compare/models.js | 5 +- static/js/editor/ai-models.js | 3 +- static/js/group.js | 5 +- static/js/modelPicker.js | 5 +- static/js/modelSort.js | 29 ++++ static/js/models.js | 3 +- static/js/providers.js | 8 + static/js/research/panel.js | 3 +- static/js/settings.js | 249 ++++++++++++++++++++------------ static/js/slashCommands.js | 22 ++- static/js/tasks.js | 3 +- tests/test_endpoint_resolver.py | 46 ++++++ tests/test_llm_core_ollama.py | 43 ++++++ tests/test_model_routes.py | 23 +++ 27 files changed, 699 insertions(+), 169 deletions(-) create mode 100644 static/js/modelSort.js create mode 100644 tests/test_llm_core_ollama.py diff --git a/routes/chat_helpers.py b/routes/chat_helpers.py index ce2e0cf..7e7a764 100644 --- a/routes/chat_helpers.py +++ b/routes/chat_helpers.py @@ -188,7 +188,7 @@ def try_fallback_endpoint(sess, session_id: str) -> dict | None: Returns {"model": ..., "endpoint_url": ..., "endpoint_name": ...} or None. """ import requests as _req - from src.endpoint_resolver import build_chat_url, build_headers, normalize_base + from src.endpoint_resolver import build_chat_url, build_headers, build_models_url, normalize_base current_url = sess.endpoint_url or "" db = SessionLocal() @@ -205,15 +205,19 @@ def try_fallback_endpoint(sess, session_id: str) -> dict | None: if current_url and base in current_url: continue # Quick ping - ping_url = base + "/models" - headers = {} - if ep.api_key: - headers["Authorization"] = f"Bearer {ep.api_key}" + ping_url = build_models_url(base) + headers = build_headers(ep.api_key, base) try: r = _req.get(ping_url, headers=headers, timeout=5) r.raise_for_status() data = r.json() models = [m.get("id") for m in (data.get("data") or []) if m.get("id")] + if not models: + models = [ + m.get("name") or m.get("model") + for m in (data.get("models") or []) + if m.get("name") or m.get("model") + ] if not models: continue # Found a working endpoint — update session diff --git a/routes/compare_routes.py b/routes/compare_routes.py index 18b2165..2d06e95 100644 --- a/routes/compare_routes.py +++ b/routes/compare_routes.py @@ -62,14 +62,16 @@ def setup_compare_routes(session_manager: SessionManager): db = SessionLocal() try: from core.database import ModelEndpoint + from src.endpoint_resolver import build_headers, normalize_base # Find matching endpoint by URL + base = normalize_base(endpoint) ep = db.query(ModelEndpoint).filter( - ModelEndpoint.base_url == endpoint.replace('/chat/completions', '') + ModelEndpoint.base_url == base ).first() if ep and ep.api_key: s = session_manager.sessions.get(sid) if s: - s.headers = {"Authorization": f"Bearer {ep.api_key}"} + s.headers = build_headers(ep.api_key, ep.base_url) finally: db.close() diff --git a/routes/model_routes.py b/routes/model_routes.py index bd209db..3f4f2f1 100644 --- a/routes/model_routes.py +++ b/routes/model_routes.py @@ -16,12 +16,60 @@ from core.database import SessionLocal, ModelEndpoint, Session as DbSession from core.middleware import require_admin from src.llm_core import _detect_provider, ANTHROPIC_MODELS from src.settings import load_settings as _load_settings, save_settings as _save_settings -from src.endpoint_resolver import normalize_base as _normalize_base, build_chat_url, build_headers, _anthropic_api_root +from src.endpoint_resolver import normalize_base as _normalize_base, build_chat_url from src.auth_helpers import owner_filter logger = logging.getLogger(__name__) +def _anthropic_api_root(base: str) -> str: + """Return Anthropic's API root without duplicating /v1.""" + base = (base or "").strip().rstrip("/") + host = urlparse(base).hostname or "" + if host.endswith("anthropic.com") and base.endswith("/v1"): + return base[:-3].rstrip("/") + return base + + +def _ollama_api_root(base: str) -> str: + """Return Ollama's native API root without depending on deferred imports.""" + base = (base or "").strip().rstrip("/") + parsed = urlparse(base) + host = parsed.hostname or "" + path = (parsed.path or "").rstrip("/") + if path.endswith("/api"): + return base + if host.endswith("ollama.com"): + root = f"{parsed.scheme}://{parsed.netloc}" if parsed.scheme and parsed.netloc else "https://ollama.com" + return root.rstrip("/") + "/api" + return base + + +def _models_url(base: str) -> str: + """Return provider-specific model-list URL for route-local probing.""" + provider = _detect_provider(base) + host = urlparse(base).hostname or "" + if provider == "anthropic" or host.endswith("anthropic.com"): + return _anthropic_api_root(base) + "/v1/models" + if provider == "ollama" or host.endswith("ollama.com"): + return _ollama_api_root(base) + "/tags" + return base.rstrip("/") + "/models" + + +def _provider_headers(api_key: Optional[str], base: str) -> Dict[str, str]: + """Build provider auth headers without depending on import-time stubs.""" + if not api_key: + return {} + provider = _detect_provider(base) + host = urlparse(base).hostname or "" + if provider == "anthropic" or host.endswith("anthropic.com"): + return { + "x-api-key": api_key, + "anthropic-version": "2023-06-01", + } + return {"Authorization": f"Bearer {api_key}"} + + # ── Curated model lists per provider ── # For cloud providers that return 100+ models, only show these by default. # A model ID matches if it starts with or equals a curated entry. @@ -87,6 +135,7 @@ _URL_TO_CURATED = { "generativelanguage.googleapis.com": "google", "api.x.ai": "xai", "openrouter.ai": "openrouter", + "ollama.com": "ollama", } @@ -183,9 +232,15 @@ def _probe_single_model(base: str, api_key: str, model_id: str, timeout: int = 1 payload = _build_anthropic_payload(model_id, messages, 0.0, 5) if _test_tools: payload["tools"] = [{"name": "test", "description": "Test tool", "input_schema": {"type": "object", "properties": {}}}] + elif provider == "ollama": + from src.llm_core import _build_ollama_payload + target_url = build_chat_url(base) + h = _provider_headers(api_key, base) + h["Content-Type"] = "application/json" + payload = _build_ollama_payload(model_id, messages, 0.0, 5, stream=False, tools=_test_tools) else: target_url = build_chat_url(base) - h = build_headers(api_key, base) + h = _provider_headers(api_key, base) h["Content-Type"] = "application/json" from src.llm_core import _uses_max_completion_tokens _max_key = "max_completion_tokens" if _uses_max_completion_tokens(model_id) else "max_tokens" @@ -276,10 +331,8 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis return [] logger.warning(f"Anthropic /v1/models failed, using hardcoded list: {e}") return list(ANTHROPIC_MODELS) - url = base + "/models" - headers = {} - if api_key: - headers["Authorization"] = f"Bearer {api_key}" + url = _models_url(base) + headers = _provider_headers(api_key, base) try: r = httpx.get(url, headers=headers, timeout=timeout) r.raise_for_status() @@ -494,10 +547,7 @@ def setup_model_routes(model_discovery): pass model_ids = [m for m in model_ids if m not in hidden] # Build correct URL based on provider - if provider == "anthropic": - chat_url = build_chat_url(base) - else: - chat_url = base + "/chat/completions" + chat_url = build_chat_url(base) category = _classify_endpoint(base) if model_ids: @@ -671,10 +721,8 @@ def setup_model_routes(model_discovery): entry["error"] = str(e) entry["model_count"] = 0 else: - url = base + "/models" - headers = {} - if ep.api_key: - headers["Authorization"] = f"Bearer {ep.api_key}" + url = _models_url(base) + headers = _provider_headers(ep.api_key, base) try: t0 = _time.time() r = httpx.get(url, headers=headers, timeout=5) @@ -682,6 +730,12 @@ def setup_model_routes(model_discovery): r.raise_for_status() data = r.json() models = [m.get("id") for m in (data.get("data") or []) if m.get("id")] + if not models: + models = [ + m.get("name") or m.get("model") + for m in (data.get("models") or []) + if m.get("name") or m.get("model") + ] entry["status"] = "online" entry["model_count"] = len(models) except Exception as e: @@ -896,6 +950,7 @@ def setup_model_routes(model_discovery): for suffix in ["/models", "/chat/completions", "/completions", "/v1/messages"]: if base_url.endswith(suffix): base_url = base_url[:-len(suffix)].rstrip("/") + base_url = _normalize_base(base_url) if not base_url: raise HTTPException(400, "Base URL is required") # Resolve hostname via Tailscale if DNS fails diff --git a/routes/session_routes.py b/routes/session_routes.py index 18e0b18..7dd875e 100644 --- a/routes/session_routes.py +++ b/routes/session_routes.py @@ -227,6 +227,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_ ) # Set auth headers for custom API-key endpoints resolved_key = api_key.strip() if api_key else "" + resolved_base = endpoint_url if not resolved_key and endpoint_id and endpoint_id.strip(): from core.database import ModelEndpoint _db = SessionLocal() @@ -234,10 +235,12 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_ ep = _db.query(ModelEndpoint).filter(ModelEndpoint.id == endpoint_id.strip()).first() if ep and ep.api_key: resolved_key = ep.api_key + resolved_base = ep.base_url finally: _db.close() if resolved_key: - session.headers = {"Authorization": f"Bearer {resolved_key}"} + from src.endpoint_resolver import build_headers + session.headers = build_headers(resolved_key, resolved_base) session_manager.save_sessions() # Fire webhook (sync-safe) if webhook_manager: diff --git a/routes/webhook_routes.py b/routes/webhook_routes.py index 8fc88fe..7eead00 100644 --- a/routes/webhook_routes.py +++ b/routes/webhook_routes.py @@ -157,6 +157,7 @@ def setup_webhook_routes( "groq": "https://api.groq.com/openai/v1", "together": "https://api.together.xyz/v1", "openrouter": "https://openrouter.ai/api/v1", + "ollama": "https://ollama.com/api", "fireworks": "https://api.fireworks.ai/inference/v1", } @@ -203,6 +204,7 @@ def setup_webhook_routes( from core.models import ChatMessage from src.llm_core import llm_call_async from core.database import ModelEndpoint + from src.endpoint_resolver import build_chat_url, build_headers, build_models_url, normalize_base message = body.message.strip() if not message: @@ -244,7 +246,8 @@ def setup_webhook_routes( "Could not auto-detect provider. Pass base_url (e.g. 'https://api.deepseek.com/v1') " "or provider ('deepseek', 'openai', 'groq', etc.)") - endpoint_url = base_url + "/chat/completions" + base_url = normalize_base(base_url) + endpoint_url = build_chat_url(base_url) if not session_manager: raise HTTPException(500, "Session manager not available") @@ -254,7 +257,7 @@ def setup_webhook_routes( session_id=sid, name="API Chat", endpoint_url=endpoint_url, model=model, owner=token_owner, ) - sess.headers = {"Authorization": f"Bearer {api_key}"} + sess.headers = build_headers(api_key, base_url) session_manager.save_sessions() session_id = sid @@ -271,18 +274,26 @@ def setup_webhook_routes( "No session, api_key, or configured endpoints. " "Pass api_key + model, or configure an endpoint in Admin.") - endpoint_url = ep.base_url.rstrip("/") + "/chat/completions" + base_url = normalize_base(ep.base_url) + endpoint_url = build_chat_url(base_url) model = body.model or "auto" api_key = ep.api_key if model == "auto": try: async with httpx.AsyncClient(timeout=5) as client: - models_url = ep.base_url.rstrip("/") + "/models" - hdrs = {"Authorization": f"Bearer {api_key}"} if api_key else {} + models_url = build_models_url(base_url) + hdrs = build_headers(api_key, base_url) resp = await client.get(models_url, headers=hdrs) resp.raise_for_status() - ids = [m.get("id") for m in (resp.json().get("data") or []) if m.get("id")] + data = resp.json() + ids = [m.get("id") for m in (data.get("data") or []) if m.get("id")] + if not ids: + ids = [ + m.get("name") or m.get("model") + for m in (data.get("models") or []) + if m.get("name") or m.get("model") + ] model = ids[0] if ids else "auto" except Exception: raise HTTPException(500, "Could not discover models from endpoint") @@ -296,7 +307,7 @@ def setup_webhook_routes( model=model, owner=token_owner, ) if api_key: - sess.headers = {"Authorization": f"Bearer {api_key}"} + sess.headers = build_headers(api_key, base_url) session_manager.save_sessions() session_id = sid diff --git a/src/agent_loop.py b/src/agent_loop.py index 2c42e9d..6b7d982 100644 --- a/src/agent_loop.py +++ b/src/agent_loop.py @@ -450,6 +450,7 @@ _API_HOSTS = frozenset([ "api.deepseek.com", "deepseek.com", "api.together.xyz", "api.fireworks.ai", "api.perplexity.ai", "api.x.ai", + "ollama.com", ]) _MCP_KEYWORDS = frozenset(["browse", "browser", "website", "calendar", "event", "email", "gmail", "screenshot", "navigate", "click", "miniflux", "rss", "feed"]) diff --git a/src/ai_interaction.py b/src/ai_interaction.py index 2db291a..9063ced 100644 --- a/src/ai_interaction.py +++ b/src/ai_interaction.py @@ -55,7 +55,7 @@ def set_rag_manager(rag_mgr, personal_docs_mgr=None): # Model resolution # --------------------------------------------------------------------------- -from src.endpoint_resolver import normalize_base as _normalize_base +from src.endpoint_resolver import normalize_base as _normalize_base, build_chat_url, build_headers, build_models_url def _resolve_model(spec: str) -> Tuple[str, str, Dict]: @@ -95,9 +95,7 @@ def _resolve_model(spec: str) -> Tuple[str, str, Dict]: for ep in endpoints: base = _normalize_base(ep.base_url) provider = _detect_provider(base) - headers = {} - if ep.api_key: - headers["Authorization"] = f"Bearer {ep.api_key}" + headers = build_headers(ep.api_key, base) if provider == "anthropic": # Anthropic: match against hardcoded model list @@ -107,27 +105,32 @@ def _resolve_model(spec: str) -> Tuple[str, str, Dict]: matched = am break if matched: - headers["x-api-key"] = ep.api_key or "" - headers["anthropic-version"] = "2023-06-01" - return base + "/v1/messages", matched, headers + return build_chat_url(base), matched, headers else: - # OpenAI-compatible: probe /models + # OpenAI-compatible and native Ollama: probe the provider's model list. try: - r = httpx.get(base + "/models", headers=headers, timeout=5) + r = httpx.get(build_models_url(base), headers=headers, timeout=5) r.raise_for_status() - model_ids = [m.get("id") for m in (r.json().get("data") or []) if m.get("id")] + data = r.json() + model_ids = [m.get("id") for m in (data.get("data") or []) if m.get("id")] + if not model_ids: + model_ids = [ + m.get("name") or m.get("model") + for m in (data.get("models") or []) + if m.get("name") or m.get("model") + ] except Exception: model_ids = [] # Exact match first for mid in model_ids: if mid.lower() == model_name.lower(): - return base + "/chat/completions", mid, headers + return build_chat_url(base), mid, headers # Partial match for mid in model_ids: if model_name.lower() in mid.lower() or mid.lower() in model_name.lower(): - return base + "/chat/completions", mid, headers + return build_chat_url(base), mid, headers raise ValueError(f"Model '{spec}' not found on any configured endpoint") finally: @@ -1107,18 +1110,23 @@ async def do_list_models(content: str, session_id: Optional[str] = None) -> Dict for ep in endpoints: base = _normalize_base(ep.base_url) provider = _detect_provider(base) - headers = {} - if ep.api_key: - headers["Authorization"] = f"Bearer {ep.api_key}" + headers = build_headers(ep.api_key, base) model_ids = [] if provider == "anthropic": model_ids = list(ANTHROPIC_MODELS) else: try: - r = httpx.get(base + "/models", headers=headers, timeout=5) + r = httpx.get(build_models_url(base), headers=headers, timeout=5) r.raise_for_status() - model_ids = [m.get("id") for m in (r.json().get("data") or []) if m.get("id")] + data = r.json() + model_ids = [m.get("id") for m in (data.get("data") or []) if m.get("id")] + if not model_ids: + model_ids = [ + m.get("name") or m.get("model") + for m in (data.get("models") or []) + if m.get("name") or m.get("model") + ] except Exception: model_ids = ["(endpoint offline)"] diff --git a/src/endpoint_resolver.py b/src/endpoint_resolver.py index df5eb7c..b204c7c 100644 --- a/src/endpoint_resolver.py +++ b/src/endpoint_resolver.py @@ -101,6 +101,9 @@ def normalize_base(url: str) -> str: for suffix in ["/models", "/chat/completions", "/completions", "/v1/messages"]: if url.endswith(suffix): url = url[: -len(suffix)].rstrip("/") + for suffix in ["/chat", "/tags", "/generate"]: + if url.endswith("/api" + suffix): + url = url[: -len(suffix)].rstrip("/") return url @@ -113,6 +116,20 @@ def _anthropic_api_root(base: str) -> str: return base +def _ollama_api_root(base: str) -> str: + """Return the native Ollama API root, adding /api for ollama.com hosts.""" + base = (base or "").strip().rstrip("/") + parsed = urlparse(base) + host = parsed.hostname or "" + path = (parsed.path or "").rstrip("/") + if path.endswith("/api"): + return base + if host.endswith("ollama.com"): + root = f"{parsed.scheme}://{parsed.netloc}" if parsed.scheme and parsed.netloc else "https://ollama.com" + return root.rstrip("/") + "/api" + return base + + def build_chat_url(base: str) -> str: """Return the correct chat endpoint URL for a given base.""" base = resolve_url(base) @@ -120,9 +137,23 @@ def build_chat_url(base: str) -> str: host = urlparse(base).hostname or "" if provider == "anthropic" or host.endswith("anthropic.com"): return _anthropic_api_root(base) + "/v1/messages" + if provider == "ollama" or host.endswith("ollama.com"): + return _ollama_api_root(base) + "/chat" return base + "/chat/completions" +def build_models_url(base: str) -> str: + """Return the provider-specific model-list endpoint URL for a base.""" + base = resolve_url(base) + provider = _detect_provider(base) + host = urlparse(base).hostname or "" + if provider == "anthropic" or host.endswith("anthropic.com"): + return _anthropic_api_root(base) + "/v1/models" + if provider == "ollama" or host.endswith("ollama.com"): + return _ollama_api_root(base) + "/tags" + return base + "/models" + + def build_headers(api_key: Optional[str], base: str) -> Dict[str, str]: """Build auth headers for an endpoint.""" provider = _detect_provider(base) diff --git a/src/llm_core.py b/src/llm_core.py index 60b17b2..55af620 100644 --- a/src/llm_core.py +++ b/src/llm_core.py @@ -7,6 +7,7 @@ import logging import hashlib from fastapi import HTTPException from typing import Optional, Dict, List +from urllib.parse import urlparse logger = logging.getLogger(__name__) @@ -140,9 +141,82 @@ ANTHROPIC_MODELS = [ "claude-haiku-4-20250514", "claude-haiku-4", "claude-haiku-3-5-20241022", "claude-haiku-3-5", ] + +def _is_ollama_native_url(url: str) -> bool: + """Return True for native Ollama API URLs, including Ollama Cloud.""" + try: + parsed = urlparse(url or "") + except Exception: + return False + host = parsed.hostname or "" + path = (parsed.path or "").rstrip("/") + if host.endswith("ollama.com"): + return True + local_ollama_host = host in {"localhost", "127.0.0.1", "0.0.0.0", "::1"} or parsed.port == 11434 + return local_ollama_host and (path == "/api" or path.startswith("/api/")) + + +def _ollama_api_root(url: str) -> str: + """Return a native Ollama API root such as https://ollama.com/api.""" + url = (url or "").strip().rstrip("/") + parsed = urlparse(url) + host = parsed.hostname or "" + path = (parsed.path or "").rstrip("/") + if path.endswith("/api/chat"): + return url[: -len("/chat")] + if path.endswith("/api/tags"): + return url[: -len("/tags")] + if path.endswith("/api/generate"): + return url[: -len("/generate")] + if path.endswith("/api"): + return url + if host.endswith("ollama.com"): + root = f"{parsed.scheme}://{parsed.netloc}" if parsed.scheme and parsed.netloc else "https://ollama.com" + return root.rstrip("/") + "/api" + return url + + +def _normalize_ollama_url(url: str) -> str: + """Ensure a native Ollama URL points at /api/chat.""" + base = _ollama_api_root(url) + return base.rstrip("/") + "/chat" + + +def _build_ollama_payload( + model: str, + messages: List[Dict], + temperature: float, + max_tokens: int, + stream: bool = False, + tools: Optional[List[Dict]] = None, +) -> Dict: + payload: Dict = { + "model": model, + "messages": messages, + "stream": stream, + } + options: Dict = {} + if temperature is not None: + options["temperature"] = temperature + if max_tokens and max_tokens > 0: + options["num_predict"] = max_tokens + if options: + payload["options"] = options + if tools: + payload["tools"] = tools + return payload + + +def _parse_ollama_response(data: dict) -> str: + message = data.get("message") or {} + return message.get("content") or data.get("response") or "" + + def _detect_provider(url: str) -> str: """Detect API provider from URL.""" u = (url or "").lower() + if _is_ollama_native_url(url): + return "ollama" if "anthropic.com" in u: return "anthropic" if "openrouter.ai" in u: @@ -166,6 +240,7 @@ def _provider_label(url: str) -> str: """Human-friendly provider name for error messages.""" u = (url or "").lower() if "anthropic.com" in u: return "Anthropic" + if "ollama.com" in u: return "Ollama Cloud" if "api.x.ai" in u or "x.ai/" in u: return "xAI" if "openai.com" in u: return "OpenAI" if "openrouter.ai" in u: return "OpenRouter" @@ -396,19 +471,28 @@ def _normalize_anthropic_url(url: str) -> str: def list_model_ids(base_chat_url: str, timeout: int = LLMConfig.DEFAULT_TIMEOUT, headers: Optional[Dict] = None) -> List[str]: """List available model IDs from an endpoint.""" - if _detect_provider(base_chat_url) == "anthropic": + provider = _detect_provider(base_chat_url) + if provider == "anthropic": return list(ANTHROPIC_MODELS) try: h = {} if headers: h.update(headers) - r = httpx.get(base_chat_url.replace("/chat/completions", "/models"), headers=h, timeout=timeout) + if provider == "ollama": + models_url = _ollama_api_root(base_chat_url) + "/tags" + else: + models_url = base_chat_url.replace("/chat/completions", "/models") + r = httpx.get(models_url, headers=h, timeout=timeout) r.raise_for_status() data = r.json() - ids = [m.get("id") for m in (data.get("data") or []) if m.get("id")] - if ids: - return ids - return [m.get("name") or m.get("model") for m in (data.get("models") or []) if m.get("name") or m.get("model")] + model_ids = [m.get("id") for m in (data.get("data") or []) if m.get("id")] + if not model_ids: + model_ids = [ + m.get("name") or m.get("model") + for m in (data.get("models") or []) + if m.get("name") or m.get("model") + ] + return model_ids except Exception: try: if ":11434" in base_chat_url or "ollama" in base_chat_url.lower(): @@ -476,6 +560,9 @@ def llm_call(url: str, model: str, messages: List[Dict], temperature: float = LL target_url = _normalize_anthropic_url(url) h = _build_anthropic_headers(headers) payload = _build_anthropic_payload(model, messages_copy, temperature, max_tokens) + elif provider == "ollama": + target_url = _normalize_ollama_url(url) + payload = _build_ollama_payload(model, messages_copy, temperature, max_tokens, stream=False) else: target_url = url payload = { @@ -497,6 +584,8 @@ def llm_call(url: str, model: str, messages: List[Dict], temperature: float = LL try: if provider == "anthropic": response = _parse_anthropic_response(data) + elif provider == "ollama": + response = _parse_ollama_response(data) else: response = data["choices"][0]["message"]["content"] _set_cached_response(cache_key, response) @@ -583,6 +672,12 @@ async def llm_call_async( target_url = _normalize_anthropic_url(url) h = _build_anthropic_headers(headers) payload = _build_anthropic_payload(model, messages_copy, temperature, max_tokens) + elif provider == "ollama": + target_url = _normalize_ollama_url(url) + h = {"Content-Type": "application/json"} + if headers: + h.update(headers) + payload = _build_ollama_payload(model, messages_copy, temperature, max_tokens, stream=False) else: target_url = url h = _provider_headers(provider, headers) @@ -621,6 +716,8 @@ async def llm_call_async( try: if provider == "anthropic": response = _parse_anthropic_response(data) + elif provider == "ollama": + response = _parse_ollama_response(data) else: response = data["choices"][0]["message"]["content"] _set_cached_response(cache_key, response) @@ -673,6 +770,12 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl target_url = _normalize_anthropic_url(url) h = _build_anthropic_headers(headers) payload = _build_anthropic_payload(model, messages_copy, temperature, max_tokens, stream=True, tools=tools) + elif provider == "ollama": + target_url = _normalize_ollama_url(url) + h = {"Content-Type": "application/json"} + if headers: + h.update(headers) + payload = _build_ollama_payload(model, messages_copy, temperature, max_tokens, stream=True, tools=tools) else: target_url = url payload = { @@ -699,6 +802,62 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl return note_model_activity(target_url, model) + # ── Native Ollama streaming ── + if provider == "ollama": + _ollama_tool_calls: List[Dict] = [] + try: + client = _get_http_client() + async with client.stream('POST', target_url, json=payload, headers=h, timeout=stream_timeout) as r: + _clear_host_dead(target_url) + if r.status_code != 200: + raw = (await r.aread()).decode(errors="replace") + friendly = _format_upstream_error(r.status_code, raw, target_url) + yield f'event: error\ndata: {json.dumps({"status": r.status_code, "text": friendly, "raw": raw[:500]})}\n\n' + return + async for line in r.aiter_lines(): + if not line: + continue + try: + j = json.loads(line) + except json.JSONDecodeError: + continue + message = j.get("message") or {} + thinking = message.get("thinking") or "" + if thinking: + yield f'data: {json.dumps({"delta": thinking, "thinking": True})}\n\n' + content = message.get("content") or "" + if content: + yield f'data: {json.dumps({"delta": content})}\n\n' + for tc in message.get("tool_calls") or []: + fn = tc.get("function") or {} + if fn.get("name"): + _ollama_tool_calls.append({ + "id": tc.get("id") or f"call_{len(_ollama_tool_calls)}", + "name": fn.get("name") or "", + "arguments": json.dumps(fn.get("arguments") or {}), + }) + if j.get("done"): + if _ollama_tool_calls: + yield f'data: {json.dumps({"type": "tool_calls", "calls": _ollama_tool_calls})}\n\n' + if j.get("prompt_eval_count") is not None or j.get("eval_count") is not None: + yield f'data: {json.dumps({"type": "usage", "data": {"input_tokens": j.get("prompt_eval_count", 0), "output_tokens": j.get("eval_count", 0)}})}\n\n' + yield "data: [DONE]\n\n" + return + yield "data: [DONE]\n\n" + except (httpx.ConnectError, httpx.ConnectTimeout) as e: + _cooled = _mark_host_dead(target_url) + _tail = f" — host cooled for {DEAD_HOST_COOLDOWN:.0f}s" if _cooled else " — transient, will retry" + logger.warning(f"Ollama stream connect to {target_url} failed: {e}{_tail}") + yield f'event: error\ndata: {json.dumps({"error": f"Cannot reach {_host_key(target_url)}", "status": 503})}\n\n' + except httpx.ReadTimeout: + yield f'event: error\ndata: {json.dumps({"error": "Read timeout", "status": 504})}\n\n' + except httpx.NetworkError: + yield f'event: error\ndata: {json.dumps({"error": "Network error", "status": 502})}\n\n' + except Exception as e: + logger.error(f"Ollama stream error: {e}") + yield f'event: error\ndata: {json.dumps({"error": str(e), "status": 502})}\n\n' + return + # ── Anthropic streaming ── if provider == "anthropic": _anth_input_tokens = 0 diff --git a/src/teacher_escalation.py b/src/teacher_escalation.py index c93b709..4587c00 100644 --- a/src/teacher_escalation.py +++ b/src/teacher_escalation.py @@ -42,6 +42,7 @@ _SOTA_HOSTS = frozenset({ "api.together.xyz", "api.fireworks.ai", "api.perplexity.ai", "api.x.ai", "generativelanguage.googleapis.com", "api.groq.com", + "openrouter.ai", "ollama.com", }) diff --git a/static/index.html b/static/index.html index ab1607c..9d44cbb 100644 --- a/static/index.html +++ b/static/index.html @@ -2036,6 +2036,7 @@ + diff --git a/static/js/admin.js b/static/js/admin.js index 10947fb..4d15a4f 100644 --- a/static/js/admin.js +++ b/static/js/admin.js @@ -4,6 +4,7 @@ import uiModule from './ui.js'; import settingsModule from './settings.js'; import { providerLogo } from './providers.js'; +import { sortModelObjects } from './modelSort.js'; let initialized = false; let modalEl = null; @@ -216,7 +217,7 @@ async function _loadModelsForUser(username, allowedSet, privPanel) { return; } const allEmpty = allowedSet.size === 0; - listEl.innerHTML = allModels.map(m => { + listEl.innerHTML = sortModelObjects(allModels).map(m => { const checked = allEmpty || allowedSet.has(m.mid) ? 'checked' : ''; return `