diff --git a/src/agent_loop.py b/src/agent_loop.py index 0677cc0..f5a2b57 100644 --- a/src/agent_loop.py +++ b/src/agent_loop.py @@ -13,6 +13,7 @@ import re import time import logging from typing import AsyncGenerator, List, Dict, Optional, Set +from urllib.parse import urlparse from src.llm_core import stream_llm, stream_llm_with_fallback, _is_ollama_native_url from src.model_context import estimate_tokens @@ -475,6 +476,45 @@ _ADMIN_SCHEMA_NAMES = frozenset([ ]) _TOOL_SELECTION_TIMEOUT_SECONDS = 1.5 + +def _is_ollama_openai_compat_url(endpoint_url: str) -> bool: + """Return True for local Ollama's OpenAI-compatible /v1 surface. + + Ollama's /v1 endpoint accepts the OpenAI chat shape, but model-level tool + streaming is uneven. Some local models terminate after a token when schemas + are present. Keep native schemas opt-in via ModelEndpoint.supports_tools. + """ + try: + parsed = urlparse(endpoint_url or "") + except Exception: + return False + path = (parsed.path or "").rstrip("/") + return parsed.port == 11434 and (path == "/v1" or path.startswith("/v1/")) + + +def _endpoint_lookup_keys(endpoint_url: str) -> List[str]: + """Candidate ModelEndpoint.base_url keys for a runtime chat URL.""" + raw = (endpoint_url or "").strip() + keys: List[str] = [] + + def add(value: str): + value = (value or "").strip() + if value and value not in keys: + keys.append(value) + trimmed = value.rstrip("/") + if trimmed and trimmed not in keys: + keys.append(trimmed) + if trimmed and f"{trimmed}/" not in keys: + keys.append(f"{trimmed}/") + + add(raw) + try: + from src.endpoint_resolver import normalize_base + add(normalize_base(raw)) + except Exception: + pass + return keys + # Admin tool keywords — if the last user message contains any of these, include admin tools _ADMIN_KEYWORDS = [ "session", "sessions", "chat", "chats", "conversation", "conversations", @@ -1456,18 +1496,18 @@ async def stream_agent_loop( _model_lc = (model or "").lower() # Step 1: per-endpoint override (set at registration time from the # serve command — `--enable-auto-tool-choice` flips it on. UI can - # also toggle per endpoint). NULL = unknown, fall through to the - # keyword heuristic + host check. + # also toggle per endpoint). NULL = unknown; for local Ollama /v1 we + # default to fenced tools, otherwise fall through to keyword + host checks. _endpoint_supports: Optional[bool] = None try: from core.database import SessionLocal as _SL, ModelEndpoint as _ME _db = _SL() try: - _ep = _db.query(_ME).filter(_ME.base_url == endpoint_url).first() - if not _ep and endpoint_url: - _u = endpoint_url.rstrip("/") - _ep = _db.query(_ME).filter(_ME.base_url == _u).first() or \ - _db.query(_ME).filter(_ME.base_url == _u + "/").first() + _ep = None + for _key in _endpoint_lookup_keys(endpoint_url): + _ep = _db.query(_ME).filter(_ME.base_url == _key).first() + if _ep is not None: + break if _ep is not None: _endpoint_supports = _ep.supports_tools finally: @@ -1503,9 +1543,15 @@ async def stream_agent_loop( # (via the endpoint settings toggle), treat Ollama-native as text-only so # the fenced-block path is used instead of native function calling. _is_ollama_native = _is_ollama_native_url(endpoint_url or "") + _ollama_openai_compat = _is_ollama_openai_compat_url(endpoint_url or "") if _endpoint_supports is True: _is_api_model = True - elif _endpoint_supports is False or _model_no_tools or _is_ollama_native: + elif ( + _endpoint_supports is False + or _model_no_tools + or _is_ollama_native + or _ollama_openai_compat + ): _is_api_model = False else: _is_api_model = any(h in endpoint_url for h in _API_HOSTS) or _model_supports_tools diff --git a/tests/test_tool_support_heuristic.py b/tests/test_tool_support_heuristic.py index f6a8b9c..ed2dbc7 100644 --- a/tests/test_tool_support_heuristic.py +++ b/tests/test_tool_support_heuristic.py @@ -1,13 +1,14 @@ """Regression tests for the tool-support heuristic in stream_agent_loop. Verifies two critical cases: - 1. deepseek-r1 on a local Ollama endpoint must NOT enable native tool schemas - (Ollama returns HTTP 400 for these models when tools are sent). + 1. local Ollama endpoints must NOT enable native tool schemas by default + (some models terminate after one token with schemas). 2. api.deepseek.com must still be treated as tool-capable via the host allow-list (_API_HOSTS), so cloud deepseek users keep working. """ import pytest -from src.agent_loop import _API_HOSTS +from src.agent_loop import _API_HOSTS, _endpoint_lookup_keys, _is_ollama_openai_compat_url +from src.llm_core import _is_ollama_native_url def _compute_is_api_model(model: str, endpoint_url: str, endpoint_supports=None) -> bool: @@ -28,13 +29,18 @@ def _compute_is_api_model(model: str, endpoint_url: str, endpoint_supports=None) if endpoint_supports is True: return True - if endpoint_supports is False or model_no_tools: + if ( + endpoint_supports is False + or model_no_tools + or _is_ollama_native_url(endpoint_url) + or _is_ollama_openai_compat_url(endpoint_url) + ): return False return any(h in endpoint_url for h in _API_HOSTS) or model_supports_tools class TestDeepSeekToolSupport: - # --- local Ollama cases (must NOT get tool schemas) --- + # --- local Ollama cases (must NOT get native tool schemas by default) --- def test_deepseek_r1_7b_local_ollama_no_tools(self): result = _compute_is_api_model( @@ -56,6 +62,21 @@ class TestDeepSeekToolSupport: "deepseek-r1:7b", "http://host.docker.internal:11434/v1" ) is False + def test_qwen_local_ollama_defaults_to_fenced_tools(self): + assert _compute_is_api_model( + "qwen3.5:4b", "http://localhost:11434/v1" + ) is False + + def test_gemma_local_ollama_defaults_to_fenced_tools(self): + assert _compute_is_api_model( + "gemma4:e4b", "http://host.docker.internal:11434/v1" + ) is False + + def test_qwen_native_ollama_defaults_to_fenced_tools(self): + assert _compute_is_api_model( + "qwen3.5:4b", "http://localhost:11434/api/chat" + ) is False + # --- cloud API cases (must still get tool schemas) --- def test_deepseek_cloud_api_gets_tools(self): @@ -82,6 +103,20 @@ class TestDeepSeekToolSupport: ) assert result is True + def test_endpoint_supports_true_overrides_ollama_default(self): + """A user can still explicitly opt a known-good Ollama endpoint into + native schemas.""" + result = _compute_is_api_model( + "qwen3.5:4b", "http://localhost:11434/v1", endpoint_supports=True + ) + assert result is True + + def test_endpoint_supports_true_overrides_native_ollama_default(self): + result = _compute_is_api_model( + "qwen3.5:4b", "http://localhost:11434/api/chat", endpoint_supports=True + ) + assert result is True + def test_endpoint_supports_false_overrides_cloud(self): """supports_tools=False on an endpoint gates even cloud APIs.""" result = _compute_is_api_model( @@ -91,11 +126,11 @@ class TestDeepSeekToolSupport: # --- other local models unaffected --- - def test_qwen_local_still_gets_tools(self): - assert _compute_is_api_model("qwen2.5:14b", "http://localhost:11434/v1") is True + def test_qwen_local_non_ollama_still_gets_tools(self): + assert _compute_is_api_model("qwen2.5:14b", "http://localhost:8000/v1") is True - def test_llama_local_gets_tools_via_host(self): - assert _compute_is_api_model("llama3.2:3b", "http://localhost:11434/v1") is True + def test_llama_local_non_ollama_gets_tools_via_host(self): + assert _compute_is_api_model("llama3.2:3b", "http://localhost:8000/v1") is True class TestApiHostsContainsDeepSeek: @@ -104,3 +139,16 @@ class TestApiHostsContainsDeepSeek: def test_deepseek_com_in_api_hosts(self): assert "deepseek.com" in _API_HOSTS + + +class TestEndpointLookupKeys: + def test_chat_completions_url_matches_endpoint_base(self): + keys = _endpoint_lookup_keys("http://localhost:11434/v1/chat/completions") + + assert "http://localhost:11434/v1" in keys + assert "http://localhost:11434/v1/" in keys + + def test_native_ollama_chat_url_matches_api_base(self): + keys = _endpoint_lookup_keys("http://host.docker.internal:11434/api/chat") + + assert "http://host.docker.internal:11434/api" in keys