fix(agent): stop sending tool schemas to native Ollama endpoints (#1765)
Models like gemma4, qwen3.5, and ministral served via Ollama's native /api/chat respond to OpenAI-style tool schemas by emitting a single native tool_call chunk and then stopping. The agent loop receives 1 token of round_response and no recognised ToolBlock, so the round ends immediately — the user sees a one-token response. Root cause: _is_api_model was True for any endpoint whose host appears in _API_HOSTS (which includes "host.docker.internal" and "localhost") OR whose model name matches a keyword like "gemma". Native Ollama endpoints were never excluded from this path. Fix: import _is_ollama_native_url from llm_core and treat native Ollama endpoints (/api/chat, port 11434) as text-only by default — falling back to the fenced-block tool path the local models are tuned for. The per-endpoint supports_tools=True toggle (Settings → Endpoints) still overrides this for users who have explicitly opted in. Fixes #1567
This commit is contained in:
@@ -14,7 +14,7 @@ import time
|
||||
import logging
|
||||
from typing import AsyncGenerator, List, Dict, Optional, Set
|
||||
|
||||
from src.llm_core import stream_llm, stream_llm_with_fallback
|
||||
from src.llm_core import stream_llm, stream_llm_with_fallback, _is_ollama_native_url
|
||||
from src.model_context import estimate_tokens
|
||||
from src.settings import get_setting
|
||||
from src.prompt_security import untrusted_context_message
|
||||
@@ -1494,9 +1494,18 @@ async def stream_agent_loop(
|
||||
_model_no_tools = any(kw in _model_lc for kw in (
|
||||
"deepseek-r1",
|
||||
))
|
||||
# Native Ollama endpoints (/api/chat) handle tool schemas differently from
|
||||
# the OpenAI-compat path. Models like gemma4, qwen3.5, ministral respond to
|
||||
# tool schemas by emitting a single native tool_call token then stopping,
|
||||
# rather than writing a fenced block — the agent loop sees 1 token and no
|
||||
# recognised tool, so the round terminates immediately (issue #1567).
|
||||
# Unless the endpoint is explicitly marked supports_tools=True by the user
|
||||
# (via the endpoint settings toggle), treat Ollama-native as text-only so
|
||||
# the fenced-block path is used instead of native function calling.
|
||||
_is_ollama_native = _is_ollama_native_url(endpoint_url or "")
|
||||
if _endpoint_supports is True:
|
||||
_is_api_model = True
|
||||
elif _endpoint_supports is False or _model_no_tools:
|
||||
elif _endpoint_supports is False or _model_no_tools or _is_ollama_native:
|
||||
_is_api_model = False
|
||||
else:
|
||||
_is_api_model = any(h in endpoint_url for h in _API_HOSTS) or _model_supports_tools
|
||||
|
||||
Reference in New Issue
Block a user