diff --git a/src/agent_loop.py b/src/agent_loop.py index 64c0e86..0677cc0 100644 --- a/src/agent_loop.py +++ b/src/agent_loop.py @@ -14,7 +14,7 @@ import time import logging from typing import AsyncGenerator, List, Dict, Optional, Set -from src.llm_core import stream_llm, stream_llm_with_fallback +from src.llm_core import stream_llm, stream_llm_with_fallback, _is_ollama_native_url from src.model_context import estimate_tokens from src.settings import get_setting from src.prompt_security import untrusted_context_message @@ -1494,9 +1494,18 @@ async def stream_agent_loop( _model_no_tools = any(kw in _model_lc for kw in ( "deepseek-r1", )) + # Native Ollama endpoints (/api/chat) handle tool schemas differently from + # the OpenAI-compat path. Models like gemma4, qwen3.5, ministral respond to + # tool schemas by emitting a single native tool_call token then stopping, + # rather than writing a fenced block — the agent loop sees 1 token and no + # recognised tool, so the round terminates immediately (issue #1567). + # Unless the endpoint is explicitly marked supports_tools=True by the user + # (via the endpoint settings toggle), treat Ollama-native as text-only so + # the fenced-block path is used instead of native function calling. + _is_ollama_native = _is_ollama_native_url(endpoint_url or "") if _endpoint_supports is True: _is_api_model = True - elif _endpoint_supports is False or _model_no_tools: + elif _endpoint_supports is False or _model_no_tools or _is_ollama_native: _is_api_model = False else: _is_api_model = any(h in endpoint_url for h in _API_HOSTS) or _model_supports_tools