From 68da800dcb1c7d56c913021595d26702747f1824 Mon Sep 17 00:00:00 2001
From: Lucas Daniel <94806303+NoodleLDS@users.noreply.github.com>
Date: Wed, 3 Jun 2026 01:23:42 -0300
Subject: [PATCH] fix(agent): stop sending tool schemas to native Ollama
 endpoints (#1765)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Models like gemma4, qwen3.5, and ministral served via Ollama's native
/api/chat respond to OpenAI-style tool schemas by emitting a single
native tool_call chunk and then stopping. The agent loop receives
1 token of round_response and no recognised ToolBlock, so the round
ends immediately — the user sees a one-token response.

Root cause: _is_api_model was True for any endpoint whose host appears
in _API_HOSTS (which includes "host.docker.internal" and "localhost")
OR whose model name matches a keyword like "gemma". Native Ollama
endpoints were never excluded from this path.

Fix: import _is_ollama_native_url from llm_core and treat native Ollama
endpoints (/api/chat, port 11434) as text-only by default — falling back
to the fenced-block tool path the local models are tuned for. The
per-endpoint supports_tools=True toggle (Settings → Endpoints) still
overrides this for users who have explicitly opted in.

Fixes #1567
---
 src/agent_loop.py | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/agent_loop.py b/src/agent_loop.py
index 64c0e86..0677cc0 100644
--- a/src/agent_loop.py
+++ b/src/agent_loop.py
@@ -14,7 +14,7 @@ import time
 import logging
 from typing import AsyncGenerator, List, Dict, Optional, Set
 
-from src.llm_core import stream_llm, stream_llm_with_fallback
+from src.llm_core import stream_llm, stream_llm_with_fallback, _is_ollama_native_url
 from src.model_context import estimate_tokens
 from src.settings import get_setting
 from src.prompt_security import untrusted_context_message
@@ -1494,9 +1494,18 @@ async def stream_agent_loop(
     _model_no_tools = any(kw in _model_lc for kw in (
         "deepseek-r1",
     ))
+    # Native Ollama endpoints (/api/chat) handle tool schemas differently from
+    # the OpenAI-compat path. Models like gemma4, qwen3.5, ministral respond to
+    # tool schemas by emitting a single native tool_call token then stopping,
+    # rather than writing a fenced block — the agent loop sees 1 token and no
+    # recognised tool, so the round terminates immediately (issue #1567).
+    # Unless the endpoint is explicitly marked supports_tools=True by the user
+    # (via the endpoint settings toggle), treat Ollama-native as text-only so
+    # the fenced-block path is used instead of native function calling.
+    _is_ollama_native = _is_ollama_native_url(endpoint_url or "")
     if _endpoint_supports is True:
         _is_api_model = True
-    elif _endpoint_supports is False or _model_no_tools:
+    elif _endpoint_supports is False or _model_no_tools or _is_ollama_native:
         _is_api_model = False
     else:
         _is_api_model = any(h in endpoint_url for h in _API_HOSTS) or _model_supports_tools