Fix Ollama agent single-token responses (#1591)

Agent mode treated local /v1 endpoints, including Ollama on :11434, as native-tool-capable by host/model heuristics. On Ollama's OpenAI-compatible surface some models that advertise tool support stop after a single token when schemas are sent (issue #1567). Default local Ollama /v1 back to fenced tool blocks unless the endpoint explicitly has supports_tools=True.

Also compare both the runtime chat URL and the normalized endpoint base when reading ModelEndpoint.supports_tools. That keeps a saved base URL such as http://localhost:11434/v1 effective when the active session URL is /v1/chat/completions.

Tests: .venv/bin/python -m pytest tests/test_tool_support_heuristic.py
This commit is contained in:
Marius Popa
2026-06-04 13:45:10 +03:00
committed by GitHub
parent 965185c6f9
commit dc365a1b27
2 changed files with 111 additions and 17 deletions

View File

@@ -13,6 +13,7 @@ import re
import time
import logging
from typing import AsyncGenerator, List, Dict, Optional, Set
from urllib.parse import urlparse
from src.llm_core import stream_llm, stream_llm_with_fallback, _is_ollama_native_url
from src.model_context import estimate_tokens
@@ -475,6 +476,45 @@ _ADMIN_SCHEMA_NAMES = frozenset([
])
_TOOL_SELECTION_TIMEOUT_SECONDS = 1.5
def _is_ollama_openai_compat_url(endpoint_url: str) -> bool:
"""Return True for local Ollama's OpenAI-compatible /v1 surface.
Ollama's /v1 endpoint accepts the OpenAI chat shape, but model-level tool
streaming is uneven. Some local models terminate after a token when schemas
are present. Keep native schemas opt-in via ModelEndpoint.supports_tools.
"""
try:
parsed = urlparse(endpoint_url or "")
except Exception:
return False
path = (parsed.path or "").rstrip("/")
return parsed.port == 11434 and (path == "/v1" or path.startswith("/v1/"))
def _endpoint_lookup_keys(endpoint_url: str) -> List[str]:
"""Candidate ModelEndpoint.base_url keys for a runtime chat URL."""
raw = (endpoint_url or "").strip()
keys: List[str] = []
def add(value: str):
value = (value or "").strip()
if value and value not in keys:
keys.append(value)
trimmed = value.rstrip("/")
if trimmed and trimmed not in keys:
keys.append(trimmed)
if trimmed and f"{trimmed}/" not in keys:
keys.append(f"{trimmed}/")
add(raw)
try:
from src.endpoint_resolver import normalize_base
add(normalize_base(raw))
except Exception:
pass
return keys
# Admin tool keywords — if the last user message contains any of these, include admin tools
_ADMIN_KEYWORDS = [
"session", "sessions", "chat", "chats", "conversation", "conversations",
@@ -1456,18 +1496,18 @@ async def stream_agent_loop(
_model_lc = (model or "").lower()
# Step 1: per-endpoint override (set at registration time from the
# serve command — `--enable-auto-tool-choice` flips it on. UI can
# also toggle per endpoint). NULL = unknown, fall through to the
# keyword heuristic + host check.
# also toggle per endpoint). NULL = unknown; for local Ollama /v1 we
# default to fenced tools, otherwise fall through to keyword + host checks.
_endpoint_supports: Optional[bool] = None
try:
from core.database import SessionLocal as _SL, ModelEndpoint as _ME
_db = _SL()
try:
_ep = _db.query(_ME).filter(_ME.base_url == endpoint_url).first()
if not _ep and endpoint_url:
_u = endpoint_url.rstrip("/")
_ep = _db.query(_ME).filter(_ME.base_url == _u).first() or \
_db.query(_ME).filter(_ME.base_url == _u + "/").first()
_ep = None
for _key in _endpoint_lookup_keys(endpoint_url):
_ep = _db.query(_ME).filter(_ME.base_url == _key).first()
if _ep is not None:
break
if _ep is not None:
_endpoint_supports = _ep.supports_tools
finally:
@@ -1503,9 +1543,15 @@ async def stream_agent_loop(
# (via the endpoint settings toggle), treat Ollama-native as text-only so
# the fenced-block path is used instead of native function calling.
_is_ollama_native = _is_ollama_native_url(endpoint_url or "")
_ollama_openai_compat = _is_ollama_openai_compat_url(endpoint_url or "")
if _endpoint_supports is True:
_is_api_model = True
elif _endpoint_supports is False or _model_no_tools or _is_ollama_native:
elif (
_endpoint_supports is False
or _model_no_tools
or _is_ollama_native
or _ollama_openai_compat
):
_is_api_model = False
else:
_is_api_model = any(h in endpoint_url for h in _API_HOSTS) or _model_supports_tools