diff --git a/src/agent_loop.py b/src/agent_loop.py
index 0677cc0..f5a2b57 100644
--- a/src/agent_loop.py
+++ b/src/agent_loop.py
@@ -13,6 +13,7 @@ import re
 import time
 import logging
 from typing import AsyncGenerator, List, Dict, Optional, Set
+from urllib.parse import urlparse
 
 from src.llm_core import stream_llm, stream_llm_with_fallback, _is_ollama_native_url
 from src.model_context import estimate_tokens
@@ -475,6 +476,45 @@ _ADMIN_SCHEMA_NAMES = frozenset([
 ])
 _TOOL_SELECTION_TIMEOUT_SECONDS = 1.5
 
+
+def _is_ollama_openai_compat_url(endpoint_url: str) -> bool:
+    """Return True for local Ollama's OpenAI-compatible /v1 surface.
+
+    Ollama's /v1 endpoint accepts the OpenAI chat shape, but model-level tool
+    streaming is uneven. Some local models terminate after a token when schemas
+    are present. Keep native schemas opt-in via ModelEndpoint.supports_tools.
+    """
+    try:
+        parsed = urlparse(endpoint_url or "")
+    except Exception:
+        return False
+    path = (parsed.path or "").rstrip("/")
+    return parsed.port == 11434 and (path == "/v1" or path.startswith("/v1/"))
+
+
+def _endpoint_lookup_keys(endpoint_url: str) -> List[str]:
+    """Candidate ModelEndpoint.base_url keys for a runtime chat URL."""
+    raw = (endpoint_url or "").strip()
+    keys: List[str] = []
+
+    def add(value: str):
+        value = (value or "").strip()
+        if value and value not in keys:
+            keys.append(value)
+        trimmed = value.rstrip("/")
+        if trimmed and trimmed not in keys:
+            keys.append(trimmed)
+        if trimmed and f"{trimmed}/" not in keys:
+            keys.append(f"{trimmed}/")
+
+    add(raw)
+    try:
+        from src.endpoint_resolver import normalize_base
+        add(normalize_base(raw))
+    except Exception:
+        pass
+    return keys
+
 # Admin tool keywords — if the last user message contains any of these, include admin tools
 _ADMIN_KEYWORDS = [
     "session", "sessions", "chat", "chats", "conversation", "conversations",
@@ -1456,18 +1496,18 @@ async def stream_agent_loop(
     _model_lc = (model or "").lower()
     # Step 1: per-endpoint override (set at registration time from the
     # serve command — `--enable-auto-tool-choice` flips it on. UI can
-    # also toggle per endpoint). NULL = unknown, fall through to the
-    # keyword heuristic + host check.
+    # also toggle per endpoint). NULL = unknown; for local Ollama /v1 we
+    # default to fenced tools, otherwise fall through to keyword + host checks.
     _endpoint_supports: Optional[bool] = None
     try:
         from core.database import SessionLocal as _SL, ModelEndpoint as _ME
         _db = _SL()
         try:
-            _ep = _db.query(_ME).filter(_ME.base_url == endpoint_url).first()
-            if not _ep and endpoint_url:
-                _u = endpoint_url.rstrip("/")
-                _ep = _db.query(_ME).filter(_ME.base_url == _u).first() or \
-                      _db.query(_ME).filter(_ME.base_url == _u + "/").first()
+            _ep = None
+            for _key in _endpoint_lookup_keys(endpoint_url):
+                _ep = _db.query(_ME).filter(_ME.base_url == _key).first()
+                if _ep is not None:
+                    break
             if _ep is not None:
                 _endpoint_supports = _ep.supports_tools
         finally:
@@ -1503,9 +1543,15 @@ async def stream_agent_loop(
     # (via the endpoint settings toggle), treat Ollama-native as text-only so
     # the fenced-block path is used instead of native function calling.
     _is_ollama_native = _is_ollama_native_url(endpoint_url or "")
+    _ollama_openai_compat = _is_ollama_openai_compat_url(endpoint_url or "")
     if _endpoint_supports is True:
         _is_api_model = True
-    elif _endpoint_supports is False or _model_no_tools or _is_ollama_native:
+    elif (
+        _endpoint_supports is False
+        or _model_no_tools
+        or _is_ollama_native
+        or _ollama_openai_compat
+    ):
         _is_api_model = False
     else:
         _is_api_model = any(h in endpoint_url for h in _API_HOSTS) or _model_supports_tools
diff --git a/tests/test_tool_support_heuristic.py b/tests/test_tool_support_heuristic.py
index f6a8b9c..ed2dbc7 100644
--- a/tests/test_tool_support_heuristic.py
+++ b/tests/test_tool_support_heuristic.py
@@ -1,13 +1,14 @@
 """Regression tests for the tool-support heuristic in stream_agent_loop.
 
 Verifies two critical cases:
-  1. deepseek-r1 on a local Ollama endpoint must NOT enable native tool schemas
-     (Ollama returns HTTP 400 for these models when tools are sent).
+  1. local Ollama endpoints must NOT enable native tool schemas by default
+     (some models terminate after one token with schemas).
   2. api.deepseek.com must still be treated as tool-capable via the host
      allow-list (_API_HOSTS), so cloud deepseek users keep working.
 """
 import pytest
-from src.agent_loop import _API_HOSTS
+from src.agent_loop import _API_HOSTS, _endpoint_lookup_keys, _is_ollama_openai_compat_url
+from src.llm_core import _is_ollama_native_url
 
 
 def _compute_is_api_model(model: str, endpoint_url: str, endpoint_supports=None) -> bool:
@@ -28,13 +29,18 @@ def _compute_is_api_model(model: str, endpoint_url: str, endpoint_supports=None)
 
     if endpoint_supports is True:
         return True
-    if endpoint_supports is False or model_no_tools:
+    if (
+        endpoint_supports is False
+        or model_no_tools
+        or _is_ollama_native_url(endpoint_url)
+        or _is_ollama_openai_compat_url(endpoint_url)
+    ):
         return False
     return any(h in endpoint_url for h in _API_HOSTS) or model_supports_tools
 
 
 class TestDeepSeekToolSupport:
-    # --- local Ollama cases (must NOT get tool schemas) ---
+    # --- local Ollama cases (must NOT get native tool schemas by default) ---
 
     def test_deepseek_r1_7b_local_ollama_no_tools(self):
         result = _compute_is_api_model(
@@ -56,6 +62,21 @@ class TestDeepSeekToolSupport:
             "deepseek-r1:7b", "http://host.docker.internal:11434/v1"
         ) is False
 
+    def test_qwen_local_ollama_defaults_to_fenced_tools(self):
+        assert _compute_is_api_model(
+            "qwen3.5:4b", "http://localhost:11434/v1"
+        ) is False
+
+    def test_gemma_local_ollama_defaults_to_fenced_tools(self):
+        assert _compute_is_api_model(
+            "gemma4:e4b", "http://host.docker.internal:11434/v1"
+        ) is False
+
+    def test_qwen_native_ollama_defaults_to_fenced_tools(self):
+        assert _compute_is_api_model(
+            "qwen3.5:4b", "http://localhost:11434/api/chat"
+        ) is False
+
     # --- cloud API cases (must still get tool schemas) ---
 
     def test_deepseek_cloud_api_gets_tools(self):
@@ -82,6 +103,20 @@ class TestDeepSeekToolSupport:
         )
         assert result is True
 
+    def test_endpoint_supports_true_overrides_ollama_default(self):
+        """A user can still explicitly opt a known-good Ollama endpoint into
+        native schemas."""
+        result = _compute_is_api_model(
+            "qwen3.5:4b", "http://localhost:11434/v1", endpoint_supports=True
+        )
+        assert result is True
+
+    def test_endpoint_supports_true_overrides_native_ollama_default(self):
+        result = _compute_is_api_model(
+            "qwen3.5:4b", "http://localhost:11434/api/chat", endpoint_supports=True
+        )
+        assert result is True
+
     def test_endpoint_supports_false_overrides_cloud(self):
         """supports_tools=False on an endpoint gates even cloud APIs."""
         result = _compute_is_api_model(
@@ -91,11 +126,11 @@ class TestDeepSeekToolSupport:
 
     # --- other local models unaffected ---
 
-    def test_qwen_local_still_gets_tools(self):
-        assert _compute_is_api_model("qwen2.5:14b", "http://localhost:11434/v1") is True
+    def test_qwen_local_non_ollama_still_gets_tools(self):
+        assert _compute_is_api_model("qwen2.5:14b", "http://localhost:8000/v1") is True
 
-    def test_llama_local_gets_tools_via_host(self):
-        assert _compute_is_api_model("llama3.2:3b", "http://localhost:11434/v1") is True
+    def test_llama_local_non_ollama_gets_tools_via_host(self):
+        assert _compute_is_api_model("llama3.2:3b", "http://localhost:8000/v1") is True
 
 
 class TestApiHostsContainsDeepSeek:
@@ -104,3 +139,16 @@ class TestApiHostsContainsDeepSeek:
 
     def test_deepseek_com_in_api_hosts(self):
         assert "deepseek.com" in _API_HOSTS
+
+
+class TestEndpointLookupKeys:
+    def test_chat_completions_url_matches_endpoint_base(self):
+        keys = _endpoint_lookup_keys("http://localhost:11434/v1/chat/completions")
+
+        assert "http://localhost:11434/v1" in keys
+        assert "http://localhost:11434/v1/" in keys
+
+    def test_native_ollama_chat_url_matches_api_base(self):
+        keys = _endpoint_lookup_keys("http://host.docker.internal:11434/api/chat")
+
+        assert "http://host.docker.internal:11434/api" in keys