From cb13d090298a60be085fbd75ec45102f92ca1bae Mon Sep 17 00:00:00 2001
From: James Arslan <james.arslan@tagd.ai>
Date: Tue, 2 Jun 2026 00:34:51 +0000
Subject: [PATCH] Fix tool-calling HTTP 400 on Gemini and Ollama: send null,
 not empty, assistant content

When an agent turn uses native (OpenAI-style) function calling and the model
returns only tool calls with no prose, _append_tool_results built the follow-up
assistant message with content "" (empty string).

Google Gemini's OpenAI-compatible endpoint and Ollama both reject an assistant
message that carries tool_calls alongside an empty-string content with HTTP 400.
Because that message feeds the tool results back to the model, every tool-using
turn on these providers dies at the second round: the tool runs, but the agent
never produces a result.

Use None (JSON null) instead, which is the spec-correct form the OpenAI SDK
itself emits and which OpenAI and Anthropic accept too. Adds tests covering the
native tool-call content shaping.
---
 src/agent_loop.py        |  9 +++++-
 tests/test_agent_loop.py | 68 ++++++++++++++++++++++++++++++++++++++--
 2 files changed, 73 insertions(+), 4 deletions(-)

diff --git a/src/agent_loop.py b/src/agent_loop.py
index fd0f440..f776345 100644
--- a/src/agent_loop.py
+++ b/src/agent_loop.py
@@ -1054,7 +1054,14 @@ def _append_tool_results(
     """
     if used_native and native_tool_calls:
         assistant_msg = {"role": "assistant"}
-        assistant_msg["content"] = round_response if round_response.strip() else ""
+        # When the model emitted ONLY tool calls (no prose), content must be
+        # null, NOT an empty string. Google Gemini's OpenAI-compatible endpoint
+        # and Ollama both reject an assistant message that carries tool_calls
+        # alongside empty-string content with HTTP 400 ("contents is not
+        # specified" / a JSON parse error), which aborts every tool-using turn
+        # at the follow-up round. null (i.e. omitted text) is the spec-correct
+        # form the OpenAI SDK itself emits, and OpenAI/Anthropic accept it too.
+        assistant_msg["content"] = round_response if round_response.strip() else None
         if round_reasoning:
             assistant_msg["reasoning_content"] = round_reasoning
         assistant_msg["tool_calls"] = [
diff --git a/tests/test_agent_loop.py b/tests/test_agent_loop.py
index e2ba350..ca0a1c1 100644
--- a/tests/test_agent_loop.py
+++ b/tests/test_agent_loop.py
@@ -1,5 +1,5 @@
-"""Tests for agent_loop.py — _detect_admin_intent and _compute_final_metrics.
-Uses mock imports to avoid loading the full app stack."""
+"""Tests for agent_loop.py — _detect_admin_intent, _compute_final_metrics,
+and _append_tool_results. Uses mock imports to avoid loading the full app stack."""
 
 import sys
 from unittest.mock import MagicMock
@@ -15,7 +15,11 @@ for mod in [
     if mod not in sys.modules:
         sys.modules[mod] = MagicMock()
 
-from src.agent_loop import _detect_admin_intent, _compute_final_metrics
+from src.agent_loop import (
+    _detect_admin_intent,
+    _compute_final_metrics,
+    _append_tool_results,
+)
 
 
 # ---------------------------------------------------------------------------
@@ -239,3 +243,61 @@ class TestComputeFinalMetrics:
         m = _compute_final_metrics(**self._base_args(tool_events=[], round_texts=[]))
         assert "tool_events" not in m
         assert "round_texts" not in m
+
+
+# ---------------------------------------------------------------------------
+# _append_tool_results — native tool-call message shaping
+# ---------------------------------------------------------------------------
+
+class TestAppendToolResultsNativeContent:
+    """After a native tool call with no prose, the assistant message's content
+    must be JSON null (None), not an empty string. Google Gemini's
+    OpenAI-compatible endpoint and Ollama both reject `tool_calls` + ""
+    content with HTTP 400, which breaks every tool-using turn."""
+
+    def _native(self):
+        return [{"id": "call_abc", "name": "web_fetch", "arguments": '{"url": "https://example.com"}'}]
+
+    def test_empty_text_yields_null_content(self):
+        messages = []
+        _append_tool_results(
+            messages, "", self._native(), [{}], ["page text"],
+            used_native=True, round_num=1,
+        )
+        assistant = messages[0]
+        assert assistant["role"] == "assistant"
+        assert assistant["content"] is None  # NOT ""
+        assert assistant["tool_calls"][0]["id"] == "call_abc"
+        assert assistant["tool_calls"][0]["type"] == "function"
+        # tool result follows as a role:tool message keyed by tool_call_id
+        assert messages[1]["role"] == "tool"
+        assert messages[1]["tool_call_id"] == "call_abc"
+        assert messages[1]["content"] == "page text"
+
+    def test_whitespace_only_text_yields_null_content(self):
+        messages = []
+        _append_tool_results(
+            messages, "   \n\t  ", self._native(), [{}], ["r"],
+            used_native=True, round_num=2,
+        )
+        assert messages[0]["content"] is None
+
+    def test_real_prose_is_preserved(self):
+        messages = []
+        _append_tool_results(
+            messages, "Let me check that page.", self._native(), [{}], ["r"],
+            used_native=True, round_num=1,
+        )
+        assert messages[0]["content"] == "Let me check that page."
+
+    def test_non_native_path_unaffected(self):
+        # The text-block fallback path still wraps results in a user message.
+        messages = []
+        _append_tool_results(
+            messages, "thinking...", [], ["tool output"], [],
+            used_native=False, round_num=1,
+        )
+        assert messages[0]["role"] == "assistant"
+        assert messages[0]["content"] == "thinking..."
+        assert messages[1]["role"] == "user"
+        assert "tool output" in messages[1]["content"]