From cb13d090298a60be085fbd75ec45102f92ca1bae Mon Sep 17 00:00:00 2001 From: James Arslan Date: Tue, 2 Jun 2026 00:34:51 +0000 Subject: [PATCH] Fix tool-calling HTTP 400 on Gemini and Ollama: send null, not empty, assistant content When an agent turn uses native (OpenAI-style) function calling and the model returns only tool calls with no prose, _append_tool_results built the follow-up assistant message with content "" (empty string). Google Gemini's OpenAI-compatible endpoint and Ollama both reject an assistant message that carries tool_calls alongside an empty-string content with HTTP 400. Because that message feeds the tool results back to the model, every tool-using turn on these providers dies at the second round: the tool runs, but the agent never produces a result. Use None (JSON null) instead, which is the spec-correct form the OpenAI SDK itself emits and which OpenAI and Anthropic accept too. Adds tests covering the native tool-call content shaping. --- src/agent_loop.py | 9 +++++- tests/test_agent_loop.py | 68 ++++++++++++++++++++++++++++++++++++++-- 2 files changed, 73 insertions(+), 4 deletions(-) diff --git a/src/agent_loop.py b/src/agent_loop.py index fd0f440..f776345 100644 --- a/src/agent_loop.py +++ b/src/agent_loop.py @@ -1054,7 +1054,14 @@ def _append_tool_results( """ if used_native and native_tool_calls: assistant_msg = {"role": "assistant"} - assistant_msg["content"] = round_response if round_response.strip() else "" + # When the model emitted ONLY tool calls (no prose), content must be + # null, NOT an empty string. Google Gemini's OpenAI-compatible endpoint + # and Ollama both reject an assistant message that carries tool_calls + # alongside empty-string content with HTTP 400 ("contents is not + # specified" / a JSON parse error), which aborts every tool-using turn + # at the follow-up round. null (i.e. omitted text) is the spec-correct + # form the OpenAI SDK itself emits, and OpenAI/Anthropic accept it too. + assistant_msg["content"] = round_response if round_response.strip() else None if round_reasoning: assistant_msg["reasoning_content"] = round_reasoning assistant_msg["tool_calls"] = [ diff --git a/tests/test_agent_loop.py b/tests/test_agent_loop.py index e2ba350..ca0a1c1 100644 --- a/tests/test_agent_loop.py +++ b/tests/test_agent_loop.py @@ -1,5 +1,5 @@ -"""Tests for agent_loop.py — _detect_admin_intent and _compute_final_metrics. -Uses mock imports to avoid loading the full app stack.""" +"""Tests for agent_loop.py — _detect_admin_intent, _compute_final_metrics, +and _append_tool_results. Uses mock imports to avoid loading the full app stack.""" import sys from unittest.mock import MagicMock @@ -15,7 +15,11 @@ for mod in [ if mod not in sys.modules: sys.modules[mod] = MagicMock() -from src.agent_loop import _detect_admin_intent, _compute_final_metrics +from src.agent_loop import ( + _detect_admin_intent, + _compute_final_metrics, + _append_tool_results, +) # --------------------------------------------------------------------------- @@ -239,3 +243,61 @@ class TestComputeFinalMetrics: m = _compute_final_metrics(**self._base_args(tool_events=[], round_texts=[])) assert "tool_events" not in m assert "round_texts" not in m + + +# --------------------------------------------------------------------------- +# _append_tool_results — native tool-call message shaping +# --------------------------------------------------------------------------- + +class TestAppendToolResultsNativeContent: + """After a native tool call with no prose, the assistant message's content + must be JSON null (None), not an empty string. Google Gemini's + OpenAI-compatible endpoint and Ollama both reject `tool_calls` + "" + content with HTTP 400, which breaks every tool-using turn.""" + + def _native(self): + return [{"id": "call_abc", "name": "web_fetch", "arguments": '{"url": "https://example.com"}'}] + + def test_empty_text_yields_null_content(self): + messages = [] + _append_tool_results( + messages, "", self._native(), [{}], ["page text"], + used_native=True, round_num=1, + ) + assistant = messages[0] + assert assistant["role"] == "assistant" + assert assistant["content"] is None # NOT "" + assert assistant["tool_calls"][0]["id"] == "call_abc" + assert assistant["tool_calls"][0]["type"] == "function" + # tool result follows as a role:tool message keyed by tool_call_id + assert messages[1]["role"] == "tool" + assert messages[1]["tool_call_id"] == "call_abc" + assert messages[1]["content"] == "page text" + + def test_whitespace_only_text_yields_null_content(self): + messages = [] + _append_tool_results( + messages, " \n\t ", self._native(), [{}], ["r"], + used_native=True, round_num=2, + ) + assert messages[0]["content"] is None + + def test_real_prose_is_preserved(self): + messages = [] + _append_tool_results( + messages, "Let me check that page.", self._native(), [{}], ["r"], + used_native=True, round_num=1, + ) + assert messages[0]["content"] == "Let me check that page." + + def test_non_native_path_unaffected(self): + # The text-block fallback path still wraps results in a user message. + messages = [] + _append_tool_results( + messages, "thinking...", [], ["tool output"], [], + used_native=False, round_num=1, + ) + assert messages[0]["role"] == "assistant" + assert messages[0]["content"] == "thinking..." + assert messages[1]["role"] == "user" + assert "tool output" in messages[1]["content"]