Keep no-prose assistant tool-call messages through _sanitize_llm_messages (#862)
cb13d09 made _append_tool_results emit content=None (JSON null) for a follow-up
assistant message that carries only tool_calls and no prose, because Gemini's
OpenAI-compatible endpoint and Ollama reject tool_calls alongside an
empty-string content with HTTP 400.
But _sanitize_llm_messages strips None values and then required "content" on
every message, so it dropped that assistant message entirely — leaving the
role:"tool" result dangling with no parent tool_calls, which breaks the
follow-up round for every provider (and regresses ones that accepted "" before,
since the message is now removed rather than sent). cb13d09's tests covered
_append_tool_results in isolation, so the sanitizer interaction was uncaught.
Make the sanitizer role-aware: assistant messages survive with content OR
tool_calls, and a tool-calls-only assistant message gets an explicit
content=None re-added so the provider receives spec-correct `content: null`.
tool messages still require content + tool_call_id; user/system still require
content.
Adds tests/test_llm_core_sanitize_tool_calls.py, which drives the real producer
(_append_tool_results) into the sanitizer and asserts the assistant tool-call
message survives with its tool result paired. Red before this change, green
after.
This commit is contained in:
@@ -502,14 +502,37 @@ def _parse_anthropic_response(data: dict) -> str:
|
|||||||
|
|
||||||
|
|
||||||
def _sanitize_llm_messages(messages: List[Dict]) -> List[Dict]:
|
def _sanitize_llm_messages(messages: List[Dict]) -> List[Dict]:
|
||||||
"""Strip Odysseus-only metadata before sending messages to providers."""
|
"""Strip Odysseus-only metadata before sending messages to providers.
|
||||||
|
|
||||||
|
Per the OpenAI chat format: user/system messages must have content; a tool
|
||||||
|
message needs content + tool_call_id; an assistant message may carry content,
|
||||||
|
tool_calls, or both. The old guard required content on every message, which
|
||||||
|
dropped a valid assistant message that has only tool_calls — e.g. the
|
||||||
|
follow-up message _append_tool_results builds for a no-prose native tool call
|
||||||
|
(content=None, since Gemini/Ollama reject tool_calls alongside ""). Dropping
|
||||||
|
it leaves the tool result dangling and breaks the next round.
|
||||||
|
"""
|
||||||
allowed = {"role", "content", "name", "tool_call_id", "tool_calls", "function_call"}
|
allowed = {"role", "content", "name", "tool_call_id", "tool_calls", "function_call"}
|
||||||
cleaned = []
|
cleaned = []
|
||||||
for msg in messages or []:
|
for msg in messages or []:
|
||||||
if not isinstance(msg, dict):
|
if not isinstance(msg, dict):
|
||||||
continue
|
continue
|
||||||
item = {k: v for k, v in msg.items() if k in allowed and v is not None}
|
item = {k: v for k, v in msg.items() if k in allowed and v is not None}
|
||||||
if "role" in item and "content" in item:
|
role = item.get("role")
|
||||||
|
if not role:
|
||||||
|
continue
|
||||||
|
if role == "assistant":
|
||||||
|
# Re-add an explicit content=None when the message is tool-calls-only
|
||||||
|
# (the None was stripped above) so the provider gets the spec-correct
|
||||||
|
# `content: null`, not an omitted key.
|
||||||
|
if "content" not in item and item.get("tool_calls"):
|
||||||
|
item["content"] = None
|
||||||
|
if "content" in item or item.get("tool_calls"):
|
||||||
|
cleaned.append(item)
|
||||||
|
elif role == "tool":
|
||||||
|
if "content" in item and "tool_call_id" in item:
|
||||||
|
cleaned.append(item)
|
||||||
|
elif "content" in item:
|
||||||
cleaned.append(item)
|
cleaned.append(item)
|
||||||
return cleaned
|
return cleaned
|
||||||
|
|
||||||
|
|||||||
61
tests/test_llm_core_sanitize_tool_calls.py
Normal file
61
tests/test_llm_core_sanitize_tool_calls.py
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
"""Regression test: _sanitize_llm_messages must not drop the no-prose
|
||||||
|
assistant tool-call message.
|
||||||
|
|
||||||
|
Commit cb13d09 changed _append_tool_results so that when the model emits ONLY
|
||||||
|
tool calls (no prose), the follow-up assistant message carries content=None
|
||||||
|
(JSON null) instead of "" — Google Gemini's OpenAI-compatible endpoint and
|
||||||
|
Ollama reject tool_calls alongside an empty-string content with HTTP 400.
|
||||||
|
|
||||||
|
But _sanitize_llm_messages drops None values (`v is not None`) and then required
|
||||||
|
"content" to be present, so it dropped that assistant message entirely — leaving
|
||||||
|
a dangling role:"tool" result with no parent tool_calls. That re-breaks native
|
||||||
|
tool-calling on the follow-up round (and regresses providers that accepted ""
|
||||||
|
before, since the message is now removed instead of sent). cb13d09's tests only
|
||||||
|
exercised _append_tool_results in isolation, so the sanitizer interaction went
|
||||||
|
uncaught.
|
||||||
|
|
||||||
|
This test drives the real producer (_append_tool_results) into the sanitizer.
|
||||||
|
"""
|
||||||
|
import sys
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
# Mock heavy dependencies before importing (mirrors tests/test_agent_loop.py).
|
||||||
|
for mod in [
|
||||||
|
'sqlalchemy', 'sqlalchemy.orm', 'sqlalchemy.ext', 'sqlalchemy.ext.declarative',
|
||||||
|
'sqlalchemy.ext.hybrid', 'sqlalchemy.sql', 'sqlalchemy.sql.expression',
|
||||||
|
'src.database', 'src.agent_tools', 'core.models', 'core.database',
|
||||||
|
]:
|
||||||
|
if mod not in sys.modules:
|
||||||
|
sys.modules[mod] = MagicMock()
|
||||||
|
|
||||||
|
from src.agent_loop import _append_tool_results
|
||||||
|
from src.llm_core import _sanitize_llm_messages
|
||||||
|
|
||||||
|
|
||||||
|
def test_sanitize_keeps_no_prose_assistant_tool_call_message():
|
||||||
|
native = [{"id": "call_1", "name": "web_fetch",
|
||||||
|
"arguments": '{"url": "https://example.com"}'}]
|
||||||
|
messages = []
|
||||||
|
# Model emitted only a tool call, no prose -> _append_tool_results sets the
|
||||||
|
# assistant message's content to None (cb13d09).
|
||||||
|
_append_tool_results(messages, "", native, [{}], ["page text"],
|
||||||
|
used_native=True, round_num=1)
|
||||||
|
assert messages[0]["role"] == "assistant"
|
||||||
|
assert messages[0]["content"] is None # producer contract (cb13d09)
|
||||||
|
|
||||||
|
out = _sanitize_llm_messages(messages)
|
||||||
|
roles = [m["role"] for m in out]
|
||||||
|
|
||||||
|
# The assistant tool-call message must survive sanitization, otherwise the
|
||||||
|
# following tool result is dangling and the provider call breaks.
|
||||||
|
assert "assistant" in roles, (
|
||||||
|
"sanitize dropped the no-prose assistant tool-call message; the tool "
|
||||||
|
"result is left dangling"
|
||||||
|
)
|
||||||
|
assistant = next(m for m in out if m["role"] == "assistant")
|
||||||
|
assert assistant.get("tool_calls"), "assistant tool_calls were lost"
|
||||||
|
# Faithful to cb13d09: keep explicit JSON null rather than an omitted key.
|
||||||
|
assert assistant["content"] is None
|
||||||
|
# Pairing intact: the tool result references the assistant's tool_call id.
|
||||||
|
tool = next(m for m in out if m["role"] == "tool")
|
||||||
|
assert tool["tool_call_id"] == assistant["tool_calls"][0]["id"]
|
||||||
Reference in New Issue
Block a user