From e084dc993ea272ee6e95b1124689cd060f7164b1 Mon Sep 17 00:00:00 2001 From: Tatlatat Date: Tue, 2 Jun 2026 18:44:13 +0700 Subject: [PATCH] Chat: merge consecutive user messages for strict providers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After a non-native tool round, the agent appends tool results as a {role: 'user'} message next to the user's original 'user' prompt, producing two consecutive 'user' messages. Strict provider APIs (Anthropic/Claude) reject consecutive same-role messages, so the follow-up generation request fails silently — search returns sources, then nothing is generated. _sanitize_llm_messages now merges consecutive 'user' messages (joining their content). Only user/user is merged; normal chat and agent/tool turns already alternate and are untouched. Scoped down per maintainer review: the agent_loop 'output' source-extraction change is already on main (#898/#901) and the broad-mocking web-sources test was dropped. Added a focused test that runs consecutive-user messages through the real _build_anthropic_payload and asserts the payload alternates correctly. --- src/llm_core.py | 39 +++++++++- tests/test_llm_core_sanitize_tool_calls.py | 83 ++++++++++++++++++++++ 2 files changed, 121 insertions(+), 1 deletion(-) diff --git a/src/llm_core.py b/src/llm_core.py index ca34437..12ef35b 100644 --- a/src/llm_core.py +++ b/src/llm_core.py @@ -586,7 +586,44 @@ def _sanitize_llm_messages(messages: List[Dict]) -> List[Dict]: cleaned.append(item) elif "content" in item: cleaned.append(item) - return cleaned + + # Merge consecutive user messages to satisfy strict role alternation requirements. + merged = [] + for item in cleaned: + if not merged: + merged.append(item) + continue + + last = merged[-1] + if last["role"] == "user" and item["role"] == "user": + last_copy = dict(last) + + # Content: + last_content = last_copy.get("content") + item_content = item.get("content") + + # Convert contents to string if they exist, or keep None/empty + last_str = str(last_content) if last_content is not None else "" + item_str = str(item_content) if item_content is not None else "" + + if last_str and item_str: + new_content = f"{last_str}\n\n{item_str}" + elif last_str: + new_content = last_str + else: + new_content = item_str if item_str else None + + if new_content is not None: + last_copy["content"] = new_content + elif "content" in last_copy: + del last_copy["content"] + + merged[-1] = last_copy + else: + merged.append(item) + + return merged + def _normalize_anthropic_url(url: str) -> str: """Ensure Anthropic URL points to /v1/messages.""" diff --git a/tests/test_llm_core_sanitize_tool_calls.py b/tests/test_llm_core_sanitize_tool_calls.py index 9186cea..c71933a 100644 --- a/tests/test_llm_core_sanitize_tool_calls.py +++ b/tests/test_llm_core_sanitize_tool_calls.py @@ -59,3 +59,86 @@ def test_sanitize_keeps_no_prose_assistant_tool_call_message(): # Pairing intact: the tool result references the assistant's tool_call id. tool = next(m for m in out if m["role"] == "tool") assert tool["tool_call_id"] == assistant["tool_calls"][0]["id"] + + +def test_sanitize_merges_consecutive_user_messages(): + messages = [ + {"role": "system", "content": "System message 1"}, + {"role": "system", "content": "System message 2"}, + {"role": "user", "content": "User message 1"}, + {"role": "user", "content": "User message 2"}, + {"role": "assistant", "content": "Assistant message 1"}, + {"role": "assistant", "content": "Assistant message 2"}, + {"role": "tool", "content": "Tool output 1", "tool_call_id": "c1"}, + {"role": "tool", "content": "Tool output 2", "tool_call_id": "c2"}, + ] + out = _sanitize_llm_messages(messages) + + # Only consecutive user messages should be merged. + # Consecutive system/assistant/tool messages are left as-is. + assert len(out) == 7 + assert out[0] == {"role": "system", "content": "System message 1"} + assert out[1] == {"role": "system", "content": "System message 2"} + assert out[2] == {"role": "user", "content": "User message 1\n\nUser message 2"} + assert out[3] == {"role": "assistant", "content": "Assistant message 1"} + assert out[4] == {"role": "assistant", "content": "Assistant message 2"} + assert out[5] == {"role": "tool", "content": "Tool output 1", "tool_call_id": "c1"} + assert out[6] == {"role": "tool", "content": "Tool output 2", "tool_call_id": "c2"} + + +def test_sanitize_merges_search_results_and_user_query(): + # Simulate the exact message sequence built by build_chat_context when web search is enabled: + # preface (system policy + search results) + session messages (latest user query) + messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "UNTRUSTED SOURCE DATA\nSource: web search results\n<<>>\nHere are some web search results about python.\n<<>>"}, + {"role": "user", "content": "What is the latest version of python?"} + ] + + out = _sanitize_llm_messages(messages) + + # Assert that the consecutive user messages are successfully merged, + # preventing role alternation errors with strict LLM providers (e.g. Anthropic) + assert len(out) == 2 + assert out[0] == {"role": "system", "content": "You are a helpful assistant."} + assert out[1]["role"] == "user" + assert out[1]["content"] == ( + "UNTRUSTED SOURCE DATA\nSource: web search results\n<<>>\nHere are some web search results about python.\n<<>>" + "\n\n" + "What is the latest version of python?" + ) + + +def test_build_anthropic_payload_alternating_roles(): + from src.llm_core import _build_anthropic_payload + + # Standard messages list that has consecutive user messages (pre-merge) + messages_with_consecutive = [ + {"role": "system", "content": "system prompt"}, + {"role": "user", "content": "web search results"}, + {"role": "user", "content": "user query"} + ] + + # Sanitize and merge + sanitized = _sanitize_llm_messages(messages_with_consecutive) + + # Verify that the sanitized output merges the consecutive user messages + assert len(sanitized) == 2 + + payload = _build_anthropic_payload( + model="claude-3-5-sonnet", + messages=sanitized, + temperature=0.7, + max_tokens=1024 + ) + + # Anthropic payload has 'messages' list which contains roles alternation. + # Assert that the final message payload alternates correctly (no consecutive same role). + anth_messages = payload["messages"] + assert len(anth_messages) == 1 + assert anth_messages[0]["role"] == "user" + assert anth_messages[0]["content"] == "web search results\n\nuser query" + + + +