Chat: merge consecutive user messages for strict providers

After a non-native tool round, the agent appends tool results as a {role: 'user'} message next to the user's original 'user' prompt, producing two consecutive 'user' messages. Strict provider APIs (Anthropic/Claude) reject consecutive same-role messages, so the follow-up generation request fails silently — search returns sources, then nothing is generated. _sanitize_llm_messages now merges consecutive 'user' messages (joining their content). Only user/user is merged; normal chat and agent/tool turns already alternate and are untouched. Scoped down per maintainer review: the agent_loop 'output' source-extraction change is already on main (#898/#901) and the broad-mocking web-sources test was dropped. Added a focused test that runs consecutive-user messages through the real _build_anthropic_payload and asserts the payload alternates correctly.
2026-06-02 18:44:13 +07:00
parent fb0f8484d7
commit e084dc993e
2 changed files with 121 additions and 1 deletions
--- a/src/llm_core.py
+++ b/src/llm_core.py
@@ -586,7 +586,44 @@ def _sanitize_llm_messages(messages: List[Dict]) -> List[Dict]:
                cleaned.append(item)
        elif "content" in item:
            cleaned.append(item)
-    return cleaned
+
+    # Merge consecutive user messages to satisfy strict role alternation requirements.
+    merged = []
+    for item in cleaned:
+        if not merged:
+            merged.append(item)
+            continue
+
+        last = merged[-1]
+        if last["role"] == "user" and item["role"] == "user":
+            last_copy = dict(last)
+
+            # Content:
+            last_content = last_copy.get("content")
+            item_content = item.get("content")
+
+            # Convert contents to string if they exist, or keep None/empty
+            last_str = str(last_content) if last_content is not None else ""
+            item_str = str(item_content) if item_content is not None else ""
+
+            if last_str and item_str:
+                new_content = f"{last_str}\n\n{item_str}"
+            elif last_str:
+                new_content = last_str
+            else:
+                new_content = item_str if item_str else None
+
+            if new_content is not None:
+                last_copy["content"] = new_content
+            elif "content" in last_copy:
+                del last_copy["content"]
+
+            merged[-1] = last_copy
+        else:
+            merged.append(item)
+
+    return merged
+

 def _normalize_anthropic_url(url: str) -> str:
    """Ensure Anthropic URL points to /v1/messages."""
--- a/tests/test_llm_core_sanitize_tool_calls.py
+++ b/tests/test_llm_core_sanitize_tool_calls.py
@@ -59,3 +59,86 @@ def test_sanitize_keeps_no_prose_assistant_tool_call_message():
    # Pairing intact: the tool result references the assistant's tool_call id.
    tool = next(m for m in out if m["role"] == "tool")
    assert tool["tool_call_id"] == assistant["tool_calls"][0]["id"]
+
+
+def test_sanitize_merges_consecutive_user_messages():
+    messages = [
+        {"role": "system", "content": "System message 1"},
+        {"role": "system", "content": "System message 2"},
+        {"role": "user", "content": "User message 1"},
+        {"role": "user", "content": "User message 2"},
+        {"role": "assistant", "content": "Assistant message 1"},
+        {"role": "assistant", "content": "Assistant message 2"},
+        {"role": "tool", "content": "Tool output 1", "tool_call_id": "c1"},
+        {"role": "tool", "content": "Tool output 2", "tool_call_id": "c2"},
+    ]
+    out = _sanitize_llm_messages(messages)
+
+    # Only consecutive user messages should be merged.
+    # Consecutive system/assistant/tool messages are left as-is.
+    assert len(out) == 7
+    assert out[0] == {"role": "system", "content": "System message 1"}
+    assert out[1] == {"role": "system", "content": "System message 2"}
+    assert out[2] == {"role": "user", "content": "User message 1\n\nUser message 2"}
+    assert out[3] == {"role": "assistant", "content": "Assistant message 1"}
+    assert out[4] == {"role": "assistant", "content": "Assistant message 2"}
+    assert out[5] == {"role": "tool", "content": "Tool output 1", "tool_call_id": "c1"}
+    assert out[6] == {"role": "tool", "content": "Tool output 2", "tool_call_id": "c2"}
+
+
+def test_sanitize_merges_search_results_and_user_query():
+    # Simulate the exact message sequence built by build_chat_context when web search is enabled:
+    # preface (system policy + search results) + session messages (latest user query)
+    messages = [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": "UNTRUSTED SOURCE DATA\nSource: web search results\n<<<UNTRUSTED_SOURCE_DATA>>>\nHere are some web search results about python.\n<<<END_UNTRUSTED_SOURCE_DATA>>>"},
+        {"role": "user", "content": "What is the latest version of python?"}
+    ]
+
+    out = _sanitize_llm_messages(messages)
+
+    # Assert that the consecutive user messages are successfully merged,
+    # preventing role alternation errors with strict LLM providers (e.g. Anthropic)
+    assert len(out) == 2
+    assert out[0] == {"role": "system", "content": "You are a helpful assistant."}
+    assert out[1]["role"] == "user"
+    assert out[1]["content"] == (
+        "UNTRUSTED SOURCE DATA\nSource: web search results\n<<<UNTRUSTED_SOURCE_DATA>>>\nHere are some web search results about python.\n<<<END_UNTRUSTED_SOURCE_DATA>>>"
+        "\n\n"
+        "What is the latest version of python?"
+    )
+
+
+def test_build_anthropic_payload_alternating_roles():
+    from src.llm_core import _build_anthropic_payload
+
+    # Standard messages list that has consecutive user messages (pre-merge)
+    messages_with_consecutive = [
+        {"role": "system", "content": "system prompt"},
+        {"role": "user", "content": "web search results"},
+        {"role": "user", "content": "user query"}
+    ]
+
+    # Sanitize and merge
+    sanitized = _sanitize_llm_messages(messages_with_consecutive)
+
+    # Verify that the sanitized output merges the consecutive user messages
+    assert len(sanitized) == 2
+
+    payload = _build_anthropic_payload(
+        model="claude-3-5-sonnet",
+        messages=sanitized,
+        temperature=0.7,
+        max_tokens=1024
+    )
+
+    # Anthropic payload has 'messages' list which contains roles alternation.
+    # Assert that the final message payload alternates correctly (no consecutive same role).
+    anth_messages = payload["messages"]
+    assert len(anth_messages) == 1
+    assert anth_messages[0]["role"] == "user"
+    assert anth_messages[0]["content"] == "web search results\n\nuser query"
+
+
+
+