fix: merging consecutive user messages corrupts multimodal (image) content (#1277)
* fix: preserve multimodal content blocks when merging consecutive user messages * test: consecutive user-message merge keeps multimodal image blocks
This commit is contained in:
@@ -576,6 +576,20 @@ def _parse_anthropic_response(data: dict) -> str:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _as_content_blocks(content) -> List[Dict]:
|
||||||
|
"""Coerce a message `content` into a list of content blocks.
|
||||||
|
|
||||||
|
A list (multimodal: text + image parts) passes through; a non-empty string
|
||||||
|
becomes a single text block; None/empty yields no blocks. Used when merging
|
||||||
|
consecutive user messages so multimodal content isn't str()-ed away.
|
||||||
|
"""
|
||||||
|
if isinstance(content, list):
|
||||||
|
return content
|
||||||
|
if content:
|
||||||
|
return [{"type": "text", "text": str(content)}]
|
||||||
|
return []
|
||||||
|
|
||||||
|
|
||||||
def _sanitize_llm_messages(messages: List[Dict]) -> List[Dict]:
|
def _sanitize_llm_messages(messages: List[Dict]) -> List[Dict]:
|
||||||
"""Strip Odysseus-only metadata before sending messages to providers.
|
"""Strip Odysseus-only metadata before sending messages to providers.
|
||||||
|
|
||||||
@@ -689,13 +703,25 @@ def _sanitize_llm_messages(messages: List[Dict]) -> List[Dict]:
|
|||||||
last = merged[-1]
|
last = merged[-1]
|
||||||
if last.get("role") == "user" and item.get("role") == "user":
|
if last.get("role") == "user" and item.get("role") == "user":
|
||||||
last_copy = dict(last)
|
last_copy = dict(last)
|
||||||
last_str = str(last_copy.get("content")) if last_copy.get("content") is not None else ""
|
lc = last_copy.get("content")
|
||||||
item_str = str(item.get("content")) if item.get("content") is not None else ""
|
ic = item.get("content")
|
||||||
new_content = "\n\n".join(part for part in (last_str, item_str) if part)
|
if isinstance(lc, list) or isinstance(ic, list):
|
||||||
if new_content:
|
# Preserve multimodal content blocks (e.g. an image part) by
|
||||||
last_copy["content"] = new_content
|
# concatenating the block lists. str()-ing a list turned an
|
||||||
|
# image message into its Python repr and dropped the image.
|
||||||
|
merged_blocks = _as_content_blocks(lc) + _as_content_blocks(ic)
|
||||||
|
if merged_blocks:
|
||||||
|
last_copy["content"] = merged_blocks
|
||||||
|
else:
|
||||||
|
last_copy.pop("content", None)
|
||||||
else:
|
else:
|
||||||
last_copy.pop("content", None)
|
last_str = str(lc) if lc is not None else ""
|
||||||
|
item_str = str(ic) if ic is not None else ""
|
||||||
|
new_content = "\n\n".join(part for part in (last_str, item_str) if part)
|
||||||
|
if new_content:
|
||||||
|
last_copy["content"] = new_content
|
||||||
|
else:
|
||||||
|
last_copy.pop("content", None)
|
||||||
merged[-1] = last_copy
|
merged[-1] = last_copy
|
||||||
else:
|
else:
|
||||||
merged.append(item)
|
merged.append(item)
|
||||||
|
|||||||
28
tests/test_sanitize_multimodal_merge.py
Normal file
28
tests/test_sanitize_multimodal_merge.py
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
"""Regression: merging consecutive user messages must not str() multimodal content."""
|
||||||
|
|
||||||
|
from src.llm_core import _sanitize_llm_messages
|
||||||
|
|
||||||
|
|
||||||
|
def test_multimodal_user_message_keeps_image_block_when_merged():
|
||||||
|
image_msg = {"role": "user", "content": [
|
||||||
|
{"type": "text", "text": "look at this"},
|
||||||
|
{"type": "image_url", "image_url": {"url": "data:image/png;base64,AAAA"}},
|
||||||
|
]}
|
||||||
|
tool_result = {"role": "user", "content": "Tool result: 42"}
|
||||||
|
out = _sanitize_llm_messages([image_msg, tool_result])
|
||||||
|
|
||||||
|
# The two consecutive user messages collapse into one...
|
||||||
|
assert len(out) == 1
|
||||||
|
content = out[0]["content"]
|
||||||
|
# ...and the image block survives (it used to be str()-ed into a repr).
|
||||||
|
assert isinstance(content, list)
|
||||||
|
assert any(b.get("type") == "image_url" for b in content)
|
||||||
|
assert content[-1] == {"type": "text", "text": "Tool result: 42"}
|
||||||
|
|
||||||
|
|
||||||
|
def test_string_only_user_merge_unchanged():
|
||||||
|
a = {"role": "user", "content": "first"}
|
||||||
|
b = {"role": "user", "content": "second"}
|
||||||
|
out = _sanitize_llm_messages([a, b])
|
||||||
|
assert len(out) == 1
|
||||||
|
assert out[0]["content"] == "first\n\nsecond"
|
||||||
Reference in New Issue
Block a user