fix: persist and display multimodal messages (image/audio attachments) (#1159)

Multimodal content (list of {type, text/image_url} blocks) couldn't be
stored in the DB Text column, causing silent persist failures. On reload
the frontend fell back to String() on the array, rendering
[object Object],[object Object] in the chat.

- Serialize list content as JSON in _persist_message()
- Deserialize back to list in _db_to_session() via _parse_msg_content()
- Extract text parts from multimodal arrays in sessions.js instead of
  String() coercion
This commit is contained in:
Robin Fröhlich
2026-06-02 15:37:48 +02:00
committed by GitHub
parent 6bfe824eb4
commit 096468a29f
2 changed files with 33 additions and 4 deletions

View File

@@ -29,6 +29,21 @@ def _message_timestamp_iso(value: Optional[datetime]) -> Optional[str]:
return value.isoformat().replace("+00:00", "Z")
def _parse_msg_content(raw):
"""Parse message content from DB — deserialises JSON arrays back to lists
(multimodal content with image/audio attachments)."""
if isinstance(raw, list):
return raw
if isinstance(raw, str) and raw.startswith('[{') and '"type"' in raw:
try:
parsed = json.loads(raw)
if isinstance(parsed, list) and all(isinstance(p, dict) for p in parsed):
return parsed
except (json.JSONDecodeError, ValueError):
pass
return raw
class SessionManager:
"""
Manages chat sessions with database persistence.
@@ -119,7 +134,7 @@ class SessionManager:
meta.setdefault('timestamp', _message_timestamp_iso(db_msg.timestamp))
history.append(ChatMessage(
role=db_msg.role,
content=db_msg.content,
content=_parse_msg_content(db_msg.content),
metadata=meta,
))
else:
@@ -134,7 +149,7 @@ class SessionManager:
meta.setdefault('timestamp', _message_timestamp_iso(db_msg.timestamp))
history.append(ChatMessage(
role=db_msg.role,
content=db_msg.content,
content=_parse_msg_content(db_msg.content),
metadata=meta,
))
@@ -192,11 +207,17 @@ class SessionManager:
if message.metadata is None:
message.metadata = {}
message.metadata.setdefault('timestamp', _message_timestamp_iso(msg_time))
# Multimodal content (image/audio attachments) is a list — serialize
# to JSON so the Text column can store it. On reload, _db_to_session
# detects the JSON-array prefix and parses it back.
_content = message.content
if isinstance(_content, list):
_content = json.dumps(_content)
db_message = DbChatMessage(
id=msg_id,
session_id=session_id,
role=message.role,
content=message.content,
content=_content,
meta_data=json.dumps(message.metadata) if message.metadata else None,
timestamp=msg_time,
)

View File

@@ -1610,7 +1610,15 @@ export async function selectSession(id, { keepSidebar = false } = {}) {
} else if (msgHistory.length) {
for (const msg of msgHistory) {
const meta = msg.metadata ? { ...msg.metadata, _fromHistory: true } : null;
let displayContent = typeof msg.content === 'string' ? msg.content : (msg.content ? String(msg.content) : '');
let displayContent;
if (typeof msg.content === 'string') {
displayContent = msg.content;
} else if (Array.isArray(msg.content)) {
// Multimodal (image/audio attachments): extract text parts, skip binary
displayContent = msg.content.filter(p => p.type === 'text').map(p => p.text).join('\n').trim();
} else {
displayContent = '';
}
// Clean up doc selection context for display
if (msg.role === 'user') {
// Hide "Continue where you left off" bubbles