Stop conversations crashing during compaction on tool-call turns (#1777)

context_compactor.maybe_compact built its summary text with
msg.get('content', '')[:2000], which raised
TypeError: 'NoneType' object is not subscriptable on assistant turns
whose content is None (turns that carried only native tool_calls).
Once a conversation crossed the 85% compaction threshold — reached
after only a few turns on small-context local models plus the large
agent prompt — every subsequent message failed ("send more than three
messages and it stops working").

Flatten message content to text first via a _content_as_text helper
(str passthrough, multimodal list blocks joined, None -> "") and
tolerate a missing role. Adds tests/test_context_compactor.py covering
the helper and a >=4-message conversation that forces compaction with
a None-content tool-call turn (fails before this change, passes after).

Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
clockworksquirrel
2026-06-03 00:25:33 -04:00
committed by GitHub
parent 12696a05ae
commit 2625e97f11
2 changed files with 129 additions and 1 deletions

View File

@@ -15,6 +15,26 @@ from core.models import ChatMessage
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def _content_as_text(content: Any) -> str:
"""Flatten a message's content to plain text.
Handles the three shapes that flow through history: a plain string, a
multimodal list of content blocks (vision/image attachments), and None
(assistant turns that carried only native tool_calls persist content as
None). Returns "" for anything without text so callers can safely slice
the result.
"""
if isinstance(content, str):
return content
if isinstance(content, list):
return " ".join(
b.get("text", "") for b in content
if isinstance(b, dict) and b.get("text")
)
return ""
COMPACT_THRESHOLD = 0.85 # Trigger compaction at 85% of context window COMPACT_THRESHOLD = 0.85 # Trigger compaction at 85% of context window
SUMMARY_MAX_TOKENS = 1024 SUMMARY_MAX_TOKENS = 1024
SMALL_CONTEXT_LIMIT = 8192 # Models with context <= this get aggressive trimming SMALL_CONTEXT_LIMIT = 8192 # Models with context <= this get aggressive trimming
@@ -274,7 +294,7 @@ async def maybe_compact(
# Build the text to summarize # Build the text to summarize
convo_text = "\n".join( convo_text = "\n".join(
f"{msg['role'].upper()}: {msg.get('content', '')[:2000]}" f"{msg.get('role', 'user').upper()}: {_content_as_text(msg.get('content'))[:2000]}"
for msg in older for msg in older
) )

View File

@@ -1,9 +1,12 @@
"""Tests for context_compactor.py — constants and prompt templates. """Tests for context_compactor.py — constants and prompt templates.
Uses mock imports to avoid loading the full app stack.""" Uses mock imports to avoid loading the full app stack."""
import asyncio
import sys import sys
from unittest.mock import MagicMock from unittest.mock import MagicMock
import pytest
# Mock heavy dependencies before importing # Mock heavy dependencies before importing
for mod in [ for mod in [
'sqlalchemy', 'sqlalchemy.orm', 'sqlalchemy.ext', 'sqlalchemy.ext.declarative', 'sqlalchemy', 'sqlalchemy.orm', 'sqlalchemy.ext', 'sqlalchemy.ext.declarative',
@@ -14,10 +17,13 @@ for mod in [
if mod not in sys.modules: if mod not in sys.modules:
sys.modules[mod] = MagicMock() sys.modules[mod] = MagicMock()
import src.context_compactor as cc
from src.context_compactor import ( from src.context_compactor import (
COMPACT_THRESHOLD, COMPACT_THRESHOLD,
SELF_SUMMARY_SYSTEM_PROMPT, SELF_SUMMARY_SYSTEM_PROMPT,
SUMMARY_MAX_TOKENS, SUMMARY_MAX_TOKENS,
_content_as_text,
maybe_compact,
trim_for_context, trim_for_context,
) )
@@ -84,3 +90,105 @@ class TestTrimForContext:
assert trimmed[-1]["role"] == "user" assert trimmed[-1]["role"] == "user"
assert "pasted message was too large" in trimmed[-1]["content"] assert "pasted message was too large" in trimmed[-1]["content"]
assert "old-0" not in "\n".join(str(m.get("content", "")) for m in trimmed) assert "old-0" not in "\n".join(str(m.get("content", "")) for m in trimmed)
class TestContentAsText:
def test_string_passthrough(self):
assert _content_as_text("hello") == "hello"
def test_none_returns_empty(self):
# Assistant turns that carried only native tool_calls persist
# content as None — flattening must not raise.
assert _content_as_text(None) == ""
def test_list_content_joins_text_blocks(self):
content = [
{"type": "text", "text": "describe this"},
{"type": "image_url", "image_url": {"url": "data:..."}},
]
assert _content_as_text(content) == "describe this"
def test_unknown_type_returns_empty(self):
assert _content_as_text(42) == ""
class TestMaybeCompactFourthMessage:
"""Regression: a multi-message conversation must not crash compaction when
a prior assistant turn used native tool_calls (content == None). This was
the '4th message stops working' bug — on a small-context model the soft
85% threshold is crossed after a few turns, and the older half being
summarized contained a None-content assistant message, which raised
TypeError: 'NoneType' object is not subscriptable and broke the request."""
def _run(self, messages, *, context_length=500):
# Force compaction to trigger and stub the summary LLM call so the test
# is hermetic (no network, no real endpoint resolution).
orig_ctx = cc.get_context_length
orig_call = cc.llm_call_async
orig_resolve = cc.resolve_endpoint
orig_update = cc._update_session_history
async def _fake_summary(*a, **k):
return "compact summary text"
cc.get_context_length = lambda url, model: context_length
cc.llm_call_async = _fake_summary
cc.resolve_endpoint = lambda which: (None, None, None)
cc._update_session_history = lambda *a, **k: None
try:
return asyncio.run(
maybe_compact(
session=None,
endpoint_url="http://local/v1/chat/completions",
model="local-model",
messages=list(messages),
headers={},
)
)
finally:
cc.get_context_length = orig_ctx
cc.llm_call_async = orig_call
cc.resolve_endpoint = orig_resolve
cc._update_session_history = orig_update
def _four_turn_history_with_tool_call(self):
# Large system prompt so the conversation crosses the 85% threshold of
# the tiny (context_length=500) window used in _run, forcing the real
# compaction branch to execute.
return [
{"role": "system", "content": "You are a helpful agent. " * 200},
{"role": "user", "content": "turn 1: search the web"},
# Native tool call → content is None (matches agent_loop persistence)
{"role": "assistant", "content": None,
"tool_calls": [{"id": "c1", "type": "function",
"function": {"name": "web_search", "arguments": "{}"}}]},
{"role": "tool", "tool_call_id": "c1", "content": "search results"},
{"role": "assistant", "content": "Here is what I found."},
{"role": "user", "content": "turn 2"},
{"role": "assistant", "content": "reply 2"},
{"role": "user", "content": "turn 3"},
{"role": "assistant", "content": "reply 3"},
{"role": "user", "content": "turn 4 — previously broke here"},
]
def test_does_not_crash_on_none_content_turn(self):
# Must not raise TypeError; returns the 3-tuple contract.
result = self._run(self._four_turn_history_with_tool_call())
assert isinstance(result, tuple) and len(result) == 3
compacted_messages, context_length, was_compacted = result
assert isinstance(compacted_messages, list)
assert was_compacted is True
# The summary the model produced is present and a system message.
assert any(
m.get("role") == "system" and "compact summary text" in (m.get("content") or "")
for m in compacted_messages
)
def test_handles_multimodal_list_content(self):
messages = self._four_turn_history_with_tool_call()
messages[1] = {"role": "user", "content": [
{"type": "text", "text": "look at this image"},
{"type": "image_url", "image_url": {"url": "data:image/png;base64,xxxx"}},
]}
result = self._run(messages)
assert len(result) == 3 and result[2] is True