fix: context_compactor token helpers crash on non-string message text (#1634)

* fix: context_compactor token helpers crash on non-string message text * fix: _truncate_text_to_token_budget returns an empty string for non-string text, not the raw value
2026-06-03 06:12:14 +01:00
parent b424996a9c
commit 33ae982968
2 changed files with 31 additions and 0 deletions
--- a/src/context_compactor.py
+++ b/src/context_compactor.py
@@ -116,6 +116,8 @@ def _sanitize_tool_messages(msgs: List[Dict]) -> List[Dict]:


 def _message_text_token_estimate(text: str) -> int:
+    if not isinstance(text, str):
+        return 4
    return int(len(text) * 0.3) + 4


@@ -124,6 +126,11 @@ def _truncate_text_to_token_budget(text: str, token_budget: int) -> str:
    if token_budget <= 32:
        return "[Current user message omitted: it exceeded the model context window.]"

+    if not isinstance(text, str):
+        # This helper is typed/used as text downstream, so return an empty
+        # string rather than the raw non-string (which would move the crash
+        # into the caller that concatenates/measures the result).
+        return ""
    # Match src.model_context.estimate_tokens' rough chars * 0.3 estimate.
    max_chars = max(200, int((token_budget - 16) / 0.3))
    if len(text) <= max_chars:
--- a/tests/test_context_compactor_nonstring.py
+++ b/tests/test_context_compactor_nonstring.py
@@ -0,0 +1,24 @@
+"""Regression: context_compactor token helpers must tolerate non-string text.
+
+_message_text_token_estimate and _truncate_text_to_token_budget call len(text)
+on the message text; a None/non-string (e.g. an assistant tool-call message
+with content=None) raised TypeError. They now coerce gracefully.
+"""
+from src.context_compactor import _message_text_token_estimate, _truncate_text_to_token_budget
+
+
+def test_estimate_handles_non_string():
+    assert _message_text_token_estimate(None) == 4
+    assert _message_text_token_estimate(123) == 4
+
+
+def test_truncate_returns_string_for_non_string():
+    # Returns an empty string, not the raw non-string, so callers that
+    # concatenate/measure the result don't crash downstream.
+    assert _truncate_text_to_token_budget(None, 1000) == ""
+    assert _truncate_text_to_token_budget(123, 1000) == ""
+
+
+def test_valid_text_unchanged():
+    assert _message_text_token_estimate("hello") == int(len("hello") * 0.3) + 4
+    assert _truncate_text_to_token_budget("short", 1000) == "short"