fix: context_compactor token helpers crash on non-string message text (#1634)

* fix: context_compactor token helpers crash on non-string message text

* fix: _truncate_text_to_token_budget returns an empty string for non-string text, not the raw value
This commit is contained in:
Afonso Coutinho
2026-06-03 06:12:14 +01:00
committed by GitHub
parent b424996a9c
commit 33ae982968
2 changed files with 31 additions and 0 deletions

View File

@@ -116,6 +116,8 @@ def _sanitize_tool_messages(msgs: List[Dict]) -> List[Dict]:
def _message_text_token_estimate(text: str) -> int:
if not isinstance(text, str):
return 4
return int(len(text) * 0.3) + 4
@@ -124,6 +126,11 @@ def _truncate_text_to_token_budget(text: str, token_budget: int) -> str:
if token_budget <= 32:
return "[Current user message omitted: it exceeded the model context window.]"
if not isinstance(text, str):
# This helper is typed/used as text downstream, so return an empty
# string rather than the raw non-string (which would move the crash
# into the caller that concatenates/measures the result).
return ""
# Match src.model_context.estimate_tokens' rough chars * 0.3 estimate.
max_chars = max(200, int((token_budget - 16) / 0.3))
if len(text) <= max_chars:

View File

@@ -0,0 +1,24 @@
"""Regression: context_compactor token helpers must tolerate non-string text.
_message_text_token_estimate and _truncate_text_to_token_budget call len(text)
on the message text; a None/non-string (e.g. an assistant tool-call message
with content=None) raised TypeError. They now coerce gracefully.
"""
from src.context_compactor import _message_text_token_estimate, _truncate_text_to_token_budget
def test_estimate_handles_non_string():
assert _message_text_token_estimate(None) == 4
assert _message_text_token_estimate(123) == 4
def test_truncate_returns_string_for_non_string():
# Returns an empty string, not the raw non-string, so callers that
# concatenate/measure the result don't crash downstream.
assert _truncate_text_to_token_budget(None, 1000) == ""
assert _truncate_text_to_token_budget(123, 1000) == ""
def test_valid_text_unchanged():
assert _message_text_token_estimate("hello") == int(len("hello") * 0.3) + 4
assert _truncate_text_to_token_budget("short", 1000) == "short"