fix: context_compactor token helpers crash on non-string message text (#1634)
* fix: context_compactor token helpers crash on non-string message text * fix: _truncate_text_to_token_budget returns an empty string for non-string text, not the raw value
This commit is contained in:
@@ -116,6 +116,8 @@ def _sanitize_tool_messages(msgs: List[Dict]) -> List[Dict]:
|
|||||||
|
|
||||||
|
|
||||||
def _message_text_token_estimate(text: str) -> int:
|
def _message_text_token_estimate(text: str) -> int:
|
||||||
|
if not isinstance(text, str):
|
||||||
|
return 4
|
||||||
return int(len(text) * 0.3) + 4
|
return int(len(text) * 0.3) + 4
|
||||||
|
|
||||||
|
|
||||||
@@ -124,6 +126,11 @@ def _truncate_text_to_token_budget(text: str, token_budget: int) -> str:
|
|||||||
if token_budget <= 32:
|
if token_budget <= 32:
|
||||||
return "[Current user message omitted: it exceeded the model context window.]"
|
return "[Current user message omitted: it exceeded the model context window.]"
|
||||||
|
|
||||||
|
if not isinstance(text, str):
|
||||||
|
# This helper is typed/used as text downstream, so return an empty
|
||||||
|
# string rather than the raw non-string (which would move the crash
|
||||||
|
# into the caller that concatenates/measures the result).
|
||||||
|
return ""
|
||||||
# Match src.model_context.estimate_tokens' rough chars * 0.3 estimate.
|
# Match src.model_context.estimate_tokens' rough chars * 0.3 estimate.
|
||||||
max_chars = max(200, int((token_budget - 16) / 0.3))
|
max_chars = max(200, int((token_budget - 16) / 0.3))
|
||||||
if len(text) <= max_chars:
|
if len(text) <= max_chars:
|
||||||
|
|||||||
24
tests/test_context_compactor_nonstring.py
Normal file
24
tests/test_context_compactor_nonstring.py
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
"""Regression: context_compactor token helpers must tolerate non-string text.
|
||||||
|
|
||||||
|
_message_text_token_estimate and _truncate_text_to_token_budget call len(text)
|
||||||
|
on the message text; a None/non-string (e.g. an assistant tool-call message
|
||||||
|
with content=None) raised TypeError. They now coerce gracefully.
|
||||||
|
"""
|
||||||
|
from src.context_compactor import _message_text_token_estimate, _truncate_text_to_token_budget
|
||||||
|
|
||||||
|
|
||||||
|
def test_estimate_handles_non_string():
|
||||||
|
assert _message_text_token_estimate(None) == 4
|
||||||
|
assert _message_text_token_estimate(123) == 4
|
||||||
|
|
||||||
|
|
||||||
|
def test_truncate_returns_string_for_non_string():
|
||||||
|
# Returns an empty string, not the raw non-string, so callers that
|
||||||
|
# concatenate/measure the result don't crash downstream.
|
||||||
|
assert _truncate_text_to_token_budget(None, 1000) == ""
|
||||||
|
assert _truncate_text_to_token_budget(123, 1000) == ""
|
||||||
|
|
||||||
|
|
||||||
|
def test_valid_text_unchanged():
|
||||||
|
assert _message_text_token_estimate("hello") == int(len("hello") * 0.3) + 4
|
||||||
|
assert _truncate_text_to_token_budget("short", 1000) == "short"
|
||||||
Reference in New Issue
Block a user