diff --git a/src/context_compactor.py b/src/context_compactor.py index 8ed5909..c70ed0b 100644 --- a/src/context_compactor.py +++ b/src/context_compactor.py @@ -116,6 +116,8 @@ def _sanitize_tool_messages(msgs: List[Dict]) -> List[Dict]: def _message_text_token_estimate(text: str) -> int: + if not isinstance(text, str): + return 4 return int(len(text) * 0.3) + 4 @@ -124,6 +126,11 @@ def _truncate_text_to_token_budget(text: str, token_budget: int) -> str: if token_budget <= 32: return "[Current user message omitted: it exceeded the model context window.]" + if not isinstance(text, str): + # This helper is typed/used as text downstream, so return an empty + # string rather than the raw non-string (which would move the crash + # into the caller that concatenates/measures the result). + return "" # Match src.model_context.estimate_tokens' rough chars * 0.3 estimate. max_chars = max(200, int((token_budget - 16) / 0.3)) if len(text) <= max_chars: diff --git a/tests/test_context_compactor_nonstring.py b/tests/test_context_compactor_nonstring.py new file mode 100644 index 0000000..d5eba37 --- /dev/null +++ b/tests/test_context_compactor_nonstring.py @@ -0,0 +1,24 @@ +"""Regression: context_compactor token helpers must tolerate non-string text. + +_message_text_token_estimate and _truncate_text_to_token_budget call len(text) +on the message text; a None/non-string (e.g. an assistant tool-call message +with content=None) raised TypeError. They now coerce gracefully. +""" +from src.context_compactor import _message_text_token_estimate, _truncate_text_to_token_budget + + +def test_estimate_handles_non_string(): + assert _message_text_token_estimate(None) == 4 + assert _message_text_token_estimate(123) == 4 + + +def test_truncate_returns_string_for_non_string(): + # Returns an empty string, not the raw non-string, so callers that + # concatenate/measure the result don't crash downstream. + assert _truncate_text_to_token_budget(None, 1000) == "" + assert _truncate_text_to_token_budget(123, 1000) == "" + + +def test_valid_text_unchanged(): + assert _message_text_token_estimate("hello") == int(len("hello") * 0.3) + 4 + assert _truncate_text_to_token_budget("short", 1000) == "short"