From a66f241e213289dbdf5f5aad4c8790e932d914f3 Mon Sep 17 00:00:00 2001 From: pewdiepie-archdaemon Date: Mon, 1 Jun 2026 12:38:10 +0900 Subject: [PATCH] Preserve large pasted messages in context --- src/context_compactor.py | 77 +++++++++++++++++++++++++++++---- tests/test_context_compactor.py | 31 +++++++++++++ 2 files changed, 100 insertions(+), 8 deletions(-) diff --git a/src/context_compactor.py b/src/context_compactor.py index 9c00c92..890a9eb 100644 --- a/src/context_compactor.py +++ b/src/context_compactor.py @@ -6,7 +6,7 @@ Summarizes older messages via the same LLM, preserving key context. """ import logging -from typing import Dict, List, Optional +from typing import Any, Dict, List, Optional from src.model_context import get_context_length, estimate_tokens from src.llm_core import llm_call_async @@ -95,6 +95,55 @@ def _sanitize_tool_messages(msgs: List[Dict]) -> List[Dict]: return out +def _message_text_token_estimate(text: str) -> int: + return int(len(text) * 0.3) + 4 + + +def _truncate_text_to_token_budget(text: str, token_budget: int) -> str: + """Trim a too-large current user message instead of dropping it entirely.""" + if token_budget <= 32: + return "[Current user message omitted: it exceeded the model context window.]" + + # Match src.model_context.estimate_tokens' rough chars * 0.3 estimate. + max_chars = max(200, int((token_budget - 16) / 0.3)) + if len(text) <= max_chars: + return text + + notice = ( + "\n\n[Notice: the pasted message was too large for this model's context " + "window, so Odysseus kept the beginning and end.]" + ) + keep_chars = max(200, max_chars - len(notice)) + head_len = max(100, int(keep_chars * 0.7)) + tail_len = max(80, keep_chars - head_len) + return text[:head_len].rstrip() + notice + "\n\n" + text[-tail_len:].lstrip() + + +def _truncate_message_to_token_budget(msg: Dict[str, Any], token_budget: int) -> Dict[str, Any]: + """Return a copy of msg whose text content fits inside token_budget.""" + out = dict(msg) + content = out.get("content", "") + if isinstance(content, str): + out["content"] = _truncate_text_to_token_budget(content, token_budget) + return out + + if isinstance(content, list): + remaining = token_budget + new_content = [] + for item in content: + if not isinstance(item, dict) or item.get("type") != "text": + new_content.append(item) + continue + text = item.get("text", "") + truncated = _truncate_text_to_token_budget(text, remaining) + cloned = dict(item) + cloned["text"] = truncated + new_content.append(cloned) + remaining -= _message_text_token_estimate(truncated) + out["content"] = new_content + return out + + def trim_for_context(messages: List[Dict], context_length: int, reserve_tokens: int = 512) -> List[Dict]: """Trim system messages to fit within context_length. @@ -153,19 +202,31 @@ def trim_for_context(messages: List[Dict], context_length: int, reserve_tokens: if estimate_tokens(trimmed) <= budget: return _sanitize_tool_messages(essential_system + protected_msgs + convo_msgs) - # Still too big — drop older conversation turns BUT protect the last 10. + # Still too big — drop older conversation turns BUT always keep the current + # user turn. If a pasted message alone exceeds the model context, truncate + # that message with a visible notice instead of dropping it; otherwise the + # model appears to "ignore" large pastes because it never receives them. # Hermes-style: recent context matters more than old context. PROTECT_RECENT = 10 - if len(convo_msgs) > PROTECT_RECENT: - old_msgs = convo_msgs[:-PROTECT_RECENT] - recent_msgs = convo_msgs[-PROTECT_RECENT:] + current_msg = convo_msgs[-1:] if convo_msgs else [] + prior_convo = convo_msgs[:-1] if convo_msgs else [] + if len(prior_convo) >= PROTECT_RECENT: + old_msgs = prior_convo[:-(PROTECT_RECENT - 1)] + recent_msgs = prior_convo[-(PROTECT_RECENT - 1):] + current_msg while old_msgs and estimate_tokens(essential_system + old_msgs + recent_msgs) > budget: old_msgs.pop(0) convo_msgs = old_msgs + recent_msgs else: - # Not enough messages to split — just trim from front - while convo_msgs and estimate_tokens(essential_system + convo_msgs) > budget: - convo_msgs.pop(0) + convo_msgs = prior_convo + current_msg + while prior_convo and estimate_tokens(essential_system + prior_convo + current_msg) > budget: + prior_convo.pop(0) + convo_msgs = prior_convo + current_msg + + # If the current message itself is too large, shrink only that message. + if current_msg and estimate_tokens(essential_system + protected_msgs + convo_msgs) > budget: + prefix = essential_system + protected_msgs + convo_msgs[:-1] + available_for_current = max(64, budget - estimate_tokens(prefix)) + convo_msgs[-1] = _truncate_message_to_token_budget(convo_msgs[-1], available_for_current) result = _sanitize_tool_messages(essential_system + protected_msgs + convo_msgs) logger.info(f"Trimmed to {estimate_tokens(result)} tokens ({len(result)} messages)") diff --git a/tests/test_context_compactor.py b/tests/test_context_compactor.py index 7f88fb5..5a1dfa3 100644 --- a/tests/test_context_compactor.py +++ b/tests/test_context_compactor.py @@ -18,6 +18,7 @@ from src.context_compactor import ( COMPACT_THRESHOLD, SELF_SUMMARY_SYSTEM_PROMPT, SUMMARY_MAX_TOKENS, + trim_for_context, ) @@ -53,3 +54,33 @@ class TestSelfSummaryPrompt: def test_mentions_compactions(self): assert "Compactions so far" in SELF_SUMMARY_SYSTEM_PROMPT + + +class TestTrimForContext: + def test_keeps_current_large_user_message_by_truncating(self): + huge = "A" * 20000 + messages = [ + {"role": "system", "content": "You are helpful."}, + {"role": "user", "content": huge}, + ] + + trimmed = trim_for_context(messages, context_length=2048, reserve_tokens=512) + + user_msgs = [m for m in trimmed if m.get("role") == "user"] + assert len(user_msgs) == 1 + content = user_msgs[0]["content"] + assert "pasted message was too large" in content + assert content.startswith("A") + assert len(content) < len(huge) + + def test_drops_older_messages_before_latest_user_paste(self): + huge = "B" * 12000 + messages = [{"role": "system", "content": "You are helpful."}] + messages.extend({"role": "user", "content": f"old-{i} " + ("x" * 1000)} for i in range(8)) + messages.append({"role": "user", "content": huge}) + + trimmed = trim_for_context(messages, context_length=2048, reserve_tokens=512) + + assert trimmed[-1]["role"] == "user" + assert "pasted message was too large" in trimmed[-1]["content"] + assert "old-0" not in "\n".join(str(m.get("content", "")) for m in trimmed)