Preserve large pasted messages in context

2026-06-01 12:38:10 +09:00
parent 1ce00b5dea
commit a66f241e21
2 changed files with 100 additions and 8 deletions
--- a/src/context_compactor.py
+++ b/src/context_compactor.py
@@ -6,7 +6,7 @@ Summarizes older messages via the same LLM, preserving key context.
 """

 import logging
-from typing import Dict, List, Optional
+from typing import Any, Dict, List, Optional

 from src.model_context import get_context_length, estimate_tokens
 from src.llm_core import llm_call_async
@@ -95,6 +95,55 @@ def _sanitize_tool_messages(msgs: List[Dict]) -> List[Dict]:
    return out


+def _message_text_token_estimate(text: str) -> int:
+    return int(len(text) * 0.3) + 4
+
+
+def _truncate_text_to_token_budget(text: str, token_budget: int) -> str:
+    """Trim a too-large current user message instead of dropping it entirely."""
+    if token_budget <= 32:
+        return "[Current user message omitted: it exceeded the model context window.]"
+
+    # Match src.model_context.estimate_tokens' rough chars * 0.3 estimate.
+    max_chars = max(200, int((token_budget - 16) / 0.3))
+    if len(text) <= max_chars:
+        return text
+
+    notice = (
+        "\n\n[Notice: the pasted message was too large for this model's context "
+        "window, so Odysseus kept the beginning and end.]"
+    )
+    keep_chars = max(200, max_chars - len(notice))
+    head_len = max(100, int(keep_chars * 0.7))
+    tail_len = max(80, keep_chars - head_len)
+    return text[:head_len].rstrip() + notice + "\n\n" + text[-tail_len:].lstrip()
+
+
+def _truncate_message_to_token_budget(msg: Dict[str, Any], token_budget: int) -> Dict[str, Any]:
+    """Return a copy of msg whose text content fits inside token_budget."""
+    out = dict(msg)
+    content = out.get("content", "")
+    if isinstance(content, str):
+        out["content"] = _truncate_text_to_token_budget(content, token_budget)
+        return out
+
+    if isinstance(content, list):
+        remaining = token_budget
+        new_content = []
+        for item in content:
+            if not isinstance(item, dict) or item.get("type") != "text":
+                new_content.append(item)
+                continue
+            text = item.get("text", "")
+            truncated = _truncate_text_to_token_budget(text, remaining)
+            cloned = dict(item)
+            cloned["text"] = truncated
+            new_content.append(cloned)
+            remaining -= _message_text_token_estimate(truncated)
+        out["content"] = new_content
+    return out
+
+
 def trim_for_context(messages: List[Dict], context_length: int, reserve_tokens: int = 512) -> List[Dict]:
    """Trim system messages to fit within context_length.

@@ -153,19 +202,31 @@ def trim_for_context(messages: List[Dict], context_length: int, reserve_tokens:
            if estimate_tokens(trimmed) <= budget:
                return _sanitize_tool_messages(essential_system + protected_msgs + convo_msgs)

-    # Still too big — drop older conversation turns BUT protect the last 10.
+    # Still too big — drop older conversation turns BUT always keep the current
+    # user turn. If a pasted message alone exceeds the model context, truncate
+    # that message with a visible notice instead of dropping it; otherwise the
+    # model appears to "ignore" large pastes because it never receives them.
    # Hermes-style: recent context matters more than old context.
    PROTECT_RECENT = 10
-    if len(convo_msgs) > PROTECT_RECENT:
-        old_msgs = convo_msgs[:-PROTECT_RECENT]
-        recent_msgs = convo_msgs[-PROTECT_RECENT:]
+    current_msg = convo_msgs[-1:] if convo_msgs else []
+    prior_convo = convo_msgs[:-1] if convo_msgs else []
+    if len(prior_convo) >= PROTECT_RECENT:
+        old_msgs = prior_convo[:-(PROTECT_RECENT - 1)]
+        recent_msgs = prior_convo[-(PROTECT_RECENT - 1):] + current_msg
        while old_msgs and estimate_tokens(essential_system + old_msgs + recent_msgs) > budget:
            old_msgs.pop(0)
        convo_msgs = old_msgs + recent_msgs
    else:
-        # Not enough messages to split — just trim from front
-        while convo_msgs and estimate_tokens(essential_system + convo_msgs) > budget:
-            convo_msgs.pop(0)
+        convo_msgs = prior_convo + current_msg
+        while prior_convo and estimate_tokens(essential_system + prior_convo + current_msg) > budget:
+            prior_convo.pop(0)
+        convo_msgs = prior_convo + current_msg
+
+    # If the current message itself is too large, shrink only that message.
+    if current_msg and estimate_tokens(essential_system + protected_msgs + convo_msgs) > budget:
+        prefix = essential_system + protected_msgs + convo_msgs[:-1]
+        available_for_current = max(64, budget - estimate_tokens(prefix))
+        convo_msgs[-1] = _truncate_message_to_token_budget(convo_msgs[-1], available_for_current)

    result = _sanitize_tool_messages(essential_system + protected_msgs + convo_msgs)
    logger.info(f"Trimmed to {estimate_tokens(result)} tokens ({len(result)} messages)")
--- a/tests/test_context_compactor.py
+++ b/tests/test_context_compactor.py
@@ -18,6 +18,7 @@ from src.context_compactor import (
    COMPACT_THRESHOLD,
    SELF_SUMMARY_SYSTEM_PROMPT,
    SUMMARY_MAX_TOKENS,
+    trim_for_context,
 )


@@ -53,3 +54,33 @@ class TestSelfSummaryPrompt:

    def test_mentions_compactions(self):
        assert "Compactions so far" in SELF_SUMMARY_SYSTEM_PROMPT
+
+
+class TestTrimForContext:
+    def test_keeps_current_large_user_message_by_truncating(self):
+        huge = "A" * 20000
+        messages = [
+            {"role": "system", "content": "You are helpful."},
+            {"role": "user", "content": huge},
+        ]
+
+        trimmed = trim_for_context(messages, context_length=2048, reserve_tokens=512)
+
+        user_msgs = [m for m in trimmed if m.get("role") == "user"]
+        assert len(user_msgs) == 1
+        content = user_msgs[0]["content"]
+        assert "pasted message was too large" in content
+        assert content.startswith("A")
+        assert len(content) < len(huge)
+
+    def test_drops_older_messages_before_latest_user_paste(self):
+        huge = "B" * 12000
+        messages = [{"role": "system", "content": "You are helpful."}]
+        messages.extend({"role": "user", "content": f"old-{i} " + ("x" * 1000)} for i in range(8))
+        messages.append({"role": "user", "content": huge})
+
+        trimmed = trim_for_context(messages, context_length=2048, reserve_tokens=512)
+
+        assert trimmed[-1]["role"] == "user"
+        assert "pasted message was too large" in trimmed[-1]["content"]
+        assert "old-0" not in "\n".join(str(m.get("content", "")) for m in trimmed)