Preserve large pasted messages in context
This commit is contained in:
@@ -6,7 +6,7 @@ Summarizes older messages via the same LLM, preserving key context.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, List, Optional
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from src.model_context import get_context_length, estimate_tokens
|
||||
from src.llm_core import llm_call_async
|
||||
@@ -95,6 +95,55 @@ def _sanitize_tool_messages(msgs: List[Dict]) -> List[Dict]:
|
||||
return out
|
||||
|
||||
|
||||
def _message_text_token_estimate(text: str) -> int:
|
||||
return int(len(text) * 0.3) + 4
|
||||
|
||||
|
||||
def _truncate_text_to_token_budget(text: str, token_budget: int) -> str:
|
||||
"""Trim a too-large current user message instead of dropping it entirely."""
|
||||
if token_budget <= 32:
|
||||
return "[Current user message omitted: it exceeded the model context window.]"
|
||||
|
||||
# Match src.model_context.estimate_tokens' rough chars * 0.3 estimate.
|
||||
max_chars = max(200, int((token_budget - 16) / 0.3))
|
||||
if len(text) <= max_chars:
|
||||
return text
|
||||
|
||||
notice = (
|
||||
"\n\n[Notice: the pasted message was too large for this model's context "
|
||||
"window, so Odysseus kept the beginning and end.]"
|
||||
)
|
||||
keep_chars = max(200, max_chars - len(notice))
|
||||
head_len = max(100, int(keep_chars * 0.7))
|
||||
tail_len = max(80, keep_chars - head_len)
|
||||
return text[:head_len].rstrip() + notice + "\n\n" + text[-tail_len:].lstrip()
|
||||
|
||||
|
||||
def _truncate_message_to_token_budget(msg: Dict[str, Any], token_budget: int) -> Dict[str, Any]:
|
||||
"""Return a copy of msg whose text content fits inside token_budget."""
|
||||
out = dict(msg)
|
||||
content = out.get("content", "")
|
||||
if isinstance(content, str):
|
||||
out["content"] = _truncate_text_to_token_budget(content, token_budget)
|
||||
return out
|
||||
|
||||
if isinstance(content, list):
|
||||
remaining = token_budget
|
||||
new_content = []
|
||||
for item in content:
|
||||
if not isinstance(item, dict) or item.get("type") != "text":
|
||||
new_content.append(item)
|
||||
continue
|
||||
text = item.get("text", "")
|
||||
truncated = _truncate_text_to_token_budget(text, remaining)
|
||||
cloned = dict(item)
|
||||
cloned["text"] = truncated
|
||||
new_content.append(cloned)
|
||||
remaining -= _message_text_token_estimate(truncated)
|
||||
out["content"] = new_content
|
||||
return out
|
||||
|
||||
|
||||
def trim_for_context(messages: List[Dict], context_length: int, reserve_tokens: int = 512) -> List[Dict]:
|
||||
"""Trim system messages to fit within context_length.
|
||||
|
||||
@@ -153,19 +202,31 @@ def trim_for_context(messages: List[Dict], context_length: int, reserve_tokens:
|
||||
if estimate_tokens(trimmed) <= budget:
|
||||
return _sanitize_tool_messages(essential_system + protected_msgs + convo_msgs)
|
||||
|
||||
# Still too big — drop older conversation turns BUT protect the last 10.
|
||||
# Still too big — drop older conversation turns BUT always keep the current
|
||||
# user turn. If a pasted message alone exceeds the model context, truncate
|
||||
# that message with a visible notice instead of dropping it; otherwise the
|
||||
# model appears to "ignore" large pastes because it never receives them.
|
||||
# Hermes-style: recent context matters more than old context.
|
||||
PROTECT_RECENT = 10
|
||||
if len(convo_msgs) > PROTECT_RECENT:
|
||||
old_msgs = convo_msgs[:-PROTECT_RECENT]
|
||||
recent_msgs = convo_msgs[-PROTECT_RECENT:]
|
||||
current_msg = convo_msgs[-1:] if convo_msgs else []
|
||||
prior_convo = convo_msgs[:-1] if convo_msgs else []
|
||||
if len(prior_convo) >= PROTECT_RECENT:
|
||||
old_msgs = prior_convo[:-(PROTECT_RECENT - 1)]
|
||||
recent_msgs = prior_convo[-(PROTECT_RECENT - 1):] + current_msg
|
||||
while old_msgs and estimate_tokens(essential_system + old_msgs + recent_msgs) > budget:
|
||||
old_msgs.pop(0)
|
||||
convo_msgs = old_msgs + recent_msgs
|
||||
else:
|
||||
# Not enough messages to split — just trim from front
|
||||
while convo_msgs and estimate_tokens(essential_system + convo_msgs) > budget:
|
||||
convo_msgs.pop(0)
|
||||
convo_msgs = prior_convo + current_msg
|
||||
while prior_convo and estimate_tokens(essential_system + prior_convo + current_msg) > budget:
|
||||
prior_convo.pop(0)
|
||||
convo_msgs = prior_convo + current_msg
|
||||
|
||||
# If the current message itself is too large, shrink only that message.
|
||||
if current_msg and estimate_tokens(essential_system + protected_msgs + convo_msgs) > budget:
|
||||
prefix = essential_system + protected_msgs + convo_msgs[:-1]
|
||||
available_for_current = max(64, budget - estimate_tokens(prefix))
|
||||
convo_msgs[-1] = _truncate_message_to_token_budget(convo_msgs[-1], available_for_current)
|
||||
|
||||
result = _sanitize_tool_messages(essential_system + protected_msgs + convo_msgs)
|
||||
logger.info(f"Trimmed to {estimate_tokens(result)} tokens ({len(result)} messages)")
|
||||
|
||||
@@ -18,6 +18,7 @@ from src.context_compactor import (
|
||||
COMPACT_THRESHOLD,
|
||||
SELF_SUMMARY_SYSTEM_PROMPT,
|
||||
SUMMARY_MAX_TOKENS,
|
||||
trim_for_context,
|
||||
)
|
||||
|
||||
|
||||
@@ -53,3 +54,33 @@ class TestSelfSummaryPrompt:
|
||||
|
||||
def test_mentions_compactions(self):
|
||||
assert "Compactions so far" in SELF_SUMMARY_SYSTEM_PROMPT
|
||||
|
||||
|
||||
class TestTrimForContext:
|
||||
def test_keeps_current_large_user_message_by_truncating(self):
|
||||
huge = "A" * 20000
|
||||
messages = [
|
||||
{"role": "system", "content": "You are helpful."},
|
||||
{"role": "user", "content": huge},
|
||||
]
|
||||
|
||||
trimmed = trim_for_context(messages, context_length=2048, reserve_tokens=512)
|
||||
|
||||
user_msgs = [m for m in trimmed if m.get("role") == "user"]
|
||||
assert len(user_msgs) == 1
|
||||
content = user_msgs[0]["content"]
|
||||
assert "pasted message was too large" in content
|
||||
assert content.startswith("A")
|
||||
assert len(content) < len(huge)
|
||||
|
||||
def test_drops_older_messages_before_latest_user_paste(self):
|
||||
huge = "B" * 12000
|
||||
messages = [{"role": "system", "content": "You are helpful."}]
|
||||
messages.extend({"role": "user", "content": f"old-{i} " + ("x" * 1000)} for i in range(8))
|
||||
messages.append({"role": "user", "content": huge})
|
||||
|
||||
trimmed = trim_for_context(messages, context_length=2048, reserve_tokens=512)
|
||||
|
||||
assert trimmed[-1]["role"] == "user"
|
||||
assert "pasted message was too large" in trimmed[-1]["content"]
|
||||
assert "old-0" not in "\n".join(str(m.get("content", "")) for m in trimmed)
|
||||
|
||||
Reference in New Issue
Block a user