diff --git a/src/agent_loop.py b/src/agent_loop.py index d37169c..f1c1e99 100644 --- a/src/agent_loop.py +++ b/src/agent_loop.py @@ -1487,12 +1487,21 @@ async def stream_agent_loop( _t3 = time.time() try: from src.context_compactor import trim_for_context + from src.context_budget import compute_input_token_budget + from src.settings import is_setting_overridden soft_budget = int(get_setting("agent_input_token_budget", 6000) or 0) if soft_budget > 0: before_trim_tokens = estimate_tokens(messages) reserve_tokens = min(max(max_tokens or 1024, 512), 2048) - effective_budget = min(context_length or soft_budget, soft_budget) + # Scale the default budget to the model's context window so long-context + # models aren't silently capped at 6000; an explicit user setting is + # still honoured (clamped to the window). (#1170) + effective_budget = compute_input_token_budget( + soft_budget, + context_length, + is_setting_overridden("agent_input_token_budget"), + ) trimmed_messages = trim_for_context( messages, effective_budget, diff --git a/src/context_budget.py b/src/context_budget.py new file mode 100644 index 0000000..d331ffa --- /dev/null +++ b/src/context_budget.py @@ -0,0 +1,55 @@ +"""Adaptive input-token budget for the agent loop (#1170). + +The agent soft-trims its input context to ``agent_input_token_budget`` (default +6000). The old computation was ``min(context_length or budget, budget)``, which +made the 6000 default a hard ceiling for *every* model — so a 128K or 1M context +model was silently capped at 6000 input tokens even though it can hold far more. + +This derives the effective budget from the model's discovered context window when +the user has NOT set an explicit budget, while still honouring an explicit setting +exactly (clamped to the window). Pure and side-effect free so it is unit-testable. +""" + +# Generous ceiling so long-context models are unblocked without sending a +# pathologically large prompt every agent turn. Tunable; chosen to fully cover +# 128K models and give 1M models a large but bounded budget. +DEFAULT_HARD_MAX = 200_000 +DEFAULT_BUDGET = 6000 +DEFAULT_HEADROOM = 0.85 + + +def compute_input_token_budget( + configured: int, + context_length: int, + explicit: bool, + *, + default: int = DEFAULT_BUDGET, + headroom: float = DEFAULT_HEADROOM, + hard_max: int = DEFAULT_HARD_MAX, +) -> int: + """Return the effective soft input-token budget. + + Args: + configured: the value read from settings (may be the default). + context_length: the model's discovered context window (0/unknown if none). + explicit: True if the user explicitly set ``agent_input_token_budget``. + + Rules: + - Explicit user budget is honoured exactly, only clamped to the model's + window when that window is known (never send more than the model holds). + - Otherwise (default), scale to ``headroom`` of the context window, capped + at ``hard_max`` — so long-context models use their capacity. + - When the window is unknown, fall back to the configured/default value + (preserving the previous behaviour). + """ + configured = int(configured or 0) + context_length = int(context_length or 0) + + if explicit and configured > 0: + return min(configured, context_length) if context_length > 0 else configured + + if context_length > 0: + scaled = int(context_length * headroom) + return max(1, min(scaled, hard_max)) + + return configured if configured > 0 else default diff --git a/src/settings.py b/src/settings.py index acf29a9..5804bf2 100644 --- a/src/settings.py +++ b/src/settings.py @@ -195,6 +195,21 @@ def get_setting(key: str, default: Any = None) -> Any: return load_settings().get(key, default) +def is_setting_overridden(key: str) -> bool: + """True if ``key`` is explicitly present in the saved settings file. + + ``load_settings`` merges DEFAULT_SETTINGS with the saved file, so a value + equal to its default is indistinguishable from "never set" via get_setting. + Callers that need to treat an explicit user choice differently from the + default (e.g. adaptive budgets) use this to read the raw saved file. + """ + try: + with open(SETTINGS_FILE, "r", encoding="utf-8") as f: + return key in json.load(f) + except (FileNotFoundError, json.JSONDecodeError): + return False + + # Per-user settings (user prefs override the global admin default). Used for # keys that a user is allowed to choose individually — currently the vision # model + image-generation model. The owner argument is the authed username diff --git a/tests/test_context_budget.py b/tests/test_context_budget.py new file mode 100644 index 0000000..9d7337c --- /dev/null +++ b/tests/test_context_budget.py @@ -0,0 +1,46 @@ +"""Issue #1170 — the agent input-token budget adapts to the model context window. + +Pins the pure budget computation and the explicit-override detection. +""" + +import json + +from src.context_budget import compute_input_token_budget, DEFAULT_HARD_MAX + + +def test_default_scales_to_context_window(): + # Not explicit, big window -> ~85% of the window (the old code capped at 6000). + assert compute_input_token_budget(6000, 128000, explicit=False) == int(128000 * 0.85) + + +def test_default_capped_at_hard_max_for_huge_windows(): + assert compute_input_token_budget(6000, 1_000_000, explicit=False) == DEFAULT_HARD_MAX + + +def test_explicit_budget_is_honoured(): + # User explicitly chose 6000 -> keep it even on a 128K model. + assert compute_input_token_budget(6000, 128000, explicit=True) == 6000 + # A larger explicit budget is honoured too, clamped to the window. + assert compute_input_token_budget(50000, 128000, explicit=True) == 50000 + + +def test_explicit_budget_clamped_to_window(): + assert compute_input_token_budget(200000, 32000, explicit=True) == 32000 + + +def test_unknown_window_falls_back_to_configured(): + assert compute_input_token_budget(6000, 0, explicit=False) == 6000 + assert compute_input_token_budget(0, 0, explicit=False) == 6000 # default + + +def test_is_setting_overridden_reads_raw_saved_file(tmp_path, monkeypatch): + import src.settings as settings + + f = tmp_path / "settings.json" + f.write_text(json.dumps({"agent_input_token_budget": 12000}), encoding="utf-8") + monkeypatch.setattr(settings, "SETTINGS_FILE", str(f)) + assert settings.is_setting_overridden("agent_input_token_budget") is True + assert settings.is_setting_overridden("some_unset_key") is False + + f.write_text(json.dumps({}), encoding="utf-8") + assert settings.is_setting_overridden("agent_input_token_budget") is False