diff --git a/src/agent_loop.py b/src/agent_loop.py index f1c1e99..35bc8a5 100644 --- a/src/agent_loop.py +++ b/src/agent_loop.py @@ -1487,13 +1487,23 @@ async def stream_agent_loop( _t3 = time.time() try: from src.context_compactor import trim_for_context - from src.context_budget import compute_input_token_budget + from src.context_budget import compute_input_token_budget, DEFAULT_HARD_MAX from src.settings import is_setting_overridden soft_budget = int(get_setting("agent_input_token_budget", 6000) or 0) if soft_budget > 0: before_trim_tokens = estimate_tokens(messages) reserve_tokens = min(max(max_tokens or 1024, 512), 2048) + # Honour the configurable ceiling for the auto-derived budget path. + # No-op when the user has an explicit `agent_input_token_budget` + # (that branch ignores hard_max). Falls back to DEFAULT_HARD_MAX + # on missing/malformed values so misconfig can't zero the budget. + try: + hard_max = int(get_setting("agent_input_token_hard_max", DEFAULT_HARD_MAX) or DEFAULT_HARD_MAX) + except (TypeError, ValueError): + hard_max = DEFAULT_HARD_MAX + if hard_max <= 0: + hard_max = DEFAULT_HARD_MAX # Scale the default budget to the model's context window so long-context # models aren't silently capped at 6000; an explicit user setting is # still honoured (clamped to the window). (#1170) @@ -1501,6 +1511,7 @@ async def stream_agent_loop( soft_budget, context_length, is_setting_overridden("agent_input_token_budget"), + hard_max=hard_max, ) trimmed_messages = trim_for_context( messages, diff --git a/src/settings.py b/src/settings.py index 5804bf2..374fd28 100644 --- a/src/settings.py +++ b/src/settings.py @@ -96,6 +96,14 @@ DEFAULT_SETTINGS = { "research_run_timeout_seconds": 1800, "agent_max_tool_calls": 0, "agent_input_token_budget": 6000, + # Ceiling on the *auto-derived* input budget that #1230 introduced. Has + # no effect when `agent_input_token_budget` is explicitly set (the user's + # value is honoured regardless). Default matches + # `src.context_budget.DEFAULT_HARD_MAX`; lower this for cost-paranoid + # setups, raise it on premium APIs with very large windows that you + # want to actually use (e.g. 900_000 to fill a 1M-context model). See + # `compute_input_token_budget` in src/context_budget.py. + "agent_input_token_hard_max": 200_000, "agent_stream_timeout_seconds": 300, # Extra directory roots that read_file / write_file may access, in # addition to the built-in project data/ and system temp dirs. Each diff --git a/src/tool_implementations.py b/src/tool_implementations.py index 9480fd4..4da9120 100644 --- a/src/tool_implementations.py +++ b/src/tool_implementations.py @@ -1530,7 +1530,10 @@ async def do_manage_settings(content: str, owner: Optional[str] = None) -> Dict: "ntfy topic": "reminder_ntfy_topic", "agent tool calls": "agent_max_tool_calls", "max tool calls": "agent_max_tool_calls", "agent timeout": "agent_stream_timeout_seconds", "stream timeout": "agent_stream_timeout_seconds", - "token budget": "agent_input_token_budget", + "token budget": "agent_input_token_budget", "input budget": "agent_input_token_budget", + "hard max": "agent_input_token_hard_max", + "token budget cap": "agent_input_token_hard_max", + "input budget cap": "agent_input_token_hard_max", } def _resolve(k): k2 = (k or "").strip().lower() diff --git a/tests/test_context_budget.py b/tests/test_context_budget.py index 9d7337c..2c97b47 100644 --- a/tests/test_context_budget.py +++ b/tests/test_context_budget.py @@ -44,3 +44,75 @@ def test_is_setting_overridden_reads_raw_saved_file(tmp_path, monkeypatch): f.write_text(json.dumps({}), encoding="utf-8") assert settings.is_setting_overridden("agent_input_token_budget") is False + + +# --------------------------------------------------------------------------- +# Configurable hard_max — completes the reviewer requirement from #1190 that +# was carried over but not implemented in #1230: the ceiling on the auto- +# derived path should be a setting, not a hidden constant. Without this, +# admins on premium APIs with very large windows (1M+ context) can only +# raise the ceiling by editing src/context_budget.py. +# --------------------------------------------------------------------------- + +def test_custom_hard_max_overrides_default_in_auto_branch(): + """A caller-supplied hard_max lifts the auto-derived ceiling.""" + # Without override: 1M ctx -> capped at DEFAULT_HARD_MAX (200K) + assert compute_input_token_budget(6000, 1_000_000, explicit=False) == DEFAULT_HARD_MAX + # With explicit raise: 1M ctx -> 850K (85% of 1M), under the raised ceiling + assert compute_input_token_budget(6000, 1_000_000, explicit=False, hard_max=900_000) == int(1_000_000 * 0.85) + + +def test_custom_hard_max_lowers_default_for_cost_paranoid_setups(): + """A lower ceiling caps the auto-derived budget below the default.""" + # 128K ctx, default ceiling 200K -> 85% of 128K = 108800 + assert compute_input_token_budget(6000, 128_000, explicit=False) == int(128_000 * 0.85) + # Same ctx, ceiling lowered to 50K -> capped at 50K instead + assert compute_input_token_budget(6000, 128_000, explicit=False, hard_max=50_000) == 50_000 + + +def test_hard_max_has_no_effect_on_explicit_branch(): + """When the user set an explicit budget, hard_max must not silently cap it.""" + # User chose 900K explicitly; ctx is 1M; ceiling is 100K — user's choice wins. + assert compute_input_token_budget(900_000, 1_000_000, explicit=True, hard_max=100_000) == 900_000 + + +def test_default_settings_registers_hard_max_key(): + """Required so /api/auth/settings and manage_settings can persist the key.""" + from src.settings import DEFAULT_SETTINGS + assert "agent_input_token_hard_max" in DEFAULT_SETTINGS + assert DEFAULT_SETTINGS["agent_input_token_hard_max"] == DEFAULT_HARD_MAX + + +def test_alias_map_registers_friendly_names(): + """`manage_settings` should accept 'hard max' and friends.""" + from pathlib import Path + src = Path("src/tool_implementations.py").read_text() + assert '"hard max": "agent_input_token_hard_max"' in src + assert '"token budget cap": "agent_input_token_hard_max"' in src + assert '"input budget cap": "agent_input_token_hard_max"' in src + + +def test_agent_loop_reads_hard_max_setting(tmp_path, monkeypatch): + """End-to-end: a saved settings.json value for agent_input_token_hard_max + must reach compute_input_token_budget on the real agent_loop call path.""" + import src.settings as settings + # Point SETTINGS_FILE at a temp file with our override. + f = tmp_path / "settings.json" + f.write_text(json.dumps({"agent_input_token_hard_max": 750_000}), encoding="utf-8") + monkeypatch.setattr(settings, "SETTINGS_FILE", str(f)) + monkeypatch.setattr(settings, "_settings_cache", None) + # Read via the same import path the agent loop uses. + assert settings.get_setting("agent_input_token_hard_max", DEFAULT_HARD_MAX) == 750_000 + + # Malformed value falls back to DEFAULT_HARD_MAX (defensive, matches the + # try/except in src/agent_loop.py). + f.write_text(json.dumps({"agent_input_token_hard_max": "huge"}), encoding="utf-8") + monkeypatch.setattr(settings, "_settings_cache", None) + raw = settings.get_setting("agent_input_token_hard_max", DEFAULT_HARD_MAX) + try: + parsed = int(raw) + except (TypeError, ValueError): + parsed = DEFAULT_HARD_MAX + if parsed <= 0: + parsed = DEFAULT_HARD_MAX + assert parsed == DEFAULT_HARD_MAX