From 64d65b73c1868226b5d8be0b1a0db6bc1f515d07 Mon Sep 17 00:00:00 2001 From: Kenny Van de Maele Date: Thu, 4 Jun 2026 22:36:05 +0200 Subject: [PATCH] =?UTF-8?q?feat:=20round-limit=20handling=20=E2=80=94=20Co?= =?UTF-8?q?ntinue=20affordance=20at=20the=20cap=20+=20configurable=20cap?= =?UTF-8?q?=20(#1999)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: round-limit handling — Continue affordance at the cap + configurable cap When the agent loop runs out of rounds (per-message step cap, default 20) while still actively using tools, it stopped silently mid-task. Now: 1. The loop emits a `rounds_exhausted` SSE event at the cap, and the UI shows a "Continue" pill at the bottom of the chat that resumes the task from where it left off. Repeated cap-hits each get a fresh Continue (multiple continues in a row). 2. The cap is configurable in Settings → Agent ("Max steps per message"), validated on the client, at the save endpoint, and at the read site. - src/agent_loop.py: track `_exhausted_rounds` (set only when a full tool-executing round completes on the last allowed round — i.e. the agent wanted to keep going); emit `{"type":"rounds_exhausted","rounds":N}` (logged). - routes/chat_routes.py: read `agent_max_rounds` (clamped 1..200), pass as `max_rounds`; forward the new event through the SSE relay. - routes/auth_routes.py: validate numeric settings on save (int + clamp; agent_max_rounds 1..200, agent_max_tool_calls 0..1000; 400 on non-int). - src/settings.py: default `agent_max_rounds = 20`. - static/: Settings input + client-side clamp; the Continue pill (reuses the existing .stopped-indicator / .continue-btn classes and theme vars --border/--fg/--bg/--accent); appended to the chat container so it survives the message re-render at stream finalize. chat.js cache version bumped. * test: cover rounds_exhausted emission (cap-hit vs normal finish) Drives the real stream_agent_loop with mocked LLM stream / tool exec / settings: a tool block every round exhausts the cap and must emit rounds_exhausted; a plain answer hits the done-break and must not. Guards the for/else logic. --- routes/auth_routes.py | 19 +++++++- routes/chat_routes.py | 10 ++++ src/agent_loop.py | 19 ++++++++ src/settings.py | 1 + static/index.html | 13 ++---- static/js/chat.js | 38 +++++++++++++++ static/js/settings.js | 27 +++++++++-- static/style.css | 32 +++++++++++++ tests/test_agent_rounds_exhausted.py | 70 ++++++++++++++++++++++++++++ 9 files changed, 215 insertions(+), 14 deletions(-) create mode 100644 tests/test_agent_rounds_exhausted.py diff --git a/routes/auth_routes.py b/routes/auth_routes.py index 60021e1..644b12d 100644 --- a/routes/auth_routes.py +++ b/routes/auth_routes.py @@ -438,9 +438,24 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter: raise HTTPException(403, "Admin only") body = await request.json() current = _load_settings() + # Per-key validation for numeric settings: coerce to int and clamp to a + # sane range so a bad value can't disable the agent or let it run away. + _INT_RANGES = { + "agent_max_rounds": (1, 200), + "agent_max_tool_calls": (0, 1000), # 0 = unlimited + } for key in DEFAULT_SETTINGS: - if key in body: - current[key] = body[key] + if key not in body: + continue + val = body[key] + if key in _INT_RANGES: + lo, hi = _INT_RANGES[key] + try: + val = int(val) + except (TypeError, ValueError): + raise HTTPException(400, f"{key} must be an integer") + val = max(lo, min(val, hi)) + current[key] = val _save_settings(current) return current diff --git a/routes/chat_routes.py b/routes/chat_routes.py index a3c6c16..836e9da 100644 --- a/routes/chat_routes.py +++ b/routes/chat_routes.py @@ -981,7 +981,15 @@ def setup_chat_routes( _answered_by = None # set if the selected model failed and a fallback answered try: from src.settings import get_setting + from src.agent_tools import MAX_AGENT_ROUNDS as _DEFAULT_ROUNDS _tool_budget = int(get_setting("agent_max_tool_calls", 0)) + # Per-message round cap from settings; clamp defensively in + # case settings.json was hand-edited to a bad value. + try: + _max_rounds = int(get_setting("agent_max_rounds", _DEFAULT_ROUNDS) or _DEFAULT_ROUNDS) + except (TypeError, ValueError): + _max_rounds = _DEFAULT_ROUNDS + _max_rounds = max(1, min(_max_rounds, 200)) async for chunk in stream_agent_loop( sess.endpoint_url, @@ -992,6 +1000,7 @@ def setup_chat_routes( max_tokens=ctx.preset.max_tokens, prompt_type=preset_id, max_tool_calls=_tool_budget, + max_rounds=_max_rounds, context_length=ctx.context_length, active_document=active_doc, session_id=session, @@ -1017,6 +1026,7 @@ def setup_chat_routes( "tool_start", "tool_output", "agent_step", "doc_stream_open", "doc_stream_delta", "doc_update", "doc_suggestions", "ui_control", + "rounds_exhausted", ): if data.get("type") == "agent_step": _agent_rounds = max(_agent_rounds, data.get("round", 1)) diff --git a/src/agent_loop.py b/src/agent_loop.py index 7aa7e19..e0b6248 100644 --- a/src/agent_loop.py +++ b/src/agent_loop.py @@ -1643,6 +1643,11 @@ async def stream_agent_loop( _doc_opened = False # whether doc_stream_open was sent _doc_last_len = 0 # last content length sent + # Set when the loop runs out of rounds while the agent was still actively + # using tools — i.e. it was cut off, not finished. Drives a "Continue" event + # so the user can resume instead of the turn silently stalling. + _exhausted_rounds = False + for round_num in range(1, max_rounds + 1): round_response = "" round_reasoning = "" # reasoning_content deltas (DeepSeek-thinking, vLLM --reasoning-parser) @@ -2300,6 +2305,20 @@ async def stream_agent_loop( # Separator in accumulated response full_response += "\n\n" + else: + # The for-loop completed every allowed round WITHOUT an early `break` + # (a `break` fires on "done", budget, or error). Reaching this `else` + # means the agent kept working until it ran out of rounds — so offer + # Continue instead of stopping silently. This catches ALL exhaustion + # paths, including a verifier `continue` on the final round (the old + # bottom-of-loop flag missed those). + _exhausted_rounds = True + + # If the loop hit the round cap while still working, tell the client so it + # can show a "Continue" affordance instead of the turn just stopping. + if _exhausted_rounds: + logger.info("[agent] round cap (%d) reached mid-task — emitting rounds_exhausted", max_rounds) + yield f'data: {json.dumps({"type": "rounds_exhausted", "rounds": max_rounds})}\n\n' # If the response is completely empty and no tools were executed, # yield a fallback message so the user is not left hanging. diff --git a/src/settings.py b/src/settings.py index 8f810a6..5bce0fc 100644 --- a/src/settings.py +++ b/src/settings.py @@ -100,6 +100,7 @@ DEFAULT_SETTINGS = { # Tune via Settings or by editing data/settings.json. "research_run_timeout_seconds": 1800, "agent_max_tool_calls": 0, + "agent_max_rounds": 20, # per-message agent step cap (clamped 1..200) "agent_input_token_budget": 6000, # Ceiling on the *auto-derived* input budget that #1230 introduced. Has # no effect when `agent_input_token_budget` is explicitly set (the user's diff --git a/static/index.html b/static/index.html index cade5cf..03edfa9 100644 --- a/static/index.html +++ b/static/index.html @@ -1478,6 +1478,10 @@ +
+ + +
@@ -2092,13 +2096,6 @@
-
- -
-
@@ -2271,7 +2268,7 @@ - + diff --git a/static/js/chat.js b/static/js/chat.js index c34d6a0..e064b5c 100644 --- a/static/js/chat.js +++ b/static/js/chat.js @@ -1836,6 +1836,44 @@ import createResearchSynapse from './researchSynapse.js'; } } } + } else if (json.type === 'rounds_exhausted') { + // The agent hit the per-turn step limit while still working. + // Offer a Continue button instead of stalling silently. + // NOTE: append to the chat-history container (bottom), NOT the + // message body — the body innerHTML is re-rendered at stream + // finalize, which would wipe a note placed inside it. + const _chatBox = document.getElementById('chat-history'); + if (!_isBg && _chatBox) { + // Drop any prior box so repeated cap-hits each get a fresh + // Continue at the bottom (multiple continues in a row). + const _old = _chatBox.querySelector('.rounds-exhausted'); + if (_old) _old.remove(); + const note = document.createElement('div'); + note.className = 'stopped-indicator rounds-exhausted'; + const label = document.createElement('span'); + label.className = 'rounds-exhausted-label'; + label.textContent = `Reached the ${json.rounds || ''}-step limit — not finished.`; + note.appendChild(label); + const contBtn = document.createElement('button'); + contBtn.className = 'continue-btn'; + contBtn.title = 'Continue the task'; + contBtn.textContent = 'Continue ▸'; + const _holder = currentHolder; + contBtn.addEventListener('click', () => { + note.remove(); + _hideUserBubble = true; + _pendingContinue = _holder; + const msgInput = uiModule.el('message'); + if (msgInput) { + msgInput.value = 'You hit the step limit before finishing — the task is not complete. Continue from exactly where you left off and keep going until it is done. Do NOT repeat work already done.'; + const sb = document.querySelector('.send-btn'); + if (sb) sb.click(); + } + }); + note.appendChild(contBtn); + _chatBox.appendChild(note); + try { note.scrollIntoView({ block: 'end', behavior: 'smooth' }); } catch (_) { uiModule.scrollHistory && uiModule.scrollHistory(); } + } } else if (json.type === 'attachments') { if (_isBg) continue; // Update user bubble — replace file chips with image previews diff --git a/static/js/settings.js b/static/js/settings.js index 161f722..8a53606 100644 --- a/static/js/settings.js +++ b/static/js/settings.js @@ -1558,6 +1558,7 @@ async function initResearchSearchSettings() { /* ── Agent Settings (AI tab) ── */ async function initAgentSettings() { var toolsInput = el('set-agentMaxTools'); + var roundsInput = el('set-agentMaxRounds'); var msg = el('set-agentMsg'); if (!toolsInput) return; @@ -1565,23 +1566,41 @@ async function initAgentSettings() { var res = await fetch('/api/auth/settings', { credentials: 'same-origin' }); var settings = await res.json(); if (settings.agent_max_tool_calls) toolsInput.value = settings.agent_max_tool_calls; + if (roundsInput && settings.agent_max_rounds) roundsInput.value = settings.agent_max_rounds; } catch (e) {} + // Clamp + coerce a raw input to an int in [lo, hi]; falls back to `dflt` + // when blank/non-numeric. Mirrors the server-side validation. + function clampInt(raw, lo, hi, dflt) { + var n = parseInt(raw, 10); + if (isNaN(n)) return dflt; + return Math.max(lo, Math.min(n, hi)); + } + async function save() { - var val = parseInt(toolsInput.value, 10) || 0; + var tools = clampInt(toolsInput.value, 0, 1000, 0); + var rounds = roundsInput ? clampInt(roundsInput.value, 1, 200, 20) : null; + toolsInput.value = tools; // reflect the clamped value + if (roundsInput) roundsInput.value = rounds; + var payload = { agent_max_tool_calls: tools }; + if (rounds != null) payload.agent_max_rounds = rounds; try { await fetch('/api/auth/settings', { method: 'POST', credentials: 'same-origin', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ agent_max_tool_calls: val }) + body: JSON.stringify(payload) }); - msg.textContent = val > 0 ? 'Limit: ' + val + ' tool calls per message' : 'Unlimited'; + msg.textContent = (tools > 0 ? 'Limit: ' + tools + ' tool calls' : 'Unlimited tool calls') + + (rounds != null ? ' · ' + rounds + ' steps/message' : ''); msg.style.color = 'var(--fg)'; } catch (e) { msg.textContent = 'Failed to save'; msg.style.color = 'var(--red)'; } } toolsInput.addEventListener('change', save); + if (roundsInput) roundsInput.addEventListener('change', save); var cur = parseInt(toolsInput.value, 10) || 0; - msg.textContent = cur > 0 ? 'Limit: ' + cur + ' tool calls per message' : 'Unlimited'; + var curR = roundsInput ? (parseInt(roundsInput.value, 10) || 20) : null; + msg.textContent = (cur > 0 ? 'Limit: ' + cur + ' tool calls' : 'Unlimited tool calls') + + (curR != null ? ' · ' + curR + ' steps/message' : ''); } /* ═══════════════════════════════════════════ diff --git a/static/style.css b/static/style.css index ea99f3e..1710504 100644 --- a/static/style.css +++ b/static/style.css @@ -3478,6 +3478,38 @@ body.bg-pattern-sparkles { .continue-btn:hover { opacity:0.8; } + + /* Round-cap "Continue" affordance — a cohesive centered pill at the chat + bottom (not the bare red in-message stopped style). */ + .rounds-exhausted { + justify-content:center; + gap:12px; + width:fit-content; + max-width:90%; + margin:14px auto 4px; + padding:7px 8px 7px 16px; + border:1px solid var(--border); + border-radius:999px; + background:color-mix(in srgb, var(--fg) 4%, transparent); + opacity:1; + } + .rounds-exhausted .rounds-exhausted-label { + color:color-mix(in srgb, var(--fg) 60%, transparent); + font-size:0.95em; + } + .rounds-exhausted .continue-btn { + font-size:0.9em; + font-weight:600; + opacity:1; + color:var(--bg); + background:var(--accent, var(--red)); + border-radius:999px; + padding:4px 14px; + line-height:1.3; + } + .rounds-exhausted .continue-btn:hover { + opacity:0.88; + } .ctx-indicator { display:inline-flex; align-items:center; gap:1px; font-size:0.75rem; diff --git a/tests/test_agent_rounds_exhausted.py b/tests/test_agent_rounds_exhausted.py new file mode 100644 index 0000000..178faa8 --- /dev/null +++ b/tests/test_agent_rounds_exhausted.py @@ -0,0 +1,70 @@ +"""Regression: stream_agent_loop emits `rounds_exhausted` only when the round +cap is hit while still working, and NOT on a normal finish. + +The decision is a `for/else` in the loop: the `else` runs only if no `break` +fired (break = done / budget / error). A refactor that adds a stray break or +return, or moves the done-break, could silently flip this. See PR #1999 / #1997. +""" + +import asyncio +import json + +import src.agent_loop as al + + +def _collect(gen): + async def _run(): + return [c async for c in gen] + return asyncio.run(_run()) + + +def _types(chunks): + out = [] + for c in chunks: + if c.startswith("data: ") and not c.startswith("data: [DONE]"): + try: + out.append(json.loads(c[6:])) + except Exception: + pass + return out + + +def _patch_common(monkeypatch): + # Skip RAG/tool-index, MCP, and settings lookups; keep the real loop body, + # _resolve_tool_blocks, and parse_tool_blocks. + monkeypatch.setattr(al, "get_setting", lambda key, default=None: default, raising=False) + monkeypatch.setattr(al, "get_mcp_manager", lambda: None, raising=False) + monkeypatch.setattr(al, "estimate_tokens", lambda *a, **k: 10, raising=False) + + async def _fake_exec(block, *a, **k): + return ("bash", {"output": "ok", "exit_code": 0}) + monkeypatch.setattr(al, "execute_tool_block", _fake_exec, raising=False) + + +def _run_loop(monkeypatch, round_text, max_rounds=2): + async def _fake_stream(_candidates, messages, **kwargs): + yield f'data: {json.dumps({"delta": round_text})}\n\n' + yield "data: [DONE]\n\n" + monkeypatch.setattr(al, "stream_llm_with_fallback", _fake_stream, raising=False) + + gen = al.stream_agent_loop( + "http://x/v1", "m", + [{"role": "user", "content": "do a long multi-step task"}], + max_rounds=max_rounds, + relevant_tools={"bash"}, + ) + return _types(_collect(gen)) + + +def test_emits_rounds_exhausted_when_cap_hit_mid_task(monkeypatch): + _patch_common(monkeypatch) + # Every round returns a tool block -> never "done" -> loop exhausts the cap. + events = _run_loop(monkeypatch, "```bash\necho hi\n```", max_rounds=2) + assert any(e.get("type") == "rounds_exhausted" for e in events), events + + +def test_no_rounds_exhausted_on_normal_finish(monkeypatch): + _patch_common(monkeypatch) + # A plain answer (no tool block) -> done-break on round 1 -> no event. + events = _run_loop(monkeypatch, "All done, here is your answer.", max_rounds=2) + assert not any(e.get("type") == "rounds_exhausted" for e in events), events