feat: round-limit handling — Continue affordance at the cap + configurable cap (#1999)

* feat: round-limit handling — Continue affordance at the cap + configurable cap

When the agent loop runs out of rounds (per-message step cap, default 20)
while still actively using tools, it stopped silently mid-task. Now:

1. The loop emits a `rounds_exhausted` SSE event at the cap, and the UI shows
   a "Continue" pill at the bottom of the chat that resumes the task from where
   it left off. Repeated cap-hits each get a fresh Continue (multiple continues
   in a row).
2. The cap is configurable in Settings → Agent ("Max steps per message"),
   validated on the client, at the save endpoint, and at the read site.

- src/agent_loop.py: track `_exhausted_rounds` (set only when a full
  tool-executing round completes on the last allowed round — i.e. the agent
  wanted to keep going); emit `{"type":"rounds_exhausted","rounds":N}` (logged).
- routes/chat_routes.py: read `agent_max_rounds` (clamped 1..200), pass as
  `max_rounds`; forward the new event through the SSE relay.
- routes/auth_routes.py: validate numeric settings on save (int + clamp;
  agent_max_rounds 1..200, agent_max_tool_calls 0..1000; 400 on non-int).
- src/settings.py: default `agent_max_rounds = 20`.
- static/: Settings input + client-side clamp; the Continue pill (reuses the
  existing .stopped-indicator / .continue-btn classes and theme vars
  --border/--fg/--bg/--accent); appended to the chat container so it survives
  the message re-render at stream finalize. chat.js cache version bumped.

* test: cover rounds_exhausted emission (cap-hit vs normal finish)

Drives the real stream_agent_loop with mocked LLM stream / tool exec / settings:
a tool block every round exhausts the cap and must emit rounds_exhausted; a
plain answer hits the done-break and must not. Guards the for/else logic.
This commit is contained in:
Kenny Van de Maele
2026-06-04 22:36:05 +02:00
committed by GitHub
parent a54f41037d
commit 64d65b73c1
9 changed files with 215 additions and 14 deletions

View File

@@ -438,9 +438,24 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
raise HTTPException(403, "Admin only") raise HTTPException(403, "Admin only")
body = await request.json() body = await request.json()
current = _load_settings() current = _load_settings()
# Per-key validation for numeric settings: coerce to int and clamp to a
# sane range so a bad value can't disable the agent or let it run away.
_INT_RANGES = {
"agent_max_rounds": (1, 200),
"agent_max_tool_calls": (0, 1000), # 0 = unlimited
}
for key in DEFAULT_SETTINGS: for key in DEFAULT_SETTINGS:
if key in body: if key not in body:
current[key] = body[key] continue
val = body[key]
if key in _INT_RANGES:
lo, hi = _INT_RANGES[key]
try:
val = int(val)
except (TypeError, ValueError):
raise HTTPException(400, f"{key} must be an integer")
val = max(lo, min(val, hi))
current[key] = val
_save_settings(current) _save_settings(current)
return current return current

View File

@@ -981,7 +981,15 @@ def setup_chat_routes(
_answered_by = None # set if the selected model failed and a fallback answered _answered_by = None # set if the selected model failed and a fallback answered
try: try:
from src.settings import get_setting from src.settings import get_setting
from src.agent_tools import MAX_AGENT_ROUNDS as _DEFAULT_ROUNDS
_tool_budget = int(get_setting("agent_max_tool_calls", 0)) _tool_budget = int(get_setting("agent_max_tool_calls", 0))
# Per-message round cap from settings; clamp defensively in
# case settings.json was hand-edited to a bad value.
try:
_max_rounds = int(get_setting("agent_max_rounds", _DEFAULT_ROUNDS) or _DEFAULT_ROUNDS)
except (TypeError, ValueError):
_max_rounds = _DEFAULT_ROUNDS
_max_rounds = max(1, min(_max_rounds, 200))
async for chunk in stream_agent_loop( async for chunk in stream_agent_loop(
sess.endpoint_url, sess.endpoint_url,
@@ -992,6 +1000,7 @@ def setup_chat_routes(
max_tokens=ctx.preset.max_tokens, max_tokens=ctx.preset.max_tokens,
prompt_type=preset_id, prompt_type=preset_id,
max_tool_calls=_tool_budget, max_tool_calls=_tool_budget,
max_rounds=_max_rounds,
context_length=ctx.context_length, context_length=ctx.context_length,
active_document=active_doc, active_document=active_doc,
session_id=session, session_id=session,
@@ -1017,6 +1026,7 @@ def setup_chat_routes(
"tool_start", "tool_output", "agent_step", "tool_start", "tool_output", "agent_step",
"doc_stream_open", "doc_stream_delta", "doc_stream_open", "doc_stream_delta",
"doc_update", "doc_suggestions", "ui_control", "doc_update", "doc_suggestions", "ui_control",
"rounds_exhausted",
): ):
if data.get("type") == "agent_step": if data.get("type") == "agent_step":
_agent_rounds = max(_agent_rounds, data.get("round", 1)) _agent_rounds = max(_agent_rounds, data.get("round", 1))

View File

@@ -1643,6 +1643,11 @@ async def stream_agent_loop(
_doc_opened = False # whether doc_stream_open was sent _doc_opened = False # whether doc_stream_open was sent
_doc_last_len = 0 # last content length sent _doc_last_len = 0 # last content length sent
# Set when the loop runs out of rounds while the agent was still actively
# using tools — i.e. it was cut off, not finished. Drives a "Continue" event
# so the user can resume instead of the turn silently stalling.
_exhausted_rounds = False
for round_num in range(1, max_rounds + 1): for round_num in range(1, max_rounds + 1):
round_response = "" round_response = ""
round_reasoning = "" # reasoning_content deltas (DeepSeek-thinking, vLLM --reasoning-parser) round_reasoning = "" # reasoning_content deltas (DeepSeek-thinking, vLLM --reasoning-parser)
@@ -2300,6 +2305,20 @@ async def stream_agent_loop(
# Separator in accumulated response # Separator in accumulated response
full_response += "\n\n" full_response += "\n\n"
else:
# The for-loop completed every allowed round WITHOUT an early `break`
# (a `break` fires on "done", budget, or error). Reaching this `else`
# means the agent kept working until it ran out of rounds — so offer
# Continue instead of stopping silently. This catches ALL exhaustion
# paths, including a verifier `continue` on the final round (the old
# bottom-of-loop flag missed those).
_exhausted_rounds = True
# If the loop hit the round cap while still working, tell the client so it
# can show a "Continue" affordance instead of the turn just stopping.
if _exhausted_rounds:
logger.info("[agent] round cap (%d) reached mid-task — emitting rounds_exhausted", max_rounds)
yield f'data: {json.dumps({"type": "rounds_exhausted", "rounds": max_rounds})}\n\n'
# If the response is completely empty and no tools were executed, # If the response is completely empty and no tools were executed,
# yield a fallback message so the user is not left hanging. # yield a fallback message so the user is not left hanging.

View File

@@ -100,6 +100,7 @@ DEFAULT_SETTINGS = {
# Tune via Settings or by editing data/settings.json. # Tune via Settings or by editing data/settings.json.
"research_run_timeout_seconds": 1800, "research_run_timeout_seconds": 1800,
"agent_max_tool_calls": 0, "agent_max_tool_calls": 0,
"agent_max_rounds": 20, # per-message agent step cap (clamped 1..200)
"agent_input_token_budget": 6000, "agent_input_token_budget": 6000,
# Ceiling on the *auto-derived* input budget that #1230 introduced. Has # Ceiling on the *auto-derived* input budget that #1230 introduced. Has
# no effect when `agent_input_token_budget` is explicitly set (the user's # no effect when `agent_input_token_budget` is explicitly set (the user's

View File

@@ -1478,6 +1478,10 @@
<label class="settings-label">Tool call limit</label> <label class="settings-label">Tool call limit</label>
<input id="set-agentMaxTools" type="text" inputmode="numeric" placeholder="0 = unlimited" class="settings-select" style="width:120px;"> <input id="set-agentMaxTools" type="text" inputmode="numeric" placeholder="0 = unlimited" class="settings-select" style="width:120px;">
</div> </div>
<div class="settings-row">
<label class="settings-label">Max steps per message</label>
<input id="set-agentMaxRounds" type="text" inputmode="numeric" placeholder="20" class="settings-select" style="width:120px;">
</div>
<div id="set-agentMsg" style="font-size:11px;color:color-mix(in srgb, var(--fg) 45%, transparent);"></div> <div id="set-agentMsg" style="font-size:11px;color:color-mix(in srgb, var(--fg) 45%, transparent);"></div>
</div> </div>
</div> </div>
@@ -2092,13 +2096,6 @@
<button class="admin-btn-add" id="adm-epAddBtn" style="width:55px;text-align:center;">Add</button> <button class="admin-btn-add" id="adm-epAddBtn" style="width:55px;text-align:center;">Add</button>
</div> </div>
<div id="adm-epApiMsg" class="adm-ep-inline-msg"></div> <div id="adm-epApiMsg" class="adm-ep-inline-msg"></div>
<div class="adm-copilot-connect">
<button class="admin-btn-sm" id="adm-copilotConnectBtn" type="button" title="Sign in to GitHub Copilot via device flow">
<svg width="13" height="13" viewBox="0 0 24 24" fill="currentColor" style="vertical-align:-2px;margin-right:5px;opacity:0.8"><path d="M12 .5C5.7.5.5 5.7.5 12c0 5.1 3.3 9.4 7.9 10.9.6.1.8-.2.8-.5v-1.7c-3.2.7-3.9-1.5-3.9-1.5-.5-1.3-1.3-1.7-1.3-1.7-1.1-.7.1-.7.1-.7 1.2.1 1.8 1.2 1.8 1.2 1 1.8 2.7 1.3 3.4 1 .1-.8.4-1.3.7-1.6-2.6-.3-5.3-1.3-5.3-5.7 0-1.3.4-2.3 1.2-3.1-.1-.3-.5-1.5.1-3.1 0 0 1-.3 3.3 1.2a11.4 11.4 0 0 1 6 0C17.3 4.7 18.3 5 18.3 5c.6 1.6.2 2.8.1 3.1.8.8 1.2 1.8 1.2 3.1 0 4.4-2.7 5.4-5.3 5.7.4.4.8 1.1.8 2.2v3.3c0 .3.2.6.8.5 4.6-1.5 7.9-5.8 7.9-10.9C23.5 5.7 18.3.5 12 .5z"/></svg>
Connect GitHub Copilot
</button>
<div id="adm-copilotStatus" class="adm-ep-inline-msg"></div>
</div>
</div> </div>
</div> </div>
</div> </div>
@@ -2271,7 +2268,7 @@
<script type="module" src="/static/js/chatRenderer.js"></script> <script type="module" src="/static/js/chatRenderer.js"></script>
<script type="module" src="/static/js/codeRunner.js"></script> <script type="module" src="/static/js/codeRunner.js"></script>
<script type="module" src="/static/js/chatStream.js"></script> <script type="module" src="/static/js/chatStream.js"></script>
<script type="module" src="/static/js/chat.js?v=20260603n"></script> <script type="module" src="/static/js/chat.js?v=20260604q"></script>
<script type="module" src="/static/js/cookbook.js"></script> <script type="module" src="/static/js/cookbook.js"></script>
<script type="module" src="/static/js/search-chat.js"></script> <script type="module" src="/static/js/search-chat.js"></script>
<script type="module" src="/static/js/compare/index.js"></script> <script type="module" src="/static/js/compare/index.js"></script>

View File

@@ -1836,6 +1836,44 @@ import createResearchSynapse from './researchSynapse.js';
} }
} }
} }
} else if (json.type === 'rounds_exhausted') {
// The agent hit the per-turn step limit while still working.
// Offer a Continue button instead of stalling silently.
// NOTE: append to the chat-history container (bottom), NOT the
// message body — the body innerHTML is re-rendered at stream
// finalize, which would wipe a note placed inside it.
const _chatBox = document.getElementById('chat-history');
if (!_isBg && _chatBox) {
// Drop any prior box so repeated cap-hits each get a fresh
// Continue at the bottom (multiple continues in a row).
const _old = _chatBox.querySelector('.rounds-exhausted');
if (_old) _old.remove();
const note = document.createElement('div');
note.className = 'stopped-indicator rounds-exhausted';
const label = document.createElement('span');
label.className = 'rounds-exhausted-label';
label.textContent = `Reached the ${json.rounds || ''}-step limit — not finished.`;
note.appendChild(label);
const contBtn = document.createElement('button');
contBtn.className = 'continue-btn';
contBtn.title = 'Continue the task';
contBtn.textContent = 'Continue ▸';
const _holder = currentHolder;
contBtn.addEventListener('click', () => {
note.remove();
_hideUserBubble = true;
_pendingContinue = _holder;
const msgInput = uiModule.el('message');
if (msgInput) {
msgInput.value = 'You hit the step limit before finishing — the task is not complete. Continue from exactly where you left off and keep going until it is done. Do NOT repeat work already done.';
const sb = document.querySelector('.send-btn');
if (sb) sb.click();
}
});
note.appendChild(contBtn);
_chatBox.appendChild(note);
try { note.scrollIntoView({ block: 'end', behavior: 'smooth' }); } catch (_) { uiModule.scrollHistory && uiModule.scrollHistory(); }
}
} else if (json.type === 'attachments') { } else if (json.type === 'attachments') {
if (_isBg) continue; if (_isBg) continue;
// Update user bubble — replace file chips with image previews // Update user bubble — replace file chips with image previews

View File

@@ -1558,6 +1558,7 @@ async function initResearchSearchSettings() {
/* ── Agent Settings (AI tab) ── */ /* ── Agent Settings (AI tab) ── */
async function initAgentSettings() { async function initAgentSettings() {
var toolsInput = el('set-agentMaxTools'); var toolsInput = el('set-agentMaxTools');
var roundsInput = el('set-agentMaxRounds');
var msg = el('set-agentMsg'); var msg = el('set-agentMsg');
if (!toolsInput) return; if (!toolsInput) return;
@@ -1565,23 +1566,41 @@ async function initAgentSettings() {
var res = await fetch('/api/auth/settings', { credentials: 'same-origin' }); var res = await fetch('/api/auth/settings', { credentials: 'same-origin' });
var settings = await res.json(); var settings = await res.json();
if (settings.agent_max_tool_calls) toolsInput.value = settings.agent_max_tool_calls; if (settings.agent_max_tool_calls) toolsInput.value = settings.agent_max_tool_calls;
if (roundsInput && settings.agent_max_rounds) roundsInput.value = settings.agent_max_rounds;
} catch (e) {} } catch (e) {}
// Clamp + coerce a raw input to an int in [lo, hi]; falls back to `dflt`
// when blank/non-numeric. Mirrors the server-side validation.
function clampInt(raw, lo, hi, dflt) {
var n = parseInt(raw, 10);
if (isNaN(n)) return dflt;
return Math.max(lo, Math.min(n, hi));
}
async function save() { async function save() {
var val = parseInt(toolsInput.value, 10) || 0; var tools = clampInt(toolsInput.value, 0, 1000, 0);
var rounds = roundsInput ? clampInt(roundsInput.value, 1, 200, 20) : null;
toolsInput.value = tools; // reflect the clamped value
if (roundsInput) roundsInput.value = rounds;
var payload = { agent_max_tool_calls: tools };
if (rounds != null) payload.agent_max_rounds = rounds;
try { try {
await fetch('/api/auth/settings', { method: 'POST', credentials: 'same-origin', await fetch('/api/auth/settings', { method: 'POST', credentials: 'same-origin',
headers: { 'Content-Type': 'application/json' }, headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ agent_max_tool_calls: val }) body: JSON.stringify(payload)
}); });
msg.textContent = val > 0 ? 'Limit: ' + val + ' tool calls per message' : 'Unlimited'; msg.textContent = (tools > 0 ? 'Limit: ' + tools + ' tool calls' : 'Unlimited tool calls') +
(rounds != null ? ' · ' + rounds + ' steps/message' : '');
msg.style.color = 'var(--fg)'; msg.style.color = 'var(--fg)';
} catch (e) { msg.textContent = 'Failed to save'; msg.style.color = 'var(--red)'; } } catch (e) { msg.textContent = 'Failed to save'; msg.style.color = 'var(--red)'; }
} }
toolsInput.addEventListener('change', save); toolsInput.addEventListener('change', save);
if (roundsInput) roundsInput.addEventListener('change', save);
var cur = parseInt(toolsInput.value, 10) || 0; var cur = parseInt(toolsInput.value, 10) || 0;
msg.textContent = cur > 0 ? 'Limit: ' + cur + ' tool calls per message' : 'Unlimited'; var curR = roundsInput ? (parseInt(roundsInput.value, 10) || 20) : null;
msg.textContent = (cur > 0 ? 'Limit: ' + cur + ' tool calls' : 'Unlimited tool calls') +
(curR != null ? ' · ' + curR + ' steps/message' : '');
} }
/* ═══════════════════════════════════════════ /* ═══════════════════════════════════════════

View File

@@ -3478,6 +3478,38 @@ body.bg-pattern-sparkles {
.continue-btn:hover { .continue-btn:hover {
opacity:0.8; opacity:0.8;
} }
/* Round-cap "Continue" affordance a cohesive centered pill at the chat
bottom (not the bare red in-message stopped style). */
.rounds-exhausted {
justify-content:center;
gap:12px;
width:fit-content;
max-width:90%;
margin:14px auto 4px;
padding:7px 8px 7px 16px;
border:1px solid var(--border);
border-radius:999px;
background:color-mix(in srgb, var(--fg) 4%, transparent);
opacity:1;
}
.rounds-exhausted .rounds-exhausted-label {
color:color-mix(in srgb, var(--fg) 60%, transparent);
font-size:0.95em;
}
.rounds-exhausted .continue-btn {
font-size:0.9em;
font-weight:600;
opacity:1;
color:var(--bg);
background:var(--accent, var(--red));
border-radius:999px;
padding:4px 14px;
line-height:1.3;
}
.rounds-exhausted .continue-btn:hover {
opacity:0.88;
}
.ctx-indicator { .ctx-indicator {
display:inline-flex; align-items:center; gap:1px; display:inline-flex; align-items:center; gap:1px;
font-size:0.75rem; font-size:0.75rem;

View File

@@ -0,0 +1,70 @@
"""Regression: stream_agent_loop emits `rounds_exhausted` only when the round
cap is hit while still working, and NOT on a normal finish.
The decision is a `for/else` in the loop: the `else` runs only if no `break`
fired (break = done / budget / error). A refactor that adds a stray break or
return, or moves the done-break, could silently flip this. See PR #1999 / #1997.
"""
import asyncio
import json
import src.agent_loop as al
def _collect(gen):
async def _run():
return [c async for c in gen]
return asyncio.run(_run())
def _types(chunks):
out = []
for c in chunks:
if c.startswith("data: ") and not c.startswith("data: [DONE]"):
try:
out.append(json.loads(c[6:]))
except Exception:
pass
return out
def _patch_common(monkeypatch):
# Skip RAG/tool-index, MCP, and settings lookups; keep the real loop body,
# _resolve_tool_blocks, and parse_tool_blocks.
monkeypatch.setattr(al, "get_setting", lambda key, default=None: default, raising=False)
monkeypatch.setattr(al, "get_mcp_manager", lambda: None, raising=False)
monkeypatch.setattr(al, "estimate_tokens", lambda *a, **k: 10, raising=False)
async def _fake_exec(block, *a, **k):
return ("bash", {"output": "ok", "exit_code": 0})
monkeypatch.setattr(al, "execute_tool_block", _fake_exec, raising=False)
def _run_loop(monkeypatch, round_text, max_rounds=2):
async def _fake_stream(_candidates, messages, **kwargs):
yield f'data: {json.dumps({"delta": round_text})}\n\n'
yield "data: [DONE]\n\n"
monkeypatch.setattr(al, "stream_llm_with_fallback", _fake_stream, raising=False)
gen = al.stream_agent_loop(
"http://x/v1", "m",
[{"role": "user", "content": "do a long multi-step task"}],
max_rounds=max_rounds,
relevant_tools={"bash"},
)
return _types(_collect(gen))
def test_emits_rounds_exhausted_when_cap_hit_mid_task(monkeypatch):
_patch_common(monkeypatch)
# Every round returns a tool block -> never "done" -> loop exhausts the cap.
events = _run_loop(monkeypatch, "```bash\necho hi\n```", max_rounds=2)
assert any(e.get("type") == "rounds_exhausted" for e in events), events
def test_no_rounds_exhausted_on_normal_finish(monkeypatch):
_patch_common(monkeypatch)
# A plain answer (no tool block) -> done-break on round 1 -> no event.
events = _run_loop(monkeypatch, "All done, here is your answer.", max_rounds=2)
assert not any(e.get("type") == "rounds_exhausted" for e in events), events