diff --git a/routes/auth_routes.py b/routes/auth_routes.py
index 60021e1..644b12d 100644
--- a/routes/auth_routes.py
+++ b/routes/auth_routes.py
@@ -438,9 +438,24 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
raise HTTPException(403, "Admin only")
body = await request.json()
current = _load_settings()
+ # Per-key validation for numeric settings: coerce to int and clamp to a
+ # sane range so a bad value can't disable the agent or let it run away.
+ _INT_RANGES = {
+ "agent_max_rounds": (1, 200),
+ "agent_max_tool_calls": (0, 1000), # 0 = unlimited
+ }
for key in DEFAULT_SETTINGS:
- if key in body:
- current[key] = body[key]
+ if key not in body:
+ continue
+ val = body[key]
+ if key in _INT_RANGES:
+ lo, hi = _INT_RANGES[key]
+ try:
+ val = int(val)
+ except (TypeError, ValueError):
+ raise HTTPException(400, f"{key} must be an integer")
+ val = max(lo, min(val, hi))
+ current[key] = val
_save_settings(current)
return current
diff --git a/routes/chat_routes.py b/routes/chat_routes.py
index a3c6c16..836e9da 100644
--- a/routes/chat_routes.py
+++ b/routes/chat_routes.py
@@ -981,7 +981,15 @@ def setup_chat_routes(
_answered_by = None # set if the selected model failed and a fallback answered
try:
from src.settings import get_setting
+ from src.agent_tools import MAX_AGENT_ROUNDS as _DEFAULT_ROUNDS
_tool_budget = int(get_setting("agent_max_tool_calls", 0))
+ # Per-message round cap from settings; clamp defensively in
+ # case settings.json was hand-edited to a bad value.
+ try:
+ _max_rounds = int(get_setting("agent_max_rounds", _DEFAULT_ROUNDS) or _DEFAULT_ROUNDS)
+ except (TypeError, ValueError):
+ _max_rounds = _DEFAULT_ROUNDS
+ _max_rounds = max(1, min(_max_rounds, 200))
async for chunk in stream_agent_loop(
sess.endpoint_url,
@@ -992,6 +1000,7 @@ def setup_chat_routes(
max_tokens=ctx.preset.max_tokens,
prompt_type=preset_id,
max_tool_calls=_tool_budget,
+ max_rounds=_max_rounds,
context_length=ctx.context_length,
active_document=active_doc,
session_id=session,
@@ -1017,6 +1026,7 @@ def setup_chat_routes(
"tool_start", "tool_output", "agent_step",
"doc_stream_open", "doc_stream_delta",
"doc_update", "doc_suggestions", "ui_control",
+ "rounds_exhausted",
):
if data.get("type") == "agent_step":
_agent_rounds = max(_agent_rounds, data.get("round", 1))
diff --git a/src/agent_loop.py b/src/agent_loop.py
index 7aa7e19..e0b6248 100644
--- a/src/agent_loop.py
+++ b/src/agent_loop.py
@@ -1643,6 +1643,11 @@ async def stream_agent_loop(
_doc_opened = False # whether doc_stream_open was sent
_doc_last_len = 0 # last content length sent
+ # Set when the loop runs out of rounds while the agent was still actively
+ # using tools — i.e. it was cut off, not finished. Drives a "Continue" event
+ # so the user can resume instead of the turn silently stalling.
+ _exhausted_rounds = False
+
for round_num in range(1, max_rounds + 1):
round_response = ""
round_reasoning = "" # reasoning_content deltas (DeepSeek-thinking, vLLM --reasoning-parser)
@@ -2300,6 +2305,20 @@ async def stream_agent_loop(
# Separator in accumulated response
full_response += "\n\n"
+ else:
+ # The for-loop completed every allowed round WITHOUT an early `break`
+ # (a `break` fires on "done", budget, or error). Reaching this `else`
+ # means the agent kept working until it ran out of rounds — so offer
+ # Continue instead of stopping silently. This catches ALL exhaustion
+ # paths, including a verifier `continue` on the final round (the old
+ # bottom-of-loop flag missed those).
+ _exhausted_rounds = True
+
+ # If the loop hit the round cap while still working, tell the client so it
+ # can show a "Continue" affordance instead of the turn just stopping.
+ if _exhausted_rounds:
+ logger.info("[agent] round cap (%d) reached mid-task — emitting rounds_exhausted", max_rounds)
+ yield f'data: {json.dumps({"type": "rounds_exhausted", "rounds": max_rounds})}\n\n'
# If the response is completely empty and no tools were executed,
# yield a fallback message so the user is not left hanging.
diff --git a/src/settings.py b/src/settings.py
index 8f810a6..5bce0fc 100644
--- a/src/settings.py
+++ b/src/settings.py
@@ -100,6 +100,7 @@ DEFAULT_SETTINGS = {
# Tune via Settings or by editing data/settings.json.
"research_run_timeout_seconds": 1800,
"agent_max_tool_calls": 0,
+ "agent_max_rounds": 20, # per-message agent step cap (clamped 1..200)
"agent_input_token_budget": 6000,
# Ceiling on the *auto-derived* input budget that #1230 introduced. Has
# no effect when `agent_input_token_budget` is explicitly set (the user's
diff --git a/static/index.html b/static/index.html
index cade5cf..03edfa9 100644
--- a/static/index.html
+++ b/static/index.html
@@ -1478,6 +1478,10 @@
+
+
+
+
@@ -2092,13 +2096,6 @@
-
-
-
-
@@ -2271,7 +2268,7 @@
-
+
diff --git a/static/js/chat.js b/static/js/chat.js
index c34d6a0..e064b5c 100644
--- a/static/js/chat.js
+++ b/static/js/chat.js
@@ -1836,6 +1836,44 @@ import createResearchSynapse from './researchSynapse.js';
}
}
}
+ } else if (json.type === 'rounds_exhausted') {
+ // The agent hit the per-turn step limit while still working.
+ // Offer a Continue button instead of stalling silently.
+ // NOTE: append to the chat-history container (bottom), NOT the
+ // message body — the body innerHTML is re-rendered at stream
+ // finalize, which would wipe a note placed inside it.
+ const _chatBox = document.getElementById('chat-history');
+ if (!_isBg && _chatBox) {
+ // Drop any prior box so repeated cap-hits each get a fresh
+ // Continue at the bottom (multiple continues in a row).
+ const _old = _chatBox.querySelector('.rounds-exhausted');
+ if (_old) _old.remove();
+ const note = document.createElement('div');
+ note.className = 'stopped-indicator rounds-exhausted';
+ const label = document.createElement('span');
+ label.className = 'rounds-exhausted-label';
+ label.textContent = `Reached the ${json.rounds || ''}-step limit — not finished.`;
+ note.appendChild(label);
+ const contBtn = document.createElement('button');
+ contBtn.className = 'continue-btn';
+ contBtn.title = 'Continue the task';
+ contBtn.textContent = 'Continue ▸';
+ const _holder = currentHolder;
+ contBtn.addEventListener('click', () => {
+ note.remove();
+ _hideUserBubble = true;
+ _pendingContinue = _holder;
+ const msgInput = uiModule.el('message');
+ if (msgInput) {
+ msgInput.value = 'You hit the step limit before finishing — the task is not complete. Continue from exactly where you left off and keep going until it is done. Do NOT repeat work already done.';
+ const sb = document.querySelector('.send-btn');
+ if (sb) sb.click();
+ }
+ });
+ note.appendChild(contBtn);
+ _chatBox.appendChild(note);
+ try { note.scrollIntoView({ block: 'end', behavior: 'smooth' }); } catch (_) { uiModule.scrollHistory && uiModule.scrollHistory(); }
+ }
} else if (json.type === 'attachments') {
if (_isBg) continue;
// Update user bubble — replace file chips with image previews
diff --git a/static/js/settings.js b/static/js/settings.js
index 161f722..8a53606 100644
--- a/static/js/settings.js
+++ b/static/js/settings.js
@@ -1558,6 +1558,7 @@ async function initResearchSearchSettings() {
/* ── Agent Settings (AI tab) ── */
async function initAgentSettings() {
var toolsInput = el('set-agentMaxTools');
+ var roundsInput = el('set-agentMaxRounds');
var msg = el('set-agentMsg');
if (!toolsInput) return;
@@ -1565,23 +1566,41 @@ async function initAgentSettings() {
var res = await fetch('/api/auth/settings', { credentials: 'same-origin' });
var settings = await res.json();
if (settings.agent_max_tool_calls) toolsInput.value = settings.agent_max_tool_calls;
+ if (roundsInput && settings.agent_max_rounds) roundsInput.value = settings.agent_max_rounds;
} catch (e) {}
+ // Clamp + coerce a raw input to an int in [lo, hi]; falls back to `dflt`
+ // when blank/non-numeric. Mirrors the server-side validation.
+ function clampInt(raw, lo, hi, dflt) {
+ var n = parseInt(raw, 10);
+ if (isNaN(n)) return dflt;
+ return Math.max(lo, Math.min(n, hi));
+ }
+
async function save() {
- var val = parseInt(toolsInput.value, 10) || 0;
+ var tools = clampInt(toolsInput.value, 0, 1000, 0);
+ var rounds = roundsInput ? clampInt(roundsInput.value, 1, 200, 20) : null;
+ toolsInput.value = tools; // reflect the clamped value
+ if (roundsInput) roundsInput.value = rounds;
+ var payload = { agent_max_tool_calls: tools };
+ if (rounds != null) payload.agent_max_rounds = rounds;
try {
await fetch('/api/auth/settings', { method: 'POST', credentials: 'same-origin',
headers: { 'Content-Type': 'application/json' },
- body: JSON.stringify({ agent_max_tool_calls: val })
+ body: JSON.stringify(payload)
});
- msg.textContent = val > 0 ? 'Limit: ' + val + ' tool calls per message' : 'Unlimited';
+ msg.textContent = (tools > 0 ? 'Limit: ' + tools + ' tool calls' : 'Unlimited tool calls') +
+ (rounds != null ? ' · ' + rounds + ' steps/message' : '');
msg.style.color = 'var(--fg)';
} catch (e) { msg.textContent = 'Failed to save'; msg.style.color = 'var(--red)'; }
}
toolsInput.addEventListener('change', save);
+ if (roundsInput) roundsInput.addEventListener('change', save);
var cur = parseInt(toolsInput.value, 10) || 0;
- msg.textContent = cur > 0 ? 'Limit: ' + cur + ' tool calls per message' : 'Unlimited';
+ var curR = roundsInput ? (parseInt(roundsInput.value, 10) || 20) : null;
+ msg.textContent = (cur > 0 ? 'Limit: ' + cur + ' tool calls' : 'Unlimited tool calls') +
+ (curR != null ? ' · ' + curR + ' steps/message' : '');
}
/* ═══════════════════════════════════════════
diff --git a/static/style.css b/static/style.css
index ea99f3e..1710504 100644
--- a/static/style.css
+++ b/static/style.css
@@ -3478,6 +3478,38 @@ body.bg-pattern-sparkles {
.continue-btn:hover {
opacity:0.8;
}
+
+ /* Round-cap "Continue" affordance — a cohesive centered pill at the chat
+ bottom (not the bare red in-message stopped style). */
+ .rounds-exhausted {
+ justify-content:center;
+ gap:12px;
+ width:fit-content;
+ max-width:90%;
+ margin:14px auto 4px;
+ padding:7px 8px 7px 16px;
+ border:1px solid var(--border);
+ border-radius:999px;
+ background:color-mix(in srgb, var(--fg) 4%, transparent);
+ opacity:1;
+ }
+ .rounds-exhausted .rounds-exhausted-label {
+ color:color-mix(in srgb, var(--fg) 60%, transparent);
+ font-size:0.95em;
+ }
+ .rounds-exhausted .continue-btn {
+ font-size:0.9em;
+ font-weight:600;
+ opacity:1;
+ color:var(--bg);
+ background:var(--accent, var(--red));
+ border-radius:999px;
+ padding:4px 14px;
+ line-height:1.3;
+ }
+ .rounds-exhausted .continue-btn:hover {
+ opacity:0.88;
+ }
.ctx-indicator {
display:inline-flex; align-items:center; gap:1px;
font-size:0.75rem;
diff --git a/tests/test_agent_rounds_exhausted.py b/tests/test_agent_rounds_exhausted.py
new file mode 100644
index 0000000..178faa8
--- /dev/null
+++ b/tests/test_agent_rounds_exhausted.py
@@ -0,0 +1,70 @@
+"""Regression: stream_agent_loop emits `rounds_exhausted` only when the round
+cap is hit while still working, and NOT on a normal finish.
+
+The decision is a `for/else` in the loop: the `else` runs only if no `break`
+fired (break = done / budget / error). A refactor that adds a stray break or
+return, or moves the done-break, could silently flip this. See PR #1999 / #1997.
+"""
+
+import asyncio
+import json
+
+import src.agent_loop as al
+
+
+def _collect(gen):
+ async def _run():
+ return [c async for c in gen]
+ return asyncio.run(_run())
+
+
+def _types(chunks):
+ out = []
+ for c in chunks:
+ if c.startswith("data: ") and not c.startswith("data: [DONE]"):
+ try:
+ out.append(json.loads(c[6:]))
+ except Exception:
+ pass
+ return out
+
+
+def _patch_common(monkeypatch):
+ # Skip RAG/tool-index, MCP, and settings lookups; keep the real loop body,
+ # _resolve_tool_blocks, and parse_tool_blocks.
+ monkeypatch.setattr(al, "get_setting", lambda key, default=None: default, raising=False)
+ monkeypatch.setattr(al, "get_mcp_manager", lambda: None, raising=False)
+ monkeypatch.setattr(al, "estimate_tokens", lambda *a, **k: 10, raising=False)
+
+ async def _fake_exec(block, *a, **k):
+ return ("bash", {"output": "ok", "exit_code": 0})
+ monkeypatch.setattr(al, "execute_tool_block", _fake_exec, raising=False)
+
+
+def _run_loop(monkeypatch, round_text, max_rounds=2):
+ async def _fake_stream(_candidates, messages, **kwargs):
+ yield f'data: {json.dumps({"delta": round_text})}\n\n'
+ yield "data: [DONE]\n\n"
+ monkeypatch.setattr(al, "stream_llm_with_fallback", _fake_stream, raising=False)
+
+ gen = al.stream_agent_loop(
+ "http://x/v1", "m",
+ [{"role": "user", "content": "do a long multi-step task"}],
+ max_rounds=max_rounds,
+ relevant_tools={"bash"},
+ )
+ return _types(_collect(gen))
+
+
+def test_emits_rounds_exhausted_when_cap_hit_mid_task(monkeypatch):
+ _patch_common(monkeypatch)
+ # Every round returns a tool block -> never "done" -> loop exhausts the cap.
+ events = _run_loop(monkeypatch, "```bash\necho hi\n```", max_rounds=2)
+ assert any(e.get("type") == "rounds_exhausted" for e in events), events
+
+
+def test_no_rounds_exhausted_on_normal_finish(monkeypatch):
+ _patch_common(monkeypatch)
+ # A plain answer (no tool block) -> done-break on round 1 -> no event.
+ events = _run_loop(monkeypatch, "All done, here is your answer.", max_rounds=2)
+ assert not any(e.get("type") == "rounds_exhausted" for e in events), events