feat: round-limit handling — Continue affordance at the cap + configurable cap (#1999)

* feat: round-limit handling — Continue affordance at the cap + configurable cap When the agent loop runs out of rounds (per-message step cap, default 20) while still actively using tools, it stopped silently mid-task. Now: 1. The loop emits a `rounds_exhausted` SSE event at the cap, and the UI shows a "Continue" pill at the bottom of the chat that resumes the task from where it left off. Repeated cap-hits each get a fresh Continue (multiple continues in a row). 2. The cap is configurable in Settings → Agent ("Max steps per message"), validated on the client, at the save endpoint, and at the read site. - src/agent_loop.py: track `_exhausted_rounds` (set only when a full tool-executing round completes on the last allowed round — i.e. the agent wanted to keep going); emit `{"type":"rounds_exhausted","rounds":N}` (logged). - routes/chat_routes.py: read `agent_max_rounds` (clamped 1..200), pass as `max_rounds`; forward the new event through the SSE relay. - routes/auth_routes.py: validate numeric settings on save (int + clamp; agent_max_rounds 1..200, agent_max_tool_calls 0..1000; 400 on non-int). - src/settings.py: default `agent_max_rounds = 20`. - static/: Settings input + client-side clamp; the Continue pill (reuses the existing .stopped-indicator / .continue-btn classes and theme vars --border/--fg/--bg/--accent); appended to the chat container so it survives the message re-render at stream finalize. chat.js cache version bumped. * test: cover rounds_exhausted emission (cap-hit vs normal finish) Drives the real stream_agent_loop with mocked LLM stream / tool exec / settings: a tool block every round exhausts the cap and must emit rounds_exhausted; a plain answer hits the done-break and must not. Guards the for/else logic.
2026-06-04 22:36:05 +02:00
parent a54f41037d
commit 64d65b73c1
9 changed files with 215 additions and 14 deletions
--- a/routes/auth_routes.py
+++ b/routes/auth_routes.py
@@ -438,9 +438,24 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
            raise HTTPException(403, "Admin only")
        body = await request.json()
        current = _load_settings()
        # Per-key validation for numeric settings: coerce to int and clamp to a
        # sane range so a bad value can't disable the agent or let it run away.
        _INT_RANGES = {
            "agent_max_rounds": (1, 200),
            "agent_max_tool_calls": (0, 1000),  # 0 = unlimited
        }
        for key in DEFAULT_SETTINGS:
-            if key in body:
+            if key not in body:
-                current[key] = body[key]
+                continue
            val = body[key]
            if key in _INT_RANGES:
                lo, hi = _INT_RANGES[key]
                try:
                    val = int(val)
                except (TypeError, ValueError):
                    raise HTTPException(400, f"{key} must be an integer")
                val = max(lo, min(val, hi))
            current[key] = val
        _save_settings(current)
        return current
--- a/routes/chat_routes.py
+++ b/routes/chat_routes.py
@@ -981,7 +981,15 @@ def setup_chat_routes(
                _answered_by = None  # set if the selected model failed and a fallback answered
                try:
                    from src.settings import get_setting
                    from src.agent_tools import MAX_AGENT_ROUNDS as _DEFAULT_ROUNDS
                    _tool_budget = int(get_setting("agent_max_tool_calls", 0))
                    # Per-message round cap from settings; clamp defensively in
                    # case settings.json was hand-edited to a bad value.
                    try:
                        _max_rounds = int(get_setting("agent_max_rounds", _DEFAULT_ROUNDS) or _DEFAULT_ROUNDS)
                    except (TypeError, ValueError):
                        _max_rounds = _DEFAULT_ROUNDS
                    _max_rounds = max(1, min(_max_rounds, 200))
                    async for chunk in stream_agent_loop(
                        sess.endpoint_url,
@@ -992,6 +1000,7 @@ def setup_chat_routes(
                        max_tokens=ctx.preset.max_tokens,
                        prompt_type=preset_id,
                        max_tool_calls=_tool_budget,
                        max_rounds=_max_rounds,
                        context_length=ctx.context_length,
                        active_document=active_doc,
                        session_id=session,
@@ -1017,6 +1026,7 @@ def setup_chat_routes(
                                    "tool_start", "tool_output", "agent_step",
                                    "doc_stream_open", "doc_stream_delta",
                                    "doc_update", "doc_suggestions", "ui_control",
                                    "rounds_exhausted",
                                ):
                                    if data.get("type") == "agent_step":
                                        _agent_rounds = max(_agent_rounds, data.get("round", 1))
--- a/src/agent_loop.py
+++ b/src/agent_loop.py
@@ -1643,6 +1643,11 @@ async def stream_agent_loop(
    _doc_opened = False    # whether doc_stream_open was sent
    _doc_last_len = 0      # last content length sent
    # Set when the loop runs out of rounds while the agent was still actively
    # using tools — i.e. it was cut off, not finished. Drives a "Continue" event
    # so the user can resume instead of the turn silently stalling.
    _exhausted_rounds = False
    for round_num in range(1, max_rounds + 1):
        round_response = ""
        round_reasoning = ""  # reasoning_content deltas (DeepSeek-thinking, vLLM --reasoning-parser)
@@ -2300,6 +2305,20 @@ async def stream_agent_loop(
        # Separator in accumulated response
        full_response += "\n\n"
    else:
        # The for-loop completed every allowed round WITHOUT an early `break`
        # (a `break` fires on "done", budget, or error). Reaching this `else`
        # means the agent kept working until it ran out of rounds — so offer
        # Continue instead of stopping silently. This catches ALL exhaustion
        # paths, including a verifier `continue` on the final round (the old
        # bottom-of-loop flag missed those).
        _exhausted_rounds = True
    # If the loop hit the round cap while still working, tell the client so it
    # can show a "Continue" affordance instead of the turn just stopping.
    if _exhausted_rounds:
        logger.info("[agent] round cap (%d) reached mid-task — emitting rounds_exhausted", max_rounds)
        yield f'data: {json.dumps({"type": "rounds_exhausted", "rounds": max_rounds})}\n\n'
    # If the response is completely empty and no tools were executed,
    # yield a fallback message so the user is not left hanging.
--- a/src/settings.py
+++ b/src/settings.py
@@ -100,6 +100,7 @@ DEFAULT_SETTINGS = {
    # Tune via Settings or by editing data/settings.json.
    "research_run_timeout_seconds": 1800,
    "agent_max_tool_calls": 0,
    "agent_max_rounds": 20,  # per-message agent step cap (clamped 1..200)
    "agent_input_token_budget": 6000,
    # Ceiling on the *auto-derived* input budget that #1230 introduced. Has
    # no effect when `agent_input_token_budget` is explicitly set (the user's
--- a/static/index.html
+++ b/static/index.html
@@ -1478,6 +1478,10 @@
                <label class="settings-label">Tool call limit</label>
                <input id="set-agentMaxTools" type="text" inputmode="numeric" placeholder="0 = unlimited" class="settings-select" style="width:120px;">
              </div>
              <div class="settings-row">
                <label class="settings-label">Max steps per message</label>
                <input id="set-agentMaxRounds" type="text" inputmode="numeric" placeholder="20" class="settings-select" style="width:120px;">
              </div>
              <div id="set-agentMsg" style="font-size:11px;color:color-mix(in srgb, var(--fg) 45%, transparent);"></div>
            </div>
          </div>
@@ -2092,13 +2096,6 @@
                  <button class="admin-btn-add" id="adm-epAddBtn" style="width:55px;text-align:center;">Add</button>
                </div>
                <div id="adm-epApiMsg" class="adm-ep-inline-msg"></div>
                <div class="adm-copilot-connect">
                  <button class="admin-btn-sm" id="adm-copilotConnectBtn" type="button" title="Sign in to GitHub Copilot via device flow">
                    <svg width="13" height="13" viewBox="0 0 24 24" fill="currentColor" style="vertical-align:-2px;margin-right:5px;opacity:0.8"><path d="M12 .5C5.7.5.5 5.7.5 12c0 5.1 3.3 9.4 7.9 10.9.6.1.8-.2.8-.5v-1.7c-3.2.7-3.9-1.5-3.9-1.5-.5-1.3-1.3-1.7-1.3-1.7-1.1-.7.1-.7.1-.7 1.2.1 1.8 1.2 1.8 1.2 1 1.8 2.7 1.3 3.4 1 .1-.8.4-1.3.7-1.6-2.6-.3-5.3-1.3-5.3-5.7 0-1.3.4-2.3 1.2-3.1-.1-.3-.5-1.5.1-3.1 0 0 1-.3 3.3 1.2a11.4 11.4 0 0 1 6 0C17.3 4.7 18.3 5 18.3 5c.6 1.6.2 2.8.1 3.1.8.8 1.2 1.8 1.2 3.1 0 4.4-2.7 5.4-5.3 5.7.4.4.8 1.1.8 2.2v3.3c0 .3.2.6.8.5 4.6-1.5 7.9-5.8 7.9-10.9C23.5 5.7 18.3.5 12 .5z"/></svg>
                    Connect GitHub Copilot
                  </button>
                  <div id="adm-copilotStatus" class="adm-ep-inline-msg"></div>
                </div>
              </div>
            </div>
          </div>
@@ -2271,7 +2268,7 @@
 <script type="module" src="/static/js/chatRenderer.js"></script>
 <script type="module" src="/static/js/codeRunner.js"></script>
 <script type="module" src="/static/js/chatStream.js"></script>
-<script type="module" src="/static/js/chat.js?v=20260603n"></script>
+<script type="module" src="/static/js/chat.js?v=20260604q"></script>
 <script type="module" src="/static/js/cookbook.js"></script>
 <script type="module" src="/static/js/search-chat.js"></script>
 <script type="module" src="/static/js/compare/index.js"></script>
--- a/static/js/chat.js
+++ b/static/js/chat.js
@@ -1836,6 +1836,44 @@ import createResearchSynapse from './researchSynapse.js';
                    }
                  }
                }
              } else if (json.type === 'rounds_exhausted') {
                // The agent hit the per-turn step limit while still working.
                // Offer a Continue button instead of stalling silently.
                // NOTE: append to the chat-history container (bottom), NOT the
                // message body — the body innerHTML is re-rendered at stream
                // finalize, which would wipe a note placed inside it.
                const _chatBox = document.getElementById('chat-history');
                if (!_isBg && _chatBox) {
                  // Drop any prior box so repeated cap-hits each get a fresh
                  // Continue at the bottom (multiple continues in a row).
                  const _old = _chatBox.querySelector('.rounds-exhausted');
                  if (_old) _old.remove();
                  const note = document.createElement('div');
                  note.className = 'stopped-indicator rounds-exhausted';
                  const label = document.createElement('span');
                  label.className = 'rounds-exhausted-label';
                  label.textContent = `Reached the ${json.rounds || ''}-step limit — not finished.`;
                  note.appendChild(label);
                  const contBtn = document.createElement('button');
                  contBtn.className = 'continue-btn';
                  contBtn.title = 'Continue the task';
                  contBtn.textContent = 'Continue ▸';
                  const _holder = currentHolder;
                  contBtn.addEventListener('click', () => {
                    note.remove();
                    _hideUserBubble = true;
                    _pendingContinue = _holder;
                    const msgInput = uiModule.el('message');
                    if (msgInput) {
                      msgInput.value = 'You hit the step limit before finishing — the task is not complete. Continue from exactly where you left off and keep going until it is done. Do NOT repeat work already done.';
                      const sb = document.querySelector('.send-btn');
                      if (sb) sb.click();
                    }
                  });
                  note.appendChild(contBtn);
                  _chatBox.appendChild(note);
                  try { note.scrollIntoView({ block: 'end', behavior: 'smooth' }); } catch (_) { uiModule.scrollHistory && uiModule.scrollHistory(); }
                }
              } else if (json.type === 'attachments') {
                if (_isBg) continue;
                // Update user bubble — replace file chips with image previews
--- a/static/js/settings.js
+++ b/static/js/settings.js
@@ -1558,6 +1558,7 @@ async function initResearchSearchSettings() {
 /* ── Agent Settings (AI tab) ── */
 async function initAgentSettings() {
  var toolsInput = el('set-agentMaxTools');
  var roundsInput = el('set-agentMaxRounds');
  var msg = el('set-agentMsg');
  if (!toolsInput) return;
@@ -1565,23 +1566,41 @@ async function initAgentSettings() {
    var res = await fetch('/api/auth/settings', { credentials: 'same-origin' });
    var settings = await res.json();
    if (settings.agent_max_tool_calls) toolsInput.value = settings.agent_max_tool_calls;
    if (roundsInput && settings.agent_max_rounds) roundsInput.value = settings.agent_max_rounds;
  } catch (e) {}
  // Clamp + coerce a raw input to an int in [lo, hi]; falls back to `dflt`
  // when blank/non-numeric. Mirrors the server-side validation.
  function clampInt(raw, lo, hi, dflt) {
    var n = parseInt(raw, 10);
    if (isNaN(n)) return dflt;
    return Math.max(lo, Math.min(n, hi));
  }
  async function save() {
-    var val = parseInt(toolsInput.value, 10) || 0;
+    var tools = clampInt(toolsInput.value, 0, 1000, 0);
    var rounds = roundsInput ? clampInt(roundsInput.value, 1, 200, 20) : null;
    toolsInput.value = tools;                       // reflect the clamped value
    if (roundsInput) roundsInput.value = rounds;
    var payload = { agent_max_tool_calls: tools };
    if (rounds != null) payload.agent_max_rounds = rounds;
    try {
      await fetch('/api/auth/settings', { method: 'POST', credentials: 'same-origin',
        headers: { 'Content-Type': 'application/json' },
-        body: JSON.stringify({ agent_max_tool_calls: val })
+        body: JSON.stringify(payload)
      });
-      msg.textContent = val > 0 ? 'Limit: ' + val + ' tool calls per message' : 'Unlimited';
+      msg.textContent = (tools > 0 ? 'Limit: ' + tools + ' tool calls' : 'Unlimited tool calls') +
        (rounds != null ? ' · ' + rounds + ' steps/message' : '');
      msg.style.color = 'var(--fg)';
    } catch (e) { msg.textContent = 'Failed to save'; msg.style.color = 'var(--red)'; }
  }
  toolsInput.addEventListener('change', save);
  if (roundsInput) roundsInput.addEventListener('change', save);
  var cur = parseInt(toolsInput.value, 10) || 0;
-  msg.textContent = cur > 0 ? 'Limit: ' + cur + ' tool calls per message' : 'Unlimited';
+  var curR = roundsInput ? (parseInt(roundsInput.value, 10) || 20) : null;
  msg.textContent = (cur > 0 ? 'Limit: ' + cur + ' tool calls' : 'Unlimited tool calls') +
    (curR != null ? ' · ' + curR + ' steps/message' : '');
 }
 /* ═══════════════════════════════════════════
--- a/static/style.css
+++ b/static/style.css
@@ -3478,6 +3478,38 @@ body.bg-pattern-sparkles {
    .continue-btn:hover {
      opacity:0.8;
    }
    /* Round-cap "Continue" affordance — a cohesive centered pill at the chat
       bottom (not the bare red in-message stopped style). */
    .rounds-exhausted {
      justify-content:center;
      gap:12px;
      width:fit-content;
      max-width:90%;
      margin:14px auto 4px;
      padding:7px 8px 7px 16px;
      border:1px solid var(--border);
      border-radius:999px;
      background:color-mix(in srgb, var(--fg) 4%, transparent);
      opacity:1;
    }
    .rounds-exhausted .rounds-exhausted-label {
      color:color-mix(in srgb, var(--fg) 60%, transparent);
      font-size:0.95em;
    }
    .rounds-exhausted .continue-btn {
      font-size:0.9em;
      font-weight:600;
      opacity:1;
      color:var(--bg);
      background:var(--accent, var(--red));
      border-radius:999px;
      padding:4px 14px;
      line-height:1.3;
    }
    .rounds-exhausted .continue-btn:hover {
      opacity:0.88;
    }
    .ctx-indicator {
      display:inline-flex; align-items:center; gap:1px;
      font-size:0.75rem;
--- a/tests/test_agent_rounds_exhausted.py
+++ b/tests/test_agent_rounds_exhausted.py
@@ -0,0 +1,70 @@
 """Regression: stream_agent_loop emits `rounds_exhausted` only when the round
 cap is hit while still working, and NOT on a normal finish.
 The decision is a `for/else` in the loop: the `else` runs only if no `break`
 fired (break = done / budget / error). A refactor that adds a stray break or
 return, or moves the done-break, could silently flip this. See PR #1999 / #1997.
 """
 import asyncio
 import json
 import src.agent_loop as al
 def _collect(gen):
    async def _run():
        return [c async for c in gen]
    return asyncio.run(_run())
 def _types(chunks):
    out = []
    for c in chunks:
        if c.startswith("data: ") and not c.startswith("data: [DONE]"):
            try:
                out.append(json.loads(c[6:]))
            except Exception:
                pass
    return out
 def _patch_common(monkeypatch):
    # Skip RAG/tool-index, MCP, and settings lookups; keep the real loop body,
    # _resolve_tool_blocks, and parse_tool_blocks.
    monkeypatch.setattr(al, "get_setting", lambda key, default=None: default, raising=False)
    monkeypatch.setattr(al, "get_mcp_manager", lambda: None, raising=False)
    monkeypatch.setattr(al, "estimate_tokens", lambda *a, **k: 10, raising=False)
    async def _fake_exec(block, *a, **k):
        return ("bash", {"output": "ok", "exit_code": 0})
    monkeypatch.setattr(al, "execute_tool_block", _fake_exec, raising=False)
 def _run_loop(monkeypatch, round_text, max_rounds=2):
    async def _fake_stream(_candidates, messages, **kwargs):
        yield f'data: {json.dumps({"delta": round_text})}\n\n'
        yield "data: [DONE]\n\n"
    monkeypatch.setattr(al, "stream_llm_with_fallback", _fake_stream, raising=False)
    gen = al.stream_agent_loop(
        "http://x/v1", "m",
        [{"role": "user", "content": "do a long multi-step task"}],
        max_rounds=max_rounds,
        relevant_tools={"bash"},
    )
    return _types(_collect(gen))
 def test_emits_rounds_exhausted_when_cap_hit_mid_task(monkeypatch):
    _patch_common(monkeypatch)
    # Every round returns a tool block -> never "done" -> loop exhausts the cap.
    events = _run_loop(monkeypatch, "```bash\necho hi\n```", max_rounds=2)
    assert any(e.get("type") == "rounds_exhausted" for e in events), events
 def test_no_rounds_exhausted_on_normal_finish(monkeypatch):
    _patch_common(monkeypatch)
    # A plain answer (no tool block) -> done-break on round 1 -> no event.
    events = _run_loop(monkeypatch, "All done, here is your answer.", max_rounds=2)
    assert not any(e.get("type") == "rounds_exhausted" for e in events), events