From 64d65b73c1868226b5d8be0b1a0db6bc1f515d07 Mon Sep 17 00:00:00 2001
From: Kenny Van de Maele <kenny@kvandemaele.be>
Date: Thu, 4 Jun 2026 22:36:05 +0200
Subject: [PATCH] =?UTF-8?q?feat:=20round-limit=20handling=20=E2=80=94=20Co?=
 =?UTF-8?q?ntinue=20affordance=20at=20the=20cap=20+=20configurable=20cap?=
 =?UTF-8?q?=20(#1999)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat: round-limit handling — Continue affordance at the cap + configurable cap

When the agent loop runs out of rounds (per-message step cap, default 20)
while still actively using tools, it stopped silently mid-task. Now:

1. The loop emits a `rounds_exhausted` SSE event at the cap, and the UI shows
   a "Continue" pill at the bottom of the chat that resumes the task from where
   it left off. Repeated cap-hits each get a fresh Continue (multiple continues
   in a row).
2. The cap is configurable in Settings → Agent ("Max steps per message"),
   validated on the client, at the save endpoint, and at the read site.

- src/agent_loop.py: track `_exhausted_rounds` (set only when a full
  tool-executing round completes on the last allowed round — i.e. the agent
  wanted to keep going); emit `{"type":"rounds_exhausted","rounds":N}` (logged).
- routes/chat_routes.py: read `agent_max_rounds` (clamped 1..200), pass as
  `max_rounds`; forward the new event through the SSE relay.
- routes/auth_routes.py: validate numeric settings on save (int + clamp;
  agent_max_rounds 1..200, agent_max_tool_calls 0..1000; 400 on non-int).
- src/settings.py: default `agent_max_rounds = 20`.
- static/: Settings input + client-side clamp; the Continue pill (reuses the
  existing .stopped-indicator / .continue-btn classes and theme vars
  --border/--fg/--bg/--accent); appended to the chat container so it survives
  the message re-render at stream finalize. chat.js cache version bumped.

* test: cover rounds_exhausted emission (cap-hit vs normal finish)

Drives the real stream_agent_loop with mocked LLM stream / tool exec / settings:
a tool block every round exhausts the cap and must emit rounds_exhausted; a
plain answer hits the done-break and must not. Guards the for/else logic.
---
 routes/auth_routes.py                | 19 +++++++-
 routes/chat_routes.py                | 10 ++++
 src/agent_loop.py                    | 19 ++++++++
 src/settings.py                      |  1 +
 static/index.html                    | 13 ++----
 static/js/chat.js                    | 38 +++++++++++++++
 static/js/settings.js                | 27 +++++++++--
 static/style.css                     | 32 +++++++++++++
 tests/test_agent_rounds_exhausted.py | 70 ++++++++++++++++++++++++++++
 9 files changed, 215 insertions(+), 14 deletions(-)
 create mode 100644 tests/test_agent_rounds_exhausted.py

diff --git a/routes/auth_routes.py b/routes/auth_routes.py
index 60021e1..644b12d 100644
--- a/routes/auth_routes.py
+++ b/routes/auth_routes.py
@@ -438,9 +438,24 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
             raise HTTPException(403, "Admin only")
         body = await request.json()
         current = _load_settings()
+        # Per-key validation for numeric settings: coerce to int and clamp to a
+        # sane range so a bad value can't disable the agent or let it run away.
+        _INT_RANGES = {
+            "agent_max_rounds": (1, 200),
+            "agent_max_tool_calls": (0, 1000),  # 0 = unlimited
+        }
         for key in DEFAULT_SETTINGS:
-            if key in body:
-                current[key] = body[key]
+            if key not in body:
+                continue
+            val = body[key]
+            if key in _INT_RANGES:
+                lo, hi = _INT_RANGES[key]
+                try:
+                    val = int(val)
+                except (TypeError, ValueError):
+                    raise HTTPException(400, f"{key} must be an integer")
+                val = max(lo, min(val, hi))
+            current[key] = val
         _save_settings(current)
         return current
 
diff --git a/routes/chat_routes.py b/routes/chat_routes.py
index a3c6c16..836e9da 100644
--- a/routes/chat_routes.py
+++ b/routes/chat_routes.py
@@ -981,7 +981,15 @@ def setup_chat_routes(
                 _answered_by = None  # set if the selected model failed and a fallback answered
                 try:
                     from src.settings import get_setting
+                    from src.agent_tools import MAX_AGENT_ROUNDS as _DEFAULT_ROUNDS
                     _tool_budget = int(get_setting("agent_max_tool_calls", 0))
+                    # Per-message round cap from settings; clamp defensively in
+                    # case settings.json was hand-edited to a bad value.
+                    try:
+                        _max_rounds = int(get_setting("agent_max_rounds", _DEFAULT_ROUNDS) or _DEFAULT_ROUNDS)
+                    except (TypeError, ValueError):
+                        _max_rounds = _DEFAULT_ROUNDS
+                    _max_rounds = max(1, min(_max_rounds, 200))
 
                     async for chunk in stream_agent_loop(
                         sess.endpoint_url,
@@ -992,6 +1000,7 @@ def setup_chat_routes(
                         max_tokens=ctx.preset.max_tokens,
                         prompt_type=preset_id,
                         max_tool_calls=_tool_budget,
+                        max_rounds=_max_rounds,
                         context_length=ctx.context_length,
                         active_document=active_doc,
                         session_id=session,
@@ -1017,6 +1026,7 @@ def setup_chat_routes(
                                     "tool_start", "tool_output", "agent_step",
                                     "doc_stream_open", "doc_stream_delta",
                                     "doc_update", "doc_suggestions", "ui_control",
+                                    "rounds_exhausted",
                                 ):
                                     if data.get("type") == "agent_step":
                                         _agent_rounds = max(_agent_rounds, data.get("round", 1))
diff --git a/src/agent_loop.py b/src/agent_loop.py
index 7aa7e19..e0b6248 100644
--- a/src/agent_loop.py
+++ b/src/agent_loop.py
@@ -1643,6 +1643,11 @@ async def stream_agent_loop(
     _doc_opened = False    # whether doc_stream_open was sent
     _doc_last_len = 0      # last content length sent
 
+    # Set when the loop runs out of rounds while the agent was still actively
+    # using tools — i.e. it was cut off, not finished. Drives a "Continue" event
+    # so the user can resume instead of the turn silently stalling.
+    _exhausted_rounds = False
+
     for round_num in range(1, max_rounds + 1):
         round_response = ""
         round_reasoning = ""  # reasoning_content deltas (DeepSeek-thinking, vLLM --reasoning-parser)
@@ -2300,6 +2305,20 @@ async def stream_agent_loop(
 
         # Separator in accumulated response
         full_response += "\n\n"
+    else:
+        # The for-loop completed every allowed round WITHOUT an early `break`
+        # (a `break` fires on "done", budget, or error). Reaching this `else`
+        # means the agent kept working until it ran out of rounds — so offer
+        # Continue instead of stopping silently. This catches ALL exhaustion
+        # paths, including a verifier `continue` on the final round (the old
+        # bottom-of-loop flag missed those).
+        _exhausted_rounds = True
+
+    # If the loop hit the round cap while still working, tell the client so it
+    # can show a "Continue" affordance instead of the turn just stopping.
+    if _exhausted_rounds:
+        logger.info("[agent] round cap (%d) reached mid-task — emitting rounds_exhausted", max_rounds)
+        yield f'data: {json.dumps({"type": "rounds_exhausted", "rounds": max_rounds})}\n\n'
 
     # If the response is completely empty and no tools were executed,
     # yield a fallback message so the user is not left hanging.
diff --git a/src/settings.py b/src/settings.py
index 8f810a6..5bce0fc 100644
--- a/src/settings.py
+++ b/src/settings.py
@@ -100,6 +100,7 @@ DEFAULT_SETTINGS = {
     # Tune via Settings or by editing data/settings.json.
     "research_run_timeout_seconds": 1800,
     "agent_max_tool_calls": 0,
+    "agent_max_rounds": 20,  # per-message agent step cap (clamped 1..200)
     "agent_input_token_budget": 6000,
     # Ceiling on the *auto-derived* input budget that #1230 introduced. Has
     # no effect when `agent_input_token_budget` is explicitly set (the user's
diff --git a/static/index.html b/static/index.html
index cade5cf..03edfa9 100644
--- a/static/index.html
+++ b/static/index.html
@@ -1478,6 +1478,10 @@
                 <label class="settings-label">Tool call limit</label>
                 <input id="set-agentMaxTools" type="text" inputmode="numeric" placeholder="0 = unlimited" class="settings-select" style="width:120px;">
               </div>
+              <div class="settings-row">
+                <label class="settings-label">Max steps per message</label>
+                <input id="set-agentMaxRounds" type="text" inputmode="numeric" placeholder="20" class="settings-select" style="width:120px;">
+              </div>
               <div id="set-agentMsg" style="font-size:11px;color:color-mix(in srgb, var(--fg) 45%, transparent);"></div>
             </div>
           </div>
@@ -2092,13 +2096,6 @@
                   <button class="admin-btn-add" id="adm-epAddBtn" style="width:55px;text-align:center;">Add</button>
                 </div>
                 <div id="adm-epApiMsg" class="adm-ep-inline-msg"></div>
-                <div class="adm-copilot-connect">
-                  <button class="admin-btn-sm" id="adm-copilotConnectBtn" type="button" title="Sign in to GitHub Copilot via device flow">
-                    <svg width="13" height="13" viewBox="0 0 24 24" fill="currentColor" style="vertical-align:-2px;margin-right:5px;opacity:0.8"><path d="M12 .5C5.7.5.5 5.7.5 12c0 5.1 3.3 9.4 7.9 10.9.6.1.8-.2.8-.5v-1.7c-3.2.7-3.9-1.5-3.9-1.5-.5-1.3-1.3-1.7-1.3-1.7-1.1-.7.1-.7.1-.7 1.2.1 1.8 1.2 1.8 1.2 1 1.8 2.7 1.3 3.4 1 .1-.8.4-1.3.7-1.6-2.6-.3-5.3-1.3-5.3-5.7 0-1.3.4-2.3 1.2-3.1-.1-.3-.5-1.5.1-3.1 0 0 1-.3 3.3 1.2a11.4 11.4 0 0 1 6 0C17.3 4.7 18.3 5 18.3 5c.6 1.6.2 2.8.1 3.1.8.8 1.2 1.8 1.2 3.1 0 4.4-2.7 5.4-5.3 5.7.4.4.8 1.1.8 2.2v3.3c0 .3.2.6.8.5 4.6-1.5 7.9-5.8 7.9-10.9C23.5 5.7 18.3.5 12 .5z"/></svg>
-                    Connect GitHub Copilot
-                  </button>
-                  <div id="adm-copilotStatus" class="adm-ep-inline-msg"></div>
-                </div>
               </div>
             </div>
           </div>
@@ -2271,7 +2268,7 @@
 <script type="module" src="/static/js/chatRenderer.js"></script>
 <script type="module" src="/static/js/codeRunner.js"></script>
 <script type="module" src="/static/js/chatStream.js"></script>
-<script type="module" src="/static/js/chat.js?v=20260603n"></script>
+<script type="module" src="/static/js/chat.js?v=20260604q"></script>
 <script type="module" src="/static/js/cookbook.js"></script>
 <script type="module" src="/static/js/search-chat.js"></script>
 <script type="module" src="/static/js/compare/index.js"></script>
diff --git a/static/js/chat.js b/static/js/chat.js
index c34d6a0..e064b5c 100644
--- a/static/js/chat.js
+++ b/static/js/chat.js
@@ -1836,6 +1836,44 @@ import createResearchSynapse from './researchSynapse.js';
                     }
                   }
                 }
+              } else if (json.type === 'rounds_exhausted') {
+                // The agent hit the per-turn step limit while still working.
+                // Offer a Continue button instead of stalling silently.
+                // NOTE: append to the chat-history container (bottom), NOT the
+                // message body — the body innerHTML is re-rendered at stream
+                // finalize, which would wipe a note placed inside it.
+                const _chatBox = document.getElementById('chat-history');
+                if (!_isBg && _chatBox) {
+                  // Drop any prior box so repeated cap-hits each get a fresh
+                  // Continue at the bottom (multiple continues in a row).
+                  const _old = _chatBox.querySelector('.rounds-exhausted');
+                  if (_old) _old.remove();
+                  const note = document.createElement('div');
+                  note.className = 'stopped-indicator rounds-exhausted';
+                  const label = document.createElement('span');
+                  label.className = 'rounds-exhausted-label';
+                  label.textContent = `Reached the ${json.rounds || ''}-step limit — not finished.`;
+                  note.appendChild(label);
+                  const contBtn = document.createElement('button');
+                  contBtn.className = 'continue-btn';
+                  contBtn.title = 'Continue the task';
+                  contBtn.textContent = 'Continue ▸';
+                  const _holder = currentHolder;
+                  contBtn.addEventListener('click', () => {
+                    note.remove();
+                    _hideUserBubble = true;
+                    _pendingContinue = _holder;
+                    const msgInput = uiModule.el('message');
+                    if (msgInput) {
+                      msgInput.value = 'You hit the step limit before finishing — the task is not complete. Continue from exactly where you left off and keep going until it is done. Do NOT repeat work already done.';
+                      const sb = document.querySelector('.send-btn');
+                      if (sb) sb.click();
+                    }
+                  });
+                  note.appendChild(contBtn);
+                  _chatBox.appendChild(note);
+                  try { note.scrollIntoView({ block: 'end', behavior: 'smooth' }); } catch (_) { uiModule.scrollHistory && uiModule.scrollHistory(); }
+                }
               } else if (json.type === 'attachments') {
                 if (_isBg) continue;
                 // Update user bubble — replace file chips with image previews
diff --git a/static/js/settings.js b/static/js/settings.js
index 161f722..8a53606 100644
--- a/static/js/settings.js
+++ b/static/js/settings.js
@@ -1558,6 +1558,7 @@ async function initResearchSearchSettings() {
 /* ── Agent Settings (AI tab) ── */
 async function initAgentSettings() {
   var toolsInput = el('set-agentMaxTools');
+  var roundsInput = el('set-agentMaxRounds');
   var msg = el('set-agentMsg');
   if (!toolsInput) return;
 
@@ -1565,23 +1566,41 @@ async function initAgentSettings() {
     var res = await fetch('/api/auth/settings', { credentials: 'same-origin' });
     var settings = await res.json();
     if (settings.agent_max_tool_calls) toolsInput.value = settings.agent_max_tool_calls;
+    if (roundsInput && settings.agent_max_rounds) roundsInput.value = settings.agent_max_rounds;
   } catch (e) {}
 
+  // Clamp + coerce a raw input to an int in [lo, hi]; falls back to `dflt`
+  // when blank/non-numeric. Mirrors the server-side validation.
+  function clampInt(raw, lo, hi, dflt) {
+    var n = parseInt(raw, 10);
+    if (isNaN(n)) return dflt;
+    return Math.max(lo, Math.min(n, hi));
+  }
+
   async function save() {
-    var val = parseInt(toolsInput.value, 10) || 0;
+    var tools = clampInt(toolsInput.value, 0, 1000, 0);
+    var rounds = roundsInput ? clampInt(roundsInput.value, 1, 200, 20) : null;
+    toolsInput.value = tools;                       // reflect the clamped value
+    if (roundsInput) roundsInput.value = rounds;
+    var payload = { agent_max_tool_calls: tools };
+    if (rounds != null) payload.agent_max_rounds = rounds;
     try {
       await fetch('/api/auth/settings', { method: 'POST', credentials: 'same-origin',
         headers: { 'Content-Type': 'application/json' },
-        body: JSON.stringify({ agent_max_tool_calls: val })
+        body: JSON.stringify(payload)
       });
-      msg.textContent = val > 0 ? 'Limit: ' + val + ' tool calls per message' : 'Unlimited';
+      msg.textContent = (tools > 0 ? 'Limit: ' + tools + ' tool calls' : 'Unlimited tool calls') +
+        (rounds != null ? ' · ' + rounds + ' steps/message' : '');
       msg.style.color = 'var(--fg)';
     } catch (e) { msg.textContent = 'Failed to save'; msg.style.color = 'var(--red)'; }
   }
 
   toolsInput.addEventListener('change', save);
+  if (roundsInput) roundsInput.addEventListener('change', save);
   var cur = parseInt(toolsInput.value, 10) || 0;
-  msg.textContent = cur > 0 ? 'Limit: ' + cur + ' tool calls per message' : 'Unlimited';
+  var curR = roundsInput ? (parseInt(roundsInput.value, 10) || 20) : null;
+  msg.textContent = (cur > 0 ? 'Limit: ' + cur + ' tool calls' : 'Unlimited tool calls') +
+    (curR != null ? ' · ' + curR + ' steps/message' : '');
 }
 
 /* ═══════════════════════════════════════════
diff --git a/static/style.css b/static/style.css
index ea99f3e..1710504 100644
--- a/static/style.css
+++ b/static/style.css
@@ -3478,6 +3478,38 @@ body.bg-pattern-sparkles {
     .continue-btn:hover {
       opacity:0.8;
     }
+
+    /* Round-cap "Continue" affordance — a cohesive centered pill at the chat
+       bottom (not the bare red in-message stopped style). */
+    .rounds-exhausted {
+      justify-content:center;
+      gap:12px;
+      width:fit-content;
+      max-width:90%;
+      margin:14px auto 4px;
+      padding:7px 8px 7px 16px;
+      border:1px solid var(--border);
+      border-radius:999px;
+      background:color-mix(in srgb, var(--fg) 4%, transparent);
+      opacity:1;
+    }
+    .rounds-exhausted .rounds-exhausted-label {
+      color:color-mix(in srgb, var(--fg) 60%, transparent);
+      font-size:0.95em;
+    }
+    .rounds-exhausted .continue-btn {
+      font-size:0.9em;
+      font-weight:600;
+      opacity:1;
+      color:var(--bg);
+      background:var(--accent, var(--red));
+      border-radius:999px;
+      padding:4px 14px;
+      line-height:1.3;
+    }
+    .rounds-exhausted .continue-btn:hover {
+      opacity:0.88;
+    }
     .ctx-indicator {
       display:inline-flex; align-items:center; gap:1px;
       font-size:0.75rem;
diff --git a/tests/test_agent_rounds_exhausted.py b/tests/test_agent_rounds_exhausted.py
new file mode 100644
index 0000000..178faa8
--- /dev/null
+++ b/tests/test_agent_rounds_exhausted.py
@@ -0,0 +1,70 @@
+"""Regression: stream_agent_loop emits `rounds_exhausted` only when the round
+cap is hit while still working, and NOT on a normal finish.
+
+The decision is a `for/else` in the loop: the `else` runs only if no `break`
+fired (break = done / budget / error). A refactor that adds a stray break or
+return, or moves the done-break, could silently flip this. See PR #1999 / #1997.
+"""
+
+import asyncio
+import json
+
+import src.agent_loop as al
+
+
+def _collect(gen):
+    async def _run():
+        return [c async for c in gen]
+    return asyncio.run(_run())
+
+
+def _types(chunks):
+    out = []
+    for c in chunks:
+        if c.startswith("data: ") and not c.startswith("data: [DONE]"):
+            try:
+                out.append(json.loads(c[6:]))
+            except Exception:
+                pass
+    return out
+
+
+def _patch_common(monkeypatch):
+    # Skip RAG/tool-index, MCP, and settings lookups; keep the real loop body,
+    # _resolve_tool_blocks, and parse_tool_blocks.
+    monkeypatch.setattr(al, "get_setting", lambda key, default=None: default, raising=False)
+    monkeypatch.setattr(al, "get_mcp_manager", lambda: None, raising=False)
+    monkeypatch.setattr(al, "estimate_tokens", lambda *a, **k: 10, raising=False)
+
+    async def _fake_exec(block, *a, **k):
+        return ("bash", {"output": "ok", "exit_code": 0})
+    monkeypatch.setattr(al, "execute_tool_block", _fake_exec, raising=False)
+
+
+def _run_loop(monkeypatch, round_text, max_rounds=2):
+    async def _fake_stream(_candidates, messages, **kwargs):
+        yield f'data: {json.dumps({"delta": round_text})}\n\n'
+        yield "data: [DONE]\n\n"
+    monkeypatch.setattr(al, "stream_llm_with_fallback", _fake_stream, raising=False)
+
+    gen = al.stream_agent_loop(
+        "http://x/v1", "m",
+        [{"role": "user", "content": "do a long multi-step task"}],
+        max_rounds=max_rounds,
+        relevant_tools={"bash"},
+    )
+    return _types(_collect(gen))
+
+
+def test_emits_rounds_exhausted_when_cap_hit_mid_task(monkeypatch):
+    _patch_common(monkeypatch)
+    # Every round returns a tool block -> never "done" -> loop exhausts the cap.
+    events = _run_loop(monkeypatch, "```bash\necho hi\n```", max_rounds=2)
+    assert any(e.get("type") == "rounds_exhausted" for e in events), events
+
+
+def test_no_rounds_exhausted_on_normal_finish(monkeypatch):
+    _patch_common(monkeypatch)
+    # A plain answer (no tool block) -> done-break on round 1 -> no event.
+    events = _run_loop(monkeypatch, "All done, here is your answer.", max_rounds=2)
+    assert not any(e.get("type") == "rounds_exhausted" for e in events), events