From 28c43121d70779ebe1e77c4f9e5fb9e8e2feef80 Mon Sep 17 00:00:00 2001 From: ghreprimand Date: Thu, 4 Jun 2026 06:53:44 -0500 Subject: [PATCH 01/12] Fix session export 500 on multimodal/None message content (#1984) txt/html/md export joined and string-munged message.content directly, so a multimodal turn (content is a list of blocks) crashed export with a TypeError on join (txt) / AttributeError on .replace (html), and None content (tool-only assistant turns) rendered as the literal 'None'. Add a _content_to_text helper that flattens string/list/None to plain text and apply it at the three export sites. JSON export is unchanged (it serializes structured content correctly). Plain-string content is returned unchanged, so existing exports are identical. Co-authored-by: ghreprimand <203024559+ghreprimand@users.noreply.github.com> --- routes/session_routes.py | 26 ++++++++-- .../test_session_export_nonstring_content.py | 50 +++++++++++++++++++ 2 files changed, 73 insertions(+), 3 deletions(-) create mode 100644 tests/test_session_export_nonstring_content.py diff --git a/routes/session_routes.py b/routes/session_routes.py index 1b38e4b..58cb8ae 100644 --- a/routes/session_routes.py +++ b/routes/session_routes.py @@ -37,6 +37,26 @@ def _public_model(name: str, model: str) -> str: return model +def _content_to_text(content) -> str: + """Flatten a message's content to plain text for text-based exports. + + History entries carry three shapes: a plain string, a multimodal list of + content blocks (vision/image attachments), or None (assistant turns that + persisted only native tool_calls). The txt/html/md exporters join and + string-munge this value, so a list crashed the export (TypeError on join, + AttributeError on .replace) and None rendered as the literal "None". + Coerce to the text blocks, returning "" for anything without text. + """ + if isinstance(content, str): + return content + if isinstance(content, list): + return "\n".join( + b.get("text", "") for b in content + if isinstance(b, dict) and b.get("text") + ) + return "" + + def _verify_session_owner(request: Request, session_id: str, session_manager=None): """Verify the current user owns the session. Raises 404 if not. @@ -708,7 +728,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_ lines = [] for m in session.history: lines.append(f"[{m.role.upper()}]") - lines.append(m.content) + lines.append(_content_to_text(m.content)) lines.append("") out_name = filename or f"conversation_{safe_name}_{timestamp}.txt" return Response( @@ -731,7 +751,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_ ] for m in session.history: cls = "user" if m.role == "user" else "ai" - content = m.content.replace("&", "&").replace("<", "<").replace(">", ">") + content = _content_to_text(m.content).replace("&", "&").replace("<", "<").replace(">", ">") content = content.replace("\n", "
") html_parts.append(f'
{m.role}
{content}
') html_parts.append("") @@ -750,7 +770,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_ markdown_lines.append("\n---\n") for message in session.history: role = message.role.upper() - content = message.content + content = _content_to_text(message.content) markdown_lines.append(f"### {role}") markdown_lines.append(f"{content}\n") markdown_lines.append("---\n") diff --git a/tests/test_session_export_nonstring_content.py b/tests/test_session_export_nonstring_content.py new file mode 100644 index 0000000..07641ed --- /dev/null +++ b/tests/test_session_export_nonstring_content.py @@ -0,0 +1,50 @@ +"""Regression: session export must tolerate non-string message content. + +A message's ``content`` is a plain string for normal turns, but a multimodal +list of content blocks for image/vision turns, and ``None`` for assistant turns +that persisted only native tool_calls. The txt/html/md exporters in +``routes/session_routes.py`` joined and string-munged ``content`` directly, so: + + - txt: ``"\n".join([..., , ...])`` -> TypeError + - html: ``.replace("&", "&")`` -> AttributeError + - md: ``f"{}"`` -> raw Python repr in output + +``_content_to_text`` coerces all three shapes to plain text so export degrades +gracefully instead of returning a 500. +""" +from routes.session_routes import _content_to_text + + +def test_plain_string_passes_through_unchanged(): + assert _content_to_text("hello world") == "hello world" + assert _content_to_text("") == "" + + +def test_multimodal_list_flattens_to_its_text_blocks(): + content = [ + {"type": "text", "text": "describe this"}, + {"type": "image_url", "image_url": {"url": "data:image/png;base64,AAAA"}}, + {"type": "text", "text": "thanks"}, + ] + assert _content_to_text(content) == "describe this\nthanks" + + +def test_none_content_becomes_empty_string(): + # Assistant turns carrying only native tool_calls persist content as None. + assert _content_to_text(None) == "" + + +def test_list_without_text_blocks_is_empty_not_crash(): + assert _content_to_text([{"type": "image_url", "image_url": {"url": "x"}}]) == "" + assert _content_to_text([]) == "" + + +def test_coerced_output_survives_the_export_operations(): + # The exact operations that previously crashed must now succeed. + history = ["plain", [{"type": "text", "text": "img turn"}], None] + texts = [_content_to_text(c) for c in history] + # txt export path + assert "\n".join(texts) == "plain\nimg turn\n" + # html export path + for t in texts: + assert isinstance(t.replace("&", "&"), str) From f876fc770432ef1a699358a37ba4691a7c558b3b Mon Sep 17 00:00:00 2001 From: SHORYA BAJ <110905262+bajshorya@users.noreply.github.com> Date: Thu, 4 Jun 2026 17:25:06 +0530 Subject: [PATCH 02/12] fix(cookbook): don't mark successful dependency installs as crashed (#1315) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pip dependency installs are tracked as download tasks but finish with the runner's "=== Process exited with code 0 ===" sentinel and pip's "Successfully installed" line — never the HuggingFace download markers (DONE / 100% / /snapshots/ / DOWNLOAD_OK) the download heuristics look for. Once the tmux pane is gone, the backend's only completion check is the HF cache lookup, which a pip package (e.g. llama-cpp-python[server], no "/") never matches, so it reports "stopped" — and the frontend maps a stopped download to "crashed". The reconnect loop's session-gone heuristic had the same gap. Result: a clean install (exit 0) showed "crashed" in the Running tab while the Dependencies tab correctly showed it installed. Add a shared _depInstallSucceeded() helper that keys off the exit-0 sentinel (falling back to pip's success line, rejecting ERROR/Traceback) and wire it into both the session-gone heuristic and the background status reconciler, gated on payload._dep so real model downloads are unaffected. Also fixes the pre-existing test_background_status_poll_reconciles_into_local_tasks assertion that no longer matched the evolved reconciler, and adds regression coverage for both paths. --- static/js/cookbookRunning.js | 31 ++++++++++++++-- ...okbook_dependency_completion_regression.py | 36 +++++++++++++++++++ 2 files changed, 65 insertions(+), 2 deletions(-) diff --git a/static/js/cookbookRunning.js b/static/js/cookbookRunning.js index 5f75a49..1f225b7 100644 --- a/static/js/cookbookRunning.js +++ b/static/js/cookbookRunning.js @@ -66,6 +66,23 @@ function _clearPillLabel(task) { return 'clear'; } +// A pip dependency/driver install (payload._dep) reports success with the +// runner's "=== Process exited with code 0 ===" sentinel and pip's +// "Successfully installed" line — never the HuggingFace download markers +// (DONE / 100% / /snapshots/ / DOWNLOAD_OK) that the download heuristics look +// for. Without this, a clean install whose tmux pane has already gone away is +// misread as crashed/stopped even though pip exited 0. Prefer the authoritative +// exit-code sentinel; fall back to pip's success line when no sentinel was +// captured (and there's no install error in the same output). +function _depInstallSucceeded(output) { + const text = String(output || ''); + if (!text) return false; + const exitMatch = text.match(/=== Process exited with code (-?\d+) ===/); + if (exitMatch) return Number(exitMatch[1]) === 0; + return /\b(?:Successfully installed|Requirement already satisfied)\b/.test(text) + && !/\bERROR\b|No matching distribution|Could not find a version|Traceback \(most recent call last\)/.test(text); +} + function _shouldOfferCrashReport(task) { if (!task) return false; if (task._unreachable && task.type === 'serve') return true; @@ -2448,7 +2465,10 @@ async function _reconnectTask(el, task) { const downloadLooksSuccessful = !lastOutput.includes('DOWNLOAD_FAILED') && (lastOutput.includes('DONE') || lastOutput.includes('100%') || lastOutput.includes('/snapshots/') || lastOutput.includes('Download complete') || lastOutput.includes('DOWNLOAD_OK')); const serveLooksReady = task.type === 'serve' && _serveOutputLooksReady({ ...task, output: lastOutput }); - const looksSuccessful = task.type === 'download' ? downloadLooksSuccessful : serveLooksReady; + // Dependency installs are tracked as download tasks but finish with a + // pip exit-0 sentinel, not HF download markers — so check that too. + const depInstallSucceeded = !!task.payload?._dep && _depInstallSucceeded(lastOutput); + const looksSuccessful = depInstallSucceeded || (task.type === 'download' ? downloadLooksSuccessful : serveLooksReady); if (!lastOutput.trim() || !looksSuccessful) { _updateTask(task.sessionId, { status: 'crashed' }); el.dataset.status = 'crashed'; @@ -3309,11 +3329,18 @@ async function _pollBackgroundStatus() { const live = statusById.get(task.sessionId); if (!live) continue; const updates = {}; + // A finished dependency install whose tmux pane is gone is reported + // "stopped" by the backend (its pip package is never in the HF cache the + // dead-session check inspects). Recover "done" from the retained output's + // exit-0 sentinel so a clean install isn't downgraded to crashed. + const depDone = !!task.payload?._dep && _depInstallSucceeded(task.output); const nextStatus = live.status === 'completed' ? 'done' : (live.status === 'error' ? 'error' - : (live.status === 'stopped' ? (task.type === 'download' ? 'crashed' : 'stopped') : null)); + : (live.status === 'stopped' + ? (depDone ? 'done' : (task.type === 'download' ? 'crashed' : 'stopped')) + : null)); if (nextStatus && task.status !== nextStatus) { updates.status = nextStatus; if (nextStatus === 'done' && task.payload?._dep) completedDeps.push(task); diff --git a/tests/test_cookbook_dependency_completion_regression.py b/tests/test_cookbook_dependency_completion_regression.py index b47e9b2..4c0ab59 100644 --- a/tests/test_cookbook_dependency_completion_regression.py +++ b/tests/test_cookbook_dependency_completion_regression.py @@ -37,6 +37,42 @@ def test_local_windows_session_commands_use_local_powershell_log_dir(): assert ": `powershell -Command \"${ps}\"`;" in source +def test_dep_install_success_recognized_from_exit_sentinel(): + """A pip dependency install reports success via the runner's exit-0 + sentinel / pip's "Successfully installed" line, not the HuggingFace + download markers. The shared helper must key off those, so an install + whose tmux pane is gone isn't misread as crashed.""" + source = _read("static/js/cookbookRunning.js") + + assert "function _depInstallSucceeded(output) {" in source + assert "=== Process exited with code" in source + assert "Successfully installed" in source + + +def test_session_gone_heuristic_honors_dep_install_success(): + """The reconnect loop's session-gone branch (download tasks need an HF + marker to look successful) must also accept a finished dependency install, + otherwise a clean pip install with no HF markers is marked crashed.""" + source = _read("static/js/cookbookRunning.js") + + assert "const depInstallSucceeded = !!task.payload?._dep && _depInstallSucceeded(lastOutput);" in source + assert ( + "const looksSuccessful = depInstallSucceeded " + "|| (task.type === 'download' ? downloadLooksSuccessful : serveLooksReady);" + ) in source + + +def test_background_poll_recovers_done_for_stopped_dependency_install(): + """When the backend reports a finished dependency install as "stopped" + (its pip package is never in the HF cache the dead-session check inspects), + the reconciler must recover "done" from the retained output instead of + downgrading the card to crashed.""" + source = _read("static/js/cookbookRunning.js") + + assert "const depDone = !!task.payload?._dep && _depInstallSucceeded(task.output);" in source + assert "depDone ? 'done' : (task.type === 'download' ? 'crashed' : 'stopped')" in source + + def test_dependency_install_payload_keeps_env_path_for_refresh(): source = _read("static/js/cookbook.js") From f59edee6115b2a806bbe4357806eb14984701ac3 Mon Sep 17 00:00:00 2001 From: tanmayraut45 Date: Thu, 4 Jun 2026 17:48:50 +0530 Subject: [PATCH 03/12] Support extra CA bundle for private-CA LLM providers (#769) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding GigaChat (Sber) or an on-premise enterprise LLM gateway as a model endpoint fails on first probe with CERTIFICATE_VERIFY_FAILED: self-signed certificate in certificate chain (_ssl.c:1000) because their TLS chain is signed by a private root CA (Russian Trusted Root CA for GigaChat; corporate CA for on-prem) that isn't part of the default system / certifi trust store. The endpoint shows offline in the picker even though the URL and API key are correct (issue #722). The right fix is to extend the trust store, not to weaken verification. This change: - src/tls_overrides.py: new module that resolves an opt-in env var LLM_CA_BUNDLE at import time, builds a shared SSLContext via ssl.create_default_context() (so the system / certifi bundle is loaded first) and layers the operator's PEM on top with load_verify_locations(). Exposes llm_verify() returning a value suitable for httpx `verify=`. Defaults to True (httpx built-in trust) when the env var is unset, when the file is missing, or when the PEM fails to load — verification is never silently disabled, the warning is logged and we fall back to the safe path. - src/llm_core.py: thread llm_verify() into the shared AsyncClient used by stream_llm / streaming completions. - routes/model_routes.py: thread llm_verify() into the five httpx.get call sites in _probe_endpoint / _ping_endpoint so adding a private-CA endpoint goes green on the very first probe and the picker stops showing it offline. - .env.example: document LLM_CA_BUNDLE with the GigaChat case as the concrete example. Deliberately NOT included: a verify=False knob (global or per-host). Disabling verification exposes the affected endpoint to MITM, and the operator-supplied bundle is the correct fix for legitimate private-CA providers — so the only switch in this PR is the safe one. Closes #722. --- .env.example | 10 ++ routes/model_routes.py | 11 ++- src/llm_core.py | 5 +- src/tls_overrides.py | 91 ++++++++++++++++++ tests/test_tls_overrides_scope.py | 149 ++++++++++++++++++++++++++++++ 5 files changed, 260 insertions(+), 6 deletions(-) create mode 100644 src/tls_overrides.py create mode 100644 tests/test_tls_overrides_scope.py diff --git a/.env.example b/.env.example index e53d2f8..f282880 100644 --- a/.env.example +++ b/.env.example @@ -27,6 +27,16 @@ LLM_HOST=localhost # Research service LLM endpoint # RESEARCH_LLM_ENDPOINT=http://localhost:8000/v1/chat/completions +# Extra CA bundle for LLM providers whose TLS chain isn't in the default +# trust store. Layered ON TOP of the system / certifi bundle — verification +# stays on for every host, the trust set just gets larger. Useful for: +# - GigaChat / Sber (Russian Trusted Root CA): without this the endpoint +# shows offline with CERTIFICATE_VERIFY_FAILED — self-signed certificate +# in certificate chain. +# - On-premise / corporate LLM gateways with an internal CA. +# Point at a PEM file containing the missing root(s). +# LLM_CA_BUNDLE=/etc/odysseus/ca/extra-roots.pem + # ============================================================ # Search & Web # ============================================================ diff --git a/routes/model_routes.py b/routes/model_routes.py index 0cf98d5..ac025ad 100644 --- a/routes/model_routes.py +++ b/routes/model_routes.py @@ -17,6 +17,7 @@ from fastapi.responses import StreamingResponse from core.database import SessionLocal, ModelEndpoint, Session as DbSession from core.middleware import require_admin from src.llm_core import _detect_provider, _host_match, ANTHROPIC_MODELS +from src.tls_overrides import llm_verify from src.settings import load_settings as _load_settings, save_settings as _save_settings from src.endpoint_resolver import ( normalize_base as _normalize_base, @@ -624,7 +625,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis if api_key: headers["x-api-key"] = api_key try: - r = httpx.get(url, headers=headers, timeout=timeout) + r = httpx.get(url, headers=headers, timeout=timeout, verify=llm_verify()) r.raise_for_status() data = r.json() models = [m.get("id") for m in (data.get("data") or []) if m.get("id")] @@ -645,7 +646,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis url = build_models_url(base) headers = build_headers(api_key, base) try: - r = httpx.get(url, headers=headers, timeout=timeout) + r = httpx.get(url, headers=headers, timeout=timeout, verify=llm_verify()) r.raise_for_status() data = r.json() # OpenAI format: {"data": [{"id": "model-name"}]} @@ -680,7 +681,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis parsed = urlparse(base) if parsed.port == 11434 or "ollama" in (parsed.hostname or "").lower(): root = base[:-3].rstrip("/") if base.endswith("/v1") else base - r = httpx.get(root + "/api/tags", timeout=timeout) + r = httpx.get(root + "/api/tags", timeout=timeout, verify=llm_verify()) r.raise_for_status() data = r.json() models = [m.get("name") or m.get("model") for m in (data.get("models") or []) if m.get("name") or m.get("model")] @@ -741,7 +742,7 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) -> break for path in ("/api/version", "/api/tags"): try: - r = httpx.get(root + path, timeout=timeout) + r = httpx.get(root + path, timeout=timeout, verify=llm_verify()) result = _result_from_response(r) if result["reachable"]: return result @@ -752,7 +753,7 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) -> pass try: - r = httpx.get(base, headers=headers, timeout=timeout) + r = httpx.get(base, headers=headers, timeout=timeout, verify=llm_verify()) return _result_from_response(r) except Exception as e: last_error = str(e)[:120] diff --git a/src/llm_core.py b/src/llm_core.py index 2d66685..be31ac5 100644 --- a/src/llm_core.py +++ b/src/llm_core.py @@ -129,7 +129,10 @@ def _get_http_client() -> httpx.AsyncClient: """Return process-wide AsyncClient. Per-request timeout is passed at call time.""" global _http_client if _http_client is None or _http_client.is_closed: - _http_client = httpx.AsyncClient(limits=_http_limits, http2=False) + from src.tls_overrides import llm_verify + _http_client = httpx.AsyncClient( + limits=_http_limits, http2=False, verify=llm_verify(), + ) return _http_client def _get_cached_response(cache_key: str) -> Optional[str]: diff --git a/src/tls_overrides.py b/src/tls_overrides.py new file mode 100644 index 0000000..dc4e460 --- /dev/null +++ b/src/tls_overrides.py @@ -0,0 +1,91 @@ +"""Extended TLS trust store for private-CA LLM providers. + +Some upstream LLM providers serve their API over TLS certificates that are +signed by a private root CA which is not part of the standard system bundle: + + - GigaChat (Sber) uses the Russian Trusted Root CA, not bundled with + OpenSSL / certifi / system trust on most non-Russian installs. The + chain looks self-signed to Python and the endpoint is marked offline + with `CERTIFICATE_VERIFY_FAILED: self-signed certificate in + certificate chain` (see issue #722). + - On-premise enterprise LLM gateways often present a corporate CA that + has not been imported into the runtime's trust store. + +Operators point `LLM_CA_BUNDLE` at a PEM file containing the extra CA +cert(s). The default system / certifi trust store is loaded first, then +the operator's PEM is layered on top, so verification still happens — +the trust set just gets larger. We deliberately do not provide a +"verify=off" knob: weakening verification globally (or per-host) would +expose those endpoints to MITM, and the operator-supplied bundle is the +correct fix for legitimate private-CA providers. + +Example (GigaChat): + # Sber publishes the chain at + # https://www.gosuslugi.ru/crt/rootca_ssl_rsa2022.cer + # Convert to PEM and point the env var at it. + LLM_CA_BUNDLE=/etc/odysseus/ca/russian-trusted-root.pem + +Scope: + `llm_verify()` is intentionally consumed by only two call sites — the + shared async client in `src/llm_core.py` and the endpoint probes in + `routes/model_routes.py`. Both reach LLM provider URLs. The override + is NOT threaded into web_fetch, search providers, gallery downloads, + embeddings, webhook delivery, or anything else that hits arbitrary + URLs, and it does NOT affect the app's own browser-facing TLS. That + boundary is pinned by `tests/test_tls_overrides_scope.py` — extending + it requires updating the allowlist there with a written justification. +""" + +import logging +import os +import ssl +from typing import Optional + +logger = logging.getLogger(__name__) + + +_extra_bundle_path: Optional[str] = (os.environ.get("LLM_CA_BUNDLE") or "").strip() or None + + +def _build_ssl_context() -> Optional[ssl.SSLContext]: + """Build an SSLContext that uses the default trust store and ALSO trusts + the operator-supplied PEM bundle. Returns None when no extra bundle is + configured, so callers fall through to httpx's default verify=True.""" + if not _extra_bundle_path: + return None + if not os.path.isfile(_extra_bundle_path): + logger.warning( + "LLM_CA_BUNDLE points at %r but the file does not exist; " + "falling back to the default trust store.", + _extra_bundle_path, + ) + return None + ctx = ssl.create_default_context() + try: + ctx.load_verify_locations(cafile=_extra_bundle_path) + except (ssl.SSLError, OSError) as e: + logger.warning( + "LLM_CA_BUNDLE=%r failed to load (%s); falling back to the " + "default trust store.", + _extra_bundle_path, e, + ) + return None + logger.info( + "Loaded extra CA bundle %r on top of the default trust store.", + _extra_bundle_path, + ) + return ctx + + +# Resolved once at import time. The httpx clients in src/llm_core.py are +# long-lived (process-wide), so editing LLM_CA_BUNDLE requires a restart — +# matching the existing semantics of LLM_HOST, SEARXNG_INSTANCE, etc. +_SHARED_SSL_CONTEXT: Optional[ssl.SSLContext] = _build_ssl_context() + + +def llm_verify(): + """Return the value to pass as `verify=` on httpx.get / httpx.Client / + httpx.AsyncClient. Returns the extended-trust SSLContext when + LLM_CA_BUNDLE is set and loaded; otherwise True (httpx default — system + / certifi bundle, verification fully on).""" + return _SHARED_SSL_CONTEXT if _SHARED_SSL_CONTEXT is not None else True diff --git a/tests/test_tls_overrides_scope.py b/tests/test_tls_overrides_scope.py new file mode 100644 index 0000000..e2ff114 --- /dev/null +++ b/tests/test_tls_overrides_scope.py @@ -0,0 +1,149 @@ +"""Scope tests for src/tls_overrides. + +#722 / PR #769 added an opt-in extra CA bundle (LLM_CA_BUNDLE) for +private-CA LLM providers. The whole point is that the override stays +SCOPED — it must extend trust for the intended outbound LLM provider +requests only, and never: + + - touch arbitrary URL fetching (web_fetch, document downloads, generic + httpx.get from any other module), + - touch browser-facing TLS (anything our app serves over HTTPS), + - weaken httpx's process-wide defaults, + - silently disable certificate verification. + +These tests prove that. They enumerate the call sites of `llm_verify()` +in the source tree and assert they match an allowlist; they verify the +override module itself never reaches for the well-known "skip TLS +verification" knobs; and they pin the safe default (verify=True) when +LLM_CA_BUNDLE is unset. + +If a future change threads `llm_verify()` into a non-LLM HTTP path, the +first test fails and the contributor either has to justify the new +caller (and add it to ALLOWED_CALLERS with a comment) or revert. That +keeps the security-sensitive helper hard to misuse. +""" + +from __future__ import annotations + +import os +import re +from pathlib import Path + +REPO = Path(__file__).resolve().parents[1] + + +# Files that legitimately need llm_verify() applied to their outbound +# httpx calls because the URL is an LLM provider's API. Every caller here +# is a discrete LLM HTTP entry point and intentional. Any addition must +# come with its own justification in code review. +ALLOWED_CALLERS = frozenset({ + "src/llm_core.py", # shared AsyncClient used by stream_llm + "routes/model_routes.py", # _probe_endpoint + _ping_endpoint +}) + + +def _grep_files(pattern: str) -> set[str]: + """Return the set of repo-relative .py file paths whose body matches + `pattern`. Skips tests, the override module itself, and worktree + scratch dirs.""" + rx = re.compile(pattern) + hits: set[str] = set() + for path in REPO.rglob("*.py"): + rel = path.relative_to(REPO).as_posix() + if rel.startswith("tests/"): + continue + if rel == "src/tls_overrides.py": # definition site, not a caller + continue + if rel.startswith(".claude/") or "/.claude/" in rel: + continue + try: + body = path.read_text(encoding="utf-8", errors="ignore") + except OSError: + continue + if rx.search(body): + hits.add(rel) + return hits + + +def test_llm_verify_only_used_in_allowlisted_files(): + """llm_verify() must only be consumed by the LLM provider HTTP path. + + The extra CA bundle is scoped to the two known LLM HTTP entry points. + If a future PR threads llm_verify() into web_fetch, search providers, + embeddings, gallery downloads, webhook delivery, or any other + arbitrary-URL caller, that's a scope expansion and a security review. + Adding a file to ALLOWED_CALLERS requires a written justification. + """ + callers = _grep_files(r"\bllm_verify\s*\(") + unexpected = callers - ALLOWED_CALLERS + missing = ALLOWED_CALLERS - callers + assert not unexpected, ( + f"llm_verify() called from unexpected file(s): {sorted(unexpected)}. " + f"Expected scope: {sorted(ALLOWED_CALLERS)}. If the new caller is an " + "LLM provider HTTP entry point, add it to ALLOWED_CALLERS with a " + "comment; if it's not, do not thread the extra CA bundle into it." + ) + assert not missing, ( + f"llm_verify() no longer called from {sorted(missing)} — the " + "extra CA bundle integration regressed or the allowlist is stale." + ) + + +def test_tls_overrides_does_not_weaken_global_tls(): + """src/tls_overrides must never reach for a TLS-weakening knob. + + Several common ways to silently weaken TLS in Python: + - ssl._create_default_https_context = ssl._create_unverified_context + - ssl._create_unverified_context (used as a default) + - urllib3.disable_warnings(...) + - httpx.AsyncClient(verify=False) (anywhere — must stay verify=True + or an SSLContext) + - requests.packages.urllib3.disable_warnings(...) + + The override module must only EXTEND trust by loading an additional + bundle into an ssl.SSLContext built on top of the system default. It + must never silently disable verification. + """ + body = (REPO / "src" / "tls_overrides.py").read_text(encoding="utf-8") + forbidden = [ + r"_create_default_https_context\s*=", + r"_create_unverified_context", + r"disable_warnings", + r"verify\s*=\s*False", + ] + for pat in forbidden: + assert not re.search(pat, body), ( + f"src/tls_overrides.py contains forbidden pattern {pat!r}. " + "The extra CA bundle must only ADD trust, never weaken it." + ) + + +def test_llm_verify_default_is_true_when_env_unset(): + """When LLM_CA_BUNDLE is unset, llm_verify() must return True so httpx + falls through to its built-in trust store. This is the safe default — + operators have to opt in to get any change at all.""" + os.environ.pop("LLM_CA_BUNDLE", None) + import importlib + + import src.tls_overrides as mod + importlib.reload(mod) + assert mod.llm_verify() is True, ( + f"Default llm_verify() must be True (httpx built-in trust store); " + f"got {mod.llm_verify()!r}. An accidental non-True default would " + "turn an opt-in extension into a process-wide change." + ) + + +def test_llm_verify_falls_back_to_true_for_missing_bundle_file(): + """Pointing LLM_CA_BUNDLE at a non-existent path must NOT raise and + must fall back to verify=True (system trust). A misconfigured env var + on a deploy box should never produce a silently TLS-disabled process.""" + os.environ["LLM_CA_BUNDLE"] = "/nonexistent/path/extra-roots.pem" + try: + import importlib + + import src.tls_overrides as mod + importlib.reload(mod) + assert mod.llm_verify() is True + finally: + os.environ.pop("LLM_CA_BUNDLE", None) From 7b45a94b6d8469bafca10e655f56f10695301ce4 Mon Sep 17 00:00:00 2001 From: Alexander Kenley Date: Thu, 4 Jun 2026 22:20:04 +1000 Subject: [PATCH 04/12] Fix calendar routing and user-local time context (#408) * fix(chat): add user-local time context * fix(chat): route calendar follow-up phrasing * refactor(chat): log tool intent routing reasons * test(chat): align user time prompt shim --------- Co-authored-by: Alex Kenley --- routes/calendar_routes.py | 62 +++++++++------- routes/chat_routes.py | 44 +++++++---- src/action_intents.py | 119 +++++++++++++++++++++--------- src/agent_loop.py | 25 +------ src/chat_processor.py | 9 +++ src/tool_index.py | 2 +- src/tool_schemas.py | 2 +- src/user_time.py | 138 +++++++++++++++++++++++++++++++++++ static/js/calendar.js | 3 +- static/js/chat.js | 31 ++++++-- tests/test_action_intents.py | 23 +++++- tests/test_user_time.py | 111 ++++++++++++++++++++++++++++ 12 files changed, 463 insertions(+), 106 deletions(-) create mode 100644 src/user_time.py create mode 100644 tests/test_user_time.py diff --git a/routes/calendar_routes.py b/routes/calendar_routes.py index 4c79ce8..788a6ea 100644 --- a/routes/calendar_routes.py +++ b/routes/calendar_routes.py @@ -161,26 +161,18 @@ def _ensure_default_calendar(db, owner: str = None) -> CalendarCal: return cal -# Per-request user UTC offset (in minutes east of UTC). chat_routes sets this -# from the `X-Tz-Offset` header so naive natural-language times the LLM -# emits ("today at 9pm") are parsed in the USER's timezone, not the server's -# clock. None = unknown, fall back to legacy server-local behavior. -from contextvars import ContextVar -_USER_TZ_OFFSET_MIN: ContextVar = ContextVar("user_tz_offset_min", default=None) - - -def set_user_tz_offset(offset_min): - """Set the current user's UTC offset for this async context.""" - try: - v = int(offset_min) - except (TypeError, ValueError): - return - _USER_TZ_OFFSET_MIN.set(v) - - -def get_user_tz_offset(): - """Read the current user's UTC offset (minutes east of UTC), or None.""" - return _USER_TZ_OFFSET_MIN.get() +# Per-request user time context. chat_routes sets this from browser timezone +# headers so natural-language times the LLM emits ("today at 9pm") are parsed +# in the user's timezone, not the server's clock. None = unknown, fall back to +# legacy server-local behavior. +from src.user_time import ( + get_user_tz_name, + get_user_tz_offset, + now_user_local, + set_user_tz_name, + set_user_tz_offset, + user_timezone, +) def parse_due_for_user(s: str) -> str: @@ -199,6 +191,7 @@ def parse_due_for_user(s: str) -> str: """ from datetime import timezone as _tz, timedelta as _td offset = get_user_tz_offset() + tz_name = get_user_tz_name() s = (s or "").strip() if not s: return s @@ -212,11 +205,11 @@ def parse_due_for_user(s: str) -> str: except ValueError: parsed = None - if offset is None: + if offset is None and not tz_name: # No user tz known — preserve legacy behavior (naive server-local). return _parse_dt(s).isoformat() - user_tz = _tz(_td(minutes=offset)) + user_tz = user_timezone() # Naive ISO → tag with user tz. if parsed is not None and parsed.tzinfo is None: @@ -224,7 +217,7 @@ def parse_due_for_user(s: str) -> str: # Natural language — evaluate against user's "now". server_now_utc = datetime.now(_tz.utc) - user_now = server_now_utc.astimezone(user_tz) + user_now = now_user_local(server_now_utc) # Patch datetime.now() inside _parse_dt by leveraging the user's clock: # we re-implement the small natural-language phrases here against user_now # so the result is naturally in the user's tz. @@ -232,6 +225,7 @@ def parse_due_for_user(s: str) -> str: lower = s.lower().strip() def _parse_time(t): + t = _re.sub(r'\b([ap])\s*\.?\s*m\.?\b', r'\1m', t.strip(), flags=_re.IGNORECASE) m = _re.match(r'^\s*(\d{1,2})(?::(\d{2}))?\s*(am|pm)?\s*$', t, _re.IGNORECASE) if not m: return None h = int(m.group(1)); mn = int(m.group(2) or 0); ampm = (m.group(3) or "").lower() @@ -341,6 +335,7 @@ def _parse_dt(s: str) -> datetime: def _parse_time(t: str): """Return (hour, minute) from '1pm', '1:30 PM', '13:00', etc., or None.""" + t = _re.sub(r'\b([ap])\s*\.?\s*m\.?\b', r'\1m', t.strip(), flags=_re.IGNORECASE) m = _re.match(r'^\s*(\d{1,2})(?::(\d{2}))?\s*(am|pm)?\s*$', t, _re.IGNORECASE) if not m: return None @@ -1210,7 +1205,20 @@ def setup_calendar_routes() -> APIRouter: text = (body.get("text") or "").strip() if not text: raise HTTPException(400, "text is required") + from src.user_time import ( + clear_user_time_context, + current_datetime_prompt, + now_user_local, + set_user_tz_name, + set_user_tz_offset, + ) + + clear_user_time_context() tz_hint = (body.get("tz") or "").strip() + if body.get("tz_offset") is not None: + set_user_tz_offset(body.get("tz_offset")) + if tz_hint: + set_user_tz_name(tz_hint) url, model, headers = resolve_endpoint("utility") if not url: @@ -1218,15 +1226,15 @@ def setup_calendar_routes() -> APIRouter: if not url or not model: return {"ok": False, "error": "No LLM endpoint configured"} - now = datetime.now() + now = now_user_local() now_iso = now.strftime("%Y-%m-%dT%H:%M:%S") # The model gets only the schema it needs to fill out; we re-validate # everything client-side too. system_prompt = ( - "You are a calendar event parser. Read the user's one-line " + current_datetime_prompt() + + "You are a calendar event parser. Read the user's one-line " "description and emit STRICT JSON describing the event. " - f"Today is {now.strftime('%A, %Y-%m-%d')} ({now_iso}). " - + (f"User timezone: {tz_hint}. " if tz_hint else "") + f"The current user-local timestamp is {now_iso}. " + "Resolve relative dates (\"tomorrow\", \"friday\", \"next monday\", " "\"in 30 minutes\") against today. Default duration is 60 minutes " "when no end time is given. If the text mentions a date with no " diff --git a/routes/chat_routes.py b/routes/chat_routes.py index f54c265..8dd17a5 100644 --- a/routes/chat_routes.py +++ b/routes/chat_routes.py @@ -37,7 +37,7 @@ from routes.chat_helpers import ( clean_thinking_for_save, _enforce_chat_privileges, ) -from src.action_intents import message_needs_tools as _message_needs_tools +from src.action_intents import classify_tool_intent as _classify_tool_intent logger = logging.getLogger(__name__) @@ -229,6 +229,26 @@ def _recover_empty_session_model(sess, session_id: str, owner: str | None = None db.close() +def _set_user_time_from_request(request: Request) -> None: + """Copy browser timezone headers into the per-request context. + + This is intentionally ephemeral: it is used only while building prompts + and running tools for this request. It is not persisted or logged. + """ + try: + tz_offset = request.headers.get("x-tz-offset") + tz_name = request.headers.get("x-tz-name") + from src.user_time import clear_user_time_context, set_user_tz_name, set_user_tz_offset + + clear_user_time_context() + if tz_offset is not None: + set_user_tz_offset(tz_offset) + if tz_name: + set_user_tz_name(tz_name) + except Exception: + pass + + def setup_chat_routes( session_manager, chat_handler, @@ -247,6 +267,8 @@ def setup_chat_routes( # ------------------------------------------------------------------ # @router.post("/api/chat", response_model=Dict[str, str]) async def chat_endpoint(request: Request, chat_request: ChatRequest) -> Dict[str, str]: + _set_user_time_from_request(request) + message = chat_request.message session = chat_request.session att_ids = chat_request.attachments or [] @@ -355,16 +377,7 @@ def setup_chat_routes( except Exception as e: raise HTTPException(400, f"Request parsing error: {e}") - # Stash the user's UTC offset (in minutes east of UTC) from the - # frontend so tools like manage_notes interpret natural-language - # times in the USER's tz, not the server's. See calendar_routes. - try: - _tz_hdr = request.headers.get("x-tz-offset") - if _tz_hdr is not None: - from routes.calendar_routes import set_user_tz_offset - set_user_tz_offset(_tz_hdr) - except Exception: - pass + _set_user_time_from_request(request) form_data = await request.form() message = form_data.get("message") @@ -393,10 +406,15 @@ def setup_chat_routes( # its way through a plain chat request (and fail, especially with the # shell disabled). auto_escalated = False - if chat_mode == "chat" and isinstance(message, str) and _message_needs_tools(message): + _tool_intent = _classify_tool_intent(message) if isinstance(message, str) else None + if chat_mode == "chat" and _tool_intent and _tool_intent.needs_tools: chat_mode = "agent" auto_escalated = True - logger.info("chat→agent auto-escalation: message matched tool-intent pattern") + logger.info( + "chat→agent auto-escalation: category=%s reason=%s", + _tool_intent.category, + _tool_intent.reason, + ) active_doc_id = form_data.get("active_doc_id", "").strip() logger.info(f"[doc-inject] chat_mode={chat_mode}, active_doc_id={active_doc_id!r}") diff --git a/src/action_intents.py b/src/action_intents.py index 7054801..84734ab 100644 --- a/src/action_intents.py +++ b/src/action_intents.py @@ -8,74 +8,121 @@ user asks how a feature works. from __future__ import annotations import re +from dataclasses import dataclass from typing import Iterable, Pattern -_ACTION_QUESTION = r"\b(?:can|could|would|will)\s+you\s+" -_PLEASE = r"^\s*(?:please\s+)?" +@dataclass(frozen=True) +class ToolIntent: + """A cheap, deterministic chat-to-agent routing decision.""" -_CALENDAR_ACTION = r"(?:add|create|schedule|book|put|set\s+up|make)" + needs_tools: bool + category: str = "" + reason: str = "" + + +_ACTION_QUESTION = r"\b(?:can|could|would|will)\s+you\s+" +_ACTION_FOLLOWUP = ( + r"\b(?:you\s+should\s+be\s+able\s+to|" + r"(?:can|could|would|will|should)\s+you|" + r"you\s+(?:can|could|would|will|should|need\s+to|have\s+to))\s+" +) +_PLEASE = r"^\s*(?:(?:please|ok(?:ay)?|alright|right|sure|cool|great|thanks)[\s,.!-]+)*" + +_CALENDAR_ACTION = ( + r"(?:add|adding|create|creating|recreate|recreating|schedule|scheduling|" + r"reschedule|rescheduling|book|booking|put|set\s+up|make|making|" + r"delete|deleting|remove|removing|cancel|cancelling|canceling)" +) _CALENDAR_THING = r"(?:calendar|calendar\s+(?:entry|item)|event|meeting|appointment|entry|call)" +_EXPLANATORY_PREFIX = re.compile( + r"^\s*(?:how\s+(?:do|can)\s+i|can\s+you\s+explain|what\s+about|tell\s+me\s+how|show\s+me\s+how)\b", + re.I, +) _PANEL = ( r"(?:calendar|notes?|inbox|email|mail|documents?|docs|library|gallery|" r"settings|cookbook|sessions?|chats?|skills|memories|memory|brain)" ) -_TOOL_INTENT_PATTERNS: tuple[Pattern[str], ...] = tuple( - re.compile(pattern, re.I) - for pattern in ( +_ROUTING_PATTERNS: tuple[tuple[str, str, Pattern[str]], ...] = tuple( + (category, reason, re.compile(pattern, re.I)) + for category, reason, pattern in ( # Calendar/event creation. Covers "Can you add an entry to my - # calendar?" and imperatives like "add lunch to my calendar". - rf"{_ACTION_QUESTION}{_CALENDAR_ACTION}\b.{{0,120}}\b{_CALENDAR_THING}\b", - rf"{_PLEASE}{_CALENDAR_ACTION}\b.{{0,120}}\b(?:to|on|in|into|for)\s+(?:my\s+|the\s+|this\s+)?calendar\b", - rf"{_PLEASE}{_CALENDAR_ACTION}\s+(?:a\s+|an\s+)?(?:calendar\s+)?(?:event|meeting|appointment|entry|item|call)\b", - r"\bput\s+.+\bon\s+(?:my\s+)?calendar\b", + # calendar?", imperatives like "add lunch to my calendar", and + # follow-ups such as "you should be able to create that event now". + ("calendar", "assistant calendar action request", rf"{_ACTION_QUESTION}{_CALENDAR_ACTION}\b.{{0,120}}\b{_CALENDAR_THING}\b"), + ("calendar", "calendar follow-up action request", rf"{_ACTION_FOLLOWUP}{_CALENDAR_ACTION}\b.{{0,120}}\b{_CALENDAR_THING}\b"), + ("calendar", "calendar imperative action request", rf"{_PLEASE}{_CALENDAR_ACTION}\b.{{0,120}}\b{_CALENDAR_THING}\b"), + ("calendar", "calendar target action request", rf"{_PLEASE}{_CALENDAR_ACTION}\b.{{0,120}}\b(?:to|on|in|into|for)\s+(?:my\s+|the\s+|this\s+)?calendar\b"), + ("calendar", "calendar item action request", rf"{_PLEASE}{_CALENDAR_ACTION}\s+(?:it\s+)?(?:a\s+|an\s+)?(?:calendar\s+)?(?:event|meeting|appointment|entry|item|call)\b"), + ("calendar", "calendar target action request", rf"\b{_CALENDAR_ACTION}\b.{{0,120}}\b(?:to|on|in|into|for)\s+(?:my\s+|the\s+|this\s+)?calendar\b"), + ("calendar", "put item on calendar request", r"\bput\s+.+\bon\s+(?:my\s+)?calendar\b"), # Notes, todos, checklists, and reminders. - r"\bremind\s+me\b", - rf"{_ACTION_QUESTION}(?:add|create|make|take|jot|write\s+down|set)\b.{{0,120}}\b(?:note|todo|task|checklist|reminder)\b", - rf"{_PLEASE}(?:add|create|make)\s+(?:a\s+|an\s+)?(?:todo|task|reminder|note|checklist)\b", - rf"{_PLEASE}(?:take|jot|write\s+down)\s+(?:a\s+|an\s+)?note\b", - rf"{_PLEASE}(?:add|jot|write\s+down)\b.{{0,120}}\b(?:to|in|into)\s+(?:my\s+|the\s+)?(?:todo(?:\s+list)?|task\s+list|notes?|checklist)\b", - rf"{_PLEASE}set\s+(?:a\s+)?reminder\b", - rf"{_ACTION_QUESTION}set\s+(?:a\s+)?reminder\b", + ("notes", "reminder request", r"\bremind\s+me\b"), + ("notes", "assistant note/todo action request", rf"{_ACTION_QUESTION}(?:add|create|make|take|jot|write\s+down|set)\b.{{0,120}}\b(?:note|todo|task|checklist|reminder)\b"), + ("notes", "note/todo imperative request", rf"{_PLEASE}(?:add|create|make)\s+(?:a\s+|an\s+)?(?:todo|task|reminder|note|checklist)\b"), + ("notes", "take note request", rf"{_PLEASE}(?:take|jot|write\s+down)\s+(?:a\s+|an\s+)?note\b"), + ("notes", "add item to notes/todo request", rf"{_PLEASE}(?:add|jot|write\s+down)\b.{{0,120}}\b(?:to|in|into)\s+(?:my\s+|the\s+)?(?:todo(?:\s+list)?|task\s+list|notes?|checklist)\b"), + ("notes", "set reminder request", rf"{_PLEASE}set\s+(?:a\s+)?reminder\b"), + ("notes", "assistant reminder request", rf"{_ACTION_QUESTION}set\s+(?:a\s+)?reminder\b"), # Email actions. - rf"{_ACTION_QUESTION}(?:send|write|reply|email|message|archive|delete|mark)\b.{{0,120}}\b(?:emails?|mail|messages?|inbox|unread|read)\b", - rf"{_PLEASE}(?:send|write|reply)\b.{{0,120}}\b(?:emails?|mail|messages?)\b", - rf"{_PLEASE}(?:archive|delete|mark)\b.{{0,120}}\b(?:emails?|mail|messages?|inbox)\b", - r"\b(?:send|write|reply)\s+(?:an?\s+)?(?:email|message|mail)\b", - r"\bemail\s+\w+\b", - r"\bcheck\s+(?:my\s+)?(?:email|inbox|mail)\b", - r"\bunread\s+(?:email|mail)s?\b", + ("email", "assistant email action request", rf"{_ACTION_QUESTION}(?:send|write|reply|email|message|archive|delete|mark)\b.{{0,120}}\b(?:emails?|mail|messages?|inbox|unread|read)\b"), + ("email", "send/write/reply email request", rf"{_PLEASE}(?:send|write|reply)\b.{{0,120}}\b(?:emails?|mail|messages?)\b"), + ("email", "archive/delete/mark email request", rf"{_PLEASE}(?:archive|delete|mark)\b.{{0,120}}\b(?:emails?|mail|messages?|inbox)\b"), + ("email", "email composition request", r"\b(?:send|write|reply)\s+(?:an?\s+)?(?:email|message|mail)\b"), + ("email", "email contact request", r"\bemail\s+\w+\b"), + ("email", "check inbox request", r"\bcheck\s+(?:my\s+)?(?:email|inbox|mail)\b"), + ("email", "unread email request", r"\bunread\s+(?:email|mail)s?\b"), # UI/control-plane actions that should open panels or flip toggles. - rf"{_PLEASE}(?:open|show|bring\s+up)\s+(?:me\s+)?(?:my\s+|the\s+)?{_PANEL}\b", - r"\b(?:disable|enable|turn\s+(?:on|off))\s+(?:the\s+)?(?:shell|search|web|browser|documents?|memory|skills|images?|calendar|email|mail|research|incognito)\b", + ("ui", "open/show panel request", rf"{_PLEASE}(?:open|show|bring\s+up)\s+(?:me\s+)?(?:my\s+|the\s+)?{_PANEL}\b"), + ("ui", "tool or feature toggle request", r"\b(?:disable|enable|turn\s+(?:on|off))\s+(?:the\s+)?(?:shell|search|web|browser|documents?|memory|skills|images?|calendar|email|mail|research|incognito)\b"), # Deep research jobs, not quick conceptual mentions of research. - rf"{_PLEASE}(?:research|deep\s+dive|look\s+into|investigate)\s+.+", - rf"{_ACTION_QUESTION}(?:research|do\s+research|deep\s+dive|look\s+into|investigate)\s+.+", + ("research", "deep research imperative request", rf"{_PLEASE}(?:research|deep\s+dive|look\s+into|investigate)\s+.+"), + ("research", "assistant deep research request", rf"{_ACTION_QUESTION}(?:research|do\s+research|deep\s+dive|look\s+into|investigate)\s+.+"), # Shell / remote-host intent. - r"\bssh\s+(?:in)?to\b", - r"\bssh\s+\w+", - r"\b(run|execute)\s+.{1,40}\bon\s+\w+", - r"\b(can|could|please|would)\s+you\s+(run|execute|exec)\b", + ("shell", "ssh request", r"\bssh\s+(?:in)?to\b"), + ("shell", "ssh target request", r"\bssh\s+\w+"), + ("shell", "remote command request", r"\b(run|execute)\s+.{1,40}\bon\s+\w+"), + ("shell", "assistant command execution request", r"\b(can|could|please|would)\s+you\s+(run|execute|exec)\b"), # Shell verbs only count in imperative position (start of message, # optionally after "please") or as a "can you ..." request. A bare # word match promoted informational questions ("What does the grep # command do?") and incidental uses ("My cat ate my homework"). - rf"{_PLEASE}(deploy|build|install|restart|reboot|kill|tail|grep|cat|ls|cd|cp|mv|rm)\b\s+\S+", - rf"{_ACTION_QUESTION}(deploy|build|install|restart|reboot|kill|tail|grep|cat|ls|cd|cp|mv|rm)\b\s+\S+", - r"\b(check|see)\s+(if|whether|what)\s+.{1,40}\b(running|process|service|port|file|exists?)\b", + ("shell", "imperative shell command request", rf"{_PLEASE}(deploy|build|install|restart|reboot|kill|tail|grep|cat|ls|cd|cp|mv|rm)\b\s+\S+"), + ("shell", "assistant shell command request", rf"{_ACTION_QUESTION}(deploy|build|install|restart|reboot|kill|tail|grep|cat|ls|cd|cp|mv|rm)\b\s+\S+"), + ("shell", "system/file check request", r"\b(check|see)\s+(if|whether|what)\s+.{1,40}\b(running|process|service|port|file|exists?)\b"), ) ) +_TOOL_INTENT_PATTERNS: tuple[Pattern[str], ...] = tuple( + pattern for _, _, pattern in _ROUTING_PATTERNS +) + + +def classify_tool_intent(text: str) -> ToolIntent: + """Classify whether a chat message should be promoted to agent mode.""" + if not text: + return ToolIntent(False, reason="empty message") + if _EXPLANATORY_PREFIX.search(text): + return ToolIntent(False, reason="explanatory feature question") + for category, reason, pattern in _ROUTING_PATTERNS: + if pattern.search(text): + return ToolIntent(True, category=category, reason=reason) + return ToolIntent(False, reason="no tool-action pattern matched") + def message_needs_tools(text: str, patterns: Iterable[Pattern[str]] = _TOOL_INTENT_PATTERNS) -> bool: """Return True when a plain chat message should be promoted to agent mode.""" if not text: return False + if _EXPLANATORY_PREFIX.search(text): + return False + if patterns is _TOOL_INTENT_PATTERNS: + return classify_tool_intent(text).needs_tools return any(pattern.search(text) for pattern in patterns) diff --git a/src/agent_loop.py b/src/agent_loop.py index c0a7cc6..653baa9 100644 --- a/src/agent_loop.py +++ b/src/agent_loop.py @@ -636,28 +636,11 @@ def _build_system_prompt( set_active_model(model) - # Current date/time — every request. Models default to their - # training-cutoff date when "today" is asked otherwise (was - # rendering April 2026 dates as "today" when the actual date is - # May 19, 2026). System TZ-local so calendar/email date math - # matches what the user sees. + # Current date/time for every agent request. This is user-local when the + # browser provided timezone headers, with a server-local fallback. try: - from datetime import datetime as _dt, timezone as _tz - _now = _dt.now().astimezone() - _utc = _dt.now(_tz.utc) - _off = _now.strftime('%z') # e.g. +0900 - _off_fmt = (f"{_off[:3]}:{_off[3:]}" if _off else "+00:00") - agent_prompt = ( - f"## Current date and time\n" - f"Today is {_now.strftime('%A, %B %-d, %Y')} ({_now.strftime('%Y-%m-%d')}). " - f"Local time is {_now.strftime('%-I:%M %p')} ({_now.strftime('%Z')}, UTC{_off_fmt}); " - f"current UTC time is {_utc.strftime('%H:%M')}. " - f"Use this for any 'today'/'tomorrow'/'this week' reasoning — do NOT " - f"infer the date from training data or from event timestamps.\n" - f"When scheduling a task (manage_tasks), scheduled_time is in UTC: " - f"subtract the offset above from the user's local time " - f"(local {_now.strftime('%H:%M')} = {_utc.strftime('%H:%M')} UTC right now).\n\n" - ) + agent_prompt + from src.user_time import current_datetime_prompt + agent_prompt = current_datetime_prompt() + agent_prompt except Exception: pass diff --git a/src/chat_processor.py b/src/chat_processor.py index 47ff76c..02062ae 100644 --- a/src/chat_processor.py +++ b/src/chat_processor.py @@ -185,6 +185,15 @@ class ChatProcessor: "role": "system", "content": preset_system_prompt }) + if not agent_mode: + try: + from src.user_time import current_datetime_prompt + preface.append({ + "role": "system", + "content": current_datetime_prompt(), + }) + except Exception: + logger.debug("Failed to add current date/time context", exc_info=True) preface.append({ "role": "system", "content": UNTRUSTED_CONTEXT_POLICY, diff --git a/src/tool_index.py b/src/tool_index.py index 506e55d..3c5150e 100644 --- a/src/tool_index.py +++ b/src/tool_index.py @@ -102,7 +102,7 @@ BUILTIN_TOOL_DESCRIPTIONS: Dict[str, str] = { "resolve_contact": "Look up a contact's email address by name. Searches CardDAV address book and sent email history. Use when the user says 'message [name]', 'email [name]', or 'send to [name]' without an email address.", "manage_contact": "Create, update, delete, or list CardDAV contacts. Use to save a new contact, change an existing one's email/phone, or remove one. Action=list returns uids needed for update/delete. Use when the user says 'save this contact', 'add [name] to contacts', 'update [name]'s email', 'delete [name] from contacts'. Do not use for user identity facts like 'my name is '; those are memory.", "manage_notes": "Create and manage notes and checklists (Google Keep-style). ALWAYS use this for note/todo/checklist/reminder creation — NEVER hit /api/notes via app_api. Accepts natural-language `due_date` like 'tomorrow at 9am' or '11pm today' (parsed in the USER'S timezone). The due_date IS the reminder — it fires a notification at that time, so do NOT also create a calendar event for the same reminder. Set colors, labels, pin, archive. Do NOT use manage_memory for note content.", - "manage_calendar": "Calendar event management: list, create, update, delete. Each event can carry a tag/category (event_type — work/personal/health/travel/meal/social/admin/other) and importance (low/normal/high/critical). Use ISO datetimes; supports all-day events. For event reminders/alarms, pass reminder_minutes; this creates the Notes reminder, so do not also call manage_notes for the same reminder.", + "manage_calendar": "Calendar event management: list, create, update, delete. Each event can carry a tag/category (event_type — work/personal/health/travel/meal/social/admin/other) and importance (low/normal/high/critical). Resolve today/tomorrow using the Current date and time context, then use ISO datetimes in the user's local wall time; supports all-day events. For event reminders/alarms, pass reminder_minutes; this creates the Notes reminder, so do not also call manage_notes for the same reminder.", "download_model": "Download a HuggingFace model to a local or remote server. Specify repo_id (e.g. 'Qwen/Qwen3-8B'), optional server host, and optional include filter for specific files.", "serve_model": "Start serving a model with vLLM, SGLang, llama.cpp, Ollama, or Diffusers. For image/inpainting/diffusion use python3 scripts/diffusion_server.py --model --port 8100. After launch, call list_served_models for readiness/errors and retry suggestions.", "list_served_models": "List currently running model servers in the Cookbook — shows status (loading, ready, idle, error), model name, port, throughput, and serve failure diagnosis/retry suggestions. Use when the user asks 'what's running', 'show my cookbook', 'which models are up', 'what's serving'.", diff --git a/src/tool_schemas.py b/src/tool_schemas.py index f55fb82..b862301 100644 --- a/src/tool_schemas.py +++ b/src/tool_schemas.py @@ -422,7 +422,7 @@ FUNCTION_TOOL_SCHEMAS = [ "type": "function", "function": { "name": "manage_calendar", - "description": "Manage calendar events: list events in a date range, create, update, delete. Each event can carry a tag/category (event_type) and importance level. Use ISO 8601 datetimes; for all-day events set all_day=true and pass YYYY-MM-DD. For event reminders/alarms, pass reminder_minutes; the tool creates the Odysseus note reminder, so do not also call manage_notes for the same reminder.", + "description": "Manage calendar events: list events in a date range, create, update, delete. Each event can carry a tag/category (event_type) and importance level. Resolve relative dates like today/tomorrow against the 'Current date and time' system context, then pass ISO 8601 datetimes in the user's local wall time; for all-day events set all_day=true and pass YYYY-MM-DD. For event reminders/alarms, pass reminder_minutes; the tool creates the Odysseus note reminder, so do not also call manage_notes for the same reminder.", "parameters": { "type": "object", "properties": { diff --git a/src/user_time.py b/src/user_time.py new file mode 100644 index 0000000..44519c0 --- /dev/null +++ b/src/user_time.py @@ -0,0 +1,138 @@ +"""Per-request user-local time helpers. + +Chat routes set this context from browser headers. Prompt builders and tools +can then resolve relative dates against the user's clock instead of the server. +""" + +from __future__ import annotations + +import re +from contextvars import ContextVar +from datetime import datetime, timedelta, timezone +from typing import Optional + + +_USER_TZ_OFFSET_MIN: ContextVar[Optional[int]] = ContextVar("user_tz_offset_min", default=None) +_USER_TZ_NAME: ContextVar[Optional[str]] = ContextVar("user_tz_name", default=None) + + +def set_user_tz_offset(offset_min) -> None: + """Set the current user's UTC offset in minutes east of UTC.""" + if offset_min in (None, ""): + _USER_TZ_OFFSET_MIN.set(None) + return + try: + value = int(offset_min) + except (TypeError, ValueError): + return + if -14 * 60 <= value <= 14 * 60: + _USER_TZ_OFFSET_MIN.set(value) + + +def get_user_tz_offset() -> Optional[int]: + """Return minutes east of UTC for the current user, if known.""" + return _USER_TZ_OFFSET_MIN.get() + + +def set_user_tz_name(name) -> None: + """Set a safe IANA timezone label for the current request context.""" + if not name: + _USER_TZ_NAME.set(None) + return + first_token = str(name).strip().split()[0] if str(name).strip() else "" + cleaned = re.sub(r"[^A-Za-z0-9_+\-./]", "", first_token)[:80] + _USER_TZ_NAME.set(cleaned or None) + + +def get_user_tz_name() -> Optional[str]: + """Return the current user's browser timezone name, if provided.""" + return _USER_TZ_NAME.get() + + +def clear_user_time_context() -> None: + """Clear user-local time context for tests and non-browser entry points.""" + _USER_TZ_OFFSET_MIN.set(None) + _USER_TZ_NAME.set(None) + + +def format_utc_offset(offset_min: Optional[int]) -> str: + """Format minutes east of UTC as +HH:MM or -HH:MM.""" + if offset_min is None: + offset_min = 0 + sign = "+" if offset_min >= 0 else "-" + total = abs(int(offset_min)) + hours, minutes = divmod(total, 60) + return f"{sign}{hours:02d}:{minutes:02d}" + + +def user_timezone() -> timezone: + """Return the best known user timezone as a fixed-offset tzinfo.""" + offset = get_user_tz_offset() + if offset is None: + name = get_user_tz_name() + if name: + try: + from zoneinfo import ZoneInfo + return ZoneInfo(name) + except Exception: + pass + return datetime.now().astimezone().tzinfo or timezone.utc + return timezone(timedelta(minutes=offset)) + + +def now_user_local(now_utc: Optional[datetime] = None) -> datetime: + """Return the current time in the user's timezone.""" + if now_utc is None: + now_utc = datetime.now(timezone.utc) + elif now_utc.tzinfo is None: + now_utc = now_utc.replace(tzinfo=timezone.utc) + return now_utc.astimezone(user_timezone()) + + +def _date_label(dt: datetime) -> str: + return f"{dt.strftime('%A')}, {dt.strftime('%B')} {dt.day}, {dt.year}" + + +def _clock_label(dt: datetime) -> str: + hour = dt.hour % 12 or 12 + return f"{hour}:{dt.minute:02d} {dt.strftime('%p')}" + + +def timezone_label(dt: Optional[datetime] = None) -> str: + """Return a concise display label such as Australia/Brisbane, UTC+10:00.""" + offset = get_user_tz_offset() + if offset is None: + if dt is None: + dt = datetime.now().astimezone() + offset = int((dt.utcoffset() or timedelta()).total_seconds() // 60) + offset_label = f"UTC{format_utc_offset(offset)}" + name = get_user_tz_name() + return f"{name}, {offset_label}" if name else offset_label + + +def current_datetime_prompt(now_utc: Optional[datetime] = None) -> str: + """Build reusable system prompt text for date/time reasoning.""" + if now_utc is None: + utc_now = datetime.now(timezone.utc) + elif now_utc.tzinfo is None: + utc_now = now_utc.replace(tzinfo=timezone.utc) + else: + utc_now = now_utc.astimezone(timezone.utc) + + local_now = now_user_local(utc_now) + tomorrow = local_now + timedelta(days=1) + return ( + "## Current date and time\n" + f"Today is {_date_label(local_now)} ({local_now.strftime('%Y-%m-%d')}). " + f"User local time is {_clock_label(local_now)} ({timezone_label(local_now)}); " + f"current UTC time is {utc_now.strftime('%H:%M')}.\n" + f"Tomorrow is {_date_label(tomorrow)} ({tomorrow.strftime('%Y-%m-%d')}) " + "in the user's local timezone.\n" + "Use this for any 'today', 'tomorrow', 'tonight', 'this week', or other " + "relative-date reasoning. Do not ask for an exact date just because the " + "user used a relative date.\n" + "When scheduling calendar events with manage_calendar, pass local ISO " + "datetimes resolved against this user-local date/time.\n" + "When scheduling a task with manage_tasks, scheduled_time is in UTC: " + "convert the user's stated local time using the UTC offset above.\n\n" + ) diff --git a/static/js/calendar.js b/static/js/calendar.js index 31a4423..ebd6bfc 100644 --- a/static/js/calendar.js +++ b/static/js/calendar.js @@ -1876,11 +1876,12 @@ function _wireAll(body) { } try { const tz = Intl.DateTimeFormat().resolvedOptions().timeZone || ''; + const tzOffset = -new Date().getTimezoneOffset(); const res = await fetch(`${API_BASE}/api/calendar/quick-parse`, { method: 'POST', credentials: 'same-origin', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ text, tz }), + body: JSON.stringify({ text, tz, tz_offset: tzOffset }), }); const data = await res.json().catch(() => ({})); if (!res.ok || !data.ok) { diff --git a/static/js/chat.js b/static/js/chat.js index f14c715..dd47188 100644 --- a/static/js/chat.js +++ b/static/js/chat.js @@ -530,6 +530,9 @@ import createResearchSynapse from './researchSynapse.js'; let _renderStream = () => {}; let _cancelThinkingTimer = () => {}; let _removeThinkingSpinner = () => {}; + let timeoutId = null; + let responseTimeoutCleared = false; + let clearResponseTimeout = () => {}; const clearProcessingProbe = () => { if (processingProbeTimer) { clearTimeout(processingProbeTimer); @@ -790,13 +793,26 @@ import createResearchSynapse from './researchSynapse.js'; // Timeout: 6 min for research and agent mode, 3 min otherwise const timeoutMs = el('research-toggle').checked || _isAgent ? RESEARCH_TIMEOUT_MS : DEFAULT_TIMEOUT_MS; - const timeoutId = setTimeout(() => { + timeoutId = setTimeout(() => { if (!abortCtrl.signal.aborted) { timedOut = true; abortCtrl._reason = 'timeout'; + try { + if (streamSessionId) { + fetch(`/api/chat/stop/${encodeURIComponent(streamSessionId)}`, { + method: 'POST', + credentials: 'same-origin', + }).catch(() => {}); + } + } catch (_) {} abortCtrl.abort(); } }, timeoutMs); + clearResponseTimeout = () => { + if (responseTimeoutCleared) return; + responseTimeoutCleared = true; + clearTimeout(timeoutId); + }; const box = el('chat-history'); holder = document.createElement('div'); @@ -922,16 +938,19 @@ import createResearchSynapse from './researchSynapse.js'; // the agent so natural-language times like "today at 9pm" are // interpreted in YOUR timezone, not the server's. const _tzOffsetMin = -new Date().getTimezoneOffset(); + const _tzName = (() => { + try { return Intl.DateTimeFormat().resolvedOptions().timeZone || ''; } + catch { return ''; } + })(); const res = await fetch(`${API_BASE}/api/chat_stream`, { method: 'POST', body: fd, - headers: { 'X-Tz-Offset': String(_tzOffsetMin) }, + headers: { 'X-Tz-Offset': String(_tzOffsetMin), 'X-Tz-Name': _tzName }, signal: abortCtrl.signal }); - clearTimeout(timeoutId); - if (!res.ok) { + clearResponseTimeout(); if (res.status === 404) { // Session was deleted (e.g. by AI) — reload and go to welcome holder.remove(); @@ -1359,7 +1378,8 @@ import createResearchSynapse from './researchSynapse.js'; typewriterInto(roundHolder.querySelector('.body'), errMsg); break; } - if (json.delta || json.type === 'tool_start' || json.type === 'agent_step' || json.type === 'doc_stream_delta') { + if (json.delta || json.type === 'tool_start' || json.type === 'tool_output' || json.type === 'tool_progress' || json.type === 'agent_step' || json.type === 'doc_stream_open' || json.type === 'doc_stream_delta' || json.type === 'research_progress') { + clearResponseTimeout(); clearProcessingProbe(); } if (json.delta) { @@ -2710,6 +2730,7 @@ import createResearchSynapse from './researchSynapse.js'; } } } finally { + clearResponseTimeout(); clearProcessingProbe(); // Streaming done — let screen readers announce the settled response. const _chatLogDone = document.getElementById('chat-history'); diff --git a/tests/test_action_intents.py b/tests/test_action_intents.py index 87f59fa..164ed4d 100644 --- a/tests/test_action_intents.py +++ b/tests/test_action_intents.py @@ -1,14 +1,26 @@ -from src.action_intents import message_needs_tools +from src.action_intents import classify_tool_intent, message_needs_tools def test_calendar_entry_request_promotes_to_agent(): assert message_needs_tools("Can you add an entry to my calendar?") + intent = classify_tool_intent("Can you add an entry to my calendar?") + assert intent.needs_tools + assert intent.category == "calendar" def test_calendar_imperative_variants_promote_to_agent(): assert message_needs_tools("add lunch with Sam to my calendar tomorrow at noon") assert message_needs_tools("schedule a call with Mina next Friday") assert message_needs_tools("put dentist appointment on my calendar") + assert message_needs_tools("Alright. Recreate that same appointment") + assert message_needs_tools("Okay delete that doctor appointment from the calendar") + assert message_needs_tools("have another go at adding a test entry to the calendar") + assert message_needs_tools( + "Okay so you should be able to create that calendar event for tomorrow at 1:30 p.m. right for me to go to the hardware store" + ) + assert message_needs_tools( + "make it an appointment at 12pm for me to visit the doctor it's tomorrow the 2nd of June 2026" + ) def test_note_todo_and_reminder_actions_promote_to_agent(): @@ -33,3 +45,12 @@ def test_explanatory_calendar_questions_stay_plain_chat(): assert not message_needs_tools("How do I add an entry to my calendar?") assert not message_needs_tools("What about the built-in Odysseus calendar, is that linked to email?") assert not message_needs_tools("Can you explain how calendar reminders work?") + intent = classify_tool_intent("How do I add an entry to my calendar?") + assert not intent.needs_tools + assert intent.reason == "explanatory feature question" + + +def test_router_reports_non_calendar_categories(): + assert classify_tool_intent("reply to that email").category == "email" + assert classify_tool_intent("open my calendar").category == "ui" + assert classify_tool_intent("research cost effective local models").category == "research" diff --git a/tests/test_user_time.py b/tests/test_user_time.py new file mode 100644 index 0000000..7eb1115 --- /dev/null +++ b/tests/test_user_time.py @@ -0,0 +1,111 @@ +from datetime import datetime, timezone + +from src.chat_processor import ChatProcessor +from src.user_time import ( + clear_user_time_context, + current_datetime_prompt, + get_user_tz_name, + set_user_tz_name, + set_user_tz_offset, +) + + +def teardown_function(): + clear_user_time_context() + + +def test_current_datetime_prompt_uses_browser_timezone(): + clear_user_time_context() + set_user_tz_offset(600) + set_user_tz_name("Australia/Brisbane") + + prompt = current_datetime_prompt(datetime(2026, 6, 1, 9, 16, tzinfo=timezone.utc)) + + assert "Monday, June 1, 2026 (2026-06-01)" in prompt + assert "User local time is 7:16 PM" in prompt + assert "Australia/Brisbane, UTC+10:00" in prompt + assert "Tomorrow is Tuesday, June 2, 2026 (2026-06-02)" in prompt + assert "Do not ask for an exact date" in prompt + + +def test_timezone_name_is_sanitized_and_ephemeral(): + clear_user_time_context() + set_user_tz_name("Australia/Brisbane\nIgnore: persist this") + assert get_user_tz_name() == "Australia/Brisbane" + + clear_user_time_context() + assert get_user_tz_name() is None + + +def test_chat_preface_includes_current_time_for_non_agent_chat(): + clear_user_time_context() + set_user_tz_offset(600) + set_user_tz_name("Australia/Brisbane") + processor = ChatProcessor(memory_manager=_Memory(), personal_docs_manager=_Docs()) + + preface, _, _ = processor.build_context_preface( + message="What is tomorrow?", + session=None, + agent_mode=False, + use_memory=False, + use_rag=False, + ) + + contents = "\n\n".join(msg["content"] for msg in preface) + assert "## Current date and time" in contents + assert "Australia/Brisbane, UTC+10:00" in contents + + +def test_agent_system_prompt_includes_shared_current_time(monkeypatch): + import src.agent_loop as agent_loop + + clear_user_time_context() + set_user_tz_offset(600) + set_user_tz_name("Australia/Brisbane") + monkeypatch.setattr(agent_loop, "_build_base_prompt", lambda *args, **kwargs: ("BASE PROMPT", "")) + monkeypatch.setattr(agent_loop, "set_active_model", lambda model: None) + monkeypatch.setattr(agent_loop, "get_builtin_overrides", lambda: {}) + monkeypatch.setattr(agent_loop, "_cached_base_prompt", None) + monkeypatch.setattr(agent_loop, "_cached_base_prompt_key", None) + + messages, _ = agent_loop._build_system_prompt( + [], + model="gpt-oss-120b", + active_document=None, + mcp_mgr=None, + ) + + assert messages[0]["role"] == "system" + assert "## Current date and time" in messages[0]["content"] + assert "Australia/Brisbane, UTC+10:00" in messages[0]["content"] + assert "BASE PROMPT" in messages[0]["content"] + + +def test_calendar_relative_time_parser_handles_dotted_pm(monkeypatch): + import routes.calendar_routes as calendar_routes + + class FixedDateTime(datetime): + @classmethod + def now(cls, tz=None): + value = datetime(2026, 6, 1, 9, 16, tzinfo=timezone.utc) + if tz is not None: + return value.astimezone(tz) + return value.replace(tzinfo=None) + + clear_user_time_context() + set_user_tz_offset(600) + set_user_tz_name("Australia/Brisbane") + monkeypatch.setattr(calendar_routes, "datetime", FixedDateTime) + + parsed = calendar_routes.parse_due_for_user("tomorrow at 1:30 p.m") + + assert parsed == "2026-06-02T13:30:00+10:00" + + +class _Memory: + def load(self, owner=None): + return [] + + +class _Docs: + rag_manager = None From 594775dc4b9de28bfa01efd4aa1344b575b6a9d7 Mon Sep 17 00:00:00 2001 From: "Massab K." Date: Thu, 4 Jun 2026 17:27:46 +0500 Subject: [PATCH 05/12] Fix issue 135 chat context bleed (#281) * Fix issue 135 chat context bleed * Guard task delivery metadata access --- routes/chat_routes.py | 19 ++++++++++++++++++- routes/session_routes.py | 15 +++++++++++++-- src/builtin_actions.py | 7 +++++-- src/session_actions.py | 12 +++++++++--- src/task_scheduler.py | 16 ++++++++++++---- 5 files changed, 57 insertions(+), 12 deletions(-) diff --git a/routes/chat_routes.py b/routes/chat_routes.py index 8dd17a5..a3c6c16 100644 --- a/routes/chat_routes.py +++ b/routes/chat_routes.py @@ -525,7 +525,24 @@ def setup_chat_routes( _doc_q = _doc_db.query(DBDocument).filter(DBDocument.id == active_doc_id) active_doc = _owner_session_filter(_doc_q, ctx.user).first() if active_doc: - logger.info(f"[doc-inject] found by ID: title={active_doc.title!r}, lang={active_doc.language!r}, is_active={active_doc.is_active}, content_len={len(active_doc.current_content or '')}") + doc_session = active_doc.session_id + doc_owner = getattr(active_doc, "owner", None) + if doc_owner and ctx.user and doc_owner != ctx.user: + logger.warning( + "[doc-inject] ignoring active_doc_id %s owned by another user", + active_doc_id, + ) + active_doc = None + elif doc_session and doc_session != session: + logger.warning( + "[doc-inject] ignoring stale active_doc_id %s from session %s while in session %s", + active_doc_id, + doc_session, + session, + ) + active_doc = None + else: + logger.info(f"[doc-inject] found by ID: title={active_doc.title!r}, lang={active_doc.language!r}, is_active={active_doc.is_active}, content_len={len(active_doc.current_content or '')}") else: logger.warning(f"[doc-inject] NOT FOUND by ID {active_doc_id}") if not active_doc: diff --git a/routes/session_routes.py b/routes/session_routes.py index 58cb8ae..049635d 100644 --- a/routes/session_routes.py +++ b/routes/session_routes.py @@ -94,7 +94,6 @@ logger = logging.getLogger(__name__) router = APIRouter(prefix="/api", tags=["sessions"]) - def _current_user_is_admin(request: Request, user: str | None) -> bool: if not user: return False @@ -142,6 +141,17 @@ def _persist_session_headers(session_id: str, headers: dict | None) -> None: db.close() +_HIDDEN_SYSTEM_SESSION_NAMES = { + "[Task] Chat Sessions Tidy", + "[Task] Documents Tidy", + "[Task] Memory Tidy", + "[Task] Research Tidy", + "[Task] Email Mark Boundaries", + "[Task] Email Tags", + "[Task] Skills Audit", +} + + def _pick_endpoint_for_sort(owner=None): """Pick model endpoint for auto-sort LLM call — uses utility endpoint setting, falls back to default.""" from src.endpoint_resolver import resolve_endpoint @@ -265,7 +275,8 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_ "message_count": msg_count_map.get(s.id, 0)} for s in user_sessions.values() if not s.archived - and (s.name or "").strip() not in ("Nobody", "Incognito")] + and (s.name or "").strip() not in ("Nobody", "Incognito") + and (s.name or "").strip() not in _HIDDEN_SYSTEM_SESSION_NAMES] return sessions diff --git a/src/builtin_actions.py b/src/builtin_actions.py index 0b19e35..6b96e31 100644 --- a/src/builtin_actions.py +++ b/src/builtin_actions.py @@ -38,13 +38,16 @@ class TaskDeferred(BaseException): async def action_tidy_sessions(owner: str, **kwargs) -> Tuple[str, bool]: - """Delete empty/throwaway sessions for the owner. Pure heuristic — + """Delete empty sessions for the owner. Pure heuristic — the LLM folder-sort phase is skipped (user opted to keep this task LLM-free; sorting can be triggered manually via the Chats UI).""" try: import asyncio from src.session_actions import run_auto_sort - result = await asyncio.wait_for(run_auto_sort(owner, skip_llm=True), timeout=60) + result = await asyncio.wait_for( + run_auto_sort(owner, skip_llm=True, delete_throwaway=False), + timeout=60, + ) return result, True except asyncio.TimeoutError: logger.error("tidy_sessions action timed out") diff --git a/src/session_actions.py b/src/session_actions.py index fd3e315..7f0944b 100644 --- a/src/session_actions.py +++ b/src/session_actions.py @@ -8,7 +8,7 @@ and the task scheduler / builtin actions system. import json import logging import re -from datetime import datetime +from datetime import datetime, timedelta logger = logging.getLogger(__name__) @@ -22,9 +22,10 @@ _THROWAWAY_NAMES = { "ok", "lol", "bruh", "hmm", "hm", "meh", } _THROWAWAY_MAX_MESSAGES = 4 +_FRESH_EMPTY_SESSION_GRACE = timedelta(minutes=10) -async def run_auto_sort(owner: str, skip_llm: bool = False) -> str: +async def run_auto_sort(owner: str, skip_llm: bool = False, delete_throwaway: bool = True) -> str: """Run session cleanup + (optional) AI folder sort for the given owner. Args: @@ -32,6 +33,7 @@ async def run_auto_sort(owner: str, skip_llm: bool = False) -> str: skip_llm: when True, do only Phase 1 (delete empty/throwaway sessions); skip Phase 2 (AI folder assignment). Used by the built-in daily background sweep so it never burns LLM tokens. + delete_throwaway: when False, only empty/incognito sessions are deleted. Returns a human-readable summary of what was done. """ @@ -53,6 +55,8 @@ async def run_auto_sort(owner: str, skip_llm: bool = False) -> str: for row in rows: if getattr(row, 'is_important', False): continue + created_at = row.created_at or row.updated_at or datetime.utcnow() + is_fresh = (datetime.utcnow() - created_at) < _FRESH_EMPTY_SESSION_GRACE if (row.name or "").strip() == "Incognito": deleted_throwaway += 1 db.delete(row) @@ -64,9 +68,11 @@ async def run_auto_sort(owner: str, skip_llm: bool = False) -> str: should_delete = False if msg_count == 0: + if is_fresh: + continue should_delete = True deleted_empty += 1 - elif msg_count <= _THROWAWAY_MAX_MESSAGES: + elif delete_throwaway and msg_count <= _THROWAWAY_MAX_MESSAGES: name = (row.name or "").strip().lower() first_msg = db.query(DbMsg.content).filter( DbMsg.session_id == row.id, DbMsg.role == "user" diff --git a/src/task_scheduler.py b/src/task_scheduler.py index 4384705..65fc451 100644 --- a/src/task_scheduler.py +++ b/src/task_scheduler.py @@ -979,10 +979,10 @@ class TaskScheduler: task = db.query(ScheduledTask).filter(ScheduledTask.id == task_id).first() if not task: return True - task_type = task.task_type or "llm" + task_type = getattr(task, "task_type", "") or "llm" if task_type != "action": return True - return (task.action or "") in self._MODEL_BACKED_ACTIONS + return (getattr(task, "action", "") or "") in self._MODEL_BACKED_ACTIONS finally: db.close() @@ -992,7 +992,7 @@ class TaskScheduler: if "check-in" in (task.name or "").lower(): return # Built-in housekeeping noise stays out of the chat. - if (task.action or "") in self._SILENT_ACTIONS: + if (getattr(task, "action", "") or "") in self._SILENT_ACTIONS: return from src.assistant_log import log_to_assistant log_to_assistant( @@ -1408,6 +1408,12 @@ class TaskScheduler: from core.database import Session as DbSession, ChatMessage, CrewMember output = task.output_target or "session" + if ( + output == "session" + and (getattr(task, "task_type", "") or "") == "action" + and (getattr(task, "action", "") or "") in self._SILENT_ACTIONS + ): + return if output.startswith("mcp__"): await self._deliver_via_mcp(output, task, result) return @@ -2069,6 +2075,8 @@ class TaskScheduler: # Built-in housekeeping/action jobs should not create browser # task notifications; user AI/research tasks still can. task.notifications_enabled = False + if (task.output_target or "session") == "session": + task.output_target = defs.get("output_target", "none") seeded = [] for action, defs in HOUSEKEEPING_DEFAULTS.items(): if action in existing_actions: @@ -2099,7 +2107,7 @@ class TaskScheduler: # AI/email/calendar tasks opt into a paused starting state # via ship_paused so users can enable them deliberately. status="paused" if ships_paused else "active", - output_target="session", + output_target=defs.get("output_target", "none"), notifications_enabled=False, ) db.add(task) From fa1fe7f866a73fc8d31b78e67faa7cd76dfbeb63 Mon Sep 17 00:00:00 2001 From: Joeseph Grey <212606152+StressTestor@users.noreply.github.com> Date: Thu, 4 Jun 2026 06:42:49 -0600 Subject: [PATCH 06/12] security: sanitize rendered research-report HTML (#364) The visual research report is assembled from LLM output over crawled web pages (untrusted content) and served under a relaxed `script-src 'unsafe-inline'` CSP. Two values reached that HTML without sanitization: - `_md_to_html` rendered the report markdown via python-markdown, which passes raw HTML through verbatim, so `", + '', + "", + 'x', +]) +def test_md_to_html_strips_active_content(payload): + from src.visual_report import _md_to_html + + out = _md_to_html(f"Report body.\n\n{payload}").lower() + + assert "\nRaw findings\n\ncontent\n\n" + ) + out = _md_to_html(md) + + assert "

on a + # report page served under `script-src 'unsafe-inline'`, so it must be escaped + # or it's an attribute-injection XSS independent of the markdown body. + from src.visual_report import generate_visual_report + + html = generate_visual_report( + question="q", + report_markdown="## H\n\nbody", + category='">', + ) + + assert "