Route calendar action requests to tools

Co-authored-by: Alex Kenley <Alex.Kenley@threatvectorsecurity.com>
2026-06-01 15:32:41 +10:00
parent 7e7e441fec
commit cb8a0b268d
4 changed files with 114 additions and 36 deletions
--- a/routes/chat_routes.py
+++ b/routes/chat_routes.py
@@ -35,6 +35,7 @@ from routes.chat_helpers import (
    clean_thinking_for_save,
    _enforce_chat_privileges,
 )
+from src.action_intents import message_needs_tools as _message_needs_tools

 logger = logging.getLogger(__name__)

@@ -55,40 +56,6 @@ def _stream_set(session_id: str, **fields) -> None:
    rec.update(fields)


-import re as _re
-# Phrases that clearly signal the user wants to create a todo / reminder /
-# calendar event. When any of these hit in plain chat mode we silently
-# escalate to the agent loop so manage_notes / manage_calendar are in scope.
-_TOOL_INTENT_PATTERNS = [
-    _re.compile(r"\bremind\s+me\b", _re.I),
-    _re.compile(r"\badd\s+(a\s+|an\s+)?(todo|task|reminder)\b", _re.I),
-    _re.compile(r"\b(create|schedule|book)\s+(a\s+|an\s+)?(event|meeting|appointment|reminder|call)\b", _re.I),
-    _re.compile(r"\bput\s+.+\bon\s+(my\s+)?calendar\b", _re.I),
-    _re.compile(r"\b(todo|reminder)\s*:", _re.I),
-    _re.compile(r"\bmake\s+(a\s+|an\s+)?(note|todo|reminder)\b", _re.I),
-    # Email intent — "write/send/email/message [someone]", "write hi to X"
-    _re.compile(r"\b(write|send)\s+.{1,30}\bto\s+\w+", _re.I),
-    _re.compile(r"\b(send|write|reply)\s+(an?\s+)?(email|message|mail)\b", _re.I),
-    _re.compile(r"\b(email|message)\s+\w+\b", _re.I),
-    _re.compile(r"\bcheck\s+(my\s+)?(email|inbox|mail)\b", _re.I),
-    _re.compile(r"\bunread\s+(email|mail)s?\b", _re.I),
-    # Shell / remote-host intent — covers the deepseek "can you ssh into X"
-    # case. We escalate to agent so `bash` is available; the model can still
-    # decide it doesn't need to actually run anything.
-    _re.compile(r"\bssh\s+(in)?to\b", _re.I),
-    _re.compile(r"\bssh\s+\w+", _re.I),
-    _re.compile(r"\b(run|execute)\s+.{1,40}\bon\s+\w+", _re.I),
-    _re.compile(r"\b(can|could|please|would)\s+you\s+(run|execute|exec)\b", _re.I),
-    _re.compile(r"\b(deploy|build|install|restart|reboot|kill|tail|grep|cat|ls|cd|cp|mv|rm)\b\s+\S+", _re.I),
-    _re.compile(r"\b(check|see)\s+(if|whether|what)\s+.{1,40}\b(running|process|service|port|file|exists?)\b", _re.I),
-]
-
-def _message_needs_tools(text: str) -> bool:
-    if not text:
-        return False
-    return any(p.search(text) for p in _TOOL_INTENT_PATTERNS)
-
-
 def _session_url_matches_endpoint(session_url: str, endpoint_base: str) -> bool:
    if not session_url or not endpoint_base:
        return False
--- a/src/action_intents.py
+++ b/src/action_intents.py
@@ -0,0 +1,76 @@
+"""Lightweight routing hints for chat requests that need tools.
+
+These patterns are intentionally conservative. They only promote plain chat
+to agent mode when the user asks the assistant to take an action, not when the
+user asks how a feature works.
+"""
+
+from __future__ import annotations
+
+import re
+from typing import Iterable, Pattern
+
+
+_ACTION_QUESTION = r"\b(?:can|could|would|will)\s+you\s+"
+_PLEASE = r"^\s*(?:please\s+)?"
+
+_CALENDAR_ACTION = r"(?:add|create|schedule|book|put|set\s+up|make)"
+_CALENDAR_THING = r"(?:calendar|calendar\s+(?:entry|item)|event|meeting|appointment|entry|call)"
+
+_PANEL = (
+    r"(?:calendar|notes?|inbox|email|mail|documents?|docs|library|gallery|"
+    r"settings|cookbook|sessions?|chats?|skills|memories|memory|brain)"
+)
+
+_TOOL_INTENT_PATTERNS: tuple[Pattern[str], ...] = tuple(
+    re.compile(pattern, re.I)
+    for pattern in (
+        # Calendar/event creation. Covers "Can you add an entry to my
+        # calendar?" and imperatives like "add lunch to my calendar".
+        rf"{_ACTION_QUESTION}{_CALENDAR_ACTION}\b.{{0,120}}\b{_CALENDAR_THING}\b",
+        rf"{_PLEASE}{_CALENDAR_ACTION}\b.{{0,120}}\b(?:to|on|in|into|for)\s+(?:my\s+|the\s+|this\s+)?calendar\b",
+        rf"{_PLEASE}{_CALENDAR_ACTION}\s+(?:a\s+|an\s+)?(?:calendar\s+)?(?:event|meeting|appointment|entry|item|call)\b",
+        r"\bput\s+.+\bon\s+(?:my\s+)?calendar\b",
+
+        # Notes, todos, checklists, and reminders.
+        r"\bremind\s+me\b",
+        rf"{_ACTION_QUESTION}(?:add|create|make|take|jot|write\s+down|set)\b.{{0,120}}\b(?:note|todo|task|checklist|reminder)\b",
+        rf"{_PLEASE}(?:add|create|make)\s+(?:a\s+|an\s+)?(?:todo|task|reminder|note|checklist)\b",
+        rf"{_PLEASE}(?:take|jot|write\s+down)\s+(?:a\s+|an\s+)?note\b",
+        rf"{_PLEASE}(?:add|jot|write\s+down)\b.{{0,120}}\b(?:to|in|into)\s+(?:my\s+|the\s+)?(?:todo(?:\s+list)?|task\s+list|notes?|checklist)\b",
+        rf"{_PLEASE}set\s+(?:a\s+)?reminder\b",
+        rf"{_ACTION_QUESTION}set\s+(?:a\s+)?reminder\b",
+
+        # Email actions.
+        rf"{_ACTION_QUESTION}(?:send|write|reply|email|message|archive|delete|mark)\b.{{0,120}}\b(?:emails?|mail|messages?|inbox|unread|read)\b",
+        rf"{_PLEASE}(?:send|write|reply)\b.{{0,120}}\b(?:emails?|mail|messages?)\b",
+        rf"{_PLEASE}(?:archive|delete|mark)\b.{{0,120}}\b(?:emails?|mail|messages?|inbox)\b",
+        r"\b(?:send|write|reply)\s+(?:an?\s+)?(?:email|message|mail)\b",
+        r"\bemail\s+\w+\b",
+        r"\bcheck\s+(?:my\s+)?(?:email|inbox|mail)\b",
+        r"\bunread\s+(?:email|mail)s?\b",
+
+        # UI/control-plane actions that should open panels or flip toggles.
+        rf"{_PLEASE}(?:open|show|bring\s+up)\s+(?:me\s+)?(?:my\s+|the\s+)?{_PANEL}\b",
+        r"\b(?:disable|enable|turn\s+(?:on|off))\s+(?:the\s+)?(?:shell|search|web|browser|documents?|memory|skills|images?|calendar|email|mail|research|incognito)\b",
+
+        # Deep research jobs, not quick conceptual mentions of research.
+        rf"{_PLEASE}(?:research|deep\s+dive|look\s+into|investigate)\s+.+",
+        rf"{_ACTION_QUESTION}(?:research|do\s+research|deep\s+dive|look\s+into|investigate)\s+.+",
+
+        # Shell / remote-host intent.
+        r"\bssh\s+(?:in)?to\b",
+        r"\bssh\s+\w+",
+        r"\b(run|execute)\s+.{1,40}\bon\s+\w+",
+        r"\b(can|could|please|would)\s+you\s+(run|execute|exec)\b",
+        r"\b(deploy|build|install|restart|reboot|kill|tail|grep|cat|ls|cd|cp|mv|rm)\b\s+\S+",
+        r"\b(check|see)\s+(if|whether|what)\s+.{1,40}\b(running|process|service|port|file|exists?)\b",
+    )
+)
+
+
+def message_needs_tools(text: str, patterns: Iterable[Pattern[str]] = _TOOL_INTENT_PATTERNS) -> bool:
+    """Return True when a plain chat message should be promoted to agent mode."""
+    if not text:
+        return False
+    return any(pattern.search(text) for pattern in patterns)
--- a/src/agent_loop.py
+++ b/src/agent_loop.py
@@ -74,7 +74,7 @@ _AGENT_RULES = """\
 - AFTER A TOOL SUCCEEDS, do not second-guess. The success message ("Document edited: v2, 1 edit") means it worked. Reply in ONE short sentence confirming what was done. No re-checking, no replaying the diff in your head, no validation theater.
 - AFTER A TOOL FAILS (timeout, error, "Unknown action", "not found"), DO NOT GO SILENT. The user expects a follow-up: either retry with a fix (e.g. correct args, longer-running form, run `tail -f /tmp/foo.log` to see progress, split into smaller steps), OR explicitly tell them "this didn't work, want me to try X instead?". A failed tool is not a stopping condition — only a successful one is.
 - YOU DECLARE WHEN THE JOB IS DONE — not a timer. Keep taking concrete steps while the task still needs them; you have plenty of rounds, so don't rush to quit just because you've made a few calls. There are exactly three ways to end a turn: (1) DONE — before you declare it, sanity-check that every concrete thing the user asked for actually exists or succeeded (file written, edit applied, command exited clean); then stop calling tools and write the final answer (that IS your "done" signal); (2) BLOCKED — you genuinely can't proceed (a capability is missing, permission denied, or data you can't obtain), so say plainly what's blocking you, in a sentence or two, and stop; (3) keep going with the single most useful next step. The only wrong moves are trailing off mid-task without one of these, and repeating a call you already ran.
- CalDAV: Call list-calendars FIRST before any calendar operations.
+- Calendar: call `manage_calendar` with `action=list_calendars` FIRST before create/update/delete operations.
 - BULK email actions ("delete all those", "mark all as read", "archive these", "delete all spam", "mark these 19 read") → use the `bulk_email` tool ONCE with either the exact `uids` list from the latest `list_emails` result or `all_unread: true`. NEVER just say you deleted/archived/marked messages unless a delete/archive/mark/bulk email tool call succeeded. NEVER loop mark_email_read / archive_email / delete_email one message at a time — that floods the context and can blow the token budget. One bulk_email call handles the whole set.
 - Email UIDs are the values after `UID:` in tool output, not list row numbers. For example, row `1.` with `UID: 90186` must use `"90186"`, never `"1"`.
 - "Last/latest/newest email" means call `list_emails` with `max_results: 1`, `unread_only: false`, and the right `account`, then read the UID returned by that tool if full content is needed. NEVER use a table row number like "#18" as an email UID.
@@ -120,7 +120,7 @@ _API_AGENT_RULES = """\
 - AFTER A TOOL SUCCEEDS, do not second-guess. A success response means it worked. Reply in ONE short sentence confirming what was done. No verification thinking, no re-analyzing — move on.
 - AFTER A TOOL FAILS, DO NOT GO SILENT. The user expects a follow-up: retry with a fix, run a diagnostic (`tail`, `ls`, `which`), or explicitly tell them what didn't work and what you'll try next. Failure is not a stopping condition.
 - YOU DECLARE WHEN THE JOB IS DONE — not a timer. Keep taking concrete steps while the task still needs them; don't quit early just because you've made a few calls. Three ways to end a turn: (1) DONE — before declaring it, verify every concrete deliverable the user asked for actually exists or succeeded; then stop calling tools and write the final answer (that IS your "done" signal); (2) BLOCKED — you can't proceed (missing capability, permission denied, unobtainable data), so state plainly what's blocking you and stop; (3) keep going with the single most useful next step. Never trail off mid-task without (1) or (2), and never repeat a call you already ran.
- CalDAV: Call list-calendars FIRST before any calendar operations.
+- Calendar: call `manage_calendar` with `action=list_calendars` FIRST before create/update/delete operations.
 - "Create/add/write a note" / "notes" / "todos" / "remind me to X at <time>" → use `manage_notes`. Do NOT store notes in `manage_memory`; memory is for persistent facts/preferences about the user, not note content. For reminders, include a `due_date`; for todos, use `note_type=checklist` when appropriate. `manage_tasks` is for RECURRING background AI jobs, NOT for one-off user reminders.
 - "Disable/turn off/enable/turn on <tool>" (shell, search, research, browser, documents, incognito, etc.) → call `ui_control` with `toggle <name> <on|off>`. Aliases accepted: shell→bash, search→web, deepresearch→research, documents→document_editor. NEVER record this as a memory — the user wants the toggle flipped, not a note about preferring it.
 - "Research X" / "do research on X" / "look into Y" / "deep dive on Z" → call `trigger_research` with `topic`. This starts a live job that appears in the Deep Research sidebar (streams progress + final report). **Do NOT use `web_search` for these** — saw the agent do a plain web_search for "do research on X" when the user wanted the deep-research job. "research X" is a deep-research request, not a quick lookup. (web_search is only for a single quick fact mid-task.) Do NOT POST /api/research/start via app_api either — blocked. After starting, tell the user it's running in the Deep Research sidebar. Only if the user explicitly wants it inline/quick should you fall back to web_search.
--- a/tests/test_action_intents.py
+++ b/tests/test_action_intents.py
@@ -0,0 +1,35 @@
+from src.action_intents import message_needs_tools
+
+
+def test_calendar_entry_request_promotes_to_agent():
+    assert message_needs_tools("Can you add an entry to my calendar?")
+
+
+def test_calendar_imperative_variants_promote_to_agent():
+    assert message_needs_tools("add lunch with Sam to my calendar tomorrow at noon")
+    assert message_needs_tools("schedule a call with Mina next Friday")
+    assert message_needs_tools("put dentist appointment on my calendar")
+
+
+def test_note_todo_and_reminder_actions_promote_to_agent():
+    assert message_needs_tools("add milk to my todo list")
+    assert message_needs_tools("take a note that the server needs checking")
+    assert message_needs_tools("set a reminder to call Pat at 4pm")
+
+
+def test_email_and_ui_actions_promote_to_agent():
+    assert message_needs_tools("reply to that email")
+    assert message_needs_tools("mark those emails as read")
+    assert message_needs_tools("open my calendar")
+    assert message_needs_tools("turn off web search")
+
+
+def test_research_action_promotes_to_agent():
+    assert message_needs_tools("research cost effective local models")
+    assert message_needs_tools("can you look into GPU hosting options")
+
+
+def test_explanatory_calendar_questions_stay_plain_chat():
+    assert not message_needs_tools("How do I add an entry to my calendar?")
+    assert not message_needs_tools("What about the built-in Odysseus calendar, is that linked to email?")
+    assert not message_needs_tools("Can you explain how calendar reminders work?")