From cb8a0b268d2a0f9f76f863468d94b559a6ee3f95 Mon Sep 17 00:00:00 2001 From: Alexander Kenley Date: Mon, 1 Jun 2026 15:32:41 +1000 Subject: [PATCH] Route calendar action requests to tools Co-authored-by: Alex Kenley --- routes/chat_routes.py | 35 +---------------- src/action_intents.py | 76 ++++++++++++++++++++++++++++++++++++ src/agent_loop.py | 4 +- tests/test_action_intents.py | 35 +++++++++++++++++ 4 files changed, 114 insertions(+), 36 deletions(-) create mode 100644 src/action_intents.py create mode 100644 tests/test_action_intents.py diff --git a/routes/chat_routes.py b/routes/chat_routes.py index 34c4a52..e984bcb 100644 --- a/routes/chat_routes.py +++ b/routes/chat_routes.py @@ -35,6 +35,7 @@ from routes.chat_helpers import ( clean_thinking_for_save, _enforce_chat_privileges, ) +from src.action_intents import message_needs_tools as _message_needs_tools logger = logging.getLogger(__name__) @@ -55,40 +56,6 @@ def _stream_set(session_id: str, **fields) -> None: rec.update(fields) -import re as _re -# Phrases that clearly signal the user wants to create a todo / reminder / -# calendar event. When any of these hit in plain chat mode we silently -# escalate to the agent loop so manage_notes / manage_calendar are in scope. -_TOOL_INTENT_PATTERNS = [ - _re.compile(r"\bremind\s+me\b", _re.I), - _re.compile(r"\badd\s+(a\s+|an\s+)?(todo|task|reminder)\b", _re.I), - _re.compile(r"\b(create|schedule|book)\s+(a\s+|an\s+)?(event|meeting|appointment|reminder|call)\b", _re.I), - _re.compile(r"\bput\s+.+\bon\s+(my\s+)?calendar\b", _re.I), - _re.compile(r"\b(todo|reminder)\s*:", _re.I), - _re.compile(r"\bmake\s+(a\s+|an\s+)?(note|todo|reminder)\b", _re.I), - # Email intent — "write/send/email/message [someone]", "write hi to X" - _re.compile(r"\b(write|send)\s+.{1,30}\bto\s+\w+", _re.I), - _re.compile(r"\b(send|write|reply)\s+(an?\s+)?(email|message|mail)\b", _re.I), - _re.compile(r"\b(email|message)\s+\w+\b", _re.I), - _re.compile(r"\bcheck\s+(my\s+)?(email|inbox|mail)\b", _re.I), - _re.compile(r"\bunread\s+(email|mail)s?\b", _re.I), - # Shell / remote-host intent — covers the deepseek "can you ssh into X" - # case. We escalate to agent so `bash` is available; the model can still - # decide it doesn't need to actually run anything. - _re.compile(r"\bssh\s+(in)?to\b", _re.I), - _re.compile(r"\bssh\s+\w+", _re.I), - _re.compile(r"\b(run|execute)\s+.{1,40}\bon\s+\w+", _re.I), - _re.compile(r"\b(can|could|please|would)\s+you\s+(run|execute|exec)\b", _re.I), - _re.compile(r"\b(deploy|build|install|restart|reboot|kill|tail|grep|cat|ls|cd|cp|mv|rm)\b\s+\S+", _re.I), - _re.compile(r"\b(check|see)\s+(if|whether|what)\s+.{1,40}\b(running|process|service|port|file|exists?)\b", _re.I), -] - -def _message_needs_tools(text: str) -> bool: - if not text: - return False - return any(p.search(text) for p in _TOOL_INTENT_PATTERNS) - - def _session_url_matches_endpoint(session_url: str, endpoint_base: str) -> bool: if not session_url or not endpoint_base: return False diff --git a/src/action_intents.py b/src/action_intents.py new file mode 100644 index 0000000..fa78abd --- /dev/null +++ b/src/action_intents.py @@ -0,0 +1,76 @@ +"""Lightweight routing hints for chat requests that need tools. + +These patterns are intentionally conservative. They only promote plain chat +to agent mode when the user asks the assistant to take an action, not when the +user asks how a feature works. +""" + +from __future__ import annotations + +import re +from typing import Iterable, Pattern + + +_ACTION_QUESTION = r"\b(?:can|could|would|will)\s+you\s+" +_PLEASE = r"^\s*(?:please\s+)?" + +_CALENDAR_ACTION = r"(?:add|create|schedule|book|put|set\s+up|make)" +_CALENDAR_THING = r"(?:calendar|calendar\s+(?:entry|item)|event|meeting|appointment|entry|call)" + +_PANEL = ( + r"(?:calendar|notes?|inbox|email|mail|documents?|docs|library|gallery|" + r"settings|cookbook|sessions?|chats?|skills|memories|memory|brain)" +) + +_TOOL_INTENT_PATTERNS: tuple[Pattern[str], ...] = tuple( + re.compile(pattern, re.I) + for pattern in ( + # Calendar/event creation. Covers "Can you add an entry to my + # calendar?" and imperatives like "add lunch to my calendar". + rf"{_ACTION_QUESTION}{_CALENDAR_ACTION}\b.{{0,120}}\b{_CALENDAR_THING}\b", + rf"{_PLEASE}{_CALENDAR_ACTION}\b.{{0,120}}\b(?:to|on|in|into|for)\s+(?:my\s+|the\s+|this\s+)?calendar\b", + rf"{_PLEASE}{_CALENDAR_ACTION}\s+(?:a\s+|an\s+)?(?:calendar\s+)?(?:event|meeting|appointment|entry|item|call)\b", + r"\bput\s+.+\bon\s+(?:my\s+)?calendar\b", + + # Notes, todos, checklists, and reminders. + r"\bremind\s+me\b", + rf"{_ACTION_QUESTION}(?:add|create|make|take|jot|write\s+down|set)\b.{{0,120}}\b(?:note|todo|task|checklist|reminder)\b", + rf"{_PLEASE}(?:add|create|make)\s+(?:a\s+|an\s+)?(?:todo|task|reminder|note|checklist)\b", + rf"{_PLEASE}(?:take|jot|write\s+down)\s+(?:a\s+|an\s+)?note\b", + rf"{_PLEASE}(?:add|jot|write\s+down)\b.{{0,120}}\b(?:to|in|into)\s+(?:my\s+|the\s+)?(?:todo(?:\s+list)?|task\s+list|notes?|checklist)\b", + rf"{_PLEASE}set\s+(?:a\s+)?reminder\b", + rf"{_ACTION_QUESTION}set\s+(?:a\s+)?reminder\b", + + # Email actions. + rf"{_ACTION_QUESTION}(?:send|write|reply|email|message|archive|delete|mark)\b.{{0,120}}\b(?:emails?|mail|messages?|inbox|unread|read)\b", + rf"{_PLEASE}(?:send|write|reply)\b.{{0,120}}\b(?:emails?|mail|messages?)\b", + rf"{_PLEASE}(?:archive|delete|mark)\b.{{0,120}}\b(?:emails?|mail|messages?|inbox)\b", + r"\b(?:send|write|reply)\s+(?:an?\s+)?(?:email|message|mail)\b", + r"\bemail\s+\w+\b", + r"\bcheck\s+(?:my\s+)?(?:email|inbox|mail)\b", + r"\bunread\s+(?:email|mail)s?\b", + + # UI/control-plane actions that should open panels or flip toggles. + rf"{_PLEASE}(?:open|show|bring\s+up)\s+(?:me\s+)?(?:my\s+|the\s+)?{_PANEL}\b", + r"\b(?:disable|enable|turn\s+(?:on|off))\s+(?:the\s+)?(?:shell|search|web|browser|documents?|memory|skills|images?|calendar|email|mail|research|incognito)\b", + + # Deep research jobs, not quick conceptual mentions of research. + rf"{_PLEASE}(?:research|deep\s+dive|look\s+into|investigate)\s+.+", + rf"{_ACTION_QUESTION}(?:research|do\s+research|deep\s+dive|look\s+into|investigate)\s+.+", + + # Shell / remote-host intent. + r"\bssh\s+(?:in)?to\b", + r"\bssh\s+\w+", + r"\b(run|execute)\s+.{1,40}\bon\s+\w+", + r"\b(can|could|please|would)\s+you\s+(run|execute|exec)\b", + r"\b(deploy|build|install|restart|reboot|kill|tail|grep|cat|ls|cd|cp|mv|rm)\b\s+\S+", + r"\b(check|see)\s+(if|whether|what)\s+.{1,40}\b(running|process|service|port|file|exists?)\b", + ) +) + + +def message_needs_tools(text: str, patterns: Iterable[Pattern[str]] = _TOOL_INTENT_PATTERNS) -> bool: + """Return True when a plain chat message should be promoted to agent mode.""" + if not text: + return False + return any(pattern.search(text) for pattern in patterns) diff --git a/src/agent_loop.py b/src/agent_loop.py index 6b7d982..000aefc 100644 --- a/src/agent_loop.py +++ b/src/agent_loop.py @@ -74,7 +74,7 @@ _AGENT_RULES = """\ - AFTER A TOOL SUCCEEDS, do not second-guess. The success message ("Document edited: v2, 1 edit") means it worked. Reply in ONE short sentence confirming what was done. No re-checking, no replaying the diff in your head, no validation theater. - AFTER A TOOL FAILS (timeout, error, "Unknown action", "not found"), DO NOT GO SILENT. The user expects a follow-up: either retry with a fix (e.g. correct args, longer-running form, run `tail -f /tmp/foo.log` to see progress, split into smaller steps), OR explicitly tell them "this didn't work, want me to try X instead?". A failed tool is not a stopping condition — only a successful one is. - YOU DECLARE WHEN THE JOB IS DONE — not a timer. Keep taking concrete steps while the task still needs them; you have plenty of rounds, so don't rush to quit just because you've made a few calls. There are exactly three ways to end a turn: (1) DONE — before you declare it, sanity-check that every concrete thing the user asked for actually exists or succeeded (file written, edit applied, command exited clean); then stop calling tools and write the final answer (that IS your "done" signal); (2) BLOCKED — you genuinely can't proceed (a capability is missing, permission denied, or data you can't obtain), so say plainly what's blocking you, in a sentence or two, and stop; (3) keep going with the single most useful next step. The only wrong moves are trailing off mid-task without one of these, and repeating a call you already ran. -- CalDAV: Call list-calendars FIRST before any calendar operations. +- Calendar: call `manage_calendar` with `action=list_calendars` FIRST before create/update/delete operations. - BULK email actions ("delete all those", "mark all as read", "archive these", "delete all spam", "mark these 19 read") → use the `bulk_email` tool ONCE with either the exact `uids` list from the latest `list_emails` result or `all_unread: true`. NEVER just say you deleted/archived/marked messages unless a delete/archive/mark/bulk email tool call succeeded. NEVER loop mark_email_read / archive_email / delete_email one message at a time — that floods the context and can blow the token budget. One bulk_email call handles the whole set. - Email UIDs are the values after `UID:` in tool output, not list row numbers. For example, row `1.` with `UID: 90186` must use `"90186"`, never `"1"`. - "Last/latest/newest email" means call `list_emails` with `max_results: 1`, `unread_only: false`, and the right `account`, then read the UID returned by that tool if full content is needed. NEVER use a table row number like "#18" as an email UID. @@ -120,7 +120,7 @@ _API_AGENT_RULES = """\ - AFTER A TOOL SUCCEEDS, do not second-guess. A success response means it worked. Reply in ONE short sentence confirming what was done. No verification thinking, no re-analyzing — move on. - AFTER A TOOL FAILS, DO NOT GO SILENT. The user expects a follow-up: retry with a fix, run a diagnostic (`tail`, `ls`, `which`), or explicitly tell them what didn't work and what you'll try next. Failure is not a stopping condition. - YOU DECLARE WHEN THE JOB IS DONE — not a timer. Keep taking concrete steps while the task still needs them; don't quit early just because you've made a few calls. Three ways to end a turn: (1) DONE — before declaring it, verify every concrete deliverable the user asked for actually exists or succeeded; then stop calling tools and write the final answer (that IS your "done" signal); (2) BLOCKED — you can't proceed (missing capability, permission denied, unobtainable data), so state plainly what's blocking you and stop; (3) keep going with the single most useful next step. Never trail off mid-task without (1) or (2), and never repeat a call you already ran. -- CalDAV: Call list-calendars FIRST before any calendar operations. +- Calendar: call `manage_calendar` with `action=list_calendars` FIRST before create/update/delete operations. - "Create/add/write a note" / "notes" / "todos" / "remind me to X at