diff --git a/app.py b/app.py
index 4f3dff0..87ef1ae 100644
--- a/app.py
+++ b/app.py
@@ -1062,6 +1062,16 @@ async def _startup_event():
                 logger.warning(f"Nightly skill audit failed: {e}")
 
     _startup_tasks.append(asyncio.create_task(_skill_audit_nightly_loop()))
+
+    # Cookbook serve lifecycle — kills scheduler-launched serves whose
+    # window-end has passed. Paired with the cookbook_serve builtin
+    # action; both are no-ops unless a scheduled task actually launches
+    # something with end_after_min set. Removing this line + the
+    # cookbook_serve entry in BUILTIN_ACTIONS + src/cookbook_serve_lifecycle.py
+    # removes the feature.
+    from src.cookbook_serve_lifecycle import cookbook_serve_lifecycle_loop
+    _startup_tasks.append(asyncio.create_task(cookbook_serve_lifecycle_loop()))
+
     logger.info("Application startup complete")
 
 async def _shutdown_event():
diff --git a/integrations/claude/skills/odysseus/SKILL.md b/integrations/claude/skills/odysseus/SKILL.md
index 3877f9c..d3b55b3 100644
--- a/integrations/claude/skills/odysseus/SKILL.md
+++ b/integrations/claude/skills/odysseus/SKILL.md
@@ -1,6 +1,6 @@
 ---
 name: odysseus
-description: Use when the user asks Claude Code to read or write Odysseus data (todos, email, calendar, memory, documents) through the scoped Claude Agent API. Requires ODYSSEUS_URL and ODYSSEUS_API_TOKEN.
+description: Use when the user asks Claude Code to read or write Odysseus data (todos, email, calendar, memory, documents) or to launch/monitor/stop a Cookbook model-serve task through the scoped Claude Agent API. Requires ODYSSEUS_URL and ODYSSEUS_API_TOKEN.
 ---
 
 # Odysseus
@@ -105,6 +105,49 @@ python3 ~/.claude/skills/odysseus/scripts/odysseus_api.py POST /api/codex/memory
 - `POST /api/codex/emails/draft` — body matches `SendEmailRequest` (`to`, `cc`, `bcc`, `subject`, `body`, `body_html`, `attachments`, `account_id`, `in_reply_to`, `references`). Requires `email:draft` (or `email:send`).
 - `POST /api/codex/emails/send` — same body. Requires `email:send`. Never send without explicit user instruction.
 
+## Cookbook serve (debug a failing model launch)
+
+The Cookbook surface lets you reproduce what a human would do in Odysseus → Cookbook: read which serves are running, tail their tmux output to see why they crashed, edit the launch command, relaunch, kill a stuck one. Use this when the user is debugging a model server that won't come up (compute-capability errors, OOM, missing kernels, wrong attention backend, etc.).
+
+- `GET /api/codex/cookbook/tasks` — list active serve/download/install tasks (sessionId, type, status, repo_id, remoteHost, payload._cmd). Requires `cookbook:read`.
+- `GET /api/codex/cookbook/servers` — list configured servers (name, host, port, env type + path, model dirs). Requires `cookbook:read`.
+- `GET /api/codex/cookbook/cached?host=<NAME>` — list models already cached on the named server (HF cache + Ollama + extra modelDirs). Call BEFORE `serve` to see what's already on disk. Requires `cookbook:read`.
+- `GET /api/codex/cookbook/presets` — list saved serve presets (model + host + port + cmd). The user's saved preset usually has a working cmd — try `preset NAME` before composing your own. Requires `cookbook:read`.
+- `GET /api/codex/cookbook/output/{session_id}?tail=400` — read the last N lines of the task's persistent log file (preferred) or tmux pane (fallback). The log file persists across vllm crashes, so this returns the actual Python traceback even after the bash prompt + neofetch banner overwrites the pane. Default tail=400. Requires `cookbook:read`.
+- `POST /api/codex/cookbook/serve` — launch a serve task. Body matches `ServeRequest`: `{ repo_id, cmd, remote_host?, ssh_port?, env_prefix?, gpus?, platform? }`. The `cmd` is validated: leading binary must be `vllm`/`python3`/`sglang`/`llama-server`/`ollama`/`node`/`npx`. NEVER prefix with `cd …`, `source …`, or chain with `&&`/`||`/`;`/`$(...)` — the validator rejects shell metacharacters. The venv activation (`env_prefix`) is added automatically from the host's saved settings, so pass the bare binary + args. Requires `cookbook:launch`.
+- `POST /api/codex/cookbook/preset/{name}` — launch a saved preset by name. Reuses the working cmd + host the user already saved. Requires `cookbook:launch`.
+- `POST /api/codex/cookbook/adopt` — register an externally-launched tmux session into cookbook tracking. Body: `{ tmux_session, model, host?, port? }`. Use this when serve_model rejected a cmd and you fell back to direct ssh+tmux — without adoption, the session is invisible to the UI. Requires `cookbook:launch`.
+- `POST /api/codex/cookbook/stop/{session_id}` — kill the tmux session for that task. Requires `cookbook:launch`.
+
+```bash
+# Survey what's running
+python3 ~/.claude/skills/odysseus/scripts/odysseus_api.py cookbook tasks
+
+# Tail the failing one (sessionId from `cookbook tasks`)
+python3 ~/.claude/skills/odysseus/scripts/odysseus_api.py cookbook output serve-abc12345 400
+
+# Stop the previous attempt before you try a new flag set
+python3 ~/.claude/skills/odysseus/scripts/odysseus_api.py cookbook stop serve-abc12345
+
+# Relaunch with new flags. cmd MUST begin with one of the allowlisted binaries.
+python3 ~/.claude/skills/odysseus/scripts/odysseus_api.py cookbook serve \
+  /mnt/HADES/models/Qwen3.5-397B-A17B-AWQ \
+  "vllm serve /mnt/HADES/models/Qwen3.5-397B-A17B-AWQ --host 0.0.0.0 --port 8001 --tensor-parallel-size 8 --max-model-len 262144 --gpu-memory-utilization 0.90 --dtype auto --max-num-seqs 8 --trust-remote-code --enable-expert-parallel --enable-auto-tool-choice --tool-call-parser qwen3_coder --reasoning-parser qwen3" \
+  pewds@192.168.1.12
+```
+
+**Debug loop pattern:** when a serve is failing, the productive sequence is
+
+1. `cookbook tasks` → find the failing sessionId.
+2. `cookbook output SID 600` → read the last 600 lines, find the actual root-cause line (often above the visible tail because tmux scrollback rolled — request a larger `tail` if the error references "above").
+3. `cookbook stop SID` — kill the previous attempt before relaunching; two serves on the same `--port` collide.
+4. `cookbook serve repo "new cmd"` — try the next variation. Wait ~20s, then `cookbook output` on the new sessionId.
+
+**Hard limits this surface enforces:**
+- `cookbook serve` cmd allowlist + shell-metacharacter rejection — you cannot run arbitrary shell, only model-server binaries.
+- `cookbook stop` only targets task sessionIds matching `[a-zA-Z0-9_-]+`.
+- The agent CAN spawn GPU-pinning long-lived processes — always `cookbook stop` your previous attempt before relaunching, and check `cookbook tasks` for collisions on the same `--port` before launching.
+
 ## Forbidden Bypass Pattern
 
 If you are about to reach the Odysseus host/container, import app internals, query the database, or call MCP helper modules directly, stop. Those paths bypass Odysseus Settings and token scopes. Ask the user to enable the relevant Claude Agent tool toggle instead.
diff --git a/integrations/claude/skills/odysseus/scripts/odysseus_api.py b/integrations/claude/skills/odysseus/scripts/odysseus_api.py
index 5fdd632..fcef8a7 100755
--- a/integrations/claude/skills/odysseus/scripts/odysseus_api.py
+++ b/integrations/claude/skills/odysseus/scripts/odysseus_api.py
@@ -17,6 +17,15 @@ def _usage() -> int:
     print("  odysseus_api.py todos add TITLE", file=sys.stderr)
     print("  odysseus_api.py emails list [limit]", file=sys.stderr)
     print("  odysseus_api.py emails read UID", file=sys.stderr)
+    print("  odysseus_api.py cookbook tasks", file=sys.stderr)
+    print("  odysseus_api.py cookbook servers", file=sys.stderr)
+    print("  odysseus_api.py cookbook cached [HOST]", file=sys.stderr)
+    print("  odysseus_api.py cookbook presets", file=sys.stderr)
+    print("  odysseus_api.py cookbook output SESSION_ID [tail]", file=sys.stderr)
+    print("  odysseus_api.py cookbook serve REPO_ID 'CMD' [REMOTE_HOST]", file=sys.stderr)
+    print("  odysseus_api.py cookbook preset NAME", file=sys.stderr)
+    print("  odysseus_api.py cookbook adopt SESSION_ID MODEL [HOST] [PORT]", file=sys.stderr)
+    print("  odysseus_api.py cookbook stop SESSION_ID", file=sys.stderr)
     print("  odysseus_api.py METHOD /api/codex/path [json-body]", file=sys.stderr)
     return 2
 
@@ -72,6 +81,61 @@ def main() -> int:
             body = None
         else:
             return _usage()
+    elif command == "cookbook":
+        if len(sys.argv) < 3:
+            return _usage()
+        action = sys.argv[2].lower()
+        if action == "tasks":
+            method = "GET"
+            path = "/api/codex/cookbook/tasks"
+            body = None
+        elif action == "servers":
+            method = "GET"
+            path = "/api/codex/cookbook/servers"
+            body = None
+        elif action == "output" and len(sys.argv) >= 4:
+            method = "GET"
+            sid = sys.argv[3]
+            tail = sys.argv[4] if len(sys.argv) >= 5 else "400"
+            path = f"/api/codex/cookbook/output/{sid}?tail={tail}"
+            body = None
+        elif action == "cached":
+            method = "GET"
+            if len(sys.argv) >= 4:
+                from urllib.parse import quote
+                path = f"/api/codex/cookbook/cached?host={quote(sys.argv[3])}"
+            else:
+                path = "/api/codex/cookbook/cached"
+            body = None
+        elif action == "presets":
+            method = "GET"
+            path = "/api/codex/cookbook/presets"
+            body = None
+        elif action == "preset" and len(sys.argv) >= 4:
+            from urllib.parse import quote
+            method = "POST"
+            path = f"/api/codex/cookbook/preset/{quote(sys.argv[3])}"
+            body = None
+        elif action == "adopt" and len(sys.argv) >= 5:
+            method = "POST"
+            path = "/api/codex/cookbook/adopt"
+            payload = {"tmux_session": sys.argv[3], "model": sys.argv[4]}
+            if len(sys.argv) >= 6: payload["host"] = sys.argv[5]
+            if len(sys.argv) >= 7: payload["port"] = int(sys.argv[6])
+            body = json.dumps(payload)
+        elif action == "serve" and len(sys.argv) >= 5:
+            method = "POST"
+            path = "/api/codex/cookbook/serve"
+            payload = {"repo_id": sys.argv[3], "cmd": sys.argv[4]}
+            if len(sys.argv) >= 6:
+                payload["remote_host"] = sys.argv[5]
+            body = json.dumps(payload)
+        elif action == "stop" and len(sys.argv) >= 4:
+            method = "POST"
+            path = f"/api/codex/cookbook/stop/{sys.argv[3]}"
+            body = None
+        else:
+            return _usage()
     else:
         if len(sys.argv) < 3:
             return _usage()
diff --git a/integrations/codex/scripts/odysseus_api.py b/integrations/codex/scripts/odysseus_api.py
index 5fdd632..fcef8a7 100755
--- a/integrations/codex/scripts/odysseus_api.py
+++ b/integrations/codex/scripts/odysseus_api.py
@@ -17,6 +17,15 @@ def _usage() -> int:
     print("  odysseus_api.py todos add TITLE", file=sys.stderr)
     print("  odysseus_api.py emails list [limit]", file=sys.stderr)
     print("  odysseus_api.py emails read UID", file=sys.stderr)
+    print("  odysseus_api.py cookbook tasks", file=sys.stderr)
+    print("  odysseus_api.py cookbook servers", file=sys.stderr)
+    print("  odysseus_api.py cookbook cached [HOST]", file=sys.stderr)
+    print("  odysseus_api.py cookbook presets", file=sys.stderr)
+    print("  odysseus_api.py cookbook output SESSION_ID [tail]", file=sys.stderr)
+    print("  odysseus_api.py cookbook serve REPO_ID 'CMD' [REMOTE_HOST]", file=sys.stderr)
+    print("  odysseus_api.py cookbook preset NAME", file=sys.stderr)
+    print("  odysseus_api.py cookbook adopt SESSION_ID MODEL [HOST] [PORT]", file=sys.stderr)
+    print("  odysseus_api.py cookbook stop SESSION_ID", file=sys.stderr)
     print("  odysseus_api.py METHOD /api/codex/path [json-body]", file=sys.stderr)
     return 2
 
@@ -72,6 +81,61 @@ def main() -> int:
             body = None
         else:
             return _usage()
+    elif command == "cookbook":
+        if len(sys.argv) < 3:
+            return _usage()
+        action = sys.argv[2].lower()
+        if action == "tasks":
+            method = "GET"
+            path = "/api/codex/cookbook/tasks"
+            body = None
+        elif action == "servers":
+            method = "GET"
+            path = "/api/codex/cookbook/servers"
+            body = None
+        elif action == "output" and len(sys.argv) >= 4:
+            method = "GET"
+            sid = sys.argv[3]
+            tail = sys.argv[4] if len(sys.argv) >= 5 else "400"
+            path = f"/api/codex/cookbook/output/{sid}?tail={tail}"
+            body = None
+        elif action == "cached":
+            method = "GET"
+            if len(sys.argv) >= 4:
+                from urllib.parse import quote
+                path = f"/api/codex/cookbook/cached?host={quote(sys.argv[3])}"
+            else:
+                path = "/api/codex/cookbook/cached"
+            body = None
+        elif action == "presets":
+            method = "GET"
+            path = "/api/codex/cookbook/presets"
+            body = None
+        elif action == "preset" and len(sys.argv) >= 4:
+            from urllib.parse import quote
+            method = "POST"
+            path = f"/api/codex/cookbook/preset/{quote(sys.argv[3])}"
+            body = None
+        elif action == "adopt" and len(sys.argv) >= 5:
+            method = "POST"
+            path = "/api/codex/cookbook/adopt"
+            payload = {"tmux_session": sys.argv[3], "model": sys.argv[4]}
+            if len(sys.argv) >= 6: payload["host"] = sys.argv[5]
+            if len(sys.argv) >= 7: payload["port"] = int(sys.argv[6])
+            body = json.dumps(payload)
+        elif action == "serve" and len(sys.argv) >= 5:
+            method = "POST"
+            path = "/api/codex/cookbook/serve"
+            payload = {"repo_id": sys.argv[3], "cmd": sys.argv[4]}
+            if len(sys.argv) >= 6:
+                payload["remote_host"] = sys.argv[5]
+            body = json.dumps(payload)
+        elif action == "stop" and len(sys.argv) >= 4:
+            method = "POST"
+            path = f"/api/codex/cookbook/stop/{sys.argv[3]}"
+            body = None
+        else:
+            return _usage()
     else:
         if len(sys.argv) < 3:
             return _usage()
diff --git a/integrations/codex/skills/odysseus/SKILL.md b/integrations/codex/skills/odysseus/SKILL.md
index 1e2be01..4cff140 100644
--- a/integrations/codex/skills/odysseus/SKILL.md
+++ b/integrations/codex/skills/odysseus/SKILL.md
@@ -1,6 +1,6 @@
 ---
 name: odysseus
-description: Use when the user asks Codex to read or write Odysseus data from a terminal Codex session through the scoped Codex Agent API. Requires ODYSSEUS_URL and ODYSSEUS_API_TOKEN.
+description: Use when the user asks Codex to read or write Odysseus data (todos, email, calendar, memory, documents) or to launch/monitor/stop a Cookbook model-serve task through the scoped Codex Agent API. Requires ODYSSEUS_URL and ODYSSEUS_API_TOKEN.
 ---
 
 # Odysseus
@@ -105,6 +105,37 @@ python3 integrations/codex/scripts/odysseus_api.py POST /api/codex/memory '{"tex
 - `POST /api/codex/emails/draft` — body matches `SendEmailRequest` (`to`, `cc`, `bcc`, `subject`, `body`, `body_html`, `attachments`, `account_id`, `in_reply_to`, `references`). Requires `email:draft` (or `email:send`).
 - `POST /api/codex/emails/send` — same body. Requires `email:send`. Never send without explicit user instruction.
 
+## Cookbook serve (debug a failing model launch)
+
+The Cookbook surface lets you reproduce what a human would do in Odysseus → Cookbook: read which serves are running, tail their tmux output to see why they crashed, edit the launch command, relaunch, kill a stuck one. Use this when the user is debugging a model server that won't come up (compute-capability errors, OOM, missing kernels, wrong attention backend, etc.).
+
+- `GET /api/codex/cookbook/tasks` — list active serve/download/install tasks (sessionId, type, status, repo_id, remoteHost, payload._cmd). Requires `cookbook:read`.
+- `GET /api/codex/cookbook/servers` — list configured servers (name, host, port, env type + path, model dirs). Requires `cookbook:read`.
+- `GET /api/codex/cookbook/cached?host=<NAME>` — list models already cached on the named server (HF cache + Ollama + extra modelDirs). Call BEFORE `serve` to see what's already on disk. Requires `cookbook:read`.
+- `GET /api/codex/cookbook/presets` — list saved serve presets (model + host + port + cmd). The user's saved preset usually has a working cmd — try `preset NAME` before composing your own. Requires `cookbook:read`.
+- `GET /api/codex/cookbook/output/{session_id}?tail=400` — read the last N lines of the task's persistent log file (preferred) or tmux pane (fallback). The log file persists across vllm crashes, so this returns the actual Python traceback even after the bash prompt + neofetch banner overwrites the pane. Default tail=400. Requires `cookbook:read`.
+- `POST /api/codex/cookbook/serve` — launch a serve task. Body matches `ServeRequest`: `{ repo_id, cmd, remote_host?, ssh_port?, env_prefix?, gpus?, platform? }`. The `cmd` is validated: leading binary must be `vllm`/`python3`/`sglang`/`llama-server`/`ollama`/`node`/`npx`. NEVER prefix with `cd …`, `source …`, or chain with `&&`/`||`/`;`/`$(...)` — the validator rejects shell metacharacters. The venv activation (`env_prefix`) is added automatically from the host's saved settings, so pass the bare binary + args. Requires `cookbook:launch`.
+- `POST /api/codex/cookbook/preset/{name}` — launch a saved preset by name. Reuses the working cmd + host the user already saved. Requires `cookbook:launch`.
+- `POST /api/codex/cookbook/adopt` — register an externally-launched tmux session into cookbook tracking. Body: `{ tmux_session, model, host?, port? }`. Use this when serve_model rejected a cmd and you fell back to direct ssh+tmux — without adoption, the session is invisible to the UI. Requires `cookbook:launch`.
+- `POST /api/codex/cookbook/stop/{session_id}` — kill the tmux session. Requires `cookbook:launch`.
+
+```bash
+python3 ~/plugins/odysseus/scripts/odysseus_api.py cookbook tasks
+python3 ~/plugins/odysseus/scripts/odysseus_api.py cookbook output serve-abc12345 400
+python3 ~/plugins/odysseus/scripts/odysseus_api.py cookbook stop serve-abc12345
+python3 ~/plugins/odysseus/scripts/odysseus_api.py cookbook serve \
+  /mnt/HADES/models/Qwen3.5-397B-A17B-AWQ \
+  "vllm serve /mnt/HADES/models/Qwen3.5-397B-A17B-AWQ --host 0.0.0.0 --port 8001 --tensor-parallel-size 8 --max-model-len 262144 --gpu-memory-utilization 0.90 --dtype auto --max-num-seqs 8 --trust-remote-code --enable-expert-parallel --enable-auto-tool-choice --tool-call-parser qwen3_coder --reasoning-parser qwen3" \
+  pewds@192.168.1.12
+```
+
+**Debug loop pattern:** `tasks` → `output SID 600` (find root cause; request larger `tail` if it references "above") → `stop SID` → `serve repo "new cmd"` → wait ~20s → `output` on the new sessionId.
+
+**Hard limits this surface enforces:**
+- `cookbook serve` cmd allowlist + shell-metacharacter rejection.
+- `cookbook stop` requires sessionIds matching `[a-zA-Z0-9_-]+`.
+- Agent CAN spawn GPU-pinning long-lived processes — always `cookbook stop` your previous attempt before relaunching.
+
 ## Forbidden Bypass Pattern
 
 If you are about to reach the Odysseus host/container, import app internals, query the database, or call MCP helper modules directly, stop. Those paths bypass Odysseus Settings and token scopes. Ask the user to enable the relevant Codex Agent tool toggle instead.
diff --git a/routes/codex_routes.py b/routes/codex_routes.py
index 8c59ee5..9898dae 100644
--- a/routes/codex_routes.py
+++ b/routes/codex_routes.py
@@ -19,6 +19,8 @@ from src.auth_helpers import require_user
 from src.tool_implementations import do_manage_notes
 
 
+COOKBOOK_READ_SCOPES = {"cookbook:read", "cookbook:launch"}
+COOKBOOK_LAUNCH_SCOPES = {"cookbook:launch"}
 TODO_READ_SCOPES = {"todos:read", "todos:write"}
 TODO_WRITE_SCOPES = {"todos:write"}
 EMAIL_READ_SCOPES = {"email:read", "email:draft", "email:send"}
@@ -130,6 +132,11 @@ def setup_codex_routes(
                     "actions": ["library", "read", "create", "delete"],
                     "available": documents_library_endpoint is not None,
                 },
+                "cookbook": {
+                    "read": scoped(COOKBOOK_READ_SCOPES),
+                    "launch": scoped(COOKBOOK_LAUNCH_SCOPES),
+                    "actions": ["tasks", "servers", "output", "serve", "stop"],
+                },
             },
             "safety": {
                 "email_send_requires_confirmation": True,
@@ -373,6 +380,374 @@ def setup_codex_routes(
             raise HTTPException(400, f"Invalid document payload: {exc}")
         return await _as_owner(request, owner, documents_create_endpoint, request, req)
 
+    # ── Cookbook surface ──
+    # Lets the agent run the same launch / monitor / kill loop the user
+    # would do by hand in the Cookbook UI: read the current task list +
+    # tmux output, launch a serve task, stop one.  Two scopes:
+    #   cookbook:read   — list tasks + tail output + list servers
+    #   cookbook:launch — also start/stop serves (host shell exec)
+    # `cookbook:launch` is genuinely powerful: /api/model/serve runs SSH'd
+    # commands on the user's hosts. The existing _validate_serve_cmd
+    # allowlist (vllm/python3/sglang/llama-server/etc., no shell metachars)
+    # keeps the agent inside the same sandbox the UI uses.
+
+    async def _run_shell(cmd: str, timeout: float = 15.0) -> dict:
+        """Run a shell command, return {exit_code, stdout, stderr}."""
+        import asyncio as _asyncio
+        try:
+            proc = await _asyncio.create_subprocess_shell(
+                cmd,
+                stdout=_asyncio.subprocess.PIPE,
+                stderr=_asyncio.subprocess.PIPE,
+            )
+            try:
+                stdout_b, stderr_b = await _asyncio.wait_for(proc.communicate(), timeout=timeout)
+            except _asyncio.TimeoutError:
+                proc.kill()
+                return {"exit_code": -1, "stdout": "", "stderr": "timed out"}
+            return {
+                "exit_code": proc.returncode,
+                "stdout": stdout_b.decode(errors="replace"),
+                "stderr": stderr_b.decode(errors="replace"),
+            }
+        except Exception as exc:
+            return {"exit_code": -1, "stdout": "", "stderr": str(exc)}
+
+    def _read_cookbook_state() -> dict:
+        from pathlib import Path as _Path
+        import os as _os, json as _json
+        p = _Path(_os.environ.get("DATA_DIR", "data")) / "cookbook_state.json"
+        if not p.exists():
+            return {}
+        try:
+            return _json.loads(p.read_text(encoding="utf-8"))
+        except Exception:
+            return {}
+
+    def _redact_task(t: dict) -> dict:
+        """Strip secrets before returning to the agent."""
+        clean = {k: v for k, v in t.items() if k not in ("hf_token", "_secrets")}
+        if isinstance(clean.get("payload"), dict):
+            pl = clean["payload"]
+            clean["payload"] = {k: v for k, v in pl.items()
+                                if k not in ("hf_token", "_secrets")}
+        return clean
+
+    @router.get("/cookbook/tasks")
+    async def codex_cookbook_tasks(request: Request):
+        _scope_owner(request, COOKBOOK_READ_SCOPES)
+        state = _read_cookbook_state()
+        tasks = state.get("tasks") or []
+        return {"tasks": [_redact_task(t) for t in tasks]}
+
+    @router.get("/cookbook/servers")
+    async def codex_cookbook_servers(request: Request):
+        _scope_owner(request, COOKBOOK_READ_SCOPES)
+        state = _read_cookbook_state()
+        servers = state.get("env", {}).get("servers") or []
+        # Strip ssh creds / passwords; keep only what's needed to pick a host.
+        cleaned = []
+        for s in servers:
+            cleaned.append({
+                "name": s.get("name"),
+                "host": s.get("host"),
+                "port": s.get("port"),
+                "env": s.get("env"),
+                "envPath": s.get("envPath"),
+                "platform": s.get("platform"),
+                "modelDirs": s.get("modelDirs"),
+            })
+        return {"servers": cleaned}
+
+    @router.get("/cookbook/output/{session_id}")
+    async def codex_cookbook_output(request: Request, session_id: str, tail: int = 400):
+        _scope_owner(request, COOKBOOK_READ_SCOPES)
+        # Defensive: session_id must be the tmux-style id we issue
+        # (`serve-XXXX` / `cookbook-XXXX` / `queue-XXXX`); anything else
+        # would let the agent run arbitrary `tmux capture-pane` targets.
+        import re as _re
+        if not _re.fullmatch(r"[a-zA-Z0-9_-]+", session_id):
+            raise HTTPException(400, "Invalid session id")
+        tail = max(20, min(int(tail or 400), 4000))
+        # Resolve the task's host (if any) from cookbook state so we can
+        # ssh to the right box, exactly as the UI does in _reconnectTask.
+        state = _read_cookbook_state()
+        tasks = state.get("tasks") or []
+        task = next((t for t in tasks if t.get("sessionId") == session_id), None)
+        if task is None:
+            raise HTTPException(404, "task not found")
+        host = (task.get("remoteHost") or "").strip()
+        ssh_port = (task.get("sshPort") or "").strip()
+        # Prefer the persisted log file over the tmux pane. The pane gets
+        # overwritten by the post-crash neofetch banner + bash prompt the
+        # moment vllm exits; the log file is the raw stdout/stderr and
+        # survives unchanged. Falls back to pane for older tasks predating
+        # the tee-to-log runner change.
+        log_path = f"/tmp/odysseus-tmux/{session_id}.log"
+        inner = (
+            f"if [ -s {log_path} ]; then tail -n {tail} {log_path}; "
+            f"else tmux capture-pane -t {session_id} -p -S -{tail}; fi"
+        )
+        if host:
+            port_flag = f"-p {ssh_port} " if ssh_port and ssh_port != "22" else ""
+            import shlex
+            cmd = f"ssh {port_flag}{host} {shlex.quote(inner)}"
+        else:
+            cmd = inner
+        result = await _run_shell(cmd, timeout=15)
+        return {
+            "session_id": session_id,
+            "host": host or "local",
+            "exit_code": result.get("exit_code"),
+            "output": result.get("stdout", ""),
+            "task": _redact_task(task),
+        }
+
+    @router.post("/cookbook/serve")
+    async def codex_cookbook_serve(request: Request, body: dict[str, Any] = Body(default_factory=dict)):
+        _scope_owner(request, COOKBOOK_LAUNCH_SCOPES)
+        # Wraps /api/model/serve with the SAME validation the UI uses.
+        # _validate_serve_cmd (called inside model_serve) rejects shell
+        # metachars and requires the leading binary to be in the
+        # cookbook allowlist (vllm / python3 / sglang / llama-server / ...).
+        from routes.cookbook_helpers import ServeRequest
+        # Accept friendly aliases agents naturally reach for. Without these,
+        # passing `host` silently maps to nothing and the serve runs LOCAL
+        # instead of on the intended remote — exactly the bug an agent
+        # would never debug on its own.
+        norm = dict(body or {})
+        if "host" in norm and "remote_host" not in norm:
+            norm["remote_host"] = norm.pop("host")
+        if "model" in norm and "repo_id" not in norm:
+            norm["repo_id"] = norm.pop("model")
+        if "ssh_port" not in norm and "port" in norm and (str(norm.get("port") or "").isdigit() and int(norm["port"]) >= 1000):
+            # Heuristic: if `port` looks like an SSH port (≥1000) and there's
+            # no explicit ssh_port, treat it as such. UI ports (8000, 8001,
+            # 30000) belong inside the cmd string, not here.
+            pass  # leave as-is — user's `port` here is ambiguous; skip remap.
+        try:
+            req = ServeRequest(**norm)
+        except Exception as exc:
+            raise HTTPException(400, f"Invalid serve payload: {exc}")
+        serve_endpoint = _find_endpoint(None, "POST", "/api/model/serve")
+        # Fall back to importing from the cookbook router registered on app.
+        if serve_endpoint is None:
+            from fastapi import FastAPI
+            app: FastAPI = request.app
+            for route in app.routes:
+                if getattr(route, "path", None) == "/api/model/serve" and "POST" in getattr(route, "methods", set()):
+                    serve_endpoint = route.endpoint
+                    break
+        if serve_endpoint is None:
+            raise HTTPException(503, "model serve endpoint unavailable")
+        return await serve_endpoint(request, req)
+
+    @router.post("/cookbook/stop/{session_id}")
+    async def codex_cookbook_stop(request: Request, session_id: str):
+        _scope_owner(request, COOKBOOK_LAUNCH_SCOPES)
+        import re as _re
+        if not _re.fullmatch(r"[a-zA-Z0-9_-]+", session_id):
+            raise HTTPException(400, "Invalid session id")
+        state = _read_cookbook_state()
+        tasks = state.get("tasks") or []
+        task = next((t for t in tasks if t.get("sessionId") == session_id), None)
+        host = ((task or {}).get("remoteHost") or "").strip()
+        ssh_port = ((task or {}).get("sshPort") or "").strip()
+        if host:
+            port_flag = f"-p {ssh_port} " if ssh_port and ssh_port != "22" else ""
+            cmd = f"ssh {port_flag}{host} \"tmux kill-session -t {session_id}\""
+        else:
+            cmd = f"tmux kill-session -t {session_id}"
+        result = await _run_shell(cmd, timeout=10)
+        return {"session_id": session_id, "exit_code": result.get("exit_code"), "host": host or "local"}
+
+    @router.get("/cookbook/cached")
+    async def codex_cookbook_cached(request: Request, host: str | None = None):
+        """List cached models on a configured server (or local if host is omitted).
+        Mirrors `list_cached_models` from the chat agent so external agents have
+        the same inventory view before deciding what to serve/download."""
+        _scope_owner(request, COOKBOOK_READ_SCOPES)
+        # Hit /api/model/cached internally, with the same modelDirs the chat
+        # agent's list_cached_models would resolve from cookbook state.
+        state = _read_cookbook_state()
+        env = state.get("env") if isinstance(state, dict) else {}
+        servers = (env.get("servers") if isinstance(env, dict) else None) or []
+        HF_DEFAULTS = {"~/.cache/huggingface/hub", "~/.cache/huggingface"}
+        def _dirs_for(srv: dict) -> str:
+            mds = srv.get("modelDirs") if isinstance(srv, dict) else None
+            if isinstance(mds, list):
+                extras = [d for d in mds if isinstance(d, str) and d.strip() and d.strip() not in HF_DEFAULTS]
+                return ",".join(extras)
+            if isinstance(mds, str) and mds.strip() not in HF_DEFAULTS:
+                return mds
+            return ""
+        # Resolve friendly host name → real host (matches list_cached_models flow).
+        resolved_host = host or ""
+        srv: dict[str, Any] = {}
+        if host:
+            srv = next(
+                (s for s in servers if isinstance(s, dict)
+                 and (s.get("name") == host or s.get("host") == host)),
+                {},
+            )
+            if srv and srv.get("host"):
+                resolved_host = srv["host"]
+        else:
+            srv = next((s for s in servers if isinstance(s, dict) and not (s.get("host") or "").strip()), {})
+        params: dict[str, str] = {}
+        if resolved_host:
+            params["host"] = resolved_host
+        md = _dirs_for(srv)
+        if md:
+            params["model_dir"] = md
+        if srv.get("port"):
+            params["ssh_port"] = str(srv["port"])
+        if srv.get("platform"):
+            params["platform"] = srv["platform"]
+        cached_endpoint = _find_endpoint(None, "GET", "/api/model/cached")
+        if cached_endpoint is None:
+            from fastapi import FastAPI
+            app: FastAPI = request.app
+            for route in app.routes:
+                if getattr(route, "path", None) == "/api/model/cached" and "GET" in getattr(route, "methods", set()):
+                    cached_endpoint = route.endpoint
+                    break
+        if cached_endpoint is None:
+            raise HTTPException(503, "model cached endpoint unavailable")
+        # The endpoint reads host/model_dir/ssh_port/platform as kwargs.
+        return await cached_endpoint(
+            request,
+            host=params.get("host") or None,
+            model_dir=params.get("model_dir") or None,
+            ssh_port=params.get("ssh_port") or None,
+            platform=params.get("platform") or None,
+        )
+
+    @router.get("/cookbook/presets")
+    async def codex_cookbook_presets(request: Request):
+        """List saved serve presets (model + host + port + launch cmd).
+        Counterpart to `list_serve_presets`. Use BEFORE composing a `serve`
+        body — the user's saved preset usually has the working cmd already."""
+        _scope_owner(request, COOKBOOK_READ_SCOPES)
+        state = _read_cookbook_state()
+        presets = state.get("presets") or []
+        out = []
+        for p in presets:
+            if not isinstance(p, dict):
+                continue
+            out.append({
+                "name": p.get("name"),
+                "model": p.get("model") or p.get("modelId"),
+                "host": p.get("host") or p.get("remoteHost"),
+                "port": p.get("port"),
+                "cmd": p.get("cmd"),
+            })
+        return {"presets": out, "default_host": (state.get("env") or {}).get("defaultServer", "")}
+
+    @router.post("/cookbook/preset/{name}")
+    async def codex_cookbook_serve_preset(request: Request, name: str):
+        """Launch a saved preset by name. Reuses the working cmd + host the
+        user already saved, avoiding the cmd-allowlist trial-and-error loop."""
+        _scope_owner(request, COOKBOOK_LAUNCH_SCOPES)
+        import re as _re
+        if not _re.fullmatch(r"[A-Za-z0-9 _.:@\-]+", name):
+            raise HTTPException(400, "Invalid preset name")
+        state = _read_cookbook_state()
+        presets = state.get("presets") or []
+        lname = name.lower().strip()
+        chosen = next(
+            (p for p in presets if isinstance(p, dict) and (p.get("name") or "").lower() == lname),
+            None,
+        )
+        if chosen is None:
+            chosen = next(
+                (p for p in presets if isinstance(p, dict) and lname in (p.get("name") or "").lower()),
+                None,
+            )
+        if chosen is None:
+            raise HTTPException(404, f"No preset matching {name!r}")
+        repo_id = chosen.get("model") or chosen.get("modelId") or ""
+        cmd = (chosen.get("cmd") or "").strip()
+        host = chosen.get("host") or chosen.get("remoteHost") or ""
+        if not repo_id or not cmd or cmd.startswith("(adopted"):
+            raise HTTPException(400, f"Preset {chosen.get('name')!r} has no launchable cmd "
+                                     "(adopted from external launch). Use POST /cookbook/serve "
+                                     "with the actual cmd instead.")
+        # Reuse the serve handler we already validated.
+        from routes.cookbook_helpers import ServeRequest
+        body = {"repo_id": repo_id, "cmd": cmd}
+        if host:
+            body["remote_host"] = host
+        try:
+            req = ServeRequest(**body)
+        except Exception as exc:
+            raise HTTPException(400, f"Preset payload invalid: {exc}")
+        serve_endpoint = _find_endpoint(None, "POST", "/api/model/serve")
+        if serve_endpoint is None:
+            from fastapi import FastAPI
+            app: FastAPI = request.app
+            for route in app.routes:
+                if getattr(route, "path", None) == "/api/model/serve" and "POST" in getattr(route, "methods", set()):
+                    serve_endpoint = route.endpoint
+                    break
+        if serve_endpoint is None:
+            raise HTTPException(503, "model serve endpoint unavailable")
+        return await serve_endpoint(request, req)
+
+    @router.post("/cookbook/adopt")
+    async def codex_cookbook_adopt(request: Request, body: dict[str, Any] = Body(default_factory=dict)):
+        """Adopt an existing tmux session (one started via raw ssh+tmux) into
+        cookbook tracking. Needed when serve_model rejects a cmd and the
+        agent falls back to direct ssh — without adoption the session is
+        invisible to the UI. Body: {tmux_session, model, host?, port?}."""
+        _scope_owner(request, COOKBOOK_LAUNCH_SCOPES)
+        norm = dict(body or {})
+        sess = (norm.get("tmux_session") or norm.get("session_id") or "").strip()
+        model = (norm.get("model") or norm.get("repo_id") or "").strip()
+        host = (norm.get("host") or norm.get("remote_host") or "").strip()
+        port = norm.get("port") or 8000
+        import re as _re
+        if not sess or not _re.fullmatch(r"[a-zA-Z0-9_-]+", sess):
+            raise HTTPException(400, "tmux_session required, [a-zA-Z0-9_-]+ only")
+        if not model:
+            raise HTTPException(400, "model required")
+        # Verify the tmux session exists on the target host before adopting.
+        import shlex
+        if host:
+            check = f"ssh {shlex.quote(host)} 'tmux has-session -t {shlex.quote(sess)}'"
+        else:
+            check = f"tmux has-session -t {shlex.quote(sess)}"
+        chk = await _run_shell(check, timeout=8)
+        if chk.get("exit_code") not in (0, None):
+            raise HTTPException(404, f"tmux session {sess!r} not found on {host or 'local'}")
+        # Write into cookbook_state.json.
+        import time as _t, json as _json
+        from core.atomic_io import atomic_write_json
+        from pathlib import Path as _Path
+        cookbook_state_path = _Path("/app/data/cookbook_state.json")
+        try:
+            state = _json.loads(cookbook_state_path.read_text(encoding="utf-8"))
+        except Exception:
+            state = {}
+        tasks = state.setdefault("tasks", [])
+        if any(isinstance(t, dict) and t.get("sessionId") == sess for t in tasks):
+            return {"ok": True, "already_tracked": True, "session_id": sess}
+        tasks.append({
+            "id": sess, "sessionId": sess,
+            "name": model.split("/")[-1] if "/" in model else model,
+            "type": "serve", "status": "running",
+            "output": f"Adopted externally-launched session {sess!r} on {host or 'local'}.",
+            "ts": int(_t.time() * 1000),
+            "payload": {"repo_id": model, "remote_host": host, "_cmd": "(adopted — launched outside cookbook)", "port": int(port)},
+            "remoteHost": host, "sshPort": "", "platform": "linux",
+            "_serveReady": False, "_endpointAdded": False, "_adoptedExternally": True,
+        })
+        try:
+            atomic_write_json(cookbook_state_path, state)
+        except Exception as exc:
+            raise HTTPException(500, f"state write failed: {exc}")
+        return {"ok": True, "session_id": sess, "host": host or "local"}
+
     return router
 
 
diff --git a/routes/cookbook_helpers.py b/routes/cookbook_helpers.py
index 9efb30d..454c67b 100644
--- a/routes/cookbook_helpers.py
+++ b/routes/cookbook_helpers.py
@@ -546,6 +546,13 @@ def _append_serve_preflight_exit_lines(runner_lines: list[str], *, keep_shell_op
     runner_lines.append('if [ -n "$ODYSSEUS_PREFLIGHT_EXIT" ]; then')
     runner_lines.append('  echo ""; echo "=== Process exited with code $ODYSSEUS_PREFLIGHT_EXIT ==="')
     if keep_shell_open:
+        # Decouple the post-crash interactive shell from the persistent log
+        # file. fds 3/4 were saved BEFORE the tee redirect at the top of
+        # the runner; restoring them here means the neofetch banner the
+        # user's .zshrc prints lands on the tmux pane only, not in the
+        # log file the agent's tail_serve_output reads.
+        runner_lines.append('  exec 1>&3 2>&4 3>&- 4>&- 2>/dev/null || true')
+        runner_lines.append('  sleep 0.2  # let tee child flush + exit')
         runner_lines.append('  exec "${SHELL:-/bin/bash}"')
     else:
         runner_lines.append('  exit "$ODYSSEUS_PREFLIGHT_EXIT"')
@@ -563,7 +570,11 @@ def _append_serve_exit_code_lines(
     if is_pip_install:
         runner_lines.append('if [ $ODYSSEUS_CMD_EXIT -eq 0 ]; then echo ""; echo "DOWNLOAD_OK"; fi')
     if keep_shell_open:
-        runner_lines.append('echo ""; echo "=== Process exited with code $ODYSSEUS_CMD_EXIT ==="; exec "${SHELL:-/bin/bash}"')
+        runner_lines.append('echo ""; echo "=== Process exited with code $ODYSSEUS_CMD_EXIT ==="')
+        # See preflight branch above for the rationale on restoring fds 3/4.
+        runner_lines.append('exec 1>&3 2>&4 3>&- 4>&- 2>/dev/null || true')
+        runner_lines.append('sleep 0.2  # let tee child flush + exit')
+        runner_lines.append('exec "${SHELL:-/bin/bash}"')
     else:
         runner_lines.append('echo ""; echo "=== Process exited with code $ODYSSEUS_CMD_EXIT ==="')
         runner_lines.append('exit "$ODYSSEUS_CMD_EXIT"')
diff --git a/routes/cookbook_routes.py b/routes/cookbook_routes.py
index 56b95d6..bf2365b 100644
--- a/routes/cookbook_routes.py
+++ b/routes/cookbook_routes.py
@@ -801,6 +801,55 @@ def setup_cookbook_routes() -> APIRouter:
         finally:
             db.close()
 
+    def _pick_free_port_for_ollama(
+        remote: str | None, ssh_port: str | None, start_port: int, max_offset: int
+    ) -> int | None:
+        """Return the first free port in [start_port, start_port+max_offset] on
+        the target host. Used to pick a real bind for `ollama serve` so we
+        don't reattach to an external systemd ollama (or other listener) the
+        Cookbook Stop button can't kill."""
+        import socket
+        if remote:
+            # Probe over SSH. Bash's /dev/tcp gives a portable "is anything
+            # listening" check without requiring ss/netstat/nmap.
+            ssh_base = ["ssh", "-o", "ConnectTimeout=4", "-o", "StrictHostKeyChecking=no"]
+            if ssh_port and str(ssh_port) != "22":
+                if not _SSH_PORT_RE.match(str(ssh_port)):
+                    return None
+                ssh_base.extend(["-p", str(ssh_port)])
+            host_arg = remote
+            if not _REMOTE_HOST_RE.match(host_arg):
+                return None
+            probe_ports = " ".join(str(start_port + i) for i in range(max_offset + 1))
+            script = (
+                f"for p in {probe_ports}; do "
+                "if ! (exec 3<>/dev/tcp/127.0.0.1/$p) 2>/dev/null; then "
+                "echo $p; exit 0; fi; exec 3<&-; exec 3>&-; done; exit 1"
+            )
+            try:
+                import subprocess
+                r = subprocess.run(
+                    ssh_base + [host_arg, script],
+                    capture_output=True, text=True, timeout=8,
+                )
+                if r.returncode == 0:
+                    out = (r.stdout or "").strip().splitlines()
+                    if out and out[0].isdigit():
+                        return int(out[0])
+            except Exception:
+                return None
+            return None
+        # Local: just try to connect.
+        for off in range(max_offset + 1):
+            p = start_port + off
+            with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+                s.settimeout(0.25)
+                try:
+                    s.connect(("127.0.0.1", p))
+                except (ConnectionRefusedError, socket.timeout, OSError):
+                    return p
+        return None
+
     def _auto_register_llm_endpoint(req: ServeRequest, remote: str | None) -> str | None:
         """Register a freshly-served LLM as a model endpoint so it appears in the
         model picker without a manual /setup step — the text-model sibling of
@@ -815,21 +864,37 @@ def setup_cookbook_routes() -> APIRouter:
         import re
         from core.database import SessionLocal, ModelEndpoint
 
-        # Port: an explicit --port wins. Otherwise fall back by backend — Ollama
-        # is the only server in our generated commands that omits --port.
+        # Port: ordered fallbacks so we match whatever the user actually
+        # asked for, not a hardcoded default:
+        #   1. explicit `--port N`  (vllm / sglang / llama-server)
+        #   2. `OLLAMA_HOST=host:port`  (the way Ollama specifies its bind)
+        #   3. fallback by backend (11434 ollama / 8080 llama.cpp)
+        # Previously the OLLAMA_HOST form was silently ignored and we
+        # registered every Ollama endpoint at 11434 — even if the user
+        # set OLLAMA_HOST=0.0.0.0:11435 to avoid colliding with an
+        # existing systemd Ollama, the registered endpoint pointed at
+        # the OLD port and showed as offline.
         port_match = re.search(r'--port\s+(\d+)', req.cmd)
+        ollama_host_match = re.search(r'OLLAMA_HOST=[^\s]*?:(\d+)', req.cmd)
         if port_match:
             port = int(port_match.group(1))
+        elif ollama_host_match:
+            port = int(ollama_host_match.group(1))
         elif "ollama" in req.cmd:
             port = 11434
         else:
             port = 8080  # llama.cpp's llama-server default — the Apple Silicon path
 
         # Determine host (mirrors the image path: SSH alias for remote serves).
+        # For local serves while Odysseus runs inside Docker, "localhost"
+        # resolves to the container itself — useless. Use host.docker.internal
+        # which compose maps to the actual host, matching what /setup adds
+        # for Ollama by hand.
         if remote:
             host = remote.split("@")[-1] if "@" in remote else remote
         else:
-            host = "localhost"
+            from routes.model_routes import _docker_host_gateway_reachable
+            host = "host.docker.internal" if _docker_host_gateway_reachable() else "localhost"
 
         base_url = f"http://{host}:{port}/v1"
 
@@ -927,6 +992,19 @@ def setup_cookbook_routes() -> APIRouter:
         session_id = f"serve-{uuid.uuid4().hex[:8]}"
         remote = req.remote_host
         is_windows = req.platform == "windows"
+
+        # Ollama: if the user didn't pin a port, resolve the actual port we'll
+        # bind to here (before runner construction) by probing the target host.
+        # Otherwise the runner script picks one at runtime and `_auto_register`
+        # below still registers the stale 11434 default — which on a host with
+        # a systemd ollama lands on the wrong (unreachable-from-docker) service.
+        if "ollama" in req.cmd and "OLLAMA_HOST=" not in req.cmd:
+            _ollama_bind_host = "0.0.0.0" if remote else "127.0.0.1"
+            _ollama_chosen_port = _pick_free_port_for_ollama(
+                remote, req.ssh_port, start_port=11434, max_offset=10,
+            )
+            if _ollama_chosen_port:
+                req.cmd = f"OLLAMA_HOST={_ollama_bind_host}:{_ollama_chosen_port} {req.cmd}"
         # LOCAL execution on a native-Windows host never uses tmux (detached
         # process path below), regardless of the UI-supplied platform.
         local_windows = IS_WINDOWS and not remote
@@ -998,6 +1076,21 @@ def setup_cookbook_routes() -> APIRouter:
         else:
             # ── Linux/Termux: bash + tmux (existing flow) ──
             runner_lines = ["#!/bin/bash"]
+            # Mirror every line of stdout+stderr into a persistent log file
+            # on the host running the serve. This is the file tail_serve_output
+            # reads when the tmux pane has been overwritten by the post-crash
+            # bash prompt — without it, the agent's diagnostic tool sees the
+            # neofetch banner instead of the actual Python traceback.
+            # We save the original fds to 3/4 so we can RESTORE them before
+            # `exec ${SHELL}` at the end of the script. Without that restore,
+            # the post-crash interactive shell's neofetch banner ALSO gets
+            # teed into the log file and `tail -N` returns ONLY the banner —
+            # the actual traceback ends up earlier than the tail window.
+            runner_lines.append("mkdir -p /tmp/odysseus-tmux 2>/dev/null || true")
+            runner_lines.append("exec 3>&1 4>&2")
+            runner_lines.append(
+                f"exec > >(tee -a /tmp/odysseus-tmux/{session_id}.log) 2>&1"
+            )
             runner_lines.extend(_user_shell_path_bootstrap())
             runner_lines.append('ODYSSEUS_PREFLIGHT_EXIT=""')
             # Put Odysseus's own venv bin on PATH (local runs only) so the serve
@@ -1074,38 +1167,24 @@ def setup_cookbook_routes() -> APIRouter:
                     req.cmd,
                     default_host=_ollama_default_host,
                 )
-                # Ollama can be a host binary, a system service, or a Docker
-                # container. If the HTTP API is already reachable, the model is
-                # already served and we should not require a host `ollama` CLI.
+                # Always launch a fresh ollama under tmux so Stop reliably
+                # kills it. If the requested port is busy (e.g. a systemd
+                # ollama on 11434), scan upward for a free one rather than
+                # silently reattaching to an external service that Stop
+                # can't reach.
                 runner_lines.append(f'ODYSSEUS_OLLAMA_HOST={_bash_squote(_ollama_host)}')
                 runner_lines.append(f'ODYSSEUS_OLLAMA_PORT="{_ollama_port}"')
-                runner_lines.append('ODYSSEUS_OLLAMA_URL=""')
-                runner_lines.append('for _ody_ollama_try in $(seq 1 20); do')
-                runner_lines.append('  for _ody_ollama_port in "$ODYSSEUS_OLLAMA_PORT" 11434; do')
-                runner_lines.append('    [ -z "$_ody_ollama_port" ] && continue')
-                runner_lines.append('    for _ody_ollama_host in 127.0.0.1 localhost host.docker.internal; do')
-                runner_lines.append('      _ody_ollama_url="http://${_ody_ollama_host}:${_ody_ollama_port}"')
-                runner_lines.append('      if curl -sf "$_ody_ollama_url/api/tags" >/dev/null 2>&1; then')
-                runner_lines.append('        ODYSSEUS_OLLAMA_URL="$_ody_ollama_url"')
-                runner_lines.append('        ODYSSEUS_OLLAMA_PORT="$_ody_ollama_port"')
-                runner_lines.append('        break 3')
-                runner_lines.append('      fi')
-                runner_lines.append('    done')
-                runner_lines.append('  done')
-                runner_lines.append('  [ "$_ody_ollama_try" -eq 1 ] && echo "[odysseus] Waiting for an existing Ollama API on ports ${ODYSSEUS_OLLAMA_PORT}/11434..."')
-                runner_lines.append('  sleep 1')
-                runner_lines.append('done')
-                runner_lines.append('if [ -n "$ODYSSEUS_OLLAMA_URL" ]; then')
-                runner_lines.append('  if [ "$ODYSSEUS_OLLAMA_PORT" != "' + _ollama_port + '" ]; then')
-                runner_lines.append('    echo "[odysseus] Selected Ollama port ' + _ollama_port + ' was not reachable; using running Ollama on port ${ODYSSEUS_OLLAMA_PORT}."')
+                runner_lines.append('for _ody_off in 0 1 2 3 4 5 6 7 8 9; do')
+                runner_lines.append('  _ody_try_port=$((ODYSSEUS_OLLAMA_PORT + _ody_off))')
+                runner_lines.append('  if ! (exec 3<>/dev/tcp/127.0.0.1/$_ody_try_port) 2>/dev/null; then')
+                runner_lines.append('    exec 3<&-; exec 3>&-')
+                runner_lines.append('    ODYSSEUS_OLLAMA_PORT="$_ody_try_port"')
+                runner_lines.append('    break')
                 runner_lines.append('  fi')
-                runner_lines.append('  echo "[odysseus] Ollama API ready on port ${ODYSSEUS_OLLAMA_PORT}: ${ODYSSEUS_OLLAMA_URL}"')
-                runner_lines.append('  echo "[odysseus] This task is monitoring an existing Ollama server; stopping it here will not stop an external Docker/system service."')
-                runner_lines.append('  exec bash -i')
-                runner_lines.append('fi')
+                runner_lines.append('  exec 3<&-; exec 3>&-')
+                runner_lines.append('done')
                 runner_lines.append('if ! command -v ollama &>/dev/null; then')
-                runner_lines.append('  echo "ERROR: Ollama not found and no Ollama API is reachable on 127.0.0.1, localhost, or host.docker.internal (ports ${ODYSSEUS_OLLAMA_PORT}/11434)."')
-                runner_lines.append('  echo "Install Ollama, start an Ollama service/container on this server, or pick the port where it is already listening."')
+                runner_lines.append('  echo "ERROR: Ollama not found on this server. Install it from https://ollama.com/download or `curl -fsSL https://ollama.com/install.sh | sh`."')
                 runner_lines.append('  echo')
                 runner_lines.append('  echo "=== Process exited with code 127 ==="')
                 runner_lines.append('  exec bash -i')
@@ -1940,6 +2019,153 @@ def setup_cookbook_routes() -> APIRouter:
 
         return {"models": out}
 
+    # Rate-limit for the orphan-tmux adoption sweep. The UI polls
+    # tasks/status every ~3s; we don't want to SSH every host on every
+    # poll. 20s is fast enough that a model the agent launched in the
+    # background shows up "almost immediately" in the UI without being
+    # wasteful.
+    _last_orphan_sweep_ts = [0.0]
+    _ORPHAN_SWEEP_MIN_INTERVAL_S = 20.0
+
+    def _maybe_sweep_orphans(tasks: list, state: dict) -> None:
+        """Scan each configured cookbook server for `serve-*` tmux sessions
+        the cookbook doesn't know about and adopt them into state.tasks.
+
+        Writes are conditional: if no orphans are found, nothing is touched.
+        Rate-limited so polling UIs don't trigger SSH on every refresh.
+        """
+        import time as _time
+        import subprocess
+        logger.info(f"_maybe_sweep_orphans: entered, last_ts={_last_orphan_sweep_ts[0]}")
+        now = _time.monotonic()
+        if now - _last_orphan_sweep_ts[0] < _ORPHAN_SWEEP_MIN_INTERVAL_S:
+            logger.info(f"_maybe_sweep_orphans: rate-limited, {now - _last_orphan_sweep_ts[0]:.1f}s since last")
+            return
+        _last_orphan_sweep_ts[0] = now
+
+        env = state.get("env") if isinstance(state, dict) else {}
+        servers = env.get("servers") if isinstance(env, dict) else []
+        logger.info(f"orphan sweep starting: {len(servers) if isinstance(servers, list) else 0} server(s), known_sids={len([t for t in tasks if isinstance(t, dict) and t.get('sessionId')])}")
+        if not isinstance(servers, list):
+            return
+
+        known_sids = {
+            t.get("sessionId") for t in tasks
+            if isinstance(t, dict) and t.get("sessionId")
+        }
+
+        adopted_any = False
+        for srv in servers:
+            if not isinstance(srv, dict):
+                continue
+            host = (srv.get("host") or "").strip()
+            if not host:
+                continue  # local-only entry; the /proc scan handles it
+            if not _REMOTE_HOST_RE.match(host):
+                continue
+            sport = str(srv.get("port") or "").strip()
+            ssh_base = ["ssh", "-o", "ConnectTimeout=4", "-o", "StrictHostKeyChecking=no"]
+            if sport and sport != "22":
+                if not _SSH_PORT_RE.match(sport):
+                    continue
+                ssh_base.extend(["-p", sport])
+
+            try:
+                ls = subprocess.run(
+                    ssh_base + [host, "tmux ls 2>/dev/null"],
+                    timeout=6, capture_output=True, text=True,
+                )
+            except Exception:
+                continue
+            for line in (ls.stdout or "").splitlines():
+                sid = line.split(":", 1)[0].strip()
+                if not sid or not _SESSION_ID_RE.match(sid):
+                    continue
+                if sid in known_sids:
+                    continue
+                # Adopt any session whose pane is currently running a
+                # known model-server process (checked below). The earlier
+                # prefix gate (serve-/cookbook-) dropped legitimate
+                # serves whenever tmux fell back to numeric IDs, leaving
+                # them invisible in the Cookbook UI — so the user could
+                # neither see nor stop them.
+                # Skip zombie / idle-shell sessions. A tmux session left
+                # over from a crashed vllm just shows a bash prompt —
+                # adopting it would pollute the UI with "running" tasks
+                # that aren't actually serving anything. pane_current_command
+                # is the foreground process in the pane right now; only
+                # real model serves leave a python/vllm/etc. process there.
+                try:
+                    pc = subprocess.run(
+                        ssh_base + [host, "tmux", "list-panes", "-t", sid,
+                                    "-F", "#{pane_current_command}"],
+                        timeout=4, capture_output=True, text=True,
+                    )
+                    cur = (pc.stdout or "").strip().splitlines()
+                except Exception:
+                    cur = []
+                LIVE_PROCS = {"python", "python3", "vllm", "llama-server",
+                              "llama_cpp_main", "sglang", "lmdeploy",
+                              "ollama", "node", "uvicorn"}
+                if not any(c in LIVE_PROCS for c in cur):
+                    continue
+                # Try to recover a plausible repo_id + port from the
+                # pane buffer. Cheap heuristic — if we can't, register
+                # with placeholder fields; the UI still shows it.
+                try:
+                    cap = subprocess.run(
+                        ssh_base + [host, "tmux", "capture-pane", "-t", sid, "-p", "-S", "-300"],
+                        timeout=6, capture_output=True, text=True,
+                    )
+                    pane = cap.stdout or ""
+                except Exception:
+                    pane = ""
+                import re as _re_orphan
+                # vLLM banner: "model   /path/...". Falls back to the
+                # raw vllm-serve command if the banner already scrolled.
+                m_model = _re_orphan.search(r"model\s+(\S+)", pane)
+                model = m_model.group(1) if m_model else ""
+                if not model:
+                    m_serve = _re_orphan.search(r"vllm\s+serve\s+(\S+)", pane)
+                    model = m_serve.group(1) if m_serve else f"adopted:{sid}"
+                m_port = _re_orphan.search(r"--port\s+(\d+)", pane)
+                port = int(m_port.group(1)) if m_port else 0
+
+                import time as _t2
+                tasks.append({
+                    "id": sid,
+                    "sessionId": sid,
+                    "name": model.split("/")[-1] if "/" in model else model,
+                    "type": "serve",
+                    "status": "running",
+                    "output": f"Auto-adopted from orphan tmux session on {host}. "
+                              "Open the task to see live output.",
+                    "ts": int(_t2.time() * 1000),
+                    "payload": {
+                        "repo_id": model,
+                        "remote_host": host,
+                        "_cmd": "(orphan tmux session — original launch cmd unknown)",
+                        "port": port,
+                    },
+                    "remoteHost": host,
+                    "sshPort": sport,
+                    "platform": "linux",
+                    "_serveReady": False,
+                    "_endpointAdded": False,
+                    "_adoptedExternally": True,
+                })
+                known_sids.add(sid)
+                adopted_any = True
+                logger.info(f"auto-adopted orphan tmux session {sid!r} on {host}")
+
+        if adopted_any:
+            try:
+                from core.atomic_io import atomic_write_json
+                state["tasks"] = tasks
+                atomic_write_json(_cookbook_state_path, state)
+            except Exception as e:
+                logger.warning(f"orphan sweep: state write failed: {e}")
+
     @router.get("/api/cookbook/tasks/status")
     async def cookbook_tasks_status(request: Request):
         """Check status of all active cookbook tmux sessions.
@@ -1993,6 +2219,7 @@ def setup_cookbook_routes() -> APIRouter:
 
         # Load saved tasks from cookbook state
         tasks = []
+        state = {}
         if _cookbook_state_path.exists():
             try:
                 state = json.loads(_cookbook_state_path.read_text(encoding="utf-8"))
@@ -2004,6 +2231,21 @@ def setup_cookbook_routes() -> APIRouter:
             except Exception:
                 pass
 
+        # Orphan-tmux auto-adoption sweep. When the agent (or anyone)
+        # SSH-launches a `serve-*` tmux session — usually because
+        # serve_model rejected `source ... && vllm ...` or because of a
+        # manual relaunch via tmux send-keys — that session is invisible
+        # to the cookbook UI even though it's a live model server. The
+        # sweep finds those orphans on each configured remote host and
+        # writes them into state.tasks with _adoptedExternally=True, so
+        # they show up in the UI on the next poll without anyone having
+        # to remember to call adopt_served_model. Rate-limited via the
+        # module-level _last_orphan_sweep so we don't SSH every 3s.
+        try:
+            _maybe_sweep_orphans(tasks, state)
+        except Exception as _sweep_e:
+            logger.warning(f"orphan sweep failed (non-fatal): {_sweep_e!r}")
+
         results = []
         for task in tasks:
             session_id = task.get("sessionId", "")
@@ -2063,7 +2305,12 @@ def setup_cookbook_routes() -> APIRouter:
                 if _tport and _tport != "22":
                     ssh_base.extend(["-p", str(_tport)])
                 check_cmd = ssh_base + [remote, "tmux", "has-session", "-t", session_id]
-                capture_cmd = ssh_base + [remote, "tmux", "capture-pane", "-t", session_id, "-p", "-S", "-50"]
+                # Capture 500 lines (was 50) so a Python traceback survives
+                # the post-crash neofetch banner + bash prompt that otherwise
+                # fills the visible tail. Without this, output_tail ends up
+                # as just "Locale: C / Ubuntu_Odysseus ❯" and the agent
+                # can't diagnose the actual error.
+                capture_cmd = ssh_base + [remote, "tmux", "capture-pane", "-t", session_id, "-p", "-S", "-500"]
             elif IS_WINDOWS:
                 # LOCAL Windows task: launched as a detached process (no tmux).
                 # Liveness comes from the <session>.pid file, output from the
@@ -2072,7 +2319,7 @@ def setup_cookbook_routes() -> APIRouter:
                 capture_cmd = None
             else:
                 check_cmd = ["tmux", "has-session", "-t", session_id]
-                capture_cmd = ["tmux", "capture-pane", "-t", session_id, "-p", "-S", "-50"]
+                capture_cmd = ["tmux", "capture-pane", "-t", session_id, "-p", "-S", "-500"]
 
             local_win_task = (not remote) and IS_WINDOWS
 
diff --git a/routes/task_routes.py b/routes/task_routes.py
index a5c49ad..6604923 100644
--- a/routes/task_routes.py
+++ b/routes/task_routes.py
@@ -18,6 +18,119 @@ from routes.prefs_routes import _load_for_user, _save_for_user
 logger = logging.getLogger(__name__)
 
 
+def _maybe_cascade_calendar_event(task) -> None:
+    """Delete the linked calendar event when a cookbook_serve task is
+    removed. Two lookup strategies:
+
+      1. PRIMARY — `cookbook_event_uid` marker stashed in task.prompt
+         by cookbookSchedule.js right after creating the event. Direct
+         UID match, no ambiguity.
+
+      2. FALLBACK — for tasks created before the marker was wired up
+         (or when the PATCH to add the marker failed silently), scan
+         the Cookbook calendar for events whose summary equals the
+         task name and delete the matches.
+
+    Best-effort throughout: errors are logged but never block the task
+    deletion itself."""
+    if not task or task.task_type != "action" or task.action != "cookbook_serve":
+        return
+
+    import httpx
+    from core.middleware import INTERNAL_TOOL_HEADER, INTERNAL_TOOL_TOKEN
+    headers = {INTERNAL_TOOL_HEADER: INTERNAL_TOOL_TOKEN}
+    if task.owner:
+        headers["X-Odysseus-Owner"] = task.owner
+
+    # Strategy 1: explicit UID marker in prompt.
+    event_uid = ""
+    if task.prompt:
+        try:
+            cfg = json.loads(task.prompt)
+            if isinstance(cfg, dict):
+                event_uid = (cfg.get("cookbook_event_uid") or "").strip()
+        except Exception:
+            pass
+
+    def _try_delete(uid: str) -> bool:
+        try:
+            with httpx.Client(timeout=10) as client:
+                r = client.delete(
+                    f"http://localhost:7000/api/calendar/events/{uid}",
+                    headers=headers,
+                )
+                if r.status_code >= 400:
+                    logger.info(
+                        f"task delete: cascade calendar event {uid} returned "
+                        f"HTTP {r.status_code}"
+                    )
+                    return False
+                return True
+        except Exception as e:
+            logger.warning(f"task delete: cascade calendar event {uid} failed: {e}")
+            return False
+
+    if event_uid:
+        _try_delete(event_uid)
+        return
+
+    # Strategy 2: scan the Cookbook calendar for matching summaries.
+    # Only runs for tasks missing the marker (old tasks or PATCH failures).
+    if not task.name:
+        return
+    try:
+        with httpx.Client(timeout=10) as client:
+            # Find the Cookbook calendar.
+            cal_r = client.get("http://localhost:7000/api/calendar/calendars", headers=headers)
+            if cal_r.status_code >= 400:
+                return
+            cals = (cal_r.json() or {}).get("calendars", [])
+            cookbook_cal = next(
+                (c for c in cals if (c.get("name") or "").lower() == "cookbook"),
+                None,
+            )
+            if not cookbook_cal:
+                return
+            cal_href = cookbook_cal.get("href") or cookbook_cal.get("id") or ""
+            # List events in a wide window to catch recurring + upcoming.
+            from datetime import datetime as _dt, timedelta as _td, timezone as _tz
+            now = _dt.now(_tz.utc)
+            start = (now - _td(days=30)).isoformat()
+            end = (now + _td(days=365)).isoformat()
+            ev_r = client.get(
+                "http://localhost:7000/api/calendar/events",
+                params={"start": start, "end": end, "calendar": cal_href},
+                headers=headers,
+            )
+            if ev_r.status_code >= 400:
+                return
+            events = (ev_r.json() or {}).get("events", [])
+            # Match by exact summary. Tasks named "Serve: <model>" are
+            # created from the schedule modal; the event's summary mirrors
+            # the task name 1:1 by design.
+            target = (task.name or "").strip()
+            uids_to_delete = set()
+            for ev in events:
+                if (ev.get("summary") or "").strip() != target:
+                    continue
+                uid = ev.get("uid") or ev.get("id") or ""
+                # Strip the "::occurrence" suffix on recurring expansions —
+                # we want to delete the MASTER once, not each instance.
+                if "::" in uid:
+                    uid = uid.split("::", 1)[0]
+                if uid:
+                    uids_to_delete.add(uid)
+            for uid in uids_to_delete:
+                _try_delete(uid)
+            if uids_to_delete:
+                logger.info(
+                    f"task delete: cascade matched {len(uids_to_delete)} calendar event(s) "
+                    f"by summary fallback for task {task.id} ({target!r})"
+                )
+    except Exception as e:
+        logger.warning(f"task delete: cascade fallback scan failed: {e}")
+
+
 class TaskCreate(BaseModel):
     name: Optional[str] = None
     prompt: Optional[str] = None
@@ -616,6 +729,12 @@ def setup_task_routes(task_scheduler) -> APIRouter:
                 raise HTTPException(404, "Task not found")
             if user and task.owner != user:
                 raise HTTPException(403, "Access denied")
+            # Cascade: cookbook_serve tasks may have a linked calendar
+            # event (created via the "Create event in calendar" toggle
+            # in the schedule modal). If so, delete the calendar event
+            # too so the calendar doesn't end up holding a phantom event
+            # for a task that no longer exists.
+            _maybe_cascade_calendar_event(task)
             db.delete(task)
             db.commit()
             return {"ok": True}
diff --git a/src/agent_loop.py b/src/agent_loop.py
index eabc340..6bd9ba8 100644
--- a/src/agent_loop.py
+++ b/src/agent_loop.py
@@ -337,6 +337,7 @@ If the user asks for a reminder/alarm before the event, pass `reminder_minutes`
     "ui_control": "- ```ui_control``` — Control the UI: toggle tools on/off, OPEN PANELS, open email reply drafts, switch models, change themes. Commands: `toggle <name> on/off` (names: bash/shell, web/search, research, incognito, document_editor/documents), `open_panel <name>` (panels: documents, gallery, email, sessions, notes, memories/brain, skills, settings, cookbook), `open_email_reply <uid> <folder> <reply|reply-all|ai-reply>` (opens an email compose document, does NOT send), `set_mode agent/chat`, `switch_model <name>`, `set_theme <preset>`, `create_theme <name> <bg> <fg> <panel> <border> <accent>` (optional key=val for advanced colors AND background effects: bgPattern=<none|dots|synapse|rain|constellations|perlin-flow|petals|sparkles|embers>, bgEffectColor=#RRGGBB, bgEffectIntensity=<num>, bgEffectSize=<num>, frosted=true|false). \"open documents\" / \"open library\" / \"show gallery\" / \"open inbox\" / \"open notes\" / \"open cookbook\" all map to `open_panel <name>`. Theme presets: dark, light, midnight, paper, cyberpunk, retrowave, forest, ocean, ume, copper, terminal, organs, lavender, gpt, claude, cute.",
     "list_served_models": "- ```list_served_models``` — Show what the Cookbook (LLM-serving subsystem) is currently running. NO args. Use this for ANY 'what's running' / 'what's serving' / 'show my cookbook' / 'is anything up' query. DO NOT shell out (`ps aux`, `docker ps`, etc.) — this tool is the source of truth. Failed serve tasks include recent logs plus diagnosis/retry suggestions; use those suggestions to call `serve_model` again with an adjusted command when appropriate.",
     "stop_served_model": "- ```stop_served_model``` — Stop a running model server. Args (JSON): {\"session_id\": \"<from list_served_models>\"}. Use for 'kill my cookbook' / 'stop the model' / 'shut down vLLM'.",
+    "tail_serve_output": "- ```tail_serve_output``` — Read the actual tmux stderr/traceback of a CURRENTLY failing cookbook task. Args (JSON): {\"session_id\": \"<from list_served_models>\", \"tail\": 150?}. **Use ONLY after** you just launched something via `serve_model` AND `list_served_models` reports YOUR new task as `crashed`/`error`. DO NOT use it on old stopped/completed download tasks (they're historical noise — won't predict whether a new launch succeeds). DO NOT call it before launching a fresh attempt. When you do call it, bump `tail` to 400+ only if the visible error references 'see root cause above'.",
     "download_model": "- ```download_model``` — Download a HuggingFace model. Args (JSON): {\"repo_id\": \"Qwen/Qwen3-8B\", \"host\": \"user@gpu-box\"?, \"include\": \"*Q4_K_M*\"?}.",
     "serve_model": "- ```serve_model``` — Start serving a model with vLLM / SGLang / llama.cpp / Ollama / Diffusers. Args (JSON): {\"repo_id\": \"...\", \"cmd\": \"vllm serve ... --port 8000\" or \"python3 -m sglang.launch_server ... --port 30000\" or \"python3 scripts/diffusion_server.py --model diffusers/stable-diffusion-xl-1.0-inpainting-0.1 --port 8100\", \"host\": \"user@gpu-box\"?}. For image/inpaint/diffusion models, use the `scripts/diffusion_server.py` command exactly. After launch, call `list_served_models`; if it returns a diagnosis with an adjusted command, retry with that command.",
     "list_downloads": "- ```list_downloads``` — Show in-progress HuggingFace model downloads (filters Cookbook tasks/status to downloads only). NO args. Use for 'what's downloading' / 'show my downloads' / 'check download progress'.",
@@ -1659,6 +1660,28 @@ async def stream_agent_loop(
     _tool_type_counts: collections.Counter = collections.Counter()
     _THINK_RE = re.compile(r'<think>.*?</think>', re.DOTALL | re.IGNORECASE)
     _force_answer = False  # set by loop-breaker → next round runs with NO tools
+    # Supervisor: how many times we've nudged the model after it announced
+    # an action without emitting the tool call. Capped to prevent a model
+    # that *can't* call the tool from looping forever.
+    _intent_nudge_count = 0
+    _MAX_INTENT_NUDGES = 2
+
+    # "I said I would, then didn't" detector. The pattern that breaks debug
+    # loops on weak models (deepseek-v4-flash mid-2026): the model writes
+    # "Let me tail the output to see the error" and then ends the turn with
+    # no tool_calls. The intent is sincere but the function call gets dropped.
+    # Match the common phrasings + an action verb that maps to an available
+    # tool, so we don't nudge on harmless transitional text like "let me
+    # know what you think".
+    _INTENT_RE = re.compile(
+        r"(?:^|\n)\s*(?:let me|i'?ll|i will|going to|let's)\s+"
+        r"(?:tail|check|investigate|look at|see|tail|read|fetch|inspect|"
+        r"verify|diagnose|examine|debug|capture|grab|pull|view|run|call|"
+        r"trigger|launch|start|kick off|stop|kill|restart|adopt|serve|"
+        r"register|adopt|list|search|find|query|hit|ping|test)"
+        r"\b[^.\n]{0,140}",
+        re.IGNORECASE,
+    )
 
     # Document streaming state (persists across rounds)
     _doc_acc = ""          # accumulated tool-call JSON arguments
@@ -2010,6 +2033,46 @@ async def stream_agent_loop(
                     # never re-verify an unchanged state in a loop.
                     _effectful_used = False
                     continue
+            # ── Intent-without-action supervisor ─────────────────────
+            # Catch "Let me tail the output" / "I'll check the logs" /
+            # "Let me investigate" patterns where the model announces an
+            # action but emits no tool_call. The bug shows up most on
+            # smaller models trained to verbalize plans before acting.
+            # We inject one sharp nudge ("you said you would X — call the
+            # actual tool now") and loop again. Capped at
+            # _MAX_INTENT_NUDGES so a model that genuinely cannot use the
+            # tool doesn't pin us in a forever loop.
+            _intent_text = _THINK_RE.sub("", cleaned_round).strip()
+            _intent_match = _INTENT_RE.search(_intent_text) if _intent_text else None
+            # Only nudge when the round REALLY looks like an unfinished
+            # promise: short response (<400 chars), no fenced code/answer,
+            # and an action-intent phrase was matched. Long answers that
+            # happen to contain "let me know" are not stalls.
+            _looks_like_promise = (
+                _intent_match is not None
+                and len(_intent_text) < 400
+                and "```" not in _intent_text
+                and _intent_nudge_count < _MAX_INTENT_NUDGES
+            )
+            if _looks_like_promise:
+                _intent_nudge_count += 1
+                _matched_phrase = _intent_match.group(0).strip()
+                logger.info(f"[agent] intent-without-action nudge #{_intent_nudge_count} on round {round_num}: {_matched_phrase!r}")
+                messages.append({
+                    "role": "system",
+                    "content": (
+                        f"You just wrote: \"{_matched_phrase}\" — but ended the "
+                        "turn without making the actual tool call. The user can "
+                        "see you announced the action but didn't run it, which "
+                        "is the most frustrating thing you can do. "
+                        "DO IT NOW: emit the actual function call this turn. "
+                        "If you decided not to do it after all, say so plainly in "
+                        "one sentence instead of restating the plan."
+                    ),
+                })
+                # Visible signal in the stream so the user knows we caught it.
+                yield f'data: {json.dumps({"type": "agent_step", "round": round_num + 1})}\n\n'
+                continue
             break  # no tools — done
 
         # ── Loop-breaker (Terminus-style stall detector) ──────────────
@@ -2285,6 +2348,19 @@ async def stream_agent_loop(
                 _anchor = f"\n\n[Open in Deep Research](#research-{_rsid})\n"
                 yield 'data: ' + json.dumps({"delta": _anchor}) + '\n\n'
 
+            # Same pattern for notes: when manage_notes creates a note
+            # and returns note_id, drop a `[View note](#note-<id>)` link
+            # into the stream so chatRenderer's click handler routes to
+            # the new openNote() in notes.js — opens the notes panel and
+            # scrolls/flashes the matching card. Without this, the agent
+            # would write "View note" as a phrase with no target.
+            _nid = result.get("note_id")
+            if _nid and block.tool_type == "manage_notes":
+                _title = (result.get("note_title") or "").strip()
+                _label = f"View note: {_title}" if _title else "View note"
+                _anchor = f"\n\n[{_label}](#note-{_nid})\n"
+                yield 'data: ' + json.dumps({"delta": _anchor}) + '\n\n'
+
             # Save for history persistence
             tool_event = {
                 "round": round_num,
diff --git a/src/agent_tools.py b/src/agent_tools.py
index 578b943..b86bd48 100644
--- a/src/agent_tools.py
+++ b/src/agent_tools.py
@@ -19,7 +19,7 @@ logger = logging.getLogger(__name__)
 # ---------------------------------------------------------------------------
 # Constants (kept here — sub-modules import from here)
 # ---------------------------------------------------------------------------
-MAX_AGENT_ROUNDS = 20
+MAX_AGENT_ROUNDS = 50
 SHELL_TIMEOUT = 60
 PYTHON_TIMEOUT = 30
 MAX_OUTPUT_CHARS = 10_000
diff --git a/src/builtin_actions.py b/src/builtin_actions.py
index 6b96e31..d532603 100644
--- a/src/builtin_actions.py
+++ b/src/builtin_actions.py
@@ -2001,6 +2001,197 @@ async def action_check_email_urgency(owner: str, **kwargs) -> Tuple[str, bool]:
         return str(e), False
 
 
+async def action_cookbook_serve(
+    owner: str,
+    task_name: str = "",
+    progress_cb=None,
+    command: str = "",
+    **kwargs,
+) -> Tuple[str, bool]:
+    """Launch a Cookbook model serve as a scheduled task.
+
+    `command` is the JSON config string the task carries in `prompt`,
+    of shape: {"preset": "name"} OR {"repo_id": "...", "cmd": "...", "host": "..."}.
+    Optional `end_after_min: N` schedules a hard-stop N minutes after launch
+    (handled by cookbook_serve_lifecycle_loop in src/cookbook_serve_lifecycle.py).
+    """
+    import json
+    import time as _time
+    import httpx
+    from pathlib import Path
+    from core.middleware import INTERNAL_TOOL_HEADER, INTERNAL_TOOL_TOKEN
+    from core.atomic_io import atomic_write_json
+
+    headers = {INTERNAL_TOOL_HEADER: INTERNAL_TOOL_TOKEN}
+    try:
+        cfg = json.loads(command or "{}")
+    except Exception:
+        return f"Invalid JSON config: {command!r}", False
+    if not isinstance(cfg, dict):
+        return "Config must be a JSON object", False
+
+    # Resolve the preset (if named) OR fall through with explicit fields.
+    preset_name = (cfg.get("preset") or "").strip()
+    repo_id = (cfg.get("repo_id") or "").strip()
+    cmd = (cfg.get("cmd") or "").strip()
+    host = (cfg.get("host") or cfg.get("remote_host") or "").strip()
+    try:
+        end_after_min = int(cfg.get("end_after_min") or 0)
+    except Exception:
+        end_after_min = 0
+
+    state_path = Path("/app/data/cookbook_state.json")
+    try:
+        state = json.loads(state_path.read_text(encoding="utf-8")) if state_path.exists() else {}
+    except Exception:
+        state = {}
+
+    # Preset lookup. Try three matching strategies in order so the
+    # schedule still works even when the user's preset is named
+    # differently from the model's short name:
+    #
+    #   1. Exact preset.name == preset_name (case-insensitive)
+    #   2. preset.model / preset.modelId == repo_id  (caller knows the repo)
+    #   3. preset.model's short name (after final /) == preset_name
+    #
+    # Without #2 and #3, scheduling "Qwen3.5-397B-A17B-AWQ" failed when
+    # the saved preset was named "vllm-qwen-397b" or had the model field
+    # populated with the full HF repo path. Either should resolve.
+    def _short(name: str) -> str:
+        return (name or "").rsplit("/", 1)[-1].lower()
+
+    if not cmd or not repo_id:
+        presets = state.get("presets") or []
+        chosen = None
+        # Strategy 1: exact name match.
+        if preset_name:
+            chosen = next(
+                (p for p in presets if isinstance(p, dict)
+                 and (p.get("name") or "").lower() == preset_name.lower()),
+                None,
+            )
+        # Strategy 2: repo_id matches the preset's model field.
+        if chosen is None and repo_id:
+            chosen = next(
+                (p for p in presets if isinstance(p, dict)
+                 and (p.get("model") or p.get("modelId") or "").lower() == repo_id.lower()),
+                None,
+            )
+        # Strategy 3: model's short name matches the preset_name.
+        if chosen is None and preset_name:
+            chosen = next(
+                (p for p in presets if isinstance(p, dict)
+                 and _short(p.get("model") or p.get("modelId") or "") == preset_name.lower()),
+                None,
+            )
+        if chosen is not None:
+            repo_id = repo_id or chosen.get("model") or chosen.get("modelId") or ""
+            cmd = cmd or (chosen.get("cmd") or "").strip()
+            host = host or chosen.get("host") or chosen.get("remoteHost") or ""
+    if not repo_id or not cmd or cmd.startswith("(adopted"):
+        # Surface what we tried so the user can name their preset to match.
+        preset_names = [(p.get("name") or "") for p in (state.get("presets") or []) if isinstance(p, dict)]
+        hint = f" Saved presets: {preset_names!r}" if preset_names else ""
+        return (f"No launchable config for {preset_name!r} (repo_id={repo_id!r}). "
+                f"Check Cookbook → Presets has a real cmd, not 'adopted'.{hint}", False)
+
+    # Resolve env_prefix etc. from the host's saved cookbook server entry,
+    # matching the chat agent's serve_model path.
+    body = {"repo_id": repo_id, "cmd": cmd}
+    if host:
+        body["remote_host"] = host
+    env = (state.get("env") or {})
+    srv = next(
+        (s for s in (env.get("servers") or [])
+         if isinstance(s, dict) and (s.get("host") == host or s.get("name") == host)),
+        {},
+    )
+    if srv.get("env") == "venv" and srv.get("envPath"):
+        body["env_prefix"] = f"source {srv['envPath']}/bin/activate"
+    elif srv.get("env") == "conda" and srv.get("envPath"):
+        body["env_prefix"] = f"conda activate {srv['envPath']}"
+    if srv.get("hfToken"): body["hf_token"] = srv["hfToken"]
+    if srv.get("port"): body["ssh_port"] = str(srv["port"])
+    if srv.get("platform"): body["platform"] = srv["platform"]
+
+    try:
+        async with httpx.AsyncClient(timeout=30) as client:
+            r = await client.post("http://localhost:7000/api/model/serve",
+                                  json=body, headers=headers)
+            data = r.json() if r.content else {}
+    except Exception as e:
+        return f"Launch HTTP failed: {e}", False
+    if not data.get("ok"):
+        return f"Launch rejected: {data.get('error') or data.get('detail') or 'unknown'}", False
+
+    sid = data.get("session_id") or ""
+    # Register the new task in cookbook_state.json + stamp it with our
+    # scheduler-owner markers. /api/model/serve spawns the tmux session
+    # but leaves the state-write to the UI — when a scheduled action
+    # launches a serve from server-side, NOBODY writes the task into
+    # state, so the Cookbook tab never shows it. We do the write here.
+    if sid:
+        try:
+            # Re-read fresh (the route may have updated state already).
+            try:
+                fresh = json.loads(state_path.read_text(encoding="utf-8"))
+            except Exception:
+                fresh = {}
+            if not isinstance(fresh, dict):
+                fresh = {}
+            tasks = fresh.get("tasks") if isinstance(fresh.get("tasks"), list) else []
+            existing = next(
+                (t for t in tasks if isinstance(t, dict) and t.get("sessionId") == sid),
+                None,
+            )
+            if existing is None:
+                display_name = repo_id.split("/")[-1] if "/" in repo_id else repo_id
+                placeholder = (
+                    f"Launched by scheduled task {task_name!r} — waiting for tmux output…\n"
+                    f"  session: {sid}\n"
+                    f"  target:  {host or 'local'}\n"
+                    f"  cmd:     {cmd[:200]}{'…' if len(cmd) > 200 else ''}"
+                )
+                existing = {
+                    "id": sid,
+                    "sessionId": sid,
+                    "name": display_name,
+                    "modelId": repo_id,
+                    "type": "serve",
+                    "status": "running",
+                    "output": placeholder,
+                    "ts": int(_time.time() * 1000),
+                    "payload": {"repo_id": repo_id, "remote_host": host or "", "_cmd": cmd},
+                    "remoteHost": host or "",
+                    "sshPort": "",
+                    "platform": "linux",
+                    "_serveReady": False,
+                    "_endpointAdded": False,
+                }
+                tasks.append(existing)
+            # Stamp ownership + end-at on the task entry.
+            existing["_scheduledByTask"] = task_name or ""
+            existing["_scheduledByOwner"] = owner or ""
+            if end_after_min > 0:
+                existing["_scheduledStopAtMs"] = int(_time.time() * 1000) + end_after_min * 60 * 1000
+            fresh["tasks"] = tasks
+            atomic_write_json(state_path, fresh)
+        except Exception as e:
+            logger.warning(f"cookbook_serve: state register/stamp failed: {e}")
+    # Don't try to render absolute clock time in the message — the
+    # server runs in UTC (Docker default), the user reads it as local,
+    # and the offset depends on the user's TZ which the action doesn't
+    # have a reliable handle on. The Tasks UI already shows the RUN
+    # timestamp in the user's local time right above this message, so
+    # "stops 8 min after that" gives the user everything they need.
+    if end_after_min:
+        return (
+            f"Launched {repo_id} (session {sid}); stops {end_after_min} min after this ran",
+            True,
+        )
+    return f"Launched {repo_id} (session {sid})", True
+
+
 BUILTIN_ACTIONS = {
     "tidy_sessions": action_tidy_sessions,
     "tidy_documents": action_tidy_documents,
@@ -2020,6 +2211,7 @@ BUILTIN_ACTIONS = {
     "test_skills": action_test_skills,
     "audit_skills": action_audit_skills,
     "check_email_urgency": action_check_email_urgency,
+    "cookbook_serve": action_cookbook_serve,
     # ping_notes removed from the registry — runs only inside `_note_pings_loop`.
 }
 
diff --git a/src/cookbook_serve_lifecycle.py b/src/cookbook_serve_lifecycle.py
new file mode 100644
index 0000000..58d4242
--- /dev/null
+++ b/src/cookbook_serve_lifecycle.py
@@ -0,0 +1,193 @@
+"""Cookbook serve lifecycle: kills scheduler-owned serves whose end-of-
+window has passed.
+
+Pairs with action_cookbook_serve in builtin_actions.py — that action
+stamps the task it launches with `_scheduledStopAtMs`, this loop ticks
+every 60s and kills any serve whose stamp is in the past.
+
+Single small module. Delete this file + the registration line in app.py
+and the feature stops doing anything; scheduler-launched serves just
+stay up until the user kills them manually.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import json
+import logging
+import time
+from pathlib import Path
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+
+def _internal_headers() -> dict:
+    from core.middleware import INTERNAL_TOOL_HEADER, INTERNAL_TOOL_TOKEN
+    return {INTERNAL_TOOL_HEADER: INTERNAL_TOOL_TOKEN}
+
+
+async def _delete_endpoint_for_task(task: dict) -> None:
+    """Drop the auto-registered model endpoint for a scheduled-stop serve.
+
+    Without this, killing the tmux session leaves the endpoint sitting in
+    the picker (probe goes offline; chats still try to route there) and
+    the user has to delete it by hand in Settings -> Endpoints.
+    """
+    import re as _re
+    payload = task.get("payload") or {}
+    cmd = str(payload.get("_cmd") or "")
+    remote = task.get("remoteHost") or ""
+    # Build host the same way _auto_register_llm_endpoint does so URL match wins.
+    if remote:
+        host = remote.split("@")[-1] if "@" in remote else remote
+    else:
+        host = "host.docker.internal"
+    port_match = _re.search(r"--port\s+(\d+)", cmd)
+    ollama_host_match = _re.search(r"OLLAMA_HOST=[^\s]*?:(\d+)", cmd)
+    if port_match:
+        port = int(port_match.group(1))
+    elif ollama_host_match:
+        port = int(ollama_host_match.group(1))
+    elif "ollama" in cmd:
+        port = 11434
+    else:
+        port = 8080
+    base_url = f"http://{host}:{port}/v1"
+    try:
+        async with httpx.AsyncClient(timeout=8) as client:
+            r = await client.get(
+                "http://localhost:7000/api/model-endpoints",
+                headers=_internal_headers(),
+            )
+            if r.status_code >= 400:
+                return
+            eps = r.json() if r.content else []
+            # Prefer exact URL match; fall back to host:port substring so we
+            # still catch the case where 0.0.0.0 vs the registered host
+            # representation diverged.
+            ep = next((e for e in eps if e.get("base_url") == base_url), None)
+            if not ep:
+                hostport = f"{host}:{port}"
+                ep = next((e for e in eps if hostport in (e.get("base_url") or "")), None)
+            if ep:
+                await client.delete(
+                    f"http://localhost:7000/api/model-endpoints/{ep['id']}",
+                    headers=_internal_headers(),
+                )
+                logger.info(
+                    f"cookbook_serve_lifecycle: deleted endpoint {ep.get('id')} "
+                    f"({ep.get('base_url')}) after scheduled stop"
+                )
+    except Exception as e:
+        logger.warning(f"cookbook_serve_lifecycle: endpoint delete failed: {e}")
+
+
+async def _stop_serve(session_id: str, remote_host: str = "", ssh_port: str = "") -> bool:
+    """Kill the tmux session that hosts the serve.
+
+    There's no `/api/model/stop` route — the cookbook UI and the chat
+    agent both kill via `/api/shell/exec` running a `tmux kill-session`
+    (wrapped in ssh for remote hosts). Mirror that here so the
+    lifecycle loop can actually stop scheduler-launched serves at
+    window-end. Without this, the action stamped `_scheduledStopAtMs`
+    correctly but every kill attempt failed silently (the route
+    returned 404 and the result was logged as "failed").
+    """
+    import shlex
+    if remote_host:
+        port_flag = f"-p {shlex.quote(str(ssh_port))} " if ssh_port and str(ssh_port) != "22" else ""
+        cmd = (
+            f"ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "
+            f"{port_flag}{shlex.quote(remote_host)} "
+            f"'tmux kill-session -t {shlex.quote(session_id)}'"
+        )
+    else:
+        cmd = f"tmux kill-session -t {shlex.quote(session_id)}"
+    try:
+        async with httpx.AsyncClient(timeout=15) as client:
+            r = await client.post(
+                "http://localhost:7000/api/shell/exec",
+                json={"command": cmd},
+                headers=_internal_headers(),
+            )
+            if r.status_code >= 400:
+                return False
+            data = r.json() if r.content else {}
+            ec = data.get("exit_code")
+            # tmux returns non-zero when the session is already gone
+            # ("can't find session: ..."). That's still "stop succeeded"
+            # from our POV — the goal is no live session at the end.
+            if ec in (None, 0):
+                return True
+            stderr = (data.get("stderr") or "").lower()
+            return "no server" in stderr or "can't find session" in stderr or "session not found" in stderr
+    except Exception as e:
+        logger.warning(f"cookbook_serve_lifecycle: stop {session_id} failed: {e}")
+        return False
+
+
+async def _tick() -> None:
+    state_path = Path("/app/data/cookbook_state.json")
+    if not state_path.exists():
+        return
+    try:
+        state = json.loads(state_path.read_text(encoding="utf-8"))
+    except Exception:
+        return
+    tasks = state.get("tasks") or []
+    now_ms = int(time.time() * 1000)
+    to_stop = []
+    for t in tasks:
+        if not isinstance(t, dict):
+            continue
+        stop_at = t.get("_scheduledStopAtMs")
+        if not isinstance(stop_at, (int, float)):
+            continue
+        if stop_at > now_ms:
+            continue
+        if (t.get("status") or "").lower() in {"stopped", "ended", "killed", "crashed"}:
+            continue
+        sid = t.get("sessionId") or t.get("id")
+        if not sid:
+            continue
+        to_stop.append((sid, t.get("remoteHost") or "", t.get("sshPort") or ""))
+    if not to_stop:
+        return
+    # Re-read state once before writing so we capture any updates from
+    # concurrent UI syncs.
+    stopped_any = False
+    for sid, host, port in to_stop:
+        ok = await _stop_serve(sid, host, port)
+        logger.info(f"cookbook_serve_lifecycle: stop {sid} (host={host or 'local'}): {'ok' if ok else 'failed'}")
+        if ok:
+            stopped_any = True
+            # Drop the auto-registered endpoint so the model picker and
+            # the chat router don't keep pointing at a dead server.
+            for t in tasks:
+                if isinstance(t, dict) and (t.get("sessionId") == sid or t.get("id") == sid):
+                    if t.get("type") == "serve":
+                        await _delete_endpoint_for_task(t)
+                    t["status"] = "stopped"
+                    t["_scheduledStopAtMs"] = None
+                    t["_lastStatusFlipAt"] = now_ms
+                    break
+    if stopped_any:
+        try:
+            from core.atomic_io import atomic_write_json
+            state["tasks"] = tasks
+            atomic_write_json(state_path, state)
+        except Exception as e:
+            logger.warning(f"cookbook_serve_lifecycle: state write failed: {e}")
+
+
+async def cookbook_serve_lifecycle_loop() -> None:
+    """Forever-loop. Registered as a startup task in app.py."""
+    await asyncio.sleep(20)  # let the rest of startup settle
+    while True:
+        try:
+            await _tick()
+        except Exception as e:
+            logger.warning(f"cookbook_serve_lifecycle tick failed: {e}")
+        await asyncio.sleep(60)
diff --git a/src/task_scheduler.py b/src/task_scheduler.py
index 65fc451..2fcb5dc 100644
--- a/src/task_scheduler.py
+++ b/src/task_scheduler.py
@@ -1018,6 +1018,10 @@ class TaskScheduler:
             kwargs = {"owner": task.owner, "task_name": task.name, "progress_cb": _progress}
             if task.action in ("run_script", "run_local", "ssh_command") and task.prompt:
                 kwargs["script" if task.action in ("run_script", "run_local") else "command"] = task.prompt
+            # cookbook_serve carries its JSON config in task.prompt — feed it
+            # through as `command` so action_cookbook_serve can json.loads it.
+            elif task.action == "cookbook_serve" and task.prompt:
+                kwargs["command"] = task.prompt
             result, success = await action_fn(**kwargs)
             return result, success
         except TaskNoop:
diff --git a/src/tool_execution.py b/src/tool_execution.py
index a667266..e84a414 100644
--- a/src/tool_execution.py
+++ b/src/tool_execution.py
@@ -1119,6 +1119,7 @@ async def execute_tool_block(
         do_manage_documents, do_manage_settings, do_manage_notes,
         do_manage_calendar,
         do_download_model, do_serve_model, do_list_served_models, do_stop_served_model,
+        do_tail_serve_output,
         do_list_downloads, do_cancel_download, do_search_hf_models, do_list_cached_models,
         do_list_serve_presets, do_serve_preset, do_adopt_served_model,
         do_list_cookbook_servers,
@@ -1290,6 +1291,9 @@ async def execute_tool_block(
     elif tool == "stop_served_model":
         desc = "stop_served_model"
         result = await do_stop_served_model(content, owner=owner)
+    elif tool == "tail_serve_output":
+        desc = "tail_serve_output"
+        result = await do_tail_serve_output(content, owner=owner)
     elif tool == "list_downloads":
         desc = "list_downloads"
         result = await do_list_downloads(content, owner=owner)
diff --git a/src/tool_implementations.py b/src/tool_implementations.py
index c7b2649..dbaf50c 100644
--- a/src/tool_implementations.py
+++ b/src/tool_implementations.py
@@ -5,6 +5,7 @@ Extracted tool implementation functions (do_* and helpers) from agent_tools.py.
 These handle the actual execution logic for each tool type.
 """
 
+import asyncio
 import json
 import logging
 import os
@@ -1956,7 +1957,19 @@ async def do_manage_notes(content: str, owner: Optional[str] = None) -> Dict:
             )
             db.add(note)
             db.commit()
-            return {"response": f"Note created: \"{title or '(untitled)'}\" (id: {note.id[:8]})", "exit_code": 0}
+            # Return note_id so the chat-side renderer can build a real
+            # "View note" button that opens the notes modal at this id.
+            # Previously the create response only included a prose
+            # confirmation; the model would type "View note" as a markdown
+            # link with no target, leaving the user with a click that
+            # did nothing and uncertainty about whether the note was made.
+            return {
+                "response": f"Note created: \"{title or '(untitled)'}\" (id: {note.id[:8]})",
+                "note_id": note.id,
+                "note_title": title or "",
+                "open_url": f"/#open=notes&note={note.id}",
+                "exit_code": 0,
+            }
 
         elif action == "update":
             note_id = args.get("id", "")
@@ -2603,6 +2616,8 @@ async def _cookbook_env_for_host(host: str) -> Dict[str, Any]:
 
     return {
         "env_prefix": env_prefix,
+        "env_type": env_kind,
+        "env_path": env_path,
         "gpus": env_root.get("gpus") or "",
         "platform": platform,
         "hf_token": env_root.get("hfToken") or "",
@@ -3041,6 +3056,31 @@ async def do_serve_model(content: str, owner: Optional[str] = None) -> Dict:
     # the UI uses. Without env_prefix, `vllm serve …` lands in a shell
     # without the user's venv and fails 'command not found'.
     env_cfg = await _cookbook_env_for_host(host)
+    # Rewrite bare `vllm` / `python3` leading tokens to the venv's absolute
+    # binary path when the target host has a venv configured. SSH non-
+    # interactive shells often leave ~/.local/bin ahead of the venv bin on
+    # PATH even with the venv activated, so `vllm serve` finds the wrong
+    # binary and crashes early (e.g. compute_89 torch ABI errors on an old
+    # user-site torch). This mirrors what static/js/cookbook.js does in
+    # _buildServeCmd for the UI launch path.
+    env_path = (env_cfg.get("env_path") or "").rstrip("/")
+    env_type = (env_cfg.get("env_type") or env_cfg.get("env") or "").lower()
+    if env_type == "venv" and env_path:
+        venv_bin = f"{env_path}/bin"
+        # Match the FIRST shell-token: skip leading KEY=VAL env-var prefixes
+        # (CUDA_VISIBLE_DEVICES=… VLLM_USE_FLASHINFER_SAMPLER=…) before the binary.
+        import re as _re3
+        tokens = cmd.split()
+        idx = 0
+        env_re = _re3.compile(r"^[A-Za-z_][A-Za-z0-9_]*=")
+        while idx < len(tokens) and env_re.match(tokens[idx]):
+            idx += 1
+        if idx < len(tokens):
+            head = tokens[idx]
+            if head in ("vllm", "python3", "python"):
+                tokens[idx] = f"{venv_bin}/{head}"
+                cmd = " ".join(tokens)
+                payload["cmd"] = cmd
     if env_cfg.get("env_prefix"): payload["env_prefix"] = env_cfg["env_prefix"]
     if env_cfg.get("gpus"):       payload["gpus"]       = env_cfg["gpus"]
     if env_cfg.get("hf_token"):   payload["hf_token"]   = env_cfg["hf_token"]
@@ -3059,7 +3099,19 @@ async def do_serve_model(content: str, owner: Optional[str] = None) -> Dict:
             )
             note = "" if registered else " (state-write failed — task may not show in UI)"
             return {"output": f"Serving {repo_id} (session: {sid}){note}", "session_id": sid, "exit_code": 0}
-        return {"error": data.get("error", "Serve failed"), "exit_code": 1}
+        # FastAPI HTTPException puts the message under `detail`, not `error`.
+        # Surface BOTH so the agent sees "Invalid characters in cmd" (from
+        # _validate_serve_cmd rejecting `&&`/`source`/`cd`) instead of
+        # the generic "Serve failed", which leaves it with nothing to act on.
+        err_msg = data.get("error") or data.get("detail") or "Serve failed"
+        hint = ""
+        if isinstance(err_msg, str) and "cmd" in err_msg.lower():
+            hint = (" — the cmd must START with an allowlisted binary "
+                    "(vllm, python3, llama-server, ollama, sglang, lmdeploy, node, npx). "
+                    "Do NOT prefix with `cd …`, `source …`, or chain with `&&`. "
+                    "env_prefix (e.g. `source ~/qwen35-env/bin/activate`) is added "
+                    "automatically from the host's saved venv settings.")
+        return {"error": f"{err_msg}{hint}", "exit_code": 1}
     except Exception as e:
         return {"error": str(e), "exit_code": 1}
 
@@ -3103,13 +3155,31 @@ async def do_list_served_models(content: str, owner: Optional[str] = None) -> Di
             "exit_code": 0,
         }
 
+    # Sort so the agent sees what's actually LIVE first. Stopped/error/
+    # completed tasks are mostly historical noise — they shouldn't lead
+    # the list when something is genuinely serving.
+    _ORDER = {
+        "ready": 0, "running": 1, "loading": 1, "warming": 1,
+        "queued": 2, "starting": 2,
+        "error": 5, "crashed": 5, "failed": 5,
+        "stopped": 6, "killed": 6, "cancelled": 6, "canceled": 6,
+        "done": 7, "completed": 7, "finished": 7,
+    }
+    def _rank(t: Dict[str, Any]) -> int:
+        phase = (t.get("phase") or t.get("status") or "unknown").lower()
+        return _ORDER.get(phase, 3)
+    merged.sort(key=_rank)
+
     cb_n = len(cookbook_tasks)
     ext_n = len(external)
+    live_n = sum(1 for t in merged if _rank(t) <= 2)
     header = []
     if cb_n:
         header.append(f"{cb_n} cookbook-tracked")
     if ext_n:
         header.append(f"{ext_n} external")
+    if live_n:
+        header.insert(0, f"{live_n} LIVE")
     lines = [f"Running: {', '.join(header)}."]
     for t in merged:
         phase = t.get("phase") or t.get("status", "unknown")
@@ -3136,8 +3206,20 @@ async def do_list_served_models(content: str, owner: Optional[str] = None) -> Di
         if t.get("status") == "error" and t.get("output_tail"):
             tail = str(t.get("output_tail") or "").strip()
             if tail:
+                # Prefer a window around a Python traceback if one exists,
+                # falling back to the last 30 lines. The previous 6-line
+                # tail showed only the post-crash bash prompt / neofetch
+                # banner ("Locale: C / Ubuntu_Odysseus ❯") — useless for
+                # diagnosis. The traceback we want is usually 50-200 lines
+                # earlier in the buffer.
+                _tail_lines = tail.splitlines()
+                _shown = _tail_lines[-30:]
+                for _i, _ln in enumerate(_tail_lines):
+                    if "Traceback (most recent call last)" in _ln or "ERROR" in _ln or "Error:" in _ln:
+                        _shown = _tail_lines[_i:_i + 40]
+                        break
                 lines.append("    recent log:")
-                for line in tail.splitlines()[-6:]:
+                for line in _shown:
                     lines.append(f"      {line[:220]}")
         if t.get("external") and t.get("cmdline_preview"):
             lines.append(f"    cmd: {t['cmdline_preview']}")
@@ -3243,6 +3325,125 @@ async def do_stop_served_model(content: str, owner: Optional[str] = None) -> Dic
     )
 
 
+async def do_tail_serve_output(content: str, owner: Optional[str] = None) -> Dict:
+    """Capture the last N lines of a cookbook task's tmux pane — remote-aware.
+
+    Used by the agent to debug a failed/stuck serve: list_served_models tells
+    you the task is `crashed`, this tool returns the actual stderr/traceback
+    so the agent can match it against a known fix (compute_89 nvcc mismatch,
+    flashinfer version mismatch, OOM, missing kernels, etc.) and decide
+    whether to relaunch via serve_model with new flags.
+    """
+    import httpx
+    import shlex
+    try:
+        args = _parse_tool_args(content)
+    except ValueError:
+        return {"error": "Invalid JSON arguments", "exit_code": 1}
+    session_id = (args.get("session_id") or "").strip()
+    if not session_id:
+        return {"error": "session_id is required (from list_served_models)", "exit_code": 1}
+    import re as _re
+    if not _re.fullmatch(r"[a-zA-Z0-9_-]+", session_id):
+        return {"error": "Invalid session_id format", "exit_code": 1}
+    try:
+        tail = int(args.get("tail") or 400)
+    except (TypeError, ValueError):
+        tail = 400
+    tail = max(20, min(tail, 4000))
+    headers = _internal_headers()
+    remote = (args.get("remote_host") or args.get("host") or "").strip()
+    sport = (args.get("ssh_port") or "").strip()
+    # Resolve host from cookbook state if caller didn't pass one — same
+    # lookup _cookbook_kill_session uses.
+    if not remote:
+        state: Dict[str, Any] = {}
+        try:
+            async with httpx.AsyncClient(timeout=10) as client:
+                resp = await client.get(f"{_COOKBOOK_BASE}/api/cookbook/state", headers=headers)
+                state = resp.json() or {}
+        except Exception as e:
+            logger.debug(f"cookbook state lookup failed for {session_id}: {e}")
+        if isinstance(state, dict):
+            for t in (state.get("tasks") or []):
+                if isinstance(t, dict) and (t.get("sessionId") == session_id or t.get("id") == session_id):
+                    remote = t.get("remoteHost") or ""
+                    if not sport:
+                        sport = t.get("sshPort") or ""
+                    break
+    # Prefer the persisted /tmp/odysseus-tmux/SESSION.log file over the
+    # live tmux pane. The pane is what the user would see scrolling on
+    # their screen — including the post-crash neofetch banner and the
+    # idle bash prompt that overwrites the actual traceback the moment
+    # vllm exits. The log file is the raw stdout/stderr of the wrapped
+    # process and survives the crash unchanged. We only fall back to
+    # the pane when the log file doesn't exist (older sessions launched
+    # before the tmux+tee wrapper was added).
+    log_path = f"/tmp/odysseus-tmux/{session_id}.log"
+    pane_inner = f"tmux capture-pane -t {shlex.quote(session_id)} -p -S -{tail} 2>/dev/null"
+    file_inner = f"tail -n {tail} {shlex.quote(log_path)} 2>/dev/null"
+    inner = (
+        f"if [ -s {shlex.quote(log_path)} ]; then {file_inner}; "
+        f"else {pane_inner}; fi"
+    )
+    if remote:
+        _pf = f"-p {shlex.quote(str(sport))} " if sport and str(sport) != "22" else ""
+        cmd = (
+            f"ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "
+            f"{_pf}{shlex.quote(remote)} {shlex.quote(inner)}"
+        )
+        host_label = remote
+    else:
+        cmd = inner
+        host_label = "local"
+    try:
+        async with httpx.AsyncClient(timeout=20) as client:
+            resp = await client.post(f"{_COOKBOOK_BASE}/api/shell/exec",
+                                     json={"command": cmd}, headers=headers)
+        if resp.status_code >= 400:
+            return {"error": f"shell/exec returned HTTP {resp.status_code}: {resp.text[:200]}", "exit_code": 1}
+        data = resp.json() if resp.content else {}
+        output_text = (data.get("stdout") or "").strip()
+        stderr_text = (data.get("stderr") or "").strip()
+        rc = data.get("exit_code")
+        if rc not in (None, 0) and not output_text:
+            already_gone = any(s in (stderr_text or "").lower() for s in ("no server running", "can't find session", "session not found"))
+            if already_gone:
+                return {"output": f"Tmux session {session_id} on {host_label} is gone (task already exited).", "exit_code": 0, "session_id": session_id, "host": host_label}
+            return {"error": f"capture-pane failed on {host_label}: {stderr_text or f'exit {rc}'}", "exit_code": 1}
+        # Dedupe download-progress noise. A 100-shard HF download produces
+        # tens of thousands of `model-NN-of-MM.safetensors: 91%|...` lines
+        # that all look the same to the agent and drown the actual error.
+        # Keep only one sample per (file, decile-percent) bucket.
+        import re as _re2
+        lines = output_text.splitlines()
+        dedup_lines = []
+        seen_progress = set()
+        progress_re = _re2.compile(r"^([\w./\-]+):\s+(\d+)%")
+        for ln in lines:
+            m = progress_re.match(ln.strip())
+            if m:
+                key = (m.group(1), int(m.group(2)) // 10)  # bucket by 10%
+                if key in seen_progress:
+                    continue
+                seen_progress.add(key)
+            dedup_lines.append(ln)
+        output_text = "\n".join(dedup_lines)
+        # Hard cap so the agent doesn't blow its token budget.
+        MAX_CHARS = 8000
+        if len(output_text) > MAX_CHARS:
+            output_text = "…(earlier output truncated)…\n" + output_text[-MAX_CHARS:]
+        return {
+            "output": output_text or "(empty pane)",
+            "session_id": session_id,
+            "host": host_label,
+            "tail_lines": tail,
+            "exit_code": 0,
+        }
+    except Exception as e:
+        return {"error": str(e), "exit_code": 1}
+
+
 async def do_list_downloads(content: str, owner: Optional[str] = None) -> Dict:
     """List in-flight model downloads (filters /api/cookbook/tasks/status to type=download)."""
     import httpx
@@ -3615,38 +3816,133 @@ async def do_serve_preset(content: str, owner: Optional[str] = None) -> Dict:
 
 
 async def do_list_cached_models(content: str, owner: Optional[str] = None) -> Dict:
-    """List models already cached locally (or on a remote host)."""
+    """List models already cached locally and/or on remote hosts.
+
+    With no `host` arg, scans EVERY configured Cookbook server (and local)
+    and aggregates — so the agent sees the full inventory in one call
+    instead of having to query each server individually.
+    """
     import httpx
     try:
         args = _parse_tool_args(content) if content.strip() else {}
     except ValueError:
         return {"error": "Invalid JSON arguments", "exit_code": 1}
-    params: Dict[str, str] = {}
     raw_host = (args.get("host") or "").strip()
-    host = await _resolve_cookbook_host(raw_host) if raw_host else ""
-    if host:
-        params["host"] = host
-    if args.get("model_dir"):
-        params["model_dir"] = args["model_dir"]
-    if args.get("ssh_port"):
-        params["ssh_port"] = str(args["ssh_port"])
-    if args.get("platform"):
-        params["platform"] = args["platform"]
+    headers = _internal_headers()
+
+    async def _scan_one(host_label: str, host_val: str, ssh_port: str = "",
+                        platform: str = "", model_dir: str = "") -> list:
+        """Hit /api/model/cached for one host; tag each returned model with its source."""
+        p: Dict[str, str] = {}
+        if host_val:
+            p["host"] = host_val
+        # Caller-provided override beats per-server config beats nothing.
+        if args.get("model_dir"):
+            p["model_dir"] = args["model_dir"]
+        elif model_dir:
+            p["model_dir"] = model_dir
+        if ssh_port:
+            p["ssh_port"] = ssh_port
+        elif args.get("ssh_port"):
+            p["ssh_port"] = str(args["ssh_port"])
+        if platform:
+            p["platform"] = platform
+        elif args.get("platform"):
+            p["platform"] = args["platform"]
+        try:
+            async with httpx.AsyncClient(timeout=60) as client:
+                resp = await client.get(f"{_COOKBOOK_BASE}/api/model/cached",
+                                        params=p, headers=headers)
+                data = resp.json()
+            ms = data.get("models", []) if isinstance(data, dict) else (data or [])
+            for m in ms:
+                m["host"] = host_label or "local"
+            return ms or []
+        except Exception as e:
+            logger.debug(f"list_cached_models scan({host_label}) failed: {e}")
+            return []
+
+    # When the caller specifies a host explicitly, scan only that one (old behaviour).
+    # Otherwise iterate every configured server + local so the agent doesn't
+    # have to repeat the call per server.
     try:
-        async with httpx.AsyncClient(timeout=60) as client:
-            resp = await client.get(f"{_COOKBOOK_BASE}/api/model/cached",
-                                    params=params, headers=_internal_headers())
-            data = resp.json()
-        models = data.get("models", []) if isinstance(data, dict) else data
+        # Pull configured servers from cookbook state (used for resolving
+        # modelDirs both when caller specifies a host and when we scan all).
+        servers: list = []
+        try:
+            async with httpx.AsyncClient(timeout=10) as client:
+                st = await client.get(f"{_COOKBOOK_BASE}/api/cookbook/state", headers=headers)
+                st_data = st.json() if st.headers.get("content-type", "").startswith("application/json") else {}
+            servers = (st_data.get("env", {}) or {}).get("servers") or []
+        except Exception as e:
+            logger.debug(f"server list fetch failed: {e}")
+            st_data = {}
+
+        def _dirs_for(server_record: Dict[str, Any]) -> str:
+            """Comma-joined modelDirs from a saved server record (Settings).
+
+            Filters out the HF cache (~/.cache/huggingface/hub) — the backend
+            scan script always scans it by default, so re-passing it as an
+            extra model_dir is redundant AND confuses some path-handling
+            edge cases where the extra dir suppresses the deeper scan.
+            We only need to forward the NON-default dirs (e.g. /mnt/HADES/models).
+            """
+            mds = server_record.get("modelDirs") if isinstance(server_record, dict) else None
+            HF_DEFAULTS = {"~/.cache/huggingface/hub", "~/.cache/huggingface"}
+            if isinstance(mds, list):
+                extras = [d for d in mds if isinstance(d, str) and d.strip() and d.strip() not in HF_DEFAULTS]
+                return ",".join(extras)
+            if isinstance(mds, str) and mds.strip() not in HF_DEFAULTS:
+                return mds
+            return ""
+
+        if raw_host:
+            host = await _resolve_cookbook_host(raw_host)
+            # Find this host's saved record so its modelDirs apply too.
+            srv = next(
+                (s for s in servers if isinstance(s, dict)
+                 and (s.get("name") == raw_host or s.get("host") == host or s.get("host") == raw_host)),
+                {},
+            )
+            models = await _scan_one(raw_host, host, model_dir=_dirs_for(srv))
+        else:
+            # Always include local. Local's saved record is the one with no host.
+            local_srv = next((s for s in servers if isinstance(s, dict) and not (s.get("host") or "").strip()), {})
+            scans: list = [_scan_one("local", "", model_dir=_dirs_for(local_srv))]
+            for s in servers:
+                if not isinstance(s, dict):
+                    continue
+                name = s.get("name") or s.get("host")
+                host_val = s.get("host") or ""
+                if not host_val:
+                    continue
+                scans.append(_scan_one(
+                    name,
+                    host_val,
+                    ssh_port=str(s.get("port") or ""),
+                    platform=s.get("platform") or "",
+                    model_dir=_dirs_for(s),
+                ))
+            results = await asyncio.gather(*scans, return_exceptions=False)
+            # Dedupe by (host, repo_id) — same model could appear in both HF cache + Ollama list.
+            seen = set()
+            models: list = []
+            for batch in results:
+                for m in batch:
+                    key = (m.get("host", ""), m.get("repo_id", ""))
+                    if key in seen:
+                        continue
+                    seen.add(key)
+                    models.append(m)
         if not models:
-            # Filesystem cache scans can miss models downloaded into the HF
-            # default cache when the server has no explicit model_dir configured.
-            # Still surface completed Cookbook downloads so the agent doesn't
-            # incorrectly assume a model is absent and re-download it.
+            # Cache scans can miss models downloaded into the HF default cache
+            # when the server has no explicit model_dir configured. Surface
+            # completed Cookbook download tasks so the agent doesn't conclude
+            # a model is absent and re-download it.
             downloaded = []
             try:
                 async with httpx.AsyncClient(timeout=10) as client:
-                    st = await client.get(f"{_COOKBOOK_BASE}/api/cookbook/state", headers=_internal_headers())
+                    st = await client.get(f"{_COOKBOOK_BASE}/api/cookbook/state", headers=headers)
                     state = st.json() if st.headers.get("content-type", "").startswith("application/json") else {}
                 for t in (state.get("tasks") or []):
                     if not isinstance(t, dict) or t.get("type") != "download":
@@ -3654,27 +3950,44 @@ async def do_list_cached_models(content: str, owner: Optional[str] = None) -> Di
                     if (t.get("status") or "").lower() not in {"done", "completed"}:
                         continue
                     task_host = t.get("remoteHost") or (t.get("payload") or {}).get("remote_host") or ""
-                    if host and task_host != host:
+                    if raw_host and task_host != raw_host:
                         continue
                     repo = t.get("modelId") or t.get("repoId") or (t.get("payload") or {}).get("repo_id") or t.get("name")
                     if repo and repo not in downloaded:
                         downloaded.append(repo)
             except Exception:
                 downloaded = []
+            host_str = f" on {raw_host}" if raw_host else ""
             if downloaded:
-                host_str = f" on {raw_host or host}" if (raw_host or host) else ""
                 lines = [f"No cache paths were detected{host_str}, but Cookbook has completed download task(s):"]
                 lines.extend(f"- {repo} — downloaded via Cookbook task" for repo in downloaded)
                 return {"output": "\n".join(lines), "models": [{"repo_id": repo, "source": "cookbook_task"} for repo in downloaded], "exit_code": 0}
-            host_str = f" on {raw_host or host}" if (raw_host or host) else ""
             return {"output": f"No cached models found{host_str}.", "exit_code": 0}
-        lines = [f"{len(models)} cached model(s):"]
-        for m in models:
-            name = m.get("repo_id", "?")
-            sz = m.get("size") or (f"{m.get('size_bytes', 0) / (1024**3):.1f}GB" if m.get("size_bytes") else "")
-            inc = " (incomplete)" if m.get("has_incomplete") else ""
-            kind = " [diffusion]" if m.get("is_diffusion") else ""
-            lines.append(f"- {name}{kind} — {sz}{inc}")
+        # Multi-host scan: group by host so the agent sees inventory per server.
+        # Single-host scan: flat list (matches old output shape).
+        if raw_host:
+            lines = [f"{len(models)} cached model(s) on {raw_host}:"]
+            for m in models:
+                name = m.get("repo_id", "?")
+                sz = m.get("size") or (f"{m.get('size_bytes', 0) / (1024**3):.1f}GB" if m.get("size_bytes") else "")
+                inc = " (incomplete)" if m.get("has_incomplete") else ""
+                kind = " [diffusion]" if m.get("is_diffusion") else ""
+                lines.append(f"- {name}{kind} — {sz}{inc}")
+        else:
+            from collections import defaultdict as _dd
+            by_host = _dd(list)
+            for m in models:
+                by_host[m.get("host", "local")].append(m)
+            lines = [f"{len(models)} cached model(s) across {len(by_host)} server(s):"]
+            for host_name in sorted(by_host.keys()):
+                lines.append(f"\n[{host_name}]")
+                for m in by_host[host_name]:
+                    name = m.get("repo_id", "?")
+                    sz = m.get("size") or (f"{m.get('size_bytes', 0) / (1024**3):.1f}GB" if m.get("size_bytes") else "")
+                    inc = " (incomplete)" if m.get("has_incomplete") else ""
+                    kind = " [diffusion]" if m.get("is_diffusion") else ""
+                    backend = f" ({m.get('backend')})" if m.get("backend") else ""
+                    lines.append(f"- {name}{kind}{backend} — {sz}{inc}")
         return {"output": "\n".join(lines), "models": models, "exit_code": 0}
     except Exception as e:
         return {"error": str(e), "exit_code": 1}
diff --git a/src/tool_index.py b/src/tool_index.py
index 2464a81..09e2dcf 100644
--- a/src/tool_index.py
+++ b/src/tool_index.py
@@ -37,7 +37,15 @@ ALWAYS_AVAILABLE = frozenset({
     # keyword hints when the user is actually talking about cookbook.
     # Keeping the always-on set small leaves room in the ~16-tool
     # budget for manage_tasks / manage_calendar / etc.
-    "list_served_models", "stop_served_model",
+    "list_served_models", "stop_served_model", "tail_serve_output",
+    # Serving is a core agent capability — keep these always available so
+    # the router doesn't lose them on phrasings like "servic" / "fire up" / "boot".
+    "serve_model", "serve_preset", "list_serve_presets",
+    "list_cached_models", "list_cookbook_servers",
+    # Fallback when serve_model's allowlist rejects a cmd or when the
+    # model was launched out-of-band via bash+tmux — without this the
+    # session is invisible to the cookbook UI even though it's running.
+    "adopt_served_model",
     # Generic API loopback — the catch-all when no named tool fits.
     "app_api",
     # Memory is ambient — "remember this" can follow any message regardless
@@ -118,9 +126,10 @@ BUILTIN_TOOL_DESCRIPTIONS: Dict[str, str] = {
     "manage_notes": "Create and manage notes and checklists (Google Keep-style). ALWAYS use this for note/todo/checklist/reminder creation — NEVER hit /api/notes via app_api. Accepts natural-language `due_date` like 'tomorrow at 9am' or '11pm today' (parsed in the USER'S timezone). The due_date IS the reminder — it fires a notification at that time, so do NOT also create a calendar event for the same reminder. Set colors, labels, pin, archive. Do NOT use manage_memory for note content.",
     "manage_calendar": "Calendar event management: list, create, update, delete. Each event can carry a tag/category (event_type — work/personal/health/travel/meal/social/admin/other) and importance (low/normal/high/critical). Resolve today/tomorrow using the Current date and time context, then use ISO datetimes in the user's local wall time; supports all-day events. For event reminders/alarms, pass reminder_minutes; this creates the Notes reminder, so do not also call manage_notes for the same reminder.",
     "download_model": "Download a HuggingFace model to a local or remote server. Specify repo_id (e.g. 'Qwen/Qwen3-8B'), optional server host, and optional include filter for specific files.",
-    "serve_model": "Start serving a model with vLLM, SGLang, llama.cpp, Ollama, or Diffusers. For image/inpainting/diffusion use python3 scripts/diffusion_server.py --model <repo> --port 8100. After launch, call list_served_models for readiness/errors and retry suggestions.",
+    "serve_model": "Start serving a model with vLLM, SGLang, llama.cpp, Ollama, or Diffusers. cmd MUST start with the binary directly — e.g. `vllm serve /mnt/HADES/models/Qwen3.5-397B-A17B-AWQ --port 8003 --tensor-parallel-size 8 …`. NEVER prefix with `cd …`, `source …`, or chain with `&&`/`||` — those get rejected by the validator. The venv activation (env_prefix) and CUDA env are added automatically from the target host's saved settings. For image/inpainting/diffusion use python3 scripts/diffusion_server.py --model <repo> --port 8100. After launch, call list_served_models for readiness/errors and retry suggestions. If serve_model fails with 'Invalid characters in cmd', simplify to the bare binary + args.",
     "list_served_models": "List currently running model servers in the Cookbook — shows status (loading, ready, idle, error), model name, port, throughput, and serve failure diagnosis/retry suggestions. Use when the user asks 'what's running', 'show my cookbook', 'which models are up', 'what's serving'.",
     "stop_served_model": "Stop a running model server in the Cookbook by session ID or model name. Use when the user says 'kill my cookbook', 'stop the model', 'kill the serve', 'shut down vLLM', 'cancel the running model'.",
+    "tail_serve_output": "Read the actual tmux stderr/traceback of a cookbook serve/download task. Use to debug WHY a task is `crashed`/`error` (compute_89 nvcc mismatch, OOM, missing kernels, wrong attention backend, etc.) so you can call serve_model with adjusted flags. Pass session_id from list_served_models; tail defaults to 300, bump if the error references 'see root cause above'.",
     "list_downloads": "List in-progress HuggingFace model downloads in the Cookbook. Shows model name, phase, percent, session ID. Use for 'what's downloading', 'show my downloads', 'check download progress'.",
     "cancel_download": "Cancel an in-progress model download by tmux session ID. Use for 'cancel the download', 'stop downloading X', 'kill the download'. Call list_downloads first to get the session_id.",
     "search_hf_models": "Search HuggingFace for models matching a query (e.g. 'qwen 8B', 'flux', 'llama-3 instruct'). Returns ranked repo IDs with sizes and download counts. Use for 'find a model', 'search huggingface for X', 'what models are there for Y'.",
diff --git a/src/tool_schemas.py b/src/tool_schemas.py
index 10874ae..0db5ab1 100644
--- a/src/tool_schemas.py
+++ b/src/tool_schemas.py
@@ -787,6 +787,21 @@ FUNCTION_TOOL_SCHEMAS = [
             }
         }
     },
+    {
+        "type": "function",
+        "function": {
+            "name": "tail_serve_output",
+            "description": "Read the last N lines of a cookbook serve/download task's tmux pane. Use ONLY in this exact sequence: (1) the user asked to serve a model, (2) you launched it via serve_model, (3) list_served_models reports the NEW task as crashed/error, (4) call tail_serve_output on the new sessionId to find the root cause, (5) call serve_model again with adjusted flags. DO NOT call this on old stopped/completed download tasks — they are historical and won't tell you anything about the current attempt. DO NOT investigate past failures before launching; the environment may have changed since.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "session_id": {"type": "string", "description": "Tmux session id from list_served_models (e.g. 'serve-abc12345', 'cookbook-a1b2c3d4')."},
+                    "tail": {"type": "integer", "description": "How many lines of pane scrollback to fetch (default 300, max 4000). Bump this if the error in the visible tail references an earlier line ('see root cause above')."},
+                },
+                "required": ["session_id"]
+            }
+        }
+    },
     {
         "type": "function",
         "function": {
diff --git a/static/index.html b/static/index.html
index c5f3828..a4637d3 100644
--- a/static/index.html
+++ b/static/index.html
@@ -2283,6 +2283,7 @@
 <script type="module" src="/static/js/chatStream.js"></script>
 <script type="module" src="/static/js/chat.js?v=20260604s"></script>
 <script type="module" src="/static/js/cookbook.js"></script>
+<script src="/static/js/cookbookSchedule.js"></script>
 <script type="module" src="/static/js/search-chat.js"></script>
 <script type="module" src="/static/js/compare/index.js"></script>
 <script type="module" src="/static/js/theme.js"></script>
diff --git a/static/js/admin.js b/static/js/admin.js
index 25e3faa..5019096 100644
--- a/static/js/admin.js
+++ b/static/js/admin.js
@@ -731,14 +731,14 @@ function initEndpointForm() {
   urlInput.addEventListener('input', () => {
     if (provider.value && urlInput.value.trim() !== provider.value) {
       provider.value = '';
-      if (kindSel) kindSel.value = 'proxy';
+      if (kindSel) kindSel.value = 'api';
       _renderPickerMenu();
       _syncPickerCurrent();
     }
   });
-  if (kindSel) kindSel.value = provider.value ? 'api' : (kindSel.value || 'proxy');
+  if (kindSel) kindSel.value = kindSel.value || 'api';
   function _apiEndpointKind() {
-    return (kindSel && kindSel.value) ? kindSel.value : (provider.value ? 'api' : 'proxy');
+    return (kindSel && kindSel.value) ? kindSel.value : 'api';
   }
   function _normalizeBaseUrl(raw) {
     let u = raw.trim();
diff --git a/static/js/calendar.js b/static/js/calendar.js
index ebd6bfc..fec9f82 100644
--- a/static/js/calendar.js
+++ b/static/js/calendar.js
@@ -299,13 +299,40 @@ async function _updateEvent(uid, data) {
 }
 
 async function _deleteEvent(uid) {
-  const backup = _allEvents[uid];
-  delete _allEvents[uid];
+  // Multiple "sibling" UIDs may need to vanish optimistically:
+  //   1. The exact uid the user clicked.
+  //   2. If the user clicked a RECURRING occurrence (uid contains "::"),
+  //      the server deletes the master + every occurrence — so we strip
+  //      the master uid AND every "master::*" expansion from the
+  //      client-side caches too. Without this, deleting one day of a
+  //      multi-day recurring task only removed THAT day visually; the
+  //      other days kept rendering until the next full refresh.
+  //   3. If the user clicked the master, strip every "master::*"
+  //      expansion (same prefix scan).
+  const masterUid = uid.includes('::') ? uid.split('::')[0] : uid;
+  const backups = {};
+  const _matches = (k) => k === uid || k === masterUid || k.startsWith(masterUid + '::');
+
+  for (const k of Object.keys(_allEvents)) {
+    if (_matches(k)) {
+      backups[k] = _allEvents[k];
+      delete _allEvents[k];
+    }
+  }
+  if (Array.isArray(_events)) {
+    _events = _events.filter(e => !(e && _matches(e.uid || '')));
+  }
+  if (_open) _render();
+  _updateBadge && _updateBadge();
   const isRecurring = uid.includes('::');
   fetch(`${API_BASE}/api/calendar/events/${encodeURIComponent(uid)}`, {
     method: 'DELETE', credentials: 'same-origin',
   }).then(r => {
-    if (!r.ok) throw new Error('HTTP ' + r.status);
+    // 404 = the event was already deleted by another session/device. That's
+    // exactly the state we want, so treat it as success — don't restore the
+    // row, otherwise the user can never clear stale cached events that were
+    // deleted from desktop while mobile was open (and vice versa).
+    if (!r.ok && r.status !== 404) throw new Error('HTTP ' + r.status);
     if (isRecurring) {
       _fetchedRanges = [];
       localStorage.removeItem(LS_KEY);
@@ -313,7 +340,11 @@ async function _deleteEvent(uid) {
       _saveCache && _saveCache();
     }
   }).catch((e) => {
-    if (backup) _allEvents[uid] = backup;
+    // Server rejected — restore every uid we optimistically stripped.
+    for (const [k, ev] of Object.entries(backups)) {
+      _allEvents[k] = ev;
+      if (Array.isArray(_events)) _events.push(ev);
+    }
     if (window.uiModule) window.uiModule.showError('Failed to delete event: ' + (e?.message || 'unknown'));
     if (_open) _render();
   });
@@ -980,7 +1011,39 @@ async function _renderMonth() {
       const startColInt = Math.round(startCol);
       const endColInt = Math.round(endCol);
       const span = endColInt - startColInt + 1;
-      h += `<div class="cal-multiday" style="--col:${startColInt};--span:${span};--slot:${barSlot};background:${_calColor(md)};--cal-event-fg:${_calEventFg(md)}" draggable="true" data-uid="${_e(md.uid)}" title="${_e(md.summary)}">${_e(md.summary)}</div>`;
+      // Proportional offsets for timed events that span across midnight
+      // (e.g. 8 PM Mon → 5 AM Tue). Without this, an overnight serve
+      // window visually fills the ENTIRE next day even when it only
+      // covers a few hours. All-day events keep the full-day shape.
+      // Bar visually spans from column (col+startFrac) to (col+span-1+endFrac),
+      // so a 8 PM→5 AM run shows ~17% of day 1 + ~21% of day 2, not 200%.
+      let startFrac = 0;
+      let endFrac = 1;
+      if (!md.all_day) {
+        try {
+          const sIso = md.dtstart || '';
+          const eIso = md.dtend || '';
+          const sDate = sIso ? new Date(sIso) : null;
+          const eDate = eIso ? new Date(eIso) : null;
+          // First-visible-day fraction (0 = midnight start). Clamp to 0
+          // when the event started before this row, so the bar still
+          // starts at the row's left edge.
+          if (sDate && !isNaN(sDate) && mdStart >= rowStart) {
+            const midnight = new Date(sDate); midnight.setHours(0, 0, 0, 0);
+            startFrac = Math.max(0, Math.min(1, (sDate - midnight) / 86400000));
+          }
+          if (eDate && !isNaN(eDate) && mdEnd <= rowEnd) {
+            const midnight = new Date(eDate); midnight.setHours(0, 0, 0, 0);
+            endFrac = Math.max(0, Math.min(1, (eDate - midnight) / 86400000));
+            // CalDAV end-times are exclusive: an event ending at exactly
+            // 00:00 on day N really ended at end-of-day N-1, so endFrac=0
+            // would visually paint a zero-width slice. Snap to a small
+            // visible minimum (5% of a day) so the bar still registers.
+            if (endFrac === 0) endFrac = 1;
+          }
+        } catch (_) { startFrac = 0; endFrac = 1; }
+      }
+      h += `<div class="cal-multiday" style="--col:${startColInt};--span:${span};--slot:${barSlot};--start-frac:${startFrac.toFixed(4)};--end-frac:${endFrac.toFixed(4)};background:${_calColor(md)};--cal-event-fg:${_calEventFg(md)}" draggable="true" data-uid="${_e(md.uid)}" title="${_e(md.summary)}">${_e(md.summary)}</div>`;
       barSlot++;
     }
     h += '</div>';
@@ -2688,6 +2751,28 @@ function _showEventForm(existing, defaultDate, defaultEndDate) {
         <option value="FREQ=YEARLY" ${existing?.rrule === 'FREQ=YEARLY' ? 'selected' : ''}>Yearly</option>
       </select>
       <textarea id="cal-f-desc" placeholder="Description" class="cal-input" rows="2">${_e(existing?.description || '')}</textarea>
+      ${(() => {
+        // Cookbook-task back-link. When the description carries a
+        // "cookbook_task_id: <id>" marker (set by cookbookSchedule.js
+        // when the user ticks "Create event in calendar"), render an
+        // Open-task button so the user can jump straight to the
+        // source task in the Tasks tab.
+        const _ct = (existing?.description || '').match(/cookbook_task_id:\s*([A-Za-z0-9_-]+)/);
+        if (!_ct) return '';
+        return `<div class="cal-form-row cal-form-cookbook-link" style="align-items:center;gap:8px;">
+          <button type="button" id="cal-f-open-task" data-task-id="${_e(_ct[1])}"
+            style="display:inline-flex;align-items:center;gap:6px;background:transparent;
+                   color:var(--accent,var(--red));border:1px solid var(--border);
+                   border-radius:6px;padding:5px 10px;font:inherit;font-size:12px;cursor:pointer;">
+            <svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+              <path d="M9 11l3 3L22 4"/>
+              <path d="M21 12v7a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V5a2 2 0 0 1 2-2h11"/>
+            </svg>
+            <span>Open in Tasks</span>
+          </button>
+          <span style="font-size:11px;opacity:0.5;">Linked to a Cookbook scheduled task</span>
+        </div>`;
+      })()}
       <div class="cal-form-row" style="align-items:center;gap:8px;">
         <label style="font-size:11px;display:flex;align-items:center;gap:4px;"><svg class="cal-remind-bell" width="11" height="11" viewBox="0 0 24 24" fill="none" stroke="var(--accent, var(--red))" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M18 8A6 6 0 0 0 6 8c0 7-3 9-3 9h18s-3-2-3-9"/><path d="M13.73 21a2 2 0 0 1-3.46 0"/></svg><span style="opacity:0.5;">Reminder</span></label>
         <select id="cal-f-remind" class="cal-input" style="flex:1;">
@@ -2737,6 +2822,19 @@ function _showEventForm(existing, defaultDate, defaultEndDate) {
   document.getElementById('cal-f-allday')?.addEventListener('change', (e) => {
     document.getElementById('cal-time-row').style.display = e.target.checked ? 'none' : '';
   });
+  // Open-task back-link button — dynamically imports the tasks module
+  // so the linkage works even if the user is opening the calendar
+  // before they've touched the Tasks tab in this session.
+  document.getElementById('cal-f-open-task')?.addEventListener('click', async (e) => {
+    e.preventDefault();
+    const taskId = e.currentTarget?.dataset?.taskId || '';
+    try {
+      const m = await import('/static/js/tasks.js');
+      const openTasks = m.openTasks || m.default?.openTasks;
+      if (typeof openTasks === 'function') { openTasks(taskId); return; }
+    } catch (_) {}
+    document.getElementById('tool-tasks-btn')?.click();
+  });
   // Keep end date >= start date
   document.getElementById('cal-f-date')?.addEventListener('change', () => {
     const s = document.getElementById('cal-f-date').value;
@@ -3341,6 +3439,44 @@ window.addEventListener('calendar-refresh', () => {
     .catch(() => {});
 });
 
+// Cross-session catch-up: when the tab/app becomes visible again (you alt-tab
+// back, the mobile app comes to the foreground, or you switch back from
+// another browser session), drop the range cache and re-fetch. Without this,
+// a delete or add on desktop never propagates to the still-open mobile tab
+// until the user does a full reload — so stale events sit there undeletable
+// (they 404 on the server). Triggers on every visibility change but the
+// fetch is cheap and already de-duped by _fetchPromise on line ~120.
+let _lastVisRefetchAt = 0;
+const _VIS_REFETCH_MIN_MS = 10 * 1000;  // throttle if user is rapidly tab-flipping
+document.addEventListener('visibilitychange', () => {
+  if (document.visibilityState !== 'visible') return;
+  const now = Date.now();
+  if (now - _lastVisRefetchAt < _VIS_REFETCH_MIN_MS) return;
+  _lastVisRefetchAt = now;
+  _fetchedRanges = [];
+  const range = (_view === 'year')
+    ? [`${_currentDate.getFullYear()}-01-01`, `${_currentDate.getFullYear() + 1}-01-01`]
+    : (_view === 'week') ? _weekRange(_currentDate) : _monthRange(_currentDate);
+  _fetchEvents(range[0], range[1], /*force*/ true)
+    .then(() => { if (_open) _render(); _updateBadge(); })
+    .catch(() => {});
+});
+
+// Same idea for window-level focus — covers desktop alt-tabbing back to a
+// browser that already had the tab visible (visibilitychange won't fire).
+window.addEventListener('focus', () => {
+  const now = Date.now();
+  if (now - _lastVisRefetchAt < _VIS_REFETCH_MIN_MS) return;
+  _lastVisRefetchAt = now;
+  _fetchedRanges = [];
+  const range = (_view === 'year')
+    ? [`${_currentDate.getFullYear()}-01-01`, `${_currentDate.getFullYear() + 1}-01-01`]
+    : (_view === 'week') ? _weekRange(_currentDate) : _monthRange(_currentDate);
+  _fetchEvents(range[0], range[1], /*force*/ true)
+    .then(() => { if (_open) _render(); _updateBadge(); })
+    .catch(() => {});
+});
+
 // Calendar reminders are stored as Notes. The Notes reminder loop owns
 // notification dispatch so calendar reminders do not fire twice.
 
diff --git a/static/js/cookbook-diagnosis.js b/static/js/cookbook-diagnosis.js
index 4d6e807..ec81aa0 100644
--- a/static/js/cookbook-diagnosis.js
+++ b/static/js/cookbook-diagnosis.js
@@ -378,16 +378,12 @@ export const ERROR_PATTERNS = [
     message: 'Model architecture too new for installed vLLM/transformers.',
     fixes: [
       { label: 'Try --trust-remote-code', action: (panel) => _serveAutoRetry(panel, '--trust-remote-code'), autofix: true },
-      { label: 'Update vLLM on server', action: (panel) => {
-        const taskEl = panel.closest('.cookbook-task');
-        const task = taskEl ? _loadTasks().find(t => t.sessionId === taskEl.dataset.taskId) : null;
-        const host = task?.remoteHost || '';
-        const prefix = _buildEnvPrefix();
-        const pipCmd = prefix ? prefix + ' pip install -U vllm transformers' : 'pip install -U vllm transformers';
-        const cmd = host ? _sshCmd(host, pipCmd) : pipCmd;
-        // Run in tmux so it doesn't timeout
-        const name = 'update-vllm';
-        _launchServeTask(name, 'pip-update', cmd);
+      { label: 'Update vLLM on server', action: () => {
+        // Use the venv's python3 by absolute path when configured (SSH non-
+        // interactive sessions often pick user-site Python over the venv).
+        const _vp = (_envState.env === 'venv' && _envState.envPath)
+          ? `${_envState.envPath.replace(/\/+$/, '')}/bin/python3` : 'python3';
+        _launchServeTask('update-vllm', 'pip-update', `${_vp} -m pip install -U vllm transformers`);
       }},
     ],
   },
@@ -395,16 +391,10 @@ export const ERROR_PATTERNS = [
     pattern: /Either a revision or a version must be specified|transformers\.integrations\.hub_kernels|kernels\/layer/i,
     message: 'Transformers/kernels package mismatch.',
     fixes: [
-      { label: 'Repair kernel package', action: (panel) => {
-        const taskEl = panel.closest('.cookbook-task');
-        const task = taskEl ? _loadTasks().find(t => t.sessionId === taskEl.dataset.taskId) : null;
-        const host = task?.remoteHost || '';
-        const prefix = _buildEnvPrefix();
-        const pipCmd = prefix
-          ? prefix + ' python3 -m pip install --user --break-system-packages "kernels<0.15"'
-          : 'python3 -m pip install --user --break-system-packages "kernels<0.15"';
-        const cmd = host ? _sshCmd(host, pipCmd) : pipCmd;
-        _launchServeTask('repair-kernels', 'pip-update', cmd);
+      { label: 'Repair kernel package', action: () => {
+        const _vp = (_envState.env === 'venv' && _envState.envPath)
+          ? `${_envState.envPath.replace(/\/+$/, '')}/bin/python3` : 'python3';
+        _launchServeTask('repair-kernels', 'pip-update', `${_vp} -m pip install --user --break-system-packages kernels<0.15`);
       }},
       { label: 'Open Dependencies', action: () => _openCookbookDependencies('sglang') },
     ],
@@ -445,14 +435,10 @@ export const ERROR_PATTERNS = [
     pattern: /Triton kernels.*Failed to import|cannot import name '\w+' from 'triton_kernels/i,
     message: 'Triton kernels version mismatch. Non-fatal warning — model will still run, just without optimized MoE kernels.',
     fixes: [
-      { label: 'Update triton on server', action: (panel) => {
-        const taskEl = panel.closest('.cookbook-task');
-        const task = taskEl ? _loadTasks().find(t => t.sessionId === taskEl.dataset.taskId) : null;
-        const host = task?.remoteHost || '';
-        const prefix = _buildEnvPrefix();
-        const pipCmd = prefix ? prefix + ' pip install -U triton triton-kernels' : 'pip install -U triton triton-kernels';
-        const cmd = host ? _sshCmd(host, pipCmd) : pipCmd;
-        _launchServeTask('update-triton', 'pip-update', cmd);
+      { label: 'Update triton on server', action: () => {
+        const _vp = (_envState.env === 'venv' && _envState.envPath)
+          ? `${_envState.envPath.replace(/\/+$/, '')}/bin/python3` : 'python3';
+        _launchServeTask('update-triton', 'pip-update', `${_vp} -m pip install -U triton triton-kernels`);
       }},
     ],
   },
@@ -474,14 +460,56 @@ export const ERROR_PATTERNS = [
     pattern: /attention_sink|sliding.window.*not supported|sliding_window.*incompatible/i,
     message: 'Model uses attention features unsupported in this vLLM version.',
     fixes: [
-      { label: 'Update vLLM on server', action: (panel) => {
-        const taskEl = panel.closest('.cookbook-task');
-        const task = taskEl ? _loadTasks().find(t => t.sessionId === taskEl.dataset.taskId) : null;
-        const host = task?.remoteHost || '';
-        const prefix = _buildEnvPrefix();
-        const pipCmd = prefix ? prefix + ' pip install -U vllm' : 'pip install -U vllm';
-        const cmd = host ? _sshCmd(host, pipCmd) : pipCmd;
-        _launchServeTask('update-vllm', 'pip-update', cmd);
+      { label: 'Update vLLM on server', action: () => {
+        const _vp = (_envState.env === 'venv' && _envState.envPath)
+          ? `${_envState.envPath.replace(/\/+$/, '')}/bin/python3` : 'python3';
+        _launchServeTask('update-vllm', 'pip-update', `${_vp} -m pip install -U vllm`);
+      }},
+    ],
+  },
+  {
+    // FlashInfer JIT-compiles attention kernels for the host GPU on first
+    // use. If the system /usr/bin/nvcc is older than CUDA 11.8 it can't
+    // target sm_89/sm_90 (Ada/Hopper), and the engine workers die before
+    // they can report a useful traceback. Two quick paths out: pick a
+    // non-flashinfer attention backend, or set CUDACXX to a newer nvcc
+    // (vLLM installs nvidia-cuda-nvcc into the venv — point at that).
+    pattern: /nvcc fatal\s+:\s+Unsupported gpu architecture 'compute_\d+'/i,
+    message: 'FlashInfer is JIT-compiling sampling kernels with an nvcc too old for this GPU (no sm_89 / sm_90 support — pre-CUDA 11.8). Changing the attention backend does not help — flashinfer JITs the SAMPLER too. The clean fix is to set VLLM_USE_FLASHINFER_SAMPLER=0 so vLLM uses its native sampler instead.',
+    suggestion: 'Suggested action: relaunch with VLLM_USE_FLASHINFER_SAMPLER=0 prepended. (Confirmed on the QuantTrio/Qwen3.5 model card as the canonical workaround.)',
+    fixes: [
+      { label: 'Retry with VLLM_USE_FLASHINFER_SAMPLER=0', action: (panel) => _serveAutoRetryReplace(panel, '', 'VLLM_USE_FLASHINFER_SAMPLER=0 ', { prepend: true }) },
+      { label: 'Uninstall flashinfer-python', action: () => {
+        // Hard fallback: vLLM 0.22 reaches into flashinfer for sampling kernels
+        // even with VLLM_USE_FLASHINFER_SAMPLER=0 in some configs. Removing
+        // the package forces it onto the native sampler.
+        const _vp = (_envState.env === 'venv' && _envState.envPath)
+          ? `${_envState.envPath.replace(/\/+$/, '')}/bin/python3` : 'python3';
+        _launchServeTask('uninstall-flashinfer', 'pip-update', `${_vp} -m pip uninstall flashinfer-python -y`);
+      }},
+      { label: 'Edit serve', action: (panel) => _openServeEditFromDiagnosis(panel) },
+    ],
+  },
+  {
+    // vLLM <-> torch ABI mismatch: vLLM imports torch.library helpers
+    // (`infer_schema`, `register_fake`, etc.) that only exist on newer torch
+    // versions. When the installed torch is older, the import fails before
+    // any server code runs. Fix is to reinstall vllm (which pulls a matching
+    // torch) or upgrade torch directly.
+    pattern: /ImportError: cannot import name '[^']+' from 'torch(\.\w+)+'/i,
+    message: 'vLLM was built against a newer torch than what is installed. Reinstall vLLM so pip pulls a compatible torch (or upgrade torch directly).',
+    fixes: [
+      { label: 'Reinstall vLLM (pulls matching torch)', action: () => {
+        // Absolute path to the venv's python3 — bare `python3` lands in the
+        // wrong site-packages over SSH when ~/.local/bin precedes the venv.
+        const _vp = (_envState.env === 'venv' && _envState.envPath)
+          ? `${_envState.envPath.replace(/\/+$/, '')}/bin/python3` : 'python3';
+        _launchServeTask('reinstall-vllm', 'pip-reinstall', `${_vp} -m pip install --force-reinstall vllm`);
+      }},
+      { label: 'Upgrade torch only', action: () => {
+        const _vp = (_envState.env === 'venv' && _envState.envPath)
+          ? `${_envState.envPath.replace(/\/+$/, '')}/bin/python3` : 'python3';
+        _launchServeTask('upgrade-torch', 'pip-update', `${_vp} -m pip install -U torch`);
       }},
     ],
   },
@@ -607,59 +635,24 @@ export function _showDiagnosis(panel, diagnosis, sourceText) {
   };
 
   if (fixes.length) {
+    // Always render fixes as inline buttons. The old "Actions ▾" dropdown
+    // (for >3 fixes) was broken — the menu wouldn't open in some panels and
+    // hid useful actions behind a non-working affordance. Inline buttons wrap
+    // naturally in `.cookbook-diag-fixes` (flex-wrap) so a long list reflows
+    // onto multiple rows instead of getting collapsed.
     const row = document.createElement('div');
     row.className = 'cookbook-diag-fixes';
-
-    if (fixes.length <= 3) {
-      for (const fix of fixes) {
-        const btn = document.createElement('button');
-        btn.className = 'cookbook-btn cookbook-diag-btn';
-        btn.type = 'button';
-        btn.innerHTML = _diagFixIcon(fix.label) + '<span class="cookbook-diag-btn-label">' + _diagEsc(fix.label) + '</span>';
-        btn.addEventListener('click', (e) => {
-          e.stopPropagation();
-          runFix(fix, btn);
-        });
-        row.appendChild(btn);
-      }
-      body.appendChild(row);
-      return;
-    }
-
-    const wrap = document.createElement('div');
-    wrap.className = 'cookbook-diag-actions';
-
-    const trigger = document.createElement('button');
-    trigger.className = 'cookbook-btn cookbook-diag-action-trigger';
-    trigger.type = 'button';
-    trigger.textContent = 'Actions';
-    trigger.appendChild(document.createTextNode(' ▾'));
-    wrap.appendChild(trigger);
-
-    const menu = document.createElement('div');
-    menu.className = 'dropdown cookbook-diag-menu hidden';
     for (const fix of fixes) {
-      const item = document.createElement('button');
-      item.type = 'button';
-      item.innerHTML = _diagFixIcon(fix.label) + '<span class="cookbook-diag-btn-label">' + _diagEsc(fix.label) + '</span>';
-      item.addEventListener('click', async (e) => {
+      const btn = document.createElement('button');
+      btn.className = 'cookbook-btn cookbook-diag-btn';
+      btn.type = 'button';
+      btn.innerHTML = _diagFixIcon(fix.label) + '<span class="cookbook-diag-btn-label">' + _diagEsc(fix.label) + '</span>';
+      btn.addEventListener('click', (e) => {
         e.stopPropagation();
-        if (item.dataset.busy || trigger.dataset.busy) return;
-        item.dataset.busy = '1';
-        await runFix(fix, trigger, fix.label, () => menu.classList.add('hidden'), () => delete item.dataset.busy);
+        runFix(fix, btn);
       });
-      menu.appendChild(item);
+      row.appendChild(btn);
     }
-    wrap.appendChild(menu);
-    trigger.addEventListener('click', (e) => {
-      e.stopPropagation();
-      if (trigger.dataset.busy) return;
-      document.querySelectorAll('.cookbook-diag-menu').forEach(m => {
-        if (m !== menu) m.classList.add('hidden');
-      });
-      menu.classList.toggle('hidden');
-    });
-    row.appendChild(wrap);
     body.appendChild(row);
   }
 }
diff --git a/static/js/cookbook.js b/static/js/cookbook.js
index 507777a..358d664 100644
--- a/static/js/cookbook.js
+++ b/static/js/cookbook.js
@@ -353,6 +353,15 @@ function _buildEnvPrefixWindows() {
 }
 
 export function _buildServeCmd(f, modelName, backend) {
+  // When a venv is configured on the chosen server, use the venv's binaries
+  // by absolute path. Bare `vllm` / `python3` relies on PATH, and SSH non-
+  // interactive sessions often leave a user-site install (~/.local/bin/vllm)
+  // ahead of the venv's bin, so the WRONG vllm gets launched even with the
+  // venv activated. Absolute path sidesteps the whole PATH question.
+  const _isVenv = _envState.env === 'venv' && _envState.envPath;
+  const _venvBin = _isVenv ? (_envState.envPath.replace(/\/+$/, '') + '/bin/') : '';
+  const _vllmBin = _venvBin ? `${_venvBin}vllm` : 'vllm';
+  const _py3Bin = _venvBin ? `${_venvBin}python3` : 'python3';
   let cmd = '';
   if (backend === 'vllm') {
     const gpuId = f.gpu_id?.trim() || '';
@@ -361,7 +370,15 @@ export function _buildServeCmd(f, modelName, backend) {
       const _opts = _detectModelOptimizations(modelName);
       if (_opts.envVars.length) cmd += _opts.envVars.join(' ') + ' ';
     }
-    cmd += `vllm serve ${modelName} --host 0.0.0.0 --port ${f.port || '8000'}`;
+    // Pinned attention backend (Attention field). Empty = let vLLM pick.
+    const _attn = (f.vllm_attn_backend ?? '').toString().trim();
+    if (_attn) cmd += `VLLM_ATTENTION_BACKEND=${_attn} `;
+    // Free-text "Env" field — verbatim KEY=VAL pairs (space-separated).
+    // Collapse any pasted newlines/tabs so the backend allowlist (which
+    // rejects \n / \r) doesn't trip on a multi-line paste from a model card.
+    const _extraEnv = (f.extra_env ?? '').toString().replace(/\s+/g, ' ').trim();
+    if (_extraEnv) cmd += _extraEnv + ' ';
+    cmd += `${_vllmBin} serve ${modelName} --host 0.0.0.0 --port ${f.port || '8000'}`;
     cmd += ` --tensor-parallel-size ${f.tp || '1'}`;
     cmd += ` --max-model-len ${f.ctx || '8192'}`;
     cmd += ` --gpu-memory-utilization ${f.gpu_mem || '0.90'}`;
@@ -389,7 +406,9 @@ export function _buildServeCmd(f, modelName, backend) {
   } else if (backend === 'sglang') {
     const gpuId = f.gpu_id?.trim() || '';
     if (gpuId) cmd += `CUDA_VISIBLE_DEVICES=${gpuId} `;
-    cmd += `python3 -m sglang.launch_server --model-path ${modelName} --host 0.0.0.0 --port ${f.port || '30000'}`;
+    const _extraEnv = (f.extra_env ?? '').toString().replace(/\s+/g, ' ').trim();
+    if (_extraEnv) cmd += _extraEnv + ' ';
+    cmd += `${_py3Bin} -m sglang.launch_server --model-path ${modelName} --host 0.0.0.0 --port ${f.port || '30000'}`;
     if (f.tp && f.tp !== '1') cmd += ` --tp ${f.tp}`;
     if (f.ctx) cmd += ` --context-length ${f.ctx}`;
     if (f.gpu_mem && f.gpu_mem !== '0.90') cmd += ` --mem-fraction-static ${f.gpu_mem}`;
@@ -642,13 +661,20 @@ async function _fetchDependencies() {
       const winBlocked = !isLocal && _isWindows() && _winUnsupported.has(pkg.name);
       const note = pkg.status_note ? `<div class="memory-item-meta" style="font-size:10px;opacity:0.65;margin-top:3px;">${esc(pkg.status_note)}</div>` : '';
       const updateNote = pkg.installed && pkg.pip_update_available === false && pkg.update_note ? `<div class="memory-item-meta" style="font-size:10px;opacity:0.55;margin-top:3px;">${esc(pkg.update_note)}</div>` : '';
-      // Inline "Rebuild" tag for the llama_cpp row only. Styled as a
-      // .cookbook-dep-tag so it matches the LLM category tag's pill look,
-      // and lives to the LEFT of the category tag (clear affordance before
-      // the row "value").
-      const _rebuildBtn = (pkg.name === 'llama_cpp')
-        ? `<button type="button" class="cookbook-dep-tag cookbook-dep-rebuild" id="cookbook-rebuild-engine" title="Clear the cached llama.cpp build so the next serve recompiles from source (use after installing a CUDA/ROCm toolkit to turn a CPU-only build into a GPU build).">Rebuild</button>`
-        : '';
+      // Inline rebuild/reinstall tag. Styled as a .cookbook-dep-tag so it
+      // matches the LLM category tag's pill look, and lives to the LEFT of the
+      // category tag. llama_cpp uses the /api/cookbook/rebuild-engine flow
+      // (clear cached binary so next serve recompiles); vllm/sglang use the
+      // diagnosis-style `_launchServeTask` with `pip install --force-reinstall`
+      // so the user can watch the pip install in the Running tab.
+      let _rebuildBtn = '';
+      if (pkg.name === 'llama_cpp') {
+        _rebuildBtn = `<button type="button" class="cookbook-dep-tag cookbook-dep-rebuild" id="cookbook-rebuild-engine" title="Clear the cached llama.cpp build so the next serve recompiles from source (use after installing a CUDA/ROCm toolkit to turn a CPU-only build into a GPU build).">Rebuild</button>`;
+      } else if (pkg.name === 'vllm' && pkg.installed) {
+        _rebuildBtn = `<button type="button" class="cookbook-dep-tag cookbook-dep-rebuild cookbook-dep-reinstall" data-reinstall-pkg="vllm" title="Force-reinstall vLLM (pulls a matching torch). Runs as a tmux task in the Running tab.">Reinstall</button>`;
+      } else if (pkg.name === 'sglang' && pkg.installed) {
+        _rebuildBtn = `<button type="button" class="cookbook-dep-tag cookbook-dep-rebuild cookbook-dep-reinstall" data-reinstall-pkg="sglang" title="Force-reinstall SGLang (pulls a matching torch). Runs as a tmux task in the Running tab.">Reinstall</button>`;
+      }
       return `<div class="cookbook-dep-row${winBlocked ? ' cookbook-dep-blocked' : ''}" data-pkg-name="${esc(pkg.name)}" data-dep-pip="${esc(pkg.pip || '')}" data-dep-target="${isLocal ? 'local' : 'remote'}" data-dep-kind="${esc(pkg.kind || 'python')}">`
         + `<div class="cookbook-dep-info">`
         + `<div class="memory-item-title">${esc(pkg.name)}</div>`
@@ -696,7 +722,18 @@ async function _fetchDependencies() {
       // for PEP-668-locked system pythons (Arch, newer Debian).
       const _inEnv = _envState.env === 'venv' || _envState.env === 'conda';
       const _pipFlags = (!_isWindows() && !_inEnv) ? ' --user --break-system-packages' : '';
-      const _py = _isWindows() ? 'python' : 'python3';
+      // Use the venv's python3 by absolute path when configured. Even with the
+      // env_prefix sourcing activate, SSH non-interactive sessions sometimes
+      // pick a `python3` ahead of the venv's bin on PATH, so the install
+      // silently lands in the wrong site-packages.
+      let _py;
+      if (_isWindows()) {
+        _py = 'python';
+      } else if (_envState.env === 'venv' && _envState.envPath) {
+        _py = `${_envState.envPath.replace(/\/+$/, '')}/bin/python3`;
+      } else {
+        _py = 'python3';
+      }
       const cmd = `${_py} -m pip install${upgrade ? ' -U' : ''}${_pipFlags} "${pipName}"`;
       let envPrefix = '';
       if (_isWindows()) {
@@ -1072,6 +1109,32 @@ function _wireTabEvents(body) {
     });
   }
 
+  // "Reinstall" buttons for pip-based serving stacks (vllm, sglang). The
+  // deps list renders ASYNCHRONOUSLY after _fetchDependencies resolves, so
+  // attaching listeners directly here would miss buttons that don't exist
+  // yet. Use document-level delegation instead — the click always finds the
+  // right .cookbook-dep-reinstall button no matter when it was painted.
+  if (!document._cookbookReinstallWired) {
+    document._cookbookReinstallWired = true;
+    document.addEventListener('click', async (ev) => {
+      const btn = ev.target.closest?.('.cookbook-dep-reinstall');
+      if (!btn) return;
+      const pkg = btn.dataset.reinstallPkg || '';
+      if (!pkg) return;
+      ev.preventDefault();
+      ev.stopPropagation();
+      const sel = document.getElementById('hwfit-deps-server');
+      if (sel) _applyServerSelection(sel.value);
+      const host = _envState.remoteHost || '';
+      const where = host || 'this server';
+      if (!confirm(`Reinstall ${pkg} on ${where}?\n\nRuns "pip install --force-reinstall --no-deps ${pkg}" as a tmux task. Watch progress in the Running tab.`)) return;
+      const _venvPy = (_envState.env === 'venv' && _envState.envPath)
+        ? `${_envState.envPath.replace(/\/+$/, '')}/bin/python3`
+        : 'python3';
+      _launchServeTask(`reinstall-${pkg}`, 'pip-reinstall', `${_venvPy} -m pip install --force-reinstall --no-deps ${pkg}`);
+    }, true);
+  }
+
   // Serve sort
   const serveSort = document.getElementById('serve-sort');
   if (serveSort) {
diff --git a/static/js/cookbookRunning.js b/static/js/cookbookRunning.js
index 1f225b7..7f3cedd 100644
--- a/static/js/cookbookRunning.js
+++ b/static/js/cookbookRunning.js
@@ -141,6 +141,14 @@ async function _openDownloadForGgufTask(task) {
 function _terminalServeDiagnosis(task, outputText) {
   const out = String(outputText || task?.output || '');
   if (!task || task.type !== 'serve' || !['stopped', 'error', 'crashed', 'failed'].includes(task.status) || !out.trim()) return null;
+  // Pip tasks (Reinstall vLLM, Upgrade torch, etc.) ride on the serve task
+  // type so they get a tmux session + show up in Running tab — but they are
+  // NOT serve invocations. Their output is pip's own; the generic
+  // "Serve stopped before the model became reachable" message + Edit-serve
+  // fix make no sense. Bail so the panel just shows pip's output.
+  const _isPipTask = ((task.payload?.repo_id || '').startsWith('pip-'))
+    || /python3? -m pip\b/.test(task.payload?._cmd || '');
+  if (_isPipTask) return null;
   if (_serveTaskLooksAwqOnLocalBackend(task, out)) {
     return {
       message: 'AWQ/GPTQ/FP8 cannot be served through llama.cpp/Ollama unified-memory mode.',
@@ -266,7 +274,7 @@ const SERVE_STATE_KEY = 'cookbook-serve-state';
 
 // Polling / timeout intervals
 const TASK_POLL_INTERVAL_MS = 3000;       // delay between reconnect-loop iterations
-const BG_MONITOR_INTERVAL_MS = 10000;     // background task status poll
+const BG_MONITOR_INTERVAL_MS = 5000;      // background task status poll
 const STALE_PROGRESS_MS = 5 * 60 * 1000;  // download with no progress this long = stale
 const STARTUP_STALE_PROGRESS_MS = 45 * 1000; // 0%-forever startup stall: retry much sooner
 
@@ -540,6 +548,26 @@ function _serveOutputLooksReady(task) {
 
 function _normalizeTaskForDisplay(task) {
   if (!task || typeof task !== 'object') return task;
+  // Pip tasks (Reinstall vLLM / Upgrade torch / etc.) ride on the serve task
+  // type so they get tmux + the Running tab. They are NOT serves — their
+  // "ready" markers are pip's `Successfully installed` / `Requirement already
+  // satisfied`, not "Application startup complete".
+  const _isPipTask = ((task.payload?.repo_id || '').startsWith('pip-'))
+    || /python3? -m pip\b/.test(task.payload?._cmd || '');
+  if (_isPipTask) {
+    // Override stale status: any pip task whose output carries pip's own
+    // success markers gets displayed as `done` regardless of what's in
+    // localStorage. Old pre-fix runs landed in error/stopped state and
+    // stuck there even after we taught the rest of the flow about pip
+    // tasks — this is the catch-all that flips them to Finished on render.
+    const out = String(task.output || '');
+    const ranOk = /Successfully installed|Requirement already (?:satisfied|up-to-date)/i.test(out)
+      && !/error:|ERROR:/.test(out.slice(-1024));
+    if (ranOk && task.status !== 'done' && task.status !== 'running') {
+      return { ...task, status: 'done' };
+    }
+    return task;
+  }
   if (task.type === 'serve' && task.status === 'done' && !_serveOutputLooksReady(task)) {
     return { ...task, status: 'error' };
   }
@@ -2066,7 +2094,7 @@ export function _renderRunningTab() {
         // Edit serve — open the full serve panel (same as the edit icon),
         // switching to this task's server first so the model is found.
         if (task.type === 'serve' && task.payload?.repo_id) {
-          items.push({ label: 'Edit serve', action: 'edit-panel', custom: () => _openEdit() });
+          items.push({ label: 'Edit in serve panel', action: 'edit-panel', tooltip: 'Open the full Serve config panel pre-filled with this task — pick a different backend, change GPUs, edit env vars, then Launch from there', custom: () => _openEdit() });
         }
         // Save serve — save current launch config as a preset.
         if (task.type === 'serve' && task.payload?._cmd) {
@@ -2079,7 +2107,7 @@ export function _renderRunningTab() {
         // Edit command — only meaningful for serve tasks that aren't running.
         // Lets the user tweak flags after a crash/error and relaunch.
         if (task.type === 'serve' && task.status !== 'running' && task.payload?._cmd) {
-          items.push({ label: 'Edit command', action: 'edit', custom: async () => {
+          items.push({ label: 'Edit cmd & relaunch', action: 'edit', tooltip: 'Edit the raw vllm/llama-server cmd string in a dialog and relaunch immediately on the same host', custom: async () => {
             const newCmd = await _promptEditServeCmd(task.payload._cmd);
             if (newCmd == null) return; // cancelled
             try {
@@ -2173,7 +2201,19 @@ export function _renderRunningTab() {
           _copyText(last);
           uiModule.showToast('Copied last 50 lines');
         }});
-        items.push({ label: 'Remove', action: 'kill', danger: true });
+        // Label matches behavior — the kill handler ALWAYS first kills
+        // the live tmux session and (for serve tasks) deletes the
+        // matching model-endpoint, THEN animates the task card out.
+        // Just "Remove" hid that it stops the live serve too.
+        const _isLive = task.type === 'serve' && ['running', 'ready', 'loading', 'warming', 'starting'].includes(task.status || '');
+        items.push({
+          label: _isLive ? 'Stop and remove' : 'Remove',
+          action: 'kill',
+          tooltip: _isLive
+            ? 'Kill the live tmux session, deregister the chat endpoint, and remove this row'
+            : 'Remove this row',
+          danger: true,
+        });
         // Cancel = mobile-only dismiss item. Same pattern as the email kebab:
         // the `dropdown-cancel-mobile` class is hidden on desktop and styled
         // as a separated bottom row on mobile (border-top + extra padding).
@@ -2200,6 +2240,7 @@ export function _renderRunningTab() {
             + (item.danger ? ' cookbook-dropdown-danger' : '')
             + (item.mobileOnly ? ' dropdown-cancel-mobile' : '');
           div.style.cssText = 'display:flex;align-items:center;gap:8px;';
+          if (item.tooltip) div.title = item.tooltip;
           const ic = _MENU_ICONS[item.action] || '';
           div.innerHTML = `<span style="display:inline-flex;flex-shrink:0;opacity:0.7;"><svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">${ic}</svg></span><span>${item.label}</span>`;
           div.addEventListener('click', () => {
@@ -2319,22 +2360,57 @@ export function _renderRunningTab() {
       _animateOutThenRemove(el, task.sessionId);
     });
 
-    // Wire kill
-    el.querySelector('.cookbook-task-action-kill').addEventListener('click', () => {
+    // Wire kill — awaits the SSH/tmux kill and verifies the session is
+    // actually gone before removing the row. Previously fire-and-forget,
+    // which meant a failed kill (wrong remoteHost, SSH error, tmux server
+    // already exited) silently left the live serve running while the
+    // row disappeared from the UI.
+    el.querySelector('.cookbook-task-action-kill').addEventListener('click', async () => {
       const outputText = el.querySelector('.cookbook-output-pre')?.textContent || task.output || '';
+      const isLive = task.type === 'serve' && ['running', 'ready', 'loading', 'warming', 'starting'].includes(task.status || '');
       const ollamaUnload = _ollamaUnloadCommand(task, outputText);
       if (ollamaUnload) {
-        fetch('/api/shell/exec', {
+        try {
+          await fetch('/api/shell/exec', {
+            method: 'POST', credentials: 'same-origin',
+            headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify({ command: ollamaUnload }),
+          });
+        } catch (_) { /* unload best-effort */ }
+      }
+      let killOk = true;
+      try {
+        const r = await fetch('/api/shell/exec', {
           method: 'POST', credentials: 'same-origin',
           headers: { 'Content-Type': 'application/json' },
-          body: JSON.stringify({ command: ollamaUnload }),
-        }).catch(() => {});
+          body: JSON.stringify({ command: _tmuxGracefulKill(task) }),
+        });
+        if (r.ok) {
+          const out = await r.json();
+          // Don't trust exit_code alone — tmux kill returns 0 even when
+          // there was nothing to kill. Verify the session is actually gone.
+          if (task.sessionId && isLive) {
+            try {
+              const probe = await fetch('/api/shell/exec', {
+                method: 'POST', credentials: 'same-origin',
+                headers: { 'Content-Type': 'application/json' },
+                body: JSON.stringify({ command: _tmuxCmd(task, `has-session -t ${task.sessionId}`) }),
+              });
+              if (probe.ok) {
+                const pj = await probe.json();
+                // has-session exits 0 when session STILL exists; non-zero = gone.
+                if ((pj.exit_code || 0) === 0) killOk = false;
+              }
+            } catch (_) { /* probe best-effort; trust kill */ }
+          }
+        } else {
+          killOk = false;
+        }
+      } catch (_) { killOk = false; }
+      if (!killOk) {
+        try { uiModule.showToast('Kill failed — session may still be running. Check `tmux ls` on the server.', 'error'); } catch (_) {}
+        return;  // leave the row so the user can retry
       }
-      fetch('/api/shell/exec', {
-        method: 'POST', credentials: 'same-origin',
-        headers: { 'Content-Type': 'application/json' },
-        body: JSON.stringify({ command: _tmuxGracefulKill(task) }),
-      }).catch(() => {});
       if (task.type === 'serve' && task.payload) {
         const endpointUrl = _endpointUrlForTask(task, outputText);
         _removeEndpointByUrl(endpointUrl);
@@ -2373,7 +2449,13 @@ export function _renderRunningTab() {
     if (targetBody) targetBody.appendChild(el);
     else group.appendChild(el);
 
-    if (task.status === 'running') {
+    // Auto-attach the tmux output stream for any task whose underlying
+    // session could still be alive — not just 'running'. Scheduler-
+    // launched serves transition to 'ready' as soon as /v1/models
+    // responds; without this, the user opens the Running tab and sees
+    // only the placeholder ("Launched by scheduled task …") because
+    // _reconnectTask never fires for status 'ready'/'loading'/'warming'.
+    if (['running', 'ready', 'loading', 'warming', 'starting'].includes(task.status)) {
       _reconnectTask(el, task);
     }
   }
@@ -2426,7 +2508,7 @@ async function _reconnectTask(el, task) {
       if (data.exit_code !== 0) {
         failCount++;
         if (failCount < 5) {
-          await new Promise(r => setTimeout(r, 5000));
+          await new Promise(r => setTimeout(r, 3000));
           continue;
         }
         try {
@@ -2447,7 +2529,15 @@ async function _reconnectTask(el, task) {
         }
 
         const lastOutput = output.textContent || '';
-        const diag = _diagnose(lastOutput);
+        // Pip tasks (Reinstall vLLM / Upgrade torch / etc.) must skip the
+        // generic serve `_diagnose` step. Their output is pip's own and the
+        // error patterns there (torch ABI traceback, "No module named torch",
+        // etc.) are routinely matched against the previous tmux scrollback,
+        // tagging a clean pip success as a crashed serve. Detection is the
+        // same shape as the looksSuccessful branch below.
+        const _isPipTaskDiag = ((task.payload?.repo_id || '').startsWith('pip-'))
+          || /python3? -m pip\b/.test(task.payload?._cmd || '');
+        const diag = _isPipTaskDiag ? null : _diagnose(lastOutput);
         if (diag) {
           let diagEl = el.querySelector('.cookbook-diagnosis');
           if (!diagEl) {
@@ -2464,17 +2554,46 @@ async function _reconnectTask(el, task) {
         } else {
           const downloadLooksSuccessful = !lastOutput.includes('DOWNLOAD_FAILED')
             && (lastOutput.includes('DONE') || lastOutput.includes('100%') || lastOutput.includes('/snapshots/') || lastOutput.includes('Download complete') || lastOutput.includes('DOWNLOAD_OK'));
+          // Pip install / reinstall tasks are launched via _launchServeTask (so
+          // they show up in the Running tab + use tmux) but they aren't real
+          // serves — the cmd is `python3 -m pip ...` and the success markers
+          // are pip's own. Without this branch, a successful reinstall ends
+          // with no "Uvicorn running on" line and gets mis-flagged as a crashed
+          // serve.
+          const _isPipTask = ((task.payload?.repo_id || '').startsWith('pip-'))
+            || /python3? -m pip\b/.test(task.payload?._cmd || '');
+          const pipLooksSuccessful = _isPipTask
+            && /Successfully installed|Requirement already (?:satisfied|up-to-date)/i.test(lastOutput)
+            && !/error:|ERROR:/.test(lastOutput.slice(-1024));
           const serveLooksReady = task.type === 'serve' && _serveOutputLooksReady({ ...task, output: lastOutput });
           // Dependency installs are tracked as download tasks but finish with a
-          // pip exit-0 sentinel, not HF download markers — so check that too.
+          // pip exit-0 sentinel, not HF download markers — check that too.
+          // Standalone pip-* serves finish with pip's own success line, not
+          // HF or "Uvicorn running on".
           const depInstallSucceeded = !!task.payload?._dep && _depInstallSucceeded(lastOutput);
-          const looksSuccessful = depInstallSucceeded || (task.type === 'download' ? downloadLooksSuccessful : serveLooksReady);
+          const looksSuccessful = depInstallSucceeded
+            || (task.type === 'download'
+              ? downloadLooksSuccessful
+              : (_isPipTask ? pipLooksSuccessful : serveLooksReady));
           if (!lastOutput.trim() || !looksSuccessful) {
             _updateTask(task.sessionId, { status: 'crashed' });
             el.dataset.status = 'crashed';
             const badge = el.querySelector('.cookbook-task-status');
             if (badge) { badge.textContent = _statusLabel('crashed', task.type); badge.className = 'cookbook-task-status cookbook-task-crashed'; }
-            if (task.type === 'serve') {
+            if (_isPipTask) {
+              // Pip tasks: don't run the serve diagnosis (which would yell
+              // "Serve stopped before the model became reachable"). Show a
+              // pip-tailored message; the user can read pip's own error output
+              // directly above.
+              const _ranOk = /Successfully installed|Requirement already (?:satisfied|up-to-date)/i.test(lastOutput);
+              if (!_ranOk) {
+                _showDiagnosis(el, {
+                  message: 'Pip install did not finish with a success marker. Check the output for the underlying error.',
+                  suggestion: 'Suggested action: copy the troubleshooting bundle. Common causes: missing build deps, network blip, mismatched torch ABI.',
+                  fixes: [],
+                }, lastOutput);
+              }
+            } else if (task.type === 'serve') {
               const diag = _diagnose(lastOutput) || {
                 message: _serveTaskLooksAwqOnLocalBackend(task, lastOutput)
                   ? 'AWQ/GPTQ/FP8 cannot be served through llama.cpp/Ollama unified-memory mode.'
@@ -2553,6 +2672,28 @@ async function _reconnectTask(el, task) {
             }
             _showCookbookNotif(true);
           } else {
+            // Strong completion markers — `DOWNLOAD_OK` is emitted by our
+            // downloader wrapper AFTER the model snapshot is on disk, and
+            // `/snapshots/` only appears once HF has resolved the cached
+            // tree. Either is conclusive. Finalize as done immediately, skip
+            // the 30s debounce — the debounce only exists to guard against
+            // ambiguous markers (bare "100%" / "Download complete") which can
+            // appear mid-stream during multi-file downloads.
+            const _strongDone = task.type === 'download'
+              && (lastOutput.includes('DOWNLOAD_OK') || lastOutput.includes('/snapshots/'));
+            if (_strongDone) {
+              _updateTask(task.sessionId, { status: 'done', _doneConfirmAt: null, _lastStatusFlipAt: Date.now() });
+              el.dataset.status = 'done';
+              const badge = el.querySelector('.cookbook-task-status');
+              if (badge) { badge.textContent = _statusLabel('done', task.type); badge.className = 'cookbook-task-status cookbook-task-done'; }
+              const _chk = el.querySelector('.cookbook-task-check'); if (_chk) _chk.style.display = '';
+              const _sb = el.querySelector('.cookbook-task-serve-btn'); if (_sb) _sb.style.display = '';
+              _showCookbookNotif();
+              _refreshDepsAfterInstall(task);
+              _renderRunningTab();
+              _processQueue();
+              break;
+            }
             // Debounce the done flip. Tmux capture-pane can fail transiently
             // (network blip, ssh reconnect), and the verify has-session right
             // above can briefly report dead even when the session is in the
@@ -2579,7 +2720,7 @@ async function _reconnectTask(el, task) {
                     stillAlive = pData.exit_code === 0;
                   } catch { /* network blip — treat as inconclusive, prefer running */ stillAlive = true; }
                   if (stillAlive) {
-                    _updateTask(task.sessionId, { status: 'running', _doneConfirmAt: null });
+                    _updateTask(task.sessionId, { status: 'running', _doneConfirmAt: null, _lastStatusFlipAt: Date.now() });
                     const _el = document.querySelector(`.cookbook-task[data-task-id="${task.sessionId}"]`);
                     if (_el) {
                       _el.dataset.status = 'running';
@@ -2591,7 +2732,7 @@ async function _reconnectTask(el, task) {
                     }
                     return;
                   }
-                  _updateTask(task.sessionId, { status: 'done', _doneConfirmAt: null });
+                  _updateTask(task.sessionId, { status: 'done', _doneConfirmAt: null, _lastStatusFlipAt: Date.now() });
                   const _el = document.querySelector(`.cookbook-task[data-task-id="${task.sessionId}"]`);
                   if (_el) {
                     _el.dataset.status = 'done';
@@ -2616,8 +2757,14 @@ async function _reconnectTask(el, task) {
 
       const snapshot = (data.stdout || '').trim();
       if (snapshot) {
+        // Only auto-scroll to bottom if the user was already there. When
+        // they've scrolled up to read earlier output, leave their position
+        // alone so a fresh snapshot doesn't yank them back to the tail.
+        // 40px tolerance covers sub-pixel rounding + the moment between
+        // releasing the scrollbar and the next poll arriving.
+        const _atBottom = (output.scrollHeight - output.scrollTop - output.clientHeight) < 40;
         output.textContent = snapshot;
-        output.scrollTop = output.scrollHeight;
+        if (_atBottom) output.scrollTop = output.scrollHeight;
 
         // Live status parsing for download tasks
         if (task.type === 'download') {
@@ -3173,16 +3320,27 @@ export async function _selfHealStaleTasks(opts = {}) {
     // itself fires every 10s, so this almost always fires too, but the
     // guard keeps a fast manual call from doubling up).
     const now = Date.now();
-    if (now - _selfHealLastTs < 8000) return;
+    if (now - _selfHealLastTs < 4000) return;
     _selfHealLastTs = now;
   }
   const tasks = _loadTasks();
-  const candidates = tasks.filter(t =>
-    t.type === 'download'
-    && ['done', 'error', 'crashed', 'stopped'].includes(t.status)
-    && t.sessionId
-    && !String(t.sessionId).startsWith('queue-')
-  );
+  const candidates = tasks.filter(t => {
+    if (t.type !== 'download') return false;
+    if (!['done', 'error', 'crashed', 'stopped'].includes(t.status)) return false;
+    if (!t.sessionId || String(t.sessionId).startsWith('queue-')) return false;
+    // Finished downloads with strong completion markers (DOWNLOAD_OK or HF
+    // /snapshots/ resolution) are demonstrably done — do not flip them back
+    // to running just because the tmux session is still alive (e.g., a
+    // long-lived shell that hosted the download or a flapping SSH that
+    // reports the session as up). This was the main source of finished↔
+    // downloading oscillation on a flaky connection.
+    if (t.status === 'done' && /DOWNLOAD_OK|\/snapshots\//.test(t.output || '')) return false;
+    // Cooldown: never flip the same task more than once every 45s. A flapping
+    // SSH connection used to drive the badge back-and-forth on every probe
+    // cycle; this enforces a stable view between flaps.
+    if (t._lastStatusFlipAt && (Date.now() - t._lastStatusFlipAt < 45000)) return false;
+    return true;
+  });
   if (!candidates.length) return;
   let flipped = 0;
   for (const t of candidates) {
@@ -3200,6 +3358,7 @@ export async function _selfHealStaleTasks(opts = {}) {
         if (ft && ft.status !== 'running') {
           ft.status = 'running';
           ft._selfHealed = true;
+          ft._lastStatusFlipAt = Date.now();
           _saveTasks(fresh);
           flipped++;
           const _el = document.querySelector(`.cookbook-task[data-task-id="${t.sessionId}"]`);
diff --git a/static/js/cookbookSchedule.js b/static/js/cookbookSchedule.js
new file mode 100644
index 0000000..a26de5d
--- /dev/null
+++ b/static/js/cookbookSchedule.js
@@ -0,0 +1,386 @@
+// Cookbook Schedule — opens a small inline form (styled with the app's
+// existing .cookbook-* classes) that creates a ScheduledTask with
+// action=cookbook_serve. Mounted from two places:
+//
+//   1. The ^ button next to Launch in a serve panel.
+//   2. The "Schedule…" entry in the cached-model ⋯ dropdown menu (which
+//      programmatically clicks the ^ button so this module owns the
+//      single source of truth).
+//
+// Feedback uses uiModule.showToast() — the same toast the rest of the
+// app uses for "Saved", "Favorited", etc. — so the success message
+// doesn't introduce a parallel notification style.
+//
+// To remove: delete this file + the <script> tag in index.html + the
+// ^ button in cookbookServe.js + the "cookbook_serve" entry in
+// BUILTIN_ACTIONS + src/cookbook_serve_lifecycle.py + its
+// registration line in app.py.
+
+try { (function () {
+  function _safe(fn) {
+    return function () {
+      try { return fn.apply(this, arguments); }
+      catch (e) { try { console.warn("[cookbookSchedule]", e); } catch (_) {} }
+    };
+  }
+  function esc(s) {
+    return String(s == null ? "" : s)
+      .replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;")
+      .replace(/"/g, "&quot;").replace(/'/g, "&#39;");
+  }
+
+  // Cached handle to the ui.js showToast function. Bound lazily on
+  // first use because ui.js is an ES module — it's not on `window`
+  // unless something else has explicitly exposed it.
+  let _toastFn = null;
+  async function _getToast() {
+    if (_toastFn) return _toastFn;
+    try {
+      const m = await import("/static/js/ui.js");
+      _toastFn = m.default?.showToast || m.showToast || null;
+    } catch (_) { _toastFn = null; }
+    return _toastFn;
+  }
+  // Optional opts: {action, onAction, duration, leadingIcon}
+  async function toast(msg, opts) {
+    const fn = await _getToast();
+    if (fn) {
+      try { fn(msg, opts); return; } catch (_) {}
+    }
+    try { console.log("[toast]", msg); } catch (_) {}
+  }
+
+  // Cached handle to the tasks module so the success toast's "Open"
+  // action can jump straight to the new task in the Tasks tab.
+  let _tasksMod = null;
+  async function _getTasksMod() {
+    if (_tasksMod) return _tasksMod;
+    try { _tasksMod = await import("/static/js/tasks.js"); } catch (_) {}
+    return _tasksMod;
+  }
+  async function openTaskInTasksTab(taskId) {
+    const m = await _getTasksMod();
+    if (m && typeof m.openTasks === "function") {
+      try { m.openTasks(taskId); return; } catch (_) {}
+    }
+    // Last-resort fallback: click the sidebar Tasks button.
+    document.getElementById("tool-tasks-btn")?.click();
+  }
+
+  const DAYS = [
+    { k: "MO", l: "Mon", idx: 0 },
+    { k: "TU", l: "Tue", idx: 1 },
+    { k: "WE", l: "Wed", idx: 2 },
+    { k: "TH", l: "Thu", idx: 3 },
+    { k: "FR", l: "Fri", idx: 4 },
+    { k: "SA", l: "Sat", idx: 5 },
+    { k: "SU", l: "Sun", idx: 6 },
+  ];
+  const WEEKDAYS = new Set(["MO","TU","WE","TH","FR"]);
+
+  // Resolve the model identity from the closest .memory-item card —
+  // that's the canonical container the cookbook serve UI uses, with
+  // the model repo on data-repo. We do NOT grab the title via
+  // textContent, because the title row also contains inline status
+  // pills ("running", "downloading") and an "HF ↗" link — pulling all
+  // of it in turns a clean preset name like "Qwen3.5-397B-A17B-AWQ"
+  // into "Qwen3.5-397B-A17B-AWQ running HF ↗", which then fails the
+  // preset lookup in action_cookbook_serve.
+  function readPanelConfig(arrowBtn) {
+    const item = arrowBtn.closest(".memory-item") || arrowBtn.closest(".hwfit-cached-item");
+    const panel = arrowBtn.closest(".hwfit-serve-panel");
+    const repo = item?.dataset?.repo
+      || arrowBtn.closest(".hwfit-serve-panel")?.dataset?.repo
+      || "";
+    // Title = last segment of the repo (after the final /), which is
+    // exactly what the cookbook UI renders in the card title and what
+    // the preset registry uses as its short name. e.g.
+    //   cyankiwi/Qwen3.5-397B-A17B-AWQ  →  Qwen3.5-397B-A17B-AWQ
+    // Falls back to data-modelName or the bare repo for ollama-style
+    // entries that don't have a slash.
+    let title = "";
+    if (repo) {
+      title = repo.includes("/") ? repo.split("/").pop() : repo;
+    }
+    if (!title) {
+      title = item?.dataset?.modelName || "model";
+    }
+    return { panel, item, title, repo_id: repo, host: item?.dataset?.host || "" };
+  }
+
+  function buildFormHtml(cfg) {
+    return `
+      <div class="hwfit-schedule-form cookbook-panel">
+        <div class="hwfit-schedule-title">
+          <svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
+            <rect x="3" y="4" width="18" height="18" rx="2"/>
+            <line x1="16" y1="2" x2="16" y2="6"/>
+            <line x1="8" y1="2" x2="8" y2="6"/>
+            <line x1="3" y1="10" x2="21" y2="10"/>
+          </svg>
+          <span class="hwfit-schedule-title-text">Schedule serve: <strong>${esc(cfg.title)}</strong></span>
+          <span class="hwfit-schedule-title-spacer"></span>
+          <label class="hwfit-schedule-mirror-toggle" title="Also create a calendar event on the Cookbook calendar">
+            <span class="hwfit-schedule-mirror-label">Create event in calendar</span>
+            <span class="admin-switch hwfit-schedule-mirror-switch">
+              <input type="checkbox" class="hwfit-sched-calendar-mirror" />
+              <span class="admin-slider"></span>
+            </span>
+          </label>
+        </div>
+
+        <div class="hwfit-schedule-row">
+          <label class="hwfit-schedule-field">
+            <span>From</span>
+            <input type="time" class="hwfit-sched-start cookbook-field-input" value="09:00" />
+          </label>
+          <label class="hwfit-schedule-field">
+            <span>Until</span>
+            <input type="time" class="hwfit-sched-end cookbook-field-input" value="17:00" />
+          </label>
+        </div>
+
+        <div class="hwfit-schedule-row hwfit-schedule-days-row">
+          <span class="hwfit-schedule-label">Days</span>
+          <div class="hwfit-sched-days">
+            ${DAYS.map(d => `
+              <button type="button" class="hwfit-sched-day-chip${WEEKDAYS.has(d.k) ? " is-on" : ""}" data-day="${d.k}">${d.l}</button>
+            `).join("")}
+          </div>
+          <span class="hwfit-schedule-actions-spacer"></span>
+          <button type="button" class="cookbook-btn hwfit-sched-cancel" title="Cancel">
+            <svg width="11" height="11" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.4" stroke-linecap="round" stroke-linejoin="round" style="vertical-align:-1px;margin-right:5px;flex-shrink:0;"><line x1="18" y1="6" x2="6" y2="18"/><line x1="6" y1="6" x2="18" y2="18"/></svg>
+            <span>Cancel</span>
+          </button>
+          <button type="button" class="cookbook-btn hwfit-sched-save" title="Save schedule" aria-label="Save schedule">
+            <svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="vertical-align:-1px;margin-right:5px;flex-shrink:0;"><rect x="3" y="4" width="18" height="18" rx="2"/><line x1="16" y1="2" x2="16" y2="6"/><line x1="8" y1="2" x2="8" y2="6"/><line x1="3" y1="10" x2="21" y2="10"/></svg>
+            <span>Save</span>
+          </button>
+        </div>
+
+        <div class="hwfit-sched-err"></div>
+      </div>`;
+  }
+
+  function openForm(arrowBtn) {
+    const cfg = readPanelConfig(arrowBtn);
+    const anchor = cfg.panel
+      || cfg.item
+      || arrowBtn.closest(".cookbook-saved-item")
+      || arrowBtn.parentElement?.parentElement
+      || arrowBtn.parentElement;
+    if (!anchor) {
+      toast("Couldn't find a panel to mount the schedule form");
+      return;
+    }
+    // Toggle.
+    const existing = anchor.querySelector(".hwfit-schedule-form");
+    if (existing) { existing.remove(); return; }
+    const tmp = document.createElement("div");
+    tmp.innerHTML = buildFormHtml(cfg);
+    const form = tmp.firstElementChild;
+    anchor.appendChild(form);
+    setTimeout(() => {
+      try { form.scrollIntoView({ behavior: "smooth", block: "nearest" }); } catch (_) {}
+    }, 50);
+    wireForm(form, cfg);
+  }
+
+  function wireForm(form, cfg) {
+    form.querySelectorAll(".hwfit-sched-day-chip").forEach(chip => {
+      chip.addEventListener("click", () => chip.classList.toggle("is-on"));
+    });
+    form.querySelector(".hwfit-sched-cancel").addEventListener("click", () => form.remove());
+    form.querySelector(".hwfit-sched-save").addEventListener("click", _safe(async () => {
+      const startTime = form.querySelector(".hwfit-sched-start").value;
+      const endTime = form.querySelector(".hwfit-sched-end").value;
+      const days = Array.from(form.querySelectorAll(".hwfit-sched-day-chip.is-on")).map(c => c.dataset.day);
+      const mirrorToCalendar = !!form.querySelector(".hwfit-sched-calendar-mirror")?.checked;
+      const errEl = form.querySelector(".hwfit-sched-err");
+      errEl.textContent = "";
+      errEl.classList.remove("is-visible");
+
+      function fail(msg) {
+        errEl.textContent = msg;
+        errEl.classList.add("is-visible");
+      }
+      if (!/^\d\d:\d\d$/.test(startTime) || !/^\d\d:\d\d$/.test(endTime)) {
+        return fail("Start and end must be HH:MM");
+      }
+      if (!days.length) {
+        return fail("Pick at least one day");
+      }
+
+      const [sh, sm] = startTime.split(":").map(Number);
+      const [eh, em] = endTime.split(":").map(Number);
+      let dur = (eh * 60 + em) - (sh * 60 + sm);
+      if (dur <= 0) dur += 24 * 60;
+
+      // The backend stores scheduled_time as UTC. The user picks
+      // wall-clock LOCAL time. Without converting, "09:55" in a UTC+9
+      // timezone gets stored as 09:55 UTC = 18:55 local → next-run
+      // shows ~9 hours later instead of "in 5 min". Mirror what
+      // tasks.js does via its _localTimeToUtc helper.
+      const _localHHMMToUtc = (hhmm) => {
+        const [h, m] = hhmm.split(":").map(Number);
+        const d = new Date();
+        d.setHours(h, m, 0, 0);
+        return `${String(d.getUTCHours()).padStart(2, "0")}:${String(d.getUTCMinutes()).padStart(2, "0")}`;
+      };
+      const startUtc = _localHHMMToUtc(startTime);
+      const [shUtc, smUtc] = startUtc.split(":").map(Number);
+
+      const allDays = days.length === 7;
+      const weekdaysOnly = days.length === 5 && ["MO","TU","WE","TH","FR"].every(d => days.includes(d));
+      const sched = {};
+      if (allDays) {
+        sched.schedule = "daily";
+        sched.scheduled_time = startUtc;
+      } else if (weekdaysOnly) {
+        sched.schedule = "cron";
+        sched.cron_expression = `${smUtc} ${shUtc} * * 1-5`;
+      } else if (days.length === 1) {
+        const dayIdx = DAYS.find(d => d.k === days[0]).idx;
+        sched.schedule = "weekly";
+        sched.scheduled_time = startUtc;
+        sched.scheduled_day = dayIdx;
+      } else {
+        const dayNum = days.map(k => {
+          const i = DAYS.find(d => d.k === k).idx;
+          return i === 6 ? 0 : i + 1;
+        });
+        sched.schedule = "cron";
+        sched.cron_expression = `${smUtc} ${shUtc} * * ${dayNum.join(",")}`;
+      }
+
+      // Name: "Serve: <full model name>" — pulled from .memory-item-title
+      // so it's the user's display name (e.g. "Qwen3.5-397B-A17B-AWQ")
+      // not a placeholder like "model".
+      const fullName = (cfg.title || cfg.repo_id || "").trim() || "model";
+      const payload = {
+        name: `Serve: ${fullName}`,
+        task_type: "action",
+        action: "cookbook_serve",
+        trigger_type: "schedule",
+        prompt: JSON.stringify({
+          preset: fullName,
+          repo_id: cfg.repo_id || "",
+          host: cfg.host || "",
+          end_after_min: dur,
+        }),
+        ...sched,
+      };
+      const saveBtn = form.querySelector(".hwfit-sched-save");
+      saveBtn.disabled = true;
+      saveBtn.textContent = "Saving…";
+      try {
+        const r = await fetch("/api/tasks", {
+          method: "POST", credentials: "same-origin",
+          headers: { "Content-Type": "application/json" },
+          body: JSON.stringify(payload),
+        });
+        const data = await r.json();
+        if (!r.ok || data.error) {
+          fail(data.error || data.detail || `HTTP ${r.status}`);
+          saveBtn.disabled = false;
+          saveBtn.textContent = "Save schedule";
+          toast(`Schedule save failed: ${data.error || data.detail || r.status}`);
+          return;
+        }
+        if (mirrorToCalendar) {
+          // Mirror onto a dedicated "Cookbook" calendar so the user can
+          // toggle the whole set on/off as a unit in the calendar UI.
+          // Best-effort: if anything here fails, we still consider the
+          // task creation a success (the task itself works regardless).
+          try {
+            const calsRes = await fetch("/api/calendar/calendars", { credentials: "same-origin" });
+            const calsBody = calsRes.ok ? await calsRes.json() : {};
+            let cookbookCal = (calsBody.calendars || []).find(c => (c.name || "").toLowerCase() === "cookbook");
+            if (!cookbookCal) {
+              const mk = await fetch("/api/calendar/calendars?name=Cookbook&color=%233b82f6", {
+                method: "POST", credentials: "same-origin",
+              });
+              if (mk.ok) {
+                const mkData = await mk.json();
+                // The create endpoint returns {ok, id, name, color}; the
+                // list endpoint returns {href, name, color}. The two map
+                // 1:1 (href === id) so we synthesize the same shape.
+                cookbookCal = { href: mkData.id, name: mkData.name, color: mkData.color };
+              }
+            }
+            // The `cookbook_task_id:` marker on its own line lets
+            // calendar.js's event-form code detect that this event was
+            // created from a Cookbook schedule and render an
+            // "Open task" button alongside the description, so the user
+            // can jump straight to the source task from the calendar UI.
+            const evBody = {
+              summary: payload.name,
+              dtstart: new Date().toISOString(),
+              dtend: new Date(Date.now() + dur * 60 * 1000).toISOString(),
+              all_day: false,
+              description: `Auto-mirrored from Cookbook schedule task ${data.id || ""}.\n`
+                + `Edit/delete the task in the Tasks tab — this event will follow.\n`
+                + `cookbook_task_id: ${data.id || ""}`,
+              rrule: weekdaysOnly
+                ? "FREQ=WEEKLY;BYDAY=MO,TU,WE,TH,FR"
+                : (sched.schedule === "weekly" ? `FREQ=WEEKLY;BYDAY=${days.join(",")}`
+                  : (sched.schedule === "daily" ? "FREQ=DAILY" : "FREQ=WEEKLY")),
+              color: "#3b82f6",
+            };
+            if (cookbookCal?.href) evBody.calendar_href = cookbookCal.href;
+            const evRes = await fetch("/api/calendar/events", {
+              method: "POST", credentials: "same-origin",
+              headers: { "Content-Type": "application/json" },
+              body: JSON.stringify(evBody),
+            });
+            const evData = evRes.ok ? await evRes.json() : null;
+            // Stash the event uid + calendar href on the task's prompt
+            // JSON so the task-delete hook can cascade the calendar
+            // cleanup. PATCH the task with an updated prompt.
+            if (evData && (evData.uid || evData.id)) {
+              const eventUid = evData.uid || evData.id;
+              try {
+                const updatedPrompt = JSON.stringify({
+                  ...JSON.parse(payload.prompt),
+                  cookbook_event_uid: eventUid,
+                  cookbook_event_calendar: cookbookCal?.href || "",
+                });
+                // /api/tasks/{id} accepts PUT, not PATCH — sending PATCH
+                // here silently failed (no such method on that route), so
+                // the task never got the cookbook_event_uid marker and the
+                // server-side delete-cascade had nothing to follow when the
+                // user later deleted the task.
+                await fetch(`/api/tasks/${encodeURIComponent(data.id)}`, {
+                  method: "PUT", credentials: "same-origin",
+                  headers: { "Content-Type": "application/json" },
+                  body: JSON.stringify({ prompt: updatedPrompt }),
+                });
+              } catch (_) {}
+            }
+          } catch (_) {}
+        }
+        form.remove();
+        const newTaskId = data.id || data.task_id || "";
+        toast(`Created task: Serve: ${fullName}`, {
+          leadingIcon: "check",
+          action: "Open",
+          duration: 5000,
+          onAction: () => openTaskInTasksTab(newTaskId),
+        });
+      } catch (e) {
+        fail(String(e));
+        saveBtn.disabled = false;
+        saveBtn.textContent = "Save schedule";
+        toast(`Schedule save failed: ${e}`);
+      }
+    }));
+  }
+
+  document.addEventListener("click", _safe((e) => {
+    const arrow = e.target.closest && e.target.closest(".hwfit-serve-schedule-arrow");
+    if (!arrow) return;
+    e.preventDefault();
+    e.stopPropagation();
+    openForm(arrow);
+  }));
+})(); } catch (e) { try { console.warn("[cookbookSchedule] top-level error:", e); } catch (_) {} }
diff --git a/static/js/cookbookServe.js b/static/js/cookbookServe.js
index 1ef503f..c27ac38 100644
--- a/static/js/cookbookServe.js
+++ b/static/js/cookbookServe.js
@@ -294,19 +294,23 @@ function _rerenderCachedModels() {
     }
     const ggufCount = _runnableGgufFiles(m).length;
     if (ggufCount > 1) metaParts.push(`${ggufCount} GGUFs`);
-    if (m.status === 'downloading') {
-      const _active = _isActivelyDownloading(m.repo_id);
-      metaParts.push(`<span class="cookbook-dl-status" style="color:var(--accent,var(--red));">${_active ? 'downloading' : 'download stalled'}</span>`);
-    }
+    // "downloading" status now renders as a title-row pill instead of
+    // a meta-row text label, matching the "running" pill style and
+    // living on the same line as the model name.
+    const _isDownloading = m.status === 'downloading';
+    const _isDlActive = _isDownloading ? _isActivelyDownloading(m.repo_id) : false;
     const isSelectMode = document.getElementById('hwfit-cache-select')?.classList.contains('active');
     html += `<div class="doclib-card memory-item" data-repo="${esc(m.repo_id)}" data-tag="${m._tag || ''}" data-family="${m._family || ''}" style="cursor:pointer;">`;
     html += `<span class="serve-select-cb memory-select-dot" style="display:${isSelectMode ? 'inline-block' : 'none'};cursor:pointer;"></span>`;
     html += `<div style="flex:1;min-width:0;">`;
     const _mc = modelColor(m.repo_id) || '';
     const _runningPill = _isActivelyServing(m.repo_id)
-      ? ' <span class="cookbook-serve-running-pill" title="This model is currently being served">running</span>'
+      ? ` <span class="cookbook-serve-running-pill is-clickable" title="This model is currently being served — click to open in Running" data-repo="${esc(m.repo_id)}" role="button" tabindex="0">running</span>`
       : '';
-    html += `<div class="memory-item-title"${_mc ? ` style="color:${_mc}"` : ''}>${modelLogo(m.repo_id)}${esc(shortName)}${hfLink ? ` <a href="${esc(hfLink)}" target="_blank" rel="noopener" class="cookbook-hf-link">HF ↗</a>` : ''}${_runningPill}</div>`;
+    const _downloadingPill = _isDownloading
+      ? ` <span class="cookbook-serve-downloading-pill${_isDlActive ? '' : ' is-stalled'}" title="${_isDlActive ? 'Download in progress' : 'Download stalled — retry to resume'}">${_isDlActive ? 'downloading' : 'stalled'}</span>`
+      : '';
+    html += `<div class="memory-item-title"${_mc ? ` style="color:${_mc}"` : ''}>${modelLogo(m.repo_id)}${esc(shortName)}${hfLink ? ` <a href="${esc(hfLink)}" target="_blank" rel="noopener" class="cookbook-hf-link">HF ↗</a>` : ''}${_runningPill}${_downloadingPill}</div>`;
     html += `<div class="memory-item-meta" style="font-size:10px;opacity:0.4;margin-top:2px;">${metaParts.join(' \u00b7 ')}</div>`;
     html += `</div>`;
     const _bk = _detectBackend(m).backend;
@@ -388,9 +392,11 @@ function _rerenderCachedModels() {
       const _retryIco = '<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><polyline points="23 4 23 10 17 10"/><path d="M20.49 15a9 9 0 1 1-2.12-9.36L23 10"/></svg>';
       const _deleteIco = '<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M3 6h18"/><path d="M8 6V4a2 2 0 0 1 2-2h4a2 2 0 0 1 2 2v2"/><path d="M19 6v14a2 2 0 0 1-2 2H7a2 2 0 0 1-2-2V6"/></svg>';
       const _selectIco = '<span style="font-size:16px;line-height:1;position:relative;top:-2px;">●</span>';
+      const _schedIco = '<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="3" y="4" width="18" height="18" rx="2"/><line x1="16" y1="2" x2="16" y2="6"/><line x1="8" y1="2" x2="8" y2="6"/><line x1="3" y1="10" x2="21" y2="10"/></svg>';
       const items = [];
       if (m && m.status === 'ready') items.push({ label: 'Serve', icon: _serveIco, action: 'serve' });
       if (m && m.status === 'downloading') items.push({ label: 'Retry', icon: _retryIco, action: 'retry' });
+      if (m && m.status === 'ready') items.push({ label: 'Schedule…', icon: _schedIco, action: 'schedule' });
       items.push({ label: 'Select', icon: _selectIco, action: 'select' });
       items.push({ label: 'Delete', icon: _deleteIco, action: 'delete', danger: true });
       for (const opt of items) {
@@ -402,6 +408,16 @@ function _rerenderCachedModels() {
           if (opt.action === 'serve') item.click();
           else if (opt.action === 'delete') _deleteCachedModel(repo, item, false, m);
           else if (opt.action === 'retry') _retryCachedModel(repo, m);
+          else if (opt.action === 'schedule') {
+            // Same entry point as the ^ button next to Launch — let
+            // cookbookSchedule.js handle it. Expand the panel first
+            // so the form has somewhere to mount.
+            if (!item.querySelector('.hwfit-serve-panel')) item.click();
+            setTimeout(() => {
+              const arrow = item.querySelector('.hwfit-serve-schedule-arrow');
+              if (arrow) arrow.click();
+            }, 120);
+          }
           else if (opt.action === 'select') {
             const selectBtn = document.getElementById('hwfit-cache-select');
             const bulkBar = document.getElementById('serve-bulk-bar');
@@ -613,6 +629,20 @@ function _rerenderCachedModels() {
       panelHtml += `<label class="hwfit-backend-vllm hwfit-backend-sglang">${_l('Max Seqs','Maximum concurrent requests. Lower = less memory. Default 4 — prosumer GPUs often OOM on vLLM default 256 during CUDA graph capture.')}<input type="text" class="hwfit-sf" data-field="max_seqs" value="${esc(sv('max_seqs', '4'))}" placeholder="4" /></label>`;
       panelHtml += `<label>${_l('Dtype','Data type for weights. auto picks best for GPU')}<select class="hwfit-sf" data-field="dtype">${dtypeOpts}</select></label>`;
       panelHtml += `<label class="hwfit-backend-vllm">${_l('KV Cache','vLLM --kv-cache-dtype. auto uses the model/runtime default; fp8 reduces KV memory for long context.')}<select class="hwfit-sf" data-field="vllm_kv_cache_dtype" style="height:32px;">${vllmKvCacheOpts}</select></label>`;
+      // Attention backend selector — pin the kernel impl. Default `auto` lets
+      // vLLM pick FlashInfer (which JITs on first use and breaks on older
+      // system nvcc) → FlashAttention → xformers. Forcing FLASH_ATTN skips
+      // the JIT entirely, fixing the `nvcc fatal: Unsupported gpu
+      // architecture 'compute_89'` failure mode on Ada / Hopper hosts.
+      const vllmAttnBackendOpts = ['auto', 'FLASH_ATTN', 'XFORMERS', 'FLASHINFER', 'TORCH_SDPA']
+        .map(b => `<option value="${b === 'auto' ? '' : b}"${(sv('vllm_attn_backend','') === (b === 'auto' ? '' : b)) ? ' selected' : ''}>${b}</option>`).join('');
+      panelHtml += `<label class="hwfit-backend-vllm">${_l('Attention','vLLM VLLM_ATTENTION_BACKEND. auto = vLLM picks (often FLASHINFER, which JITs and can fail on old nvcc). FLASH_ATTN skips the JIT entirely.')}<select class="hwfit-sf" data-field="vllm_attn_backend" style="height:32px;">${vllmAttnBackendOpts}</select></label>`;
+      // Free-text env-vars field. Anything pasted here is prepended to the
+      // launch command verbatim. Use for CUDACXX, PATH overrides, NCCL_*
+      // tuning, or any other KEY=VALUE pair that doesn't have a dedicated
+      // field. After the venv activate runs, $VIRTUAL_ENV / $PATH / etc. are
+      // already exported so they expand correctly here.
+      panelHtml += `<label class="hwfit-backend-vllm hwfit-backend-sglang" style="flex:1 1 100%;">${_l('Env','Extra KEY=VALUE env-var pairs prepended to the launch (space-separated). Example: CUDACXX=$VIRTUAL_ENV/lib/python3.10/site-packages/nvidia/cuda_nvcc/bin/nvcc — points flashinfer at the venv-bundled nvcc when the system one is too old for your GPU.')}<input type="text" class="hwfit-sf" data-field="extra_env" value="${esc(sv('extra_env',''))}" placeholder="CUDACXX=/path/to/nvcc NCCL_P2P_DISABLE=1" style="width:100%;" /></label>`;
       panelHtml += `</div>`;
       // Row 2b: Diffusers settings
       const diffDtypeOpts = ['bfloat16','float16','float32'].map(d => `<option value="${d}"${sv('diff_dtype','bfloat16')===d?' selected':''}>${d}</option>`).join('');
@@ -729,8 +759,16 @@ function _rerenderCachedModels() {
       // Copy moved inside the command textarea (top-right). Spacer then
       // pushes Cancel + Launch to the right.
       panelHtml += `<span class="hwfit-serve-actions-spacer"></span>`;
-      panelHtml += `<button class="cookbook-btn hwfit-serve-cancel" type="button" title="Close this configuration panel">Cancel</button>`;
+      panelHtml += `<button class="cookbook-btn hwfit-serve-cancel" type="button" title="Close this configuration panel"><svg width="11" height="11" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.4" stroke-linecap="round" stroke-linejoin="round" style="vertical-align:-1px;margin-right:5px;flex-shrink:0;"><line x1="18" y1="6" x2="6" y2="18"/><line x1="6" y1="6" x2="18" y2="18"/></svg>Cancel</button>`;
+      // Launch + a small ^ that opens an inline schedule form. The form
+      // creates a ScheduledTask (action=cookbook_serve), so the schedule
+      // ends up in the existing Tasks UI for edit/delete/pause.
+      panelHtml += `<span class="hwfit-serve-launch-group">`;
       panelHtml += `<button class="cookbook-btn hwfit-serve-launch"><svg width="11" height="11" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="vertical-align:-1px;margin-right:4px;flex-shrink:0;"><polygon points="13 2 3 14 12 14 11 22 21 10 12 10 13 2"/></svg>Launch</button>`;
+      // Chevron points DOWN because the schedule form opens beneath the
+      // panel — the arrow signals the direction of motion, not menu state.
+      panelHtml += `<button class="cookbook-btn hwfit-serve-schedule-arrow" type="button" aria-haspopup="true" aria-label="Schedule this serve on a recurring window" title="Schedule this serve as a recurring task"><svg width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round"><polyline points="6 9 12 15 18 9"/></svg></button>`;
+      panelHtml += `</span>`;
       panelHtml += `</div>`;
       panelHtml += `</div>`;
 
@@ -1643,6 +1681,35 @@ function _rerenderCachedModels() {
       // Launch button
       panel.querySelector('.hwfit-serve-launch').addEventListener('click', async (ev) => {
         const _launchBtn = ev.currentTarget;
+        // Immediate visual feedback. The GPU probe + backend-warning prompt
+        // below can take ~1-2s before the task UI shows up, leaving the
+        // button looking dead. Drop in the same whirlpool spinner the rest of
+        // the cookbook uses (Probe GPUs, dependency installs, etc.) right
+        // away; restored on any early-return / failure path below.
+        const _origBtnHtml = _launchBtn.innerHTML;
+        const _origBtnDisabled = _launchBtn.disabled;
+        let _launchingWp = null;
+        const _restoreLaunchBtn = () => {
+          try { _launchingWp?.destroy?.(); } catch {}
+          _launchingWp = null;
+          _launchBtn.innerHTML = _origBtnHtml;
+          _launchBtn.disabled = _origBtnDisabled;
+        };
+        _launchBtn.disabled = true;
+        _launchBtn.innerHTML = '';
+        const _launchingWrap = document.createElement('span');
+        _launchingWrap.className = 'hwfit-serve-launching';
+        _launchingWrap.style.cssText = 'display:inline-flex;align-items:center;gap:6px;';
+        _launchingWp = spinnerModule.createWhirlpool(18);
+        if (_launchingWp?.element) {
+          _launchingWp.element.style.margin = '0';
+          _launchingWp.element.style.transform = 'translateY(-2px)';
+          _launchingWrap.appendChild(_launchingWp.element);
+        }
+        const _launchingLabel = document.createElement('span');
+        _launchingLabel.textContent = 'Launching…';
+        _launchingWrap.appendChild(_launchingLabel);
+        _launchBtn.appendChild(_launchingWrap);
         // Final safety net: never launch with ctx beyond the model's trained
         // limit (or the absolute sanity ceiling when the limit is unknown). A
         // stale preset or typo (e.g. 16000000) overflows and, with a quantized
@@ -1650,7 +1717,14 @@ function _rerenderCachedModels() {
         // command (then we respect their literal text).
         if (!_cmdManuallyEdited) _clampCtx(true);
         if (!_cmdManuallyEdited) updateCmd();
-        const launchCmd = _cmdTextarea ? _cmdTextarea.value.trim() : panel._cmd;
+        // Pasted commands often carry hidden newlines / CRs / tabs from copies
+        // out of model cards or wrapped help text. The backend cmd allowlist
+        // rejects \n / \r outright (`Invalid characters in cmd`), so collapse
+        // all whitespace to single spaces before launch — same effect as the
+        // user manually re-flowing the textarea, no behavior change.
+        const _rawLaunchCmd = _cmdTextarea ? _cmdTextarea.value : panel._cmd;
+        const launchCmd = String(_rawLaunchCmd || '').replace(/\s+/g, ' ').trim();
+        if (_cmdTextarea && _cmdTextarea.value !== launchCmd) _cmdTextarea.value = launchCmd;
         const serveState = {};
         panel.querySelectorAll('.hwfit-sf').forEach(el => {
           if (el.type === 'checkbox') serveState[el.dataset.field] = el.checked;
@@ -1659,6 +1733,7 @@ function _rerenderCachedModels() {
         serveState.backend = serveState.backend || (_detectBackend(m).backend) || 'vllm';
         const backendWarning = _serveBackendWarning(m, repo, serveState.backend, serveState);
         if (backendWarning) {
+          _restoreLaunchBtn();
           await window.styledConfirm(backendWarning.body, {
             title: backendWarning.title,
             confirmText: 'Edit settings',
@@ -1689,7 +1764,7 @@ function _rerenderCachedModels() {
                 `No GPU detected on ${_probeHost ? _probeHost : 'this host'}. ${serveState.backend.toUpperCase()} needs a visible CUDA/ROCm accelerator to start — launching now will most likely crash early.\n\nLaunch anyway?`,
                 { title: 'No GPU detected', confirmText: 'Launch anyway', cancelText: 'Cancel', danger: true },
               );
-              if (!_proceed) return;
+              if (!_proceed) { _restoreLaunchBtn(); return; }
             }
           } catch {
             // Network / probe failure — don't block. Better to let the launch
@@ -1697,6 +1772,56 @@ function _rerenderCachedModels() {
             // hiccuped (the user can read the real error in the task output).
           }
         }
+
+        // Pre-launch PORT probe — second most common failure pattern is
+        // collision with an already-running server (vllm crashing with
+        // "Address already in use" because Ollama owns 11434, or a
+        // previous vllm on the same port wasn't killed). The post-mortem
+        // "Suggested action: Kill existing vLLM" came AFTER the failed
+        // launch — user wants to know BEFORE clicking Launch. Parse the
+        // port out of the cmd, ssh-check who owns it on the target host,
+        // and offer to abort or proceed.
+        try {
+          const _portMatch = launchCmd.match(/(?:^|\s)(?:--port|-p|--host\s+\S+\s+--port)\s+(\d{2,5})\b/)
+            || launchCmd.match(/(?:^|\s)--port=(\d{2,5})\b/)
+            || launchCmd.match(/OLLAMA_HOST=[^:\s]+:(\d{2,5})\b/);
+          const _port = _portMatch ? _portMatch[1] : '';
+          if (_port) {
+            const _portHost = (_envState.remoteHost || '').trim();
+            const _checkInner = `ss -tlnp 2>/dev/null | awk '$4 ~ /:${_port}$/ {print; exit}' || netstat -tlnp 2>/dev/null | awk '$4 ~ /:${_port}$/ {print; exit}'`;
+            const _cmd = _portHost
+              ? `ss h ${_portHost} <<<"" 2>/dev/null; ssh -o ConnectTimeout=4 -o StrictHostKeyChecking=no ${_portHost} ${JSON.stringify(_checkInner)}`
+              : _checkInner;
+            const _res = await fetch('/api/shell/exec', {
+              method: 'POST', credentials: 'same-origin',
+              headers: { 'Content-Type': 'application/json' },
+              body: JSON.stringify({ command: _cmd }),
+            });
+            const _data = await _res.json().catch(() => ({}));
+            const _stdout = (_data.stdout || '').trim();
+            if (_stdout) {
+              // Try to surface the process name from `users:(("name",pid=...,...))`.
+              const _procMatch = _stdout.match(/users:\(\("([^"]+)",pid=(\d+)/);
+              const _procDesc = _procMatch
+                ? `${_procMatch[1]} (PID ${_procMatch[2]})`
+                : 'another process';
+              const _hostLabel = _portHost ? _portHost : 'this host';
+              const _proceed = await window.styledConfirm(
+                `Port ${_port} on ${_hostLabel} is already in use by ${_procDesc}. Launching ${serveState.backend.toUpperCase()} now will fail with "Address already in use".\n\nStop the existing process first, OR change the --port in the command above, OR launch anyway and watch it crash.`,
+                {
+                  title: `Port ${_port} taken`,
+                  confirmText: 'Launch anyway',
+                  cancelText: 'Cancel',
+                  danger: true,
+                },
+              );
+              if (!_proceed) { _restoreLaunchBtn(); return; }
+            }
+          }
+        } catch {
+          // Probe failure — don't block. If the port check can't run we'd
+          // rather let the launch try than silently refuse.
+        }
         // Save in the { _byRepo, _lastUsed } schema — no legacy flat keys at
         // the root so per-model state doesn't leak between models.
         try {
@@ -1750,7 +1875,12 @@ function _rerenderCachedModels() {
 
       // Copy button — now icon-only, so flash a green checkmark on success
       // instead of swapping to text (which would also break the width).
-      panel.querySelector('.hwfit-serve-copy').addEventListener('click', () => {
+      panel.querySelector('.hwfit-serve-copy').addEventListener('click', (e) => {
+        // Without stopPropagation the click bubbles up to the
+        // .doclib-card click handler that toggles the expand state →
+        // copying collapses the whole serve panel mid-flight.
+        e.preventDefault();
+        e.stopPropagation();
         const cmd = panel.querySelector('.hwfit-serve-cmd').value;
         _copyText(cmd).then(() => {
           const btn = panel.querySelector('.hwfit-serve-copy');
@@ -2126,3 +2256,39 @@ export function initServe(shared) {
 }
 
 export { _cachedAllModels, _filterCachedList, _rerenderCachedModels, _deleteCachedModel };
+
+// Click the "running" pill on a serve-card → switch to Cookbook → Running
+// tab and scroll the matching task into view, with a brief flash so the
+// user can find it among a long list. Tracks the click via event
+// delegation so it survives every _rerenderCachedModels() pass.
+function _openRunningTabForRepo(repo) {
+  const body = document.querySelector('#cookbook-modal .cookbook-body');
+  if (!body) return;
+  const runTab = body.querySelector('.cookbook-tab[data-backend="Running"]');
+  if (runTab) runTab.click();
+  // The Running tab needs a tick to mount/render before we can find
+  // task cards inside it.
+  setTimeout(() => {
+    const candidates = Array.from(body.querySelectorAll('.cookbook-task'));
+    const match = candidates.find(c => {
+      // task cards expose modelId or name via dataset / inner title
+      const dsRepo = c.dataset?.modelId || c.dataset?.repoId || '';
+      if (dsRepo === repo) return true;
+      const title = c.querySelector('.cookbook-task-title, .memory-item-title')?.textContent?.trim() || '';
+      return title === repo || title === (repo.split('/').pop() || '');
+    });
+    if (match) {
+      try { match.scrollIntoView({ behavior: 'smooth', block: 'center' }); } catch (_) {}
+      match.classList.add('cookbook-task-flash');
+      setTimeout(() => match.classList.remove('cookbook-task-flash'), 1600);
+    }
+  }, 180);
+}
+document.addEventListener('click', (e) => {
+  const pill = e.target.closest && e.target.closest('.cookbook-serve-running-pill.is-clickable');
+  if (!pill) return;
+  e.preventDefault();
+  e.stopPropagation();
+  const repo = pill.dataset.repo || '';
+  if (repo) _openRunningTabForRepo(repo);
+});
diff --git a/static/js/documentLibrary.js b/static/js/documentLibrary.js
index 0341594..642a91f 100644
--- a/static/js/documentLibrary.js
+++ b/static/js/documentLibrary.js
@@ -1598,7 +1598,11 @@ let _libraryArchivedView = false;   // Documents tab showing archived docs?
     modal.innerHTML = `
       <div class="modal-content doclib-modal-content" style="width:min(640px, 92vw);max-height:85vh;background:var(--bg);">
         <div class="modal-header">
-          <h4><svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="vertical-align:-2px;margin-right:4px;"><path d="M4 19.5A2.5 2.5 0 0 1 6.5 17H20"/><path d="M6.5 2H20v20H6.5A2.5 2.5 0 0 1 4 19.5v-15A2.5 2.5 0 0 1 6.5 2z"/><line x1="8" y1="7" x2="16" y2="7"/><line x1="8" y1="11" x2="14" y2="11"/></svg>Library</h4>
+          <!-- Header title + icon mirror the currently-active sub-tab (Chats /
+               Documents / Research / Archive) so the user sees ONE icon at
+               the top representing the section they're in, with the tab
+               strip below as sub-navigation. _switchLibTab() updates this. -->
+          <h4 id="doclib-header-title"><span id="doclib-header-icon" style="vertical-align:-2px;margin-right:4px;display:inline-flex;"></span><span id="doclib-header-text">Library</span></h4>
           <button class="close-btn" id="doclib-close">\u2716</button>
         </div>
         <div class="lib-tabs" id="doclib-lib-tabs" style="padding:0 10px;">
@@ -1831,6 +1835,27 @@ let _libraryArchivedView = false;   // Documents tab showing archived docs?
       grid.parentElement.appendChild(btn);
     }
 
+    // SVG markup + label for each tab — used to keep the modal header
+    // in sync with whichever sub-tab the user is on.
+    const _TAB_HEADERS = {
+      chats: {
+        label: 'Chats',
+        svg: '<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M21 15a2 2 0 0 1-2 2H7l-4 4V5a2 2 0 0 1 2-2h14a2 2 0 0 1 2 2z"/></svg>',
+      },
+      documents: {
+        label: 'Documents',
+        svg: '<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M14 2H6a2 2 0 0 0-2 2v16a2 2 0 0 0 2 2h12a2 2 0 0 0 2-2V8z"/><polyline points="14 2 14 8 20 8"/><line x1="8" y1="13" x2="16" y2="13"/><line x1="8" y1="17" x2="13" y2="17"/></svg>',
+      },
+      research: {
+        label: 'Research',
+        svg: '<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><circle cx="11" cy="11" r="8"/><path d="M21 21l-4.35-4.35"/><line x1="11" y1="8" x2="11" y2="14"/><line x1="8" y1="11" x2="14" y2="11"/></svg>',
+      },
+      archive: {
+        label: 'Archive',
+        svg: '<svg width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><polyline points="21 8 21 21 3 21 3 8"/><rect x="1" y="3" width="22" height="5"/><line x1="10" y1="12" x2="14" y2="12"/></svg>',
+      },
+    };
+
     function _switchLibTab(tab) {
       _activeLibTab = tab;
       _tabBtns.forEach(b => b.classList.toggle('active', b.dataset.doclibTab === tab));
@@ -1841,6 +1866,14 @@ let _libraryArchivedView = false;   // Documents tab showing archived docs?
           p.style.display = 'none';
         }
       });
+      // Sync the modal header icon + label to match the active sub-tab.
+      const hdr = _TAB_HEADERS[tab];
+      if (hdr) {
+        const ico = document.getElementById('doclib-header-icon');
+        const txt = document.getElementById('doclib-header-text');
+        if (ico) ico.innerHTML = hdr.svg;
+        if (txt) txt.textContent = hdr.label;
+      }
       if (tab === 'chats') _renderLibChats();
       else if (tab === 'archive') _renderLibArchive();
       else if (tab === 'research') _renderLibResearch();
@@ -3121,8 +3154,10 @@ let _libraryArchivedView = false;   // Documents tab showing archived docs?
       return new Date(iso).toLocaleDateString();
     }
 
-    // Switch to initial tab if not documents
-    if (_activeLibTab !== 'documents') _switchLibTab(_activeLibTab);
+    // Switch to the initial tab. Always call this — even when the
+    // default ('documents') matches — so the modal header's icon + label
+    // sync from "Library" to the active sub-tab on first open.
+    _switchLibTab(_activeLibTab);
 
     const searchInput = document.getElementById('doclib-search');
     searchInput.addEventListener('input', () => {
diff --git a/static/js/modelPicker.js b/static/js/modelPicker.js
index 00874ed..07a1766 100644
--- a/static/js/modelPicker.js
+++ b/static/js/modelPicker.js
@@ -179,14 +179,23 @@ function _initModelPickerDropdown() {
     const result = [];
     const seen = new Set();
     items.forEach(item => {
-      if (item.offline) return;
+      // Previously: offline endpoints were skipped entirely, so a server
+      // that briefly went down disappeared from the picker — confusing
+      // when the user can still see it (offline-tagged) in Settings.
+      // Now: include offline-endpoint models too but flag them
+      // `stale: true` so the row renderer dims them + shows the offline
+      // pill. The user can still click and try anyway (matches the
+      // existing "local server appears offline" path on line 301).
+      const epOffline = !!item.offline;
       const allModels = (item.models || []).concat(item.models_extra || []);
       const allDisplay = (item.models_display || []).concat(item.models_extra_display || []);
       // Mark local endpoints whose live probe failed.
       const probeResult = item.endpoint_id ? _localProbe[item.endpoint_id] : null;
       const isLocalDead = !!(probeResult && probeResult.alive === false);
       allModels.forEach((mid, i) => {
-        // Deduplicate by model ID — prefer DB endpoints over env-discovered
+        // Deduplicate by model ID — prefer ONLINE endpoint entries over
+        // offline duplicates so the user gets a working endpoint first
+        // when the same model is exposed by both.
         if (seen.has(mid)) return;
         seen.add(mid);
         result.push({
@@ -201,8 +210,11 @@ function _initModelPickerDropdown() {
             item.host || '',
             item.url || '',
           ].filter(Boolean).join(' '),
-          stale: isLocalDead,
-          staleReason: isLocalDead ? (probeResult.error || 'not responding') : '',
+          stale: isLocalDead || epOffline,
+          staleReason: epOffline
+            ? (item.ping_error || 'endpoint offline')
+            : (isLocalDead ? (probeResult.error || 'not responding') : ''),
+          offline: epOffline,
         });
       });
     });
@@ -377,22 +389,35 @@ function _initModelPickerDropdown() {
       return;
     }
 
-    // ── Browse mode: Recent (auto) + Favorites (manual). No flat "All" dump. ──
+    // ── Browse mode: Favorites (manual) + Recent (auto), with dedupe. ──
+    // Rules:
+    //   1. Never list the same model twice in the dropdown. Favorites
+    //      win over Recent (if you favorited it, that's where it
+    //      belongs — Recent shouldn't show it again as duplicate).
+    //   2. Small catalogs (≤ BROWSE_ALL_LIMIT total) skip the Recent
+    //      section entirely — when there's only ~10 models, the whole
+    //      list fits below as "All models" and a separate Recent
+    //      section just duplicates rows.
     const shown = new Set();
-    const recentModels = _loadRecent()
-      .map(id => byId.get(id))
-      .filter(Boolean)
-      .slice(0, RECENT_MAX);
     const favModels = favs.map(id => byId.get(id)).filter(Boolean);
-
-    if (recentModels.length) {
-      _addSection('Recent');
-      recentModels.forEach(m => { shown.add(m.mid); _addRow(m); });
-    }
     if (favModels.length) {
       _addSection('Favorites');
       favModels.forEach(m => { shown.add(m.mid); _addRow(m); });
     }
+    // Recent: only render when the catalog is big enough that surfacing
+    // a recency shortlist is actually useful, AND only models that
+    // aren't already in Favorites (dedupe).
+    if (all.length > BROWSE_ALL_LIMIT) {
+      const recentModels = _loadRecent()
+        .map(id => byId.get(id))
+        .filter(Boolean)
+        .filter(m => !shown.has(m.mid))
+        .slice(0, RECENT_MAX);
+      if (recentModels.length) {
+        _addSection('Recent');
+        recentModels.forEach(m => { shown.add(m.mid); _addRow(m); });
+      }
+    }
 
     // Small catalogs: still list everything so users aren't forced to search.
     if (all.length <= BROWSE_ALL_LIMIT) {
diff --git a/static/js/notes.js b/static/js/notes.js
index 935b6b7..039b310 100644
--- a/static/js/notes.js
+++ b/static/js/notes.js
@@ -5053,9 +5053,54 @@ async function _initReminders() {
   } catch {}
 }
 
-const notesModule = { openPanel, closePanel, togglePanel, isPanelOpen, openNotes: openPanel, closeNotes: closePanel, isNotesOpen: isPanelOpen, refreshDueBadge };
+// Open the notes panel and scroll/flash the matching note card. Used
+// by chatRenderer.js when the user clicks a [View note](#note-<id>)
+// link the agent emits after a manage_notes create. Falls back to
+// just opening the panel when the card isn't found (panel still
+// loading, note in a different filter, etc.).
+async function openNote(noteId) {
+  // If the panel is already open, openPanel() short-circuits and does
+  // nothing — including no re-fetch — so a freshly-created note added
+  // server-side never shows up. Force a refresh by closing first when
+  // open, then re-opening. Clicking the sidebar Notes button as a
+  // last resort keeps this working even if the module state got out
+  // of sync (rare but seen during HMR or after a stuck modal).
+  try {
+    if (isPanelOpen && isPanelOpen()) {
+      closePanel();
+      // give the close animation a frame to settle
+      await new Promise(r => setTimeout(r, 30));
+    }
+  } catch (_) {}
+  openPanel();
+  // openPanel() kicks off _fetchNotes() asynchronously, so the cards
+  // for newly-created notes may not be in the DOM yet. Also poll the
+  // _notes module array directly — if the note IS loaded but the
+  // active filter (e.g. archive view) is hiding it, we can still
+  // surface a confirmation toast.
+  if (!noteId) return;
+  let tries = 0;
+  const findAndFlash = () => {
+    const card = document.querySelector(`.note-card[data-note-id="${noteId}"]`)
+      || document.querySelector(`.note-card[data-note-id^="${noteId.slice(0, 8)}"]`);
+    if (card) {
+      try { card.scrollIntoView({ behavior: 'smooth', block: 'center' }); } catch (_) {}
+      card.classList.add('note-card-flash');
+      setTimeout(() => card.classList.remove('note-card-flash'), 1600);
+      return true;
+    }
+    return false;
+  };
+  const tryNext = () => {
+    if (findAndFlash()) return;
+    if (++tries < 20) setTimeout(tryNext, 200);
+  };
+  setTimeout(tryNext, 120);
+}
+
+const notesModule = { openPanel, closePanel, togglePanel, isPanelOpen, openNote, openNotes: openPanel, closeNotes: closePanel, isNotesOpen: isPanelOpen, refreshDueBadge };
 export default notesModule;
-export { openPanel as openNotes, closePanel as closeNotes, isPanelOpen as isNotesOpen };
+export { openPanel as openNotes, closePanel as closeNotes, isPanelOpen as isNotesOpen, openNote };
 window.notesModule = notesModule;
 
 // Start reminder loop on module load (after a short delay so app loads first)
diff --git a/static/js/settings.js b/static/js/settings.js
index 3a6e9d0..8269bb6 100644
--- a/static/js/settings.js
+++ b/static/js/settings.js
@@ -4666,6 +4666,8 @@ async function initUnifiedIntegrations() {
       { key: 'calendar:write', label: 'Calendar write', detail: 'Create and update calendar events' },
       { key: 'memory:read', label: 'Memory', detail: 'Read memory when enabled' },
       { key: 'memory:write', label: 'Memory write', detail: 'Write memory when enabled' },
+      { key: 'cookbook:read', label: 'Cookbook', detail: 'List cookbook tasks + tail their tmux output (debug a model serve from outside the UI)' },
+      { key: 'cookbook:launch', label: 'Cookbook launch', detail: 'Launch and stop cookbook serve tasks. Powerful: runs SSH commands on your configured servers, bounded by the same allowlist the UI uses (vllm/python3/sglang/llama-server/...)' },
     ];
     // Strict name-prefix match keeps Codex and Claude tokens in their own forms.
     const agentTokens = (Array.isArray(tokens) ? tokens : []).filter(tok =>
@@ -4678,6 +4680,7 @@ async function initUnifiedIntegrations() {
       email: '<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="2" y="4" width="20" height="16" rx="2"/><polyline points="2 6 12 13 22 6"/></svg>',
       calendar: '<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="3" y="4" width="18" height="18" rx="2" ry="2"/><line x1="16" y1="2" x2="16" y2="6"/><line x1="8" y1="2" x2="8" y2="6"/><line x1="3" y1="10" x2="21" y2="10"/></svg>',
       memory: '<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.7" stroke-linecap="round" stroke-linejoin="round"><path d="M9.5 2a2.5 2.5 0 0 0-2.5 2.5 2.5 2.5 0 0 0-2.5 2.5A2.5 2.5 0 0 0 2 9.5v3A2.5 2.5 0 0 0 4.5 15a2.5 2.5 0 0 0 2.5 2.5A2.5 2.5 0 0 0 9.5 20H10V2z"/><path d="M14.5 2a2.5 2.5 0 0 1 2.5 2.5 2.5 2.5 0 0 1 2.5 2.5A2.5 2.5 0 0 1 22 9.5v3A2.5 2.5 0 0 1 19.5 15a2.5 2.5 0 0 1-2.5 2.5A2.5 2.5 0 0 1 14.5 20H14V2z"/></svg>',
+      cookbook: '<svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M4 19.5A2.5 2.5 0 0 1 6.5 17H20"/><path d="M6.5 2H20v20H6.5A2.5 2.5 0 0 1 4 19.5v-15A2.5 2.5 0 0 1 6.5 2z"/></svg>',
     };
     const _scopeNiceLabel = (label) => label.replace(/\s+(write|drafts?|send)$/i, '');
     const _scopeAction = (key) => (key.split(':')[1] || '').toLowerCase();
diff --git a/static/js/slashCommands.js b/static/js/slashCommands.js
index 4e528f6..0f3a720 100644
--- a/static/js/slashCommands.js
+++ b/static/js/slashCommands.js
@@ -4815,7 +4815,15 @@ async function _cmdSetup(args, ctx) {
     } else {
       pendingSetupProvider = provider;
       setupMode = 'endpoint-key-for-provider';
-      await _setupReply(`Paste your ${provider.name} API key.`);
+      // Show the canonical "/setup <provider> <key>" usage so the user
+      // learns the one-shot form instead of relying on the pasted-key
+      // mode that always greets them with a generic prompt.
+      // _setupReply renders as plain text (no HTML) — use markdown
+      // backticks for the inline code instead of <code> + &lt;&gt;.
+      const _slug = (topic || '').toLowerCase();
+      await _setupReply(
+        `Paste your ${provider.name} API key, or run \`/setup ${_slug} <api-key>\` to set it in one step.`
+      );
     }
     return true;
   }
@@ -5538,7 +5546,31 @@ const COMMANDS = {
     category: 'Getting started',
     help: 'Add local or API model endpoints',
     handler: _cmdSetup,
-    usage: '/setup local URL  ·  /setup groq KEY  ·  /setup copilot  ·  /setup endpoint'
+    usage: '/setup local URL  ·  /setup groq KEY  ·  /setup copilot  ·  /setup endpoint',
+    // Provider subs so the autocomplete popup surfaces "/setup deepseek",
+    // "/setup openai", etc. when the user types "/setup de". Each sub's
+    // handler is a thin wrapper that re-prepends the sub name and
+    // re-dispatches into _cmdSetup, which already knows how to handle
+    // bare-provider (prompts for the key) AND provider-with-key (saves it).
+    // Without the explicit handler, the slash-dispatcher errors with
+    // "subDef.handler is not a function".
+    subs: {
+      deepseek:   { help: 'DeepSeek',      usage: '/setup deepseek sk-...',     handler: (a, c) => _cmdSetup(['deepseek',   ...a], c) },
+      openai:     { help: 'OpenAI',        usage: '/setup openai sk-proj-...',  handler: (a, c) => _cmdSetup(['openai',     ...a], c) },
+      anthropic:  { help: 'Anthropic',     usage: '/setup anthropic sk-ant-...',handler: (a, c) => _cmdSetup(['anthropic',  ...a], c) },
+      openrouter: { help: 'OpenRouter',    usage: '/setup openrouter sk-or-...',handler: (a, c) => _cmdSetup(['openrouter', ...a], c) },
+      groq:       { help: 'Groq',          usage: '/setup groq gsk_...',        handler: (a, c) => _cmdSetup(['groq',       ...a], c) },
+      gemini:     { help: 'Google Gemini', alias: ['google'], usage: '/setup gemini AIza...', handler: (a, c) => _cmdSetup(['gemini', ...a], c) },
+      xai:        { help: 'xAI (Grok)',    alias: ['grok'],   usage: '/setup xai xai-...',   handler: (a, c) => _cmdSetup(['xai',    ...a], c) },
+      ollama:     { help: 'Ollama Cloud',  usage: '/setup ollama KEY',          handler: (a, c) => _cmdSetup(['ollama',     ...a], c) },
+      copilot:    { help: 'GitHub Copilot', usage: '/setup copilot',            handler: (a, c) => _cmdSetup(['copilot',    ...a], c) },
+      local:      { help: 'Local model server (vLLM / LM Studio / llama.cpp / Ollama)',
+                    usage: '/setup local http://localhost:8000/v1',
+                    handler: (a, c) => _cmdSetup(['local', ...a], c) },
+      endpoint:   { help: 'Open the endpoint manager in Settings',
+                    usage: '/setup endpoint',
+                    handler: (a, c) => _cmdSetup(['endpoint', ...a], c) },
+    },
   },
   demo: {
     alias: ['tour'],
diff --git a/static/js/tasks.js b/static/js/tasks.js
index eb140ec..03e426f 100644
--- a/static/js/tasks.js
+++ b/static/js/tasks.js
@@ -504,8 +504,13 @@ const _CATEGORY_MAP = {
   ssh_command:          'System',
   run_script:           'System',
   run_local:            'System',
+  cookbook_serve:       'Cookbook',
 };
-const _CATEGORY_ORDER = ['Other', 'Calendar', 'Email', 'Chats', 'Documents', 'Memory', 'Research', 'Skills', 'Assistant', 'System'];
+// Cookbook serves listed FIRST so a just-saved schedule shows at the
+// top instead of scrolling off the bottom of the list. The remaining
+// order is preserved for backwards-compatibility with users who've
+// learned where things are.
+const _CATEGORY_ORDER = ['Cookbook', 'Other', 'Calendar', 'Email', 'Chats', 'Documents', 'Memory', 'Research', 'Skills', 'Assistant', 'System'];
 const _CATEGORY_ICONS = {
   Calendar:  '<rect x="3" y="4" width="18" height="18" rx="2"/><line x1="16" y1="2" x2="16" y2="6"/><line x1="8" y1="2" x2="8" y2="6"/><line x1="3" y1="10" x2="21" y2="10"/>',
   Email:     '<rect x="2" y="4" width="20" height="16" rx="2"/><path d="m22 7-8.97 5.7a1.94 1.94 0 0 1-2.06 0L2 7"/>',
@@ -516,6 +521,8 @@ const _CATEGORY_ICONS = {
   Skills:    '<path d="M9 11l3 3L22 4"/><path d="M4 19.5A2.5 2.5 0 0 1 6.5 17H20"/><path d="M4 4.5A2.5 2.5 0 0 1 6.5 2H20v15H6.5A2.5 2.5 0 0 0 4 19.5z"/>',
   Assistant: '<circle cx="12" cy="12" r="10"/><circle cx="12" cy="10" r="3"/><path d="M7 18a5 5 0 0 1 10 0"/>',
   System:    '<rect x="2" y="3" width="20" height="14" rx="2"/><line x1="8" y1="21" x2="16" y2="21"/><line x1="12" y1="17" x2="12" y2="21"/>',
+  // Cookbook icon — matches the recipe-book glyph used on the sidebar.
+  Cookbook:  '<path d="M12 7v14"/><path d="M3 18a1 1 0 0 1-1-1V4a1 1 0 0 1 1-1h5a4 4 0 0 1 4 4 4 4 0 0 1 4-4h5a1 1 0 0 1 1 1v13a1 1 0 0 1-1 1h-6a3 3 0 0 0-3 3 3 3 0 0 0-3-3z"/>',
   Other:     '<circle cx="12" cy="12" r="3"/>',
 };
 
@@ -955,9 +962,13 @@ function _showPresetPicker() {
   let html = '<div class="admin-card" style="flex:1;display:flex;flex-direction:column;overflow:hidden;">';
   html += '<div style="display:flex;align-items:baseline;gap:8px;margin-bottom:2px;"><h2 style="margin:0;padding:0;line-height:1;">Add Task</h2></div>';
   html += '<p class="memory-desc" style="position:relative;top:4px;">Describe a task for the AI to draft, or pick a type below to set one up manually.</p>';
-  html += '<div class="task-ai-compose" style="display:flex;gap:6px;margin:6px 0 10px;">'
-    + '<input type="text" id="task-ai-input" class="memory-search-input" style="flex:1;" placeholder="Describe a task — e.g. &quot;every weekday 7am summarize my unread email&quot;" />'
-    + '<button class="memory-toolbar-btn active" id="task-ai-btn" title="Draft a task with AI" style="white-space:nowrap;height:28px;"><svg width="12" height="12" viewBox="0 0 24 24" fill="currentColor" style="vertical-align:-1px;margin-right:3px;"><path d="M12 0L14.59 8.41L23 12L14.59 15.59L12 24L9.41 15.59L1 12L9.41 8.41Z"/></svg>Draft with AI</button>'
+  // flex-wrap + min-width:0 on the input lets the row collapse cleanly
+  // on narrow modal widths instead of pushing the AI button past the
+  // right edge. margin-left:-4px nudges the compose row 4px into the
+  // description bar above so the input lines up with it visually.
+  html += '<div class="task-ai-compose" style="display:flex;gap:6px;margin:6px 0 10px -4px;flex-wrap:wrap;align-items:center;">'
+    + '<input type="text" id="task-ai-input" class="memory-search-input" style="flex:1 1 220px;min-width:0;" placeholder="Describe a task — e.g. &quot;every weekday 7am summarize my unread email&quot;" />'
+    + '<button class="memory-toolbar-btn active" id="task-ai-btn" title="Draft a task with AI" style="white-space:nowrap;height:28px;flex:0 0 auto;"><svg width="12" height="12" viewBox="0 0 24 24" fill="currentColor" style="vertical-align:-1px;margin-right:3px;"><path d="M12 0L14.59 8.41L23 12L14.59 15.59L12 24L9.41 15.59L1 12L9.41 8.41Z"/></svg>Draft with AI</button>'
     + '</div>';
   html += '<div class="memory-list" style="max-height:none;flex:1;gap:0px;margin-top:2px;padding-right:8px;">';
   _TASK_PRESETS.forEach((p, i) => {
diff --git a/static/style.css b/static/style.css
index 39f1e9e..c0aa39b 100644
--- a/static/style.css
+++ b/static/style.css
@@ -14059,8 +14059,28 @@ body:has(.doc-version-panel:not(.hidden)) .hamburger-btn {
 .admin-model-form-row {
   display: flex;
   gap: 6px;
+  flex-wrap: wrap;       /* let buttons drop to a new row on narrow widths
+                             instead of overflowing the modal */
+  align-items: center;
+}
+.admin-model-form-row input {
+  flex: 1 1 180px;       /* api-key input takes the long axis but allows
+                             dropping below the buttons at small widths */
+  min-width: 0;          /* don't refuse to shrink past content width */
+}
+.admin-model-form-row select {
+  flex: 0 0 auto;
+}
+.admin-model-form-row button {
+  flex: 0 0 auto;
+  white-space: nowrap;
+}
+/* On narrow screens, give buttons their own row + push the Add button
+   to the right so it remains the obvious primary action. */
+@media (max-width: 540px) {
+  .admin-model-form-row input { flex-basis: 100%; }
+  .admin-model-form-row .admin-btn-add { margin-left: auto; }
 }
-.admin-model-form-row input { flex: 1; }
 .adm-ep-inline-msg {
   min-height: 16px;
   margin-top: 5px;
@@ -18219,8 +18239,12 @@ body.gallery-selecting .gallery-dl-btn,
   color: var(--fg-muted);
   letter-spacing: 0.2px;
 }
-/* "running" pill on a Serve-tab card when the model has a live serve task. */
-.cookbook-serve-running-pill {
+/* Status pills shown inline in a Serve-tab card title (next to the model
+   name) when the model has a live serve / download task. Shared base
+   class so "running" and "downloading" sit on the same row with the
+   same chrome; only color varies. */
+.cookbook-serve-running-pill,
+.cookbook-serve-downloading-pill {
   display: inline-block;
   margin-left: 6px;
   padding: 1px 7px;
@@ -18231,11 +18255,50 @@ body.gallery-selecting .gallery-dl-btn,
   letter-spacing: 0.3px;
   vertical-align: 2px;
   position: relative;
-  top: -1px;
+  top: 1px;          /* nudged down 2px from the old -1px so it sits
+                        flush with the cap-height of the title text */
+}
+.cookbook-serve-running-pill {
   color: var(--accent, var(--red));
   background: color-mix(in srgb, var(--accent, var(--red)) 12%, transparent);
   border: 1px solid color-mix(in srgb, var(--accent, var(--red)) 35%, transparent);
 }
+.cookbook-serve-downloading-pill {
+  color: var(--accent, var(--red));
+  background: color-mix(in srgb, var(--accent, var(--red)) 12%, transparent);
+  border: 1px solid color-mix(in srgb, var(--accent, var(--red)) 35%, transparent);
+  opacity: 0.85;
+}
+.cookbook-serve-downloading-pill.is-stalled {
+  /* Stalled downloads stay visible but read as warning, not progress. */
+  color: var(--fg-muted, #888);
+  background: color-mix(in srgb, var(--fg-muted, #888) 10%, transparent);
+  border-color: color-mix(in srgb, var(--fg-muted, #888) 30%, transparent);
+  opacity: 1;
+}
+.cookbook-serve-running-pill.is-clickable {
+  cursor: pointer;
+  transition: background 0.12s, border-color 0.12s;
+}
+.cookbook-serve-running-pill.is-clickable:hover {
+  background: color-mix(in srgb, var(--accent, var(--red)) 22%, transparent);
+  border-color: color-mix(in srgb, var(--accent, var(--red)) 55%, transparent);
+}
+/* Brief highlight on the matched task card when jumping from the
+   running pill, so the user can spot it among a long list. */
+.cookbook-task-flash {
+  animation: cookbook-task-flash-anim 1.6s ease-out;
+}
+@keyframes cookbook-task-flash-anim {
+  0%   { box-shadow: 0 0 0 2px var(--accent, var(--red)); }
+  100% { box-shadow: 0 0 0 2px transparent; }
+}
+
+/* Cookbook header "downloading" status label sits 2px too far left
+   against the rest of the cookbook chrome — nudge it right. */
+#cookbook-bg-status {
+  left: 2px;
+}
 .cookbook-serve-dir-edit {
   font-size: 9px;
   color: var(--fg-muted);
@@ -19364,6 +19427,11 @@ body.gallery-selecting .gallery-dl-btn,
   background: color-mix(in srgb, var(--fg) 7%, transparent);
   font-size: 12px;
   border-bottom: 1px solid color-mix(in srgb, var(--fg) 6%, transparent);
+  /* Pin the row so flex parents + Firefox mobile can't squeeze its height to 0,
+     which hides the type pill + model name and leaves only the sub-line +
+     output visible. */
+  flex-shrink: 0;
+  min-height: 32px;
 }
 .cookbook-task-type {
   text-transform: uppercase;
@@ -33530,8 +33598,15 @@ button.cal-view-btn {
    of days it covers within the row, --slot stacks parallel bars. */
 .cal-multiday {
   position: absolute;
-  left: calc(var(--col, 0) * (100% / 7));
-  width: calc(var(--span, 1) * (100% / 7));
+  /* Fractional offsets let timed events that cross midnight render at
+     true proportional width. start-frac shifts the left edge into the
+     first day; end-frac trims the right edge inside the last day.
+     All-day events default to (0, 1) and still fill whole columns. */
+  left: calc((var(--col, 0) + var(--start-frac, 0)) * (100% / 7));
+  width: calc(
+    (var(--span, 1) - var(--start-frac, 0) - (1 - var(--end-frac, 1)))
+    * (100% / 7)
+  );
   top: calc(2px + var(--slot, 0) * 12px);
   height: 11px;
   font-size: 8px;
@@ -35920,3 +35995,149 @@ body.theme-frosted .modal {
   padding: 10px 18px;
   border-top: 1px solid var(--border);
 }
+/* Cookbook serve panel: Launch + ^ split button pair */
+.hwfit-serve-launch-group {
+  display: inline-flex;
+  align-items: stretch;
+  vertical-align: middle;
+}
+.hwfit-serve-launch-group .hwfit-serve-launch {
+  border-top-right-radius: 0 !important;
+  border-bottom-right-radius: 0 !important;
+  margin-right: 0 !important;
+}
+.hwfit-serve-launch-group .hwfit-serve-schedule-arrow {
+  border-top-left-radius: 0 !important;
+  border-bottom-left-radius: 0 !important;
+  border-left: 1px solid var(--border) !important;
+  padding: 0 8px !important;
+  min-width: 26px;
+  display: inline-flex !important;
+  align-items: center;
+  justify-content: center;
+}
+
+/* Schedule form — mounted inside the cookbook serve panel. Uses the
+   theme tokens (--bg, --panel, --border, --accent, --red) so it
+   matches the rest of the cookbook chrome instead of inline whites. */
+.hwfit-schedule-form {
+  margin: 10px 0 4px;
+  padding: 12px 14px;
+  border: 1px solid var(--border);
+  border-radius: 8px;
+  background: var(--bg);
+  display: flex;
+  flex-direction: column;
+  gap: 10px;
+}
+.hwfit-schedule-title {
+  display: flex;
+  align-items: center;
+  gap: 8px;
+  font-size: 13px;
+  font-weight: 600;
+  opacity: 0.95;
+  flex-wrap: wrap;
+}
+.hwfit-schedule-title svg { opacity: 0.7; flex-shrink: 0; }
+.hwfit-schedule-title-text { white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }
+.hwfit-schedule-title-spacer { flex: 1; min-width: 8px; }
+.hwfit-schedule-row {
+  display: flex;
+  flex-wrap: wrap;
+  align-items: center;
+  gap: 12px;
+}
+.hwfit-schedule-field {
+  display: flex;
+  flex-direction: column;
+  gap: 3px;
+  font-size: 11px;
+  opacity: 0.75;
+}
+.hwfit-schedule-field input[type="time"] {
+  font: inherit;
+  min-width: 90px;
+}
+.hwfit-schedule-label {
+  font-size: 11px;
+  opacity: 0.75;
+}
+.hwfit-sched-days {
+  display: inline-flex;
+  flex-wrap: wrap;
+  gap: 5px;
+}
+.hwfit-sched-day-chip {
+  width: 32px;
+  height: 32px;
+  border-radius: 50%;
+  border: 1px solid var(--border);
+  background: transparent;
+  color: inherit;
+  font: inherit;
+  font-size: 10px;
+  line-height: 1;
+  padding: 0;
+  cursor: pointer;
+  transition: background 0.12s, color 0.12s, border-color 0.12s;
+}
+.hwfit-sched-day-chip:hover { border-color: var(--accent, var(--red)); }
+.hwfit-sched-day-chip.is-on {
+  background: var(--accent, var(--red));
+  color: var(--panel, #fff);
+  border-color: var(--accent, var(--red));
+}
+.hwfit-schedule-actions-row {
+  display: flex;
+  align-items: center;
+  gap: 10px;
+  flex-wrap: wrap;
+}
+.hwfit-schedule-actions-spacer { flex: 1; }
+
+/* Days row hosts the day-chip strip on the left and the Cancel / Save
+   action buttons on the right. The spacer between them pushes the
+   actions to the right edge. */
+.hwfit-schedule-days-row {
+  align-items: center;
+}
+/* Cancel + Save sit on the right side of the days row with matching
+   icon-plus-label chrome. */
+.hwfit-sched-cancel,
+.hwfit-sched-save {
+  display: inline-flex !important;
+  align-items: center;
+}
+.hwfit-schedule-mirror-toggle {
+  display: inline-flex;
+  align-items: center;
+  gap: 8px;
+  font-size: 12px;
+  opacity: 0.9;
+  cursor: pointer;
+  user-select: none;
+}
+.hwfit-schedule-mirror-label { white-space: nowrap; }
+.hwfit-schedule-mirror-switch { transform: scale(0.85); transform-origin: left center; }
+.hwfit-sched-err {
+  font-size: 12px;
+  color: var(--red, #ff6b6b);
+  display: none;
+}
+.hwfit-sched-err.is-visible { display: block; }
+@media (max-width: 600px) {
+  .hwfit-schedule-row { gap: 8px; }
+  .hwfit-sched-day-chip { width: 36px; height: 36px; font-size: 11px; }
+}
+
+/* Brief outline flash on a note card when it's the target of a
+   #note-<id> link click from chat — same pattern as the cookbook
+   task flash, just scoped to .note-card. */
+.note-card-flash {
+  animation: note-card-flash-anim 1.6s ease-out;
+}
+@keyframes note-card-flash-anim {
+  0%   { box-shadow: 0 0 0 2px var(--accent, var(--red)); }
+  100% { box-shadow: 0 0 0 2px transparent; }
+}