Merge remote-tracking branch 'origin/main' into visual-pr-playground

# Conflicts: # routes/cookbook_routes.py # routes/hwfit_routes.py # services/hwfit/fit.py # services/hwfit/models.py # static/js/cookbook-diagnosis.js # static/js/cookbook-hwfit.js # static/js/cookbook.js # static/js/cookbookRunning.js
2026-06-03 16:49:10 +09:00
parent eb79b76432 41a928f21b
commit 3706d756f3
569 changed files with 35252 additions and 3489 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -9,6 +9,7 @@ __pycache__/
 dist/
 build/
 .env
+.env.bak.*
 /data/
 /logs/
 .git/
--- a/.env.example
+++ b/.env.example
@@ -16,6 +16,10 @@ LLM_HOST=localhost
 # when started with OLLAMA_HOST=0.0.0.0:11434.
 # OLLAMA_BASE_URL=http://host.docker.internal:11434/v1

+# Optional LM Studio URL. In Docker, host LM Studio is reachable here
+# when LM Studio is set to serve on all interfaces (0.0.0.0).
+# LM_STUDIO_URL=http://host.docker.internal:1234
+
 # OpenAI API key (only needed if using OpenAI models).
 # Do not commit real keys. Keep this commented until needed.
 # OPENAI_API_KEY=your_openai_api_key_here
@@ -59,6 +63,10 @@ SEARXNG_INSTANCE=http://localhost:8080
 # Keep false for Docker, LAN, reverse proxy, and any shared deployment.
 # LOCALHOST_BYPASS=false

+# Mark session cookies Secure. Set true when Odysseus is served through HTTPS
+# by a trusted reverse proxy or private access gateway.
+# SECURE_COOKIES=true
+
 # Optional: pre-seed the first admin password during setup.
 # Do not commit a real password.
 # ODYSSEUS_ADMIN_PASSWORD=change_me_before_first_boot
@@ -141,7 +149,8 @@ SEARXNG_INSTANCE=http://localhost:8080
 #
 # AMD ROCm (requires ROCm drivers on the host and the GID of the render group):
 # COMPOSE_FILE=docker-compose.yml:docker/gpu.amd.yml
-# RENDER_GID=992
+# Find the render GID with: getent group render | cut -d: -f3
+# RENDER_GID=989
 #
 # These overlays only expose the GPU devices. The slim Odysseus image
 # still needs CUDA/ROCm userspace via Cookbook -> Dependencies (vLLM,
--- a/.github/ISSUE_TEMPLATE/bug_report.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report.yml
@@ -0,0 +1,103 @@
+name: Bug Report
+description: Report a reproducible bug in Odysseus.
+labels: ["bug"]
+
+body:
+  - type: markdown
+    attributes:
+      value: |
+        **Before submitting:** search [open issues](https://github.com/pewdiepie-archdaemon/odysseus/issues)
+        and [discussions](https://github.com/pewdiepie-archdaemon/odysseus/discussions) first.
+        Duplicate reports slow things down.
+
+        For security vulnerabilities, **do not open a public issue** —
+        use [GitHub Security Advisories](https://github.com/pewdiepie-archdaemon/odysseus/security/advisories/new)
+        and read [SECURITY.md](https://github.com/pewdiepie-archdaemon/odysseus/blob/main/SECURITY.md) first.
+
+  - type: checkboxes
+    id: prerequisites
+    attributes:
+      label: Prerequisites
+      options:
+        - label: I searched [open issues](https://github.com/pewdiepie-archdaemon/odysseus/issues?q=is%3Aissue+is%3Aopen) and [discussions](https://github.com/pewdiepie-archdaemon/odysseus/discussions) and did not find an existing report of this bug.
+          required: true
+        - label: This is **not** a security vulnerability. (Vulnerabilities go to [GitHub Security Advisories](https://github.com/pewdiepie-archdaemon/odysseus/security/advisories/new) — see [SECURITY.md](https://github.com/pewdiepie-archdaemon/odysseus/blob/main/SECURITY.md).)
+          required: true
+        - label: I am running the latest code from `main`.
+          required: true
+
+  - type: dropdown
+    id: install-method
+    attributes:
+      label: Install Method
+      options:
+        - Docker (docker compose up)
+        - Manual Python install (pip / venv)
+        - Windows native (launch-windows.ps1)
+        - macOS app (build-macos-app.sh / start-macos.sh)
+        - Other (describe in the reproduction steps below)
+    validations:
+      required: true
+
+  - type: dropdown
+    id: os
+    attributes:
+      label: Operating System
+      options:
+        - Linux
+        - macOS
+        - Windows
+        - Other
+    validations:
+      required: true
+
+  - type: textarea
+    id: steps
+    attributes:
+      label: Steps to Reproduce
+      description: Exact steps that reliably trigger the bug. The more specific, the faster this gets fixed.
+      placeholder: |
+        1. Go to ...
+        2. Click / type ...
+        3. Observe ...
+    validations:
+      required: true
+
+  - type: textarea
+    id: expected
+    attributes:
+      label: Expected Behaviour
+      description: What should have happened?
+    validations:
+      required: true
+
+  - type: textarea
+    id: actual
+    attributes:
+      label: Actual Behaviour
+      description: What actually happened? Include the full error message if there is one.
+    validations:
+      required: true
+
+  - type: textarea
+    id: logs
+    attributes:
+      label: Logs / Screenshots
+      description: Paste relevant terminal output or attach screenshots. Remove API keys, passwords, and personal data before pasting.
+      render: text
+
+  - type: input
+    id: model-backend
+    attributes:
+      label: Model / Backend (if relevant)
+      description: "e.g. Ollama + llama3.2:latest, vLLM + mistral-7b, OpenAI API, Anthropic API"
+      placeholder: "Ollama + llama3.2:latest"
+
+  - type: textarea
+    id: additional-info
+    attributes:
+      label: Additional Information
+      description: Anything else that might help — browser console errors, related issues, things you already tried, or environment quirks.
+      placeholder: |
+        - Any other context goes here.
+        - If you are willing to submit a PR that fixes this, mention it here.
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,13 @@
+blank_issues_enabled: false
+contact_links:
+  - name: Question / Need Help
+    url: https://github.com/pewdiepie-archdaemon/odysseus/discussions/categories/q-a
+    about: Ask how-to questions, setup help, and model configuration questions here. Issues are for confirmed bugs and concrete proposals only.
+
+  - name: Idea or Suggestion
+    url: https://github.com/pewdiepie-archdaemon/odysseus/discussions/categories/ideas
+    about: Discuss ideas and gauge interest before opening a formal feature request. If there is already a discussion, link it in your feature request.
+
+  - name: Security Vulnerability
+    url: https://github.com/pewdiepie-archdaemon/odysseus/security/advisories/new
+    about: Report vulnerabilities privately via GitHub Security Advisories — never as a public issue. Read SECURITY.md before reporting.
--- a/.github/ISSUE_TEMPLATE/feature_request.yml
+++ b/.github/ISSUE_TEMPLATE/feature_request.yml
@@ -0,0 +1,90 @@
+name: Feature Request
+description: Propose a new feature or a concrete improvement to Odysseus.
+labels: ["enhancement"]
+
+body:
+  - type: markdown
+    attributes:
+      value: |
+        **Before submitting:** search [open issues](https://github.com/pewdiepie-archdaemon/odysseus/issues)
+        and [discussions](https://github.com/pewdiepie-archdaemon/odysseus/discussions) first.
+        Feature requests that duplicate [ROADMAP.md](https://github.com/pewdiepie-archdaemon/odysseus/blob/main/ROADMAP.md)
+        or an existing open issue will be closed as duplicates.
+
+        If your idea needs community input before it becomes a concrete proposal,
+        start a [discussion](https://github.com/pewdiepie-archdaemon/odysseus/discussions/categories/ideas) instead.
+
+  - type: checkboxes
+    id: prerequisites
+    attributes:
+      label: Prerequisites
+      options:
+        - label: I searched [open issues](https://github.com/pewdiepie-archdaemon/odysseus/issues?q=is%3Aissue+is%3Aopen) and this has not already been proposed.
+          required: true
+        - label: I searched [discussions](https://github.com/pewdiepie-archdaemon/odysseus/discussions) and this is not already being debated there.
+          required: true
+        - label: This is a concrete, actionable proposal — not a vague "it would be nice if..." request.
+          required: true
+
+  - type: dropdown
+    id: area
+    attributes:
+      label: Area
+      description: Which part of the application does this affect?
+      options:
+        - Chat / Agent
+        - Email
+        - Calendar
+        - Documents / RAG
+        - Memory
+        - Cookbook / Local Models / GPU
+        - Search
+        - Notes / Editor
+        - Auth / Security
+        - Docker / Deployment
+        - UI / Frontend
+        - API / Backend
+        - MCP
+        - Testing / CI
+        - Other
+    validations:
+      required: true
+
+  - type: textarea
+    id: problem
+    attributes:
+      label: Problem or Motivation
+      description: What problem does this solve, or what use case does it enable? Be specific — "it would be better" is not enough.
+    validations:
+      required: true
+
+  - type: textarea
+    id: solution
+    attributes:
+      label: Proposed Solution
+      description: Describe the behaviour or change you want to see. Include API shape, UI sketch, or code snippets if that helps make it concrete.
+    validations:
+      required: true
+
+  - type: textarea
+    id: alternatives
+    attributes:
+      label: Alternatives Considered
+      description: What other approaches did you consider and why did you rule them out? If there is an existing workaround, describe it.
+
+  - type: textarea
+    id: prior-art
+    attributes:
+      label: Prior Art / Related Issues
+      description: Link any related issues, discussions, or external references that informed this proposal.
+
+  - type: dropdown
+    id: willing_to_implement
+    attributes:
+      label: Are you willing to implement this?
+      options:
+        - "Yes — I can open a PR"
+        - "Partially — I can help but need guidance"
+        - "No — I am only filing the request"
+    validations:
+      required: true
--- a/.github/pull_request_template.md
+++ b/.github/pull_request_template.md
@@ -0,0 +1,53 @@
+## Summary
+
+<!-- One paragraph: what changed and why. "Fixed bug" and "Added feature" are not summaries. -->
+
+## Linked Issue
+
+<!-- Every PR should be linked to an issue.
+     Use one of:  Fixes #NNN  |  Part of #NNN  |  Closes #NNN  -->
+
+Fixes #
+
+## Type of Change
+
+- [ ] Bug fix (non-breaking — fixes a confirmed issue)
+- [ ] New feature (non-breaking — adds new behaviour)
+- [ ] Breaking change (changes or removes existing behaviour)
+- [ ] Refactor / cleanup (behaviour unchanged)
+- [ ] Documentation only
+- [ ] CI / tooling / configuration
+
+## Checklist
+
+- [ ] I searched [open issues](https://github.com/pewdiepie-archdaemon/odysseus/issues) and [open PRs](https://github.com/pewdiepie-archdaemon/odysseus/pulls) — this is not a duplicate.
+- [ ] This PR targets `main`
+- [ ] My changes are limited to the scope described above — no unrelated refactors or whitespace changes mixed in.
+- [ ] I actually ran the app (`docker compose up` or `uvicorn app:app`) and verified the change works end-to-end. Type-checks and unit tests are not enough.
+
+## How to Test
+
+<!-- Step-by-step instructions a reviewer can follow to verify this works.
+     Do not leave this empty — a PR without test steps will be sent back. -->
+
+1.
+2.
+3.
+
+## Visual / UI changes — REQUIRED if you touched anything that renders
+
+**Anything that changes what the UI looks like — buttons, icons, padding, colors, fonts, spacing, layout, CSS, HTML, SVG, or any `static/js/` module that draws to the DOM — needs all of the following. PRs that change rendering without these WILL be closed.**
+
+- [ ] **Screenshot or short clip** of the change in the running app, attached below. Mobile screenshot too if the change affects mobile.
+- [ ] **Style match**: the change uses Odysseus's existing visual language. Specifically:
+  - Reuse existing CSS variables (`--red`, `--fg`, `--bg`, `--card`, `--border`, etc.) — do not introduce new color values, font sizes, or spacing units.
+  - Reuse existing button/input/card/border classes. Don't invent parallel styling.
+  - **No Unicode emoji in UI or code.** Use inline SVG (matching the monochrome icon style already in `static/index.html`) or plain text.
+  - Monospaced font (`Fira Code`) for primary UI text. Don't override.
+  - Dark theme is the default; any light-mode work must be wired through the existing theme system, not hard-coded.
+- [ ] **No new component patterns.** If a similar widget already exists in the app, extend it instead of writing a parallel one.
+- [ ] **I am not an LLM agent submitting a bulk PR.** If you are, please open an issue describing the problem first — bulk auto-generated PRs that don't match the project's visual style are closed on sight, even when the underlying fix is correct.
+
+### Screenshots / clips
+
+<!-- Drag and drop images or a screen recording here. Required for any UI/visual change. -->
--- a/.gitignore
+++ b/.gitignore
@@ -12,6 +12,7 @@ venv/

 # Environment
 .env
+.env.bak.*
 !.env.example

 # Data — all user data stays local
--- a/ACKNOWLEDGMENTS.md
+++ b/ACKNOWLEDGMENTS.md
@@ -33,8 +33,8 @@ The full license texts are kept in [`licenses/`](licenses/).
 - **[Tongyi DeepResearch](https://github.com/Alibaba-NLP/DeepResearch)** by
  **Alibaba-NLP / Tongyi Lab** — the multi-step deep-research agent pipeline.
  Copyright © Alibaba-NLP / Tongyi Lab. **Apache-2.0.** Adapted for Odysseus's
-  Deep Research feature (`api/research_*.py`, `routes/research_routes.py`,
-  `services/search/`). Full text in
+  Deep Research feature (`services/research/`, `src/research_handler.py`,
+  `routes/research_routes.py`, `services/search/`). Full text in
  [`licenses/DeepResearch-Apache-2.0.txt`](licenses/DeepResearch-Apache-2.0.txt).

 ---
@@ -47,7 +47,7 @@ just composed.

 | Service | Image | Purpose | License |
 |---|---|---|---|
-| [SearXNG](https://github.com/searxng/searxng) | `searxng/searxng:latest` | Default metasearch backend | AGPL-3.0 |
+| [SearXNG](https://github.com/searxng/searxng) | `searxng/searxng:2026.5.31-7159b8aed` (pinned tag; see compose) | Default metasearch backend | AGPL-3.0 |
 | [ChromaDB](https://github.com/chroma-core/chroma) | `chromadb/chroma:latest` | Vector store for memory / RAG | Apache-2.0 |
 | [ntfy](https://github.com/binwiederhier/ntfy) | `binwiederhier/ntfy` | Push notifications (self-hosted reminders) | Apache-2.0 / GPL-2.0 |

@@ -118,6 +118,7 @@ Core (`requirements.txt`) and optional (`requirements-optional.txt`):
 | croniter | MIT |
 | pytest / pytest-asyncio | MIT / Apache-2.0 |
 | duckduckgo-search (optional) | MIT |
+| markitdown (optional — Office/EPUB text extraction) | MIT |
 | **PyMuPDF** *(optional — form-filling only)* | **AGPL-3.0** — see note below |

 ## Companion services (interoperated with, not bundled)
@@ -152,6 +153,9 @@ concerns from earlier are resolved:
  deployment (Artifex also sells a commercial PyMuPDF license that lifts this).
 - **`caldav`** (Python lib) is **dual-licensed GPL-3.0-or-later OR Apache-2.0**.
  Odysseus uses it under **Apache-2.0**, which is permissive and MIT-compatible.
+- **`markitdown`** (Microsoft) is **MIT** and used only as an *optional* dependency for Office/EPUB text
+  extraction (`src/markitdown_runtime.py`), lazy-imported with graceful fallback — the MIT core runs without
+  it. The cloud `az-doc-intel` extra is deliberately **not** installed, keeping extraction fully local.

 ---

--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -57,12 +57,32 @@ Good pull requests usually include:

 - A short explanation of the bug or feature.
 - The files or areas changed.
- Manual test steps or automated test results.
+- Manual test steps or automated test results from running the actual app, not just the test suite.
 - Screenshots or short recordings for UI changes.
 - Links to related issues, for example `Fixes #123`.

 Please keep PRs small. Large PRs that mix unrelated cleanup, formatting, refactors, and behavior changes are much harder to review.

+> **Auto-generated PRs.** If you are running an LLM agent (Devin, Cursor, OpenHands, Claude Code, etc.) against this repo: please open an issue describing the problem first instead of opening a PR directly. Bulk agent-generated PRs that don't match the project's visual style or contribution format will be closed without review, even when the underlying fix is correct.
+
+## Style and visual changes
+
+Odysseus has an intentional visual style. PRs that ignore it will be closed without merge, no matter how correct the underlying code is.
+
+Before submitting any change that affects what the app looks like — buttons, icons, fonts, colors, spacing, layout, CSS, HTML, SVG, or any `static/js/` module that draws to the DOM — please:
+
+1. **Run the app locally** and view the change in a browser. Type-checks and unit tests are not enough.
+2. **Attach a screenshot or short clip** of the change in the running app. Add a mobile screenshot too if the change affects mobile.
+3. **Match the existing visual language.** Specifically:
+   - Reuse existing CSS variables (`--red`, `--fg`, `--bg`, `--card`, `--border`, …). Do not introduce new color values, font sizes, or spacing units.
+   - Reuse existing button, input, card, and border classes. Don't invent parallel styling for similar widgets.
+   - **No Unicode emoji in UI or code.** Use inline SVG (matching the monochrome icon style already in `static/index.html`) or plain text.
+   - Monospaced font (`Fira Code`) for primary UI text. Don't override.
+   - Dark theme is the default; any light-mode work goes through the existing theme system, not hard-coded.
+4. **Don't add parallel components.** If a similar widget already exists in the app, extend it instead of writing a new one.
+
+If you are unsure whether a change is "visual," it is. Default to attaching a screenshot.
+
 ## Issue Reports

 For bugs, include:
--- a/README.md
+++ b/README.md
@@ -1,7 +1,10 @@
 # Odysseus
+
+```
 ───────────────────────────────────────────────
 ⊹ ࣪ ˖ ૮( ˶ᵔ ᵕ ᵔ˶ )っ  Odysseus vers. 1.0
 ───────────────────────────────────────────────
+```

 ![Odysseus](docs/odysseus.jpg)

@@ -77,8 +80,10 @@ python setup.py
 python -m uvicorn app:app --host 127.0.0.1 --port 7000
 ```
 Requirements: Python 3.11+. Cookbook also needs `tmux` for background model
-downloads and serves. Use `--host 0.0.0.0` only when you intentionally want
-LAN/reverse-proxy access.
+downloads and serves. The app itself is lightweight; local model serving is the
+heavy part and depends on the model, runtime, GPU, and VRAM, so small hosts can
+connect to API or remote model servers instead. Use `--host 0.0.0.0` only when
+you intentionally want LAN/reverse-proxy access.

 ### Apple Silicon
 Docker on macOS cannot use the Metal GPU. For GPU-accelerated Cookbook on an
@@ -90,7 +95,18 @@ cd odysseus
 ./start-macos.sh
 ```

-It launches at `http://127.0.0.1:7860`. To build a clickable app wrapper:
+It launches at `http://127.0.0.1:7860`. To expose it to your phone over a trusted LAN/VPN such as Tailscale, bind all interfaces:
+
+```bash
+ODYSSEUS_HOST=0.0.0.0 ./start-macos.sh
+# then open http://<tailscale-ip>:7860
+```
+
+The script also reads `.env` at startup, so `APP_BIND=0.0.0.0` and `APP_PORT`
+set there are picked up automatically without a command-line override each run.
+
+Keep `AUTH_ENABLED=true` (the default) before binding outside loopback. Do not
+expose this port directly to the public internet. To build a clickable app wrapper:

 ```bash
 ./build-macos-app.sh
@@ -117,21 +133,82 @@ Odysseus SSH key and add the public key to the remote server's
 ssh-copy-id -i data/ssh/id_ed25519.pub user@server
 ```

-**NVIDIA / AMD Docker GPU overlays.** Install the host runtime first, then add
-one of these to `.env`:
+**Docker GPU overlays.** CPU-only users can skip this section. Cookbook can
+only detect GPUs that Docker exposes to the container — if the host runtime or
+device passthrough is not configured, Cookbook sees the iGPU, another card, or
+CPU instead of your intended GPU.
+
+For NVIDIA, `scripts/check-docker-gpu.sh` diagnoses GPU passthrough and can
+optionally install the host runtime or update `.env`.
+
+```bash
+# Read-only diagnostic (default — installs nothing, never edits .env):
+scripts/check-docker-gpu.sh
+
+# Print OS-specific install commands without running them:
+scripts/check-docker-gpu.sh --print-install-commands
+
+# Install NVIDIA Container Toolkit on Ubuntu/Debian (requires sudo):
+scripts/check-docker-gpu.sh --install-nvidia-toolkit
+
+# Write COMPOSE_FILE to .env (only when GPU passthrough is confirmed working):
+scripts/check-docker-gpu.sh --enable-nvidia-overlay
+
+# Full assisted setup — install toolkit, then enable overlay if passthrough works:
+scripts/check-docker-gpu.sh --install-nvidia-toolkit --enable-nvidia-overlay
+```
+
+Safety notes:
+- The app never installs host GPU runtime automatically.
+- The app never edits `.env` automatically.
+- `.env` is only modified when `--enable-nvidia-overlay` is explicitly passed,
+  and only after GPU passthrough succeeds. `--yes` skips prompts but does not
+  bypass the passthrough gate.
+- `.env.bak.*` backups created by `--enable-nvidia-overlay` are ignored by
+  Git and the Docker build context.
+
+To enable manually without the script, add this to `.env`:

 ```bash
 COMPOSE_FILE=docker-compose.yml:docker/gpu.nvidia.yml
-COMPOSE_FILE=docker-compose.yml:docker/gpu.amd.yml
 ```

-Verify with:
+**AMD / ROCm.** AMD setup is read-only diagnostic plus manual `.env` edit. Run:

 ```bash
-docker compose exec odysseus nvidia-smi -L
-docker compose exec odysseus rocm-smi
+scripts/check-docker-amd-gpu.sh
 ```

+Then add the reported values to `.env`, replacing `RENDER_GID` with your host's
+numeric render group id:
+
+```bash
+COMPOSE_FILE=docker-compose.yml:docker/gpu.amd.yml
+RENDER_GID=989
+```
+
+For NVIDIA/AMD GPU support, also read the comments in the selected overlay file: docker/gpu.nvidia.yml or docker/gpu.amd.yml.
+
+Verify after enabling either overlay:
+
+```bash
+docker compose exec odysseus nvidia-smi -L   # NVIDIA
+docker compose exec odysseus sh -lc 'test -e /dev/kfd && test -d /dev/dri && ls -l /dev/kfd /dev/dri/renderD*'  # AMD
+```
+
+> **GPU passthrough ≠ llama.cpp CUDA.** `nvidia-smi` passing inside the
+> container confirms Docker GPU access, but llama.cpp also needs `cudart` and
+> the CUDA Toolkit at runtime. If Cookbook logs show `Unable to find cudart
+> library`, `Could NOT find CUDAToolkit`, `CUDA Toolkit not found`, or
+> tensors/layers assigned to CPU, that is a Cookbook/llama.cpp build issue —
+> not a Docker passthrough failure. Re-install the serve engine via
+> **Cookbook → Dependencies** to get a CUDA-enabled build.
+>
+> The same split applies to AMD/ROCm: seeing `/dev/kfd` and `/dev/dri` inside
+> the container confirms device passthrough, not ROCm userspace or a
+> ROCm-enabled vLLM/llama.cpp build. `rocm-smi` and `rocminfo` are not expected
+> inside the slim Odysseus image.
+
 **Ollama with Docker.** If Ollama runs on the host, add this endpoint in
 Settings:

@@ -145,6 +222,13 @@ Ollama must listen outside its own loopback interface:
 OLLAMA_HOST=0.0.0.0:11434 ollama serve
 ```

+This connects Odysseus in Docker to an Ollama server that is already running on
+your host machine; it does not start Ollama inside the container.
+`host.docker.internal` is Docker's hostname for the host machine from inside the
+container. Cookbook **Serve** is a separate workflow for serving downloaded
+models through Odysseus/llama.cpp, so Windows users with an existing Ollama
+install usually only need to add the endpoint in Settings.
+
 **Useful checks.**

 ```bash
@@ -176,13 +260,16 @@ Or do it by hand:
 ```powershell
 git clone https://github.com/pewdiepie-archdaemon/odysseus.git
 cd odysseus
-python -m venv venv
+py -3.11 -m venv venv
 venv\Scripts\Activate.ps1
 pip install -r requirements.txt
 python setup.py
 python -m uvicorn app:app --host 127.0.0.1 --port 7000
 ```

+If `python` points at an older interpreter, use `py -3.12` (or another installed
+3.11+ version) for the venv step.
+
 **Requirements:** Python 3.11+. The core app (chat, agent, memory, documents,
 email, calendar, deep research) runs fully native. For full **Cookbook** background
 model downloads and the agent shell tool, also install
@@ -194,31 +281,77 @@ Local GPU *serving* of vLLM/SGLang needs Linux/WSL2; for a local model on Window
 Open `http://localhost:7000`, log in with the generated admin password,
 and configure everything else inside **Settings**.

+## Troubleshooting & Advanced Setup
+
+### `chromadb-client` conflicts with embedded ChromaDB
+If `chromadb-client` (the lightweight HTTP-only package) is installed alongside the full `chromadb` package, Odysseus starts but ChromaDB silently falls back to HTTP-only mode and fails.
+
+**Fix:** uninstall `chromadb-client` and force-reinstall the full package:
+```bash
+./venv/bin/pip uninstall chromadb-client -y
+./venv/bin/pip install --force-reinstall chromadb
+```
+
+### HTTPS + LAN/Tailscale exposure
+To expose Odysseus on a local network or Tailscale with HTTPS:
+1. Change the bind address to `0.0.0.0` in `.env` (`APP_BIND=0.0.0.0` or `ODYSSEUS_HOST=0.0.0.0`).
+2. Generate a locally-trusted cert for your LAN/Tailscale IPs using [mkcert](https://github.com/FiloSottile/mkcert):
+   ```bash
+   mkcert -install
+   mkcert -cert-file cert.pem -key-file key.pem 192.168.1.100 tailscale-ip
+   ```
+3. Run `uvicorn` with the generated certs:
+   ```bash
+   python -m uvicorn app:app --host 0.0.0.0 --port 7000 --ssl-certfile=cert.pem --ssl-keyfile=key.pem
+   ```
+4. Install the `mkcert` CA on any other device you want to access Odysseus from (e.g., for iOS, email the `rootCA.pem` to yourself, install the profile, and trust it in Certificate Trust Settings).
+
+### Optional Dependencies
+`requirements-optional.txt` contains packages that unlock extra features. It is not installed by default.
+
+| Package | Feature unlocked |
+|---------|-----------------|
+| `faster-whisper` | Local speech-to-text (microphone -> text) via the "local" STT provider. |
+| `duckduckgo-search` | DuckDuckGo as a search provider option. |
+| `PyMuPDF` | PDF page rendering in the side viewer panel and form-filling. (Note: AGPL-3.0) |
+| `markitdown` | Office/EPUB document text extraction (converts .docx/.xlsx/.pptx/.xls/.epub to Markdown). |
+
 ## Security Notes
 Odysseus is a self-hosted workspace with powerful local tools: shell access, file uploads, model downloads, web research, email/calendar integrations, and API tokens. Treat it like an admin console.

 - Keep `AUTH_ENABLED=true` for any network-accessible deployment.
- Do not expose it directly to the public internet without HTTPS and a trusted reverse proxy.
- Keep `data/`, `.env`, logs, databases, and uploaded/generated media out of Git. They are ignored by default.
+- Keep `LOCALHOST_BYPASS=false` outside local development.
+- Use `SECURE_COOKIES=true` when Odysseus is served through HTTPS by a trusted reverse proxy or private access gateway.
+- Do not expose it directly to the public internet without HTTPS and a trusted reverse proxy or private access layer.
+- Keep `.env`, `data/`, `logs/`, databases, uploads, generated media, backups, auth/session files, API keys, and model/provider tokens out of Git and private shares. They are ignored by default.
 - Review `data/auth.json` after first boot: disable open signup unless you intentionally want it, make only your own account admin, and keep demo/test accounts non-admin.
 - Non-admin users do not get shell/Python/file read/write by default, and admin-only routes/tools such as MCP management, API tokens, webhooks, model/cookbook serving, backup/vault, and app settings are admin-gated. Other features are controlled by per-user privileges, so review each user's privileges before exposing a deployment.
 - Rotate any API keys or tokens that were ever pasted into a shared chat, demo, screenshot, or log.
 - If you enable API tokens or webhooks, create separate tokens per integration and delete unused ones.
 - Prefer binding manual development runs to `127.0.0.1`; bind to `0.0.0.0` only when you intentionally want LAN/reverse-proxy access.
+- Keep ChromaDB, SearXNG, ntfy, Ollama, vLLM, llama.cpp, databases, and raw model/provider APIs internal-only. Expose only the authenticated Odysseus web/API entrypoint through your trusted proxy or private access layer.
 - Before publishing a fork, run `git status --short` and confirm no private files from `.env`, `data/`, `logs/`, uploads, backups, or local databases are staged.

-### Putting it behind HTTPS
-Odysseus serves plain HTTP on its port. That's fine for `localhost` and trusted LAN/VPN use, but browsers will warn ("Password fields present on an insecure page") and the login + API tokens travel in cleartext. For anything reachable outside your machine — including a Tailscale IP shared with other devices — put a TLS-terminating reverse proxy in front.
+### Private or proxied deployments
+Odysseus serves plain HTTP on its app port. Docker Compose binds Odysseus and the bundled services to `127.0.0.1` by default, so a typical production/private setup is:

-Shortest path with [Caddy](https://caddyserver.com/) (auto-renews Let's Encrypt certs):
+1. Keep Odysseus on localhost, for example `127.0.0.1:7000`.
+2. Terminate HTTPS at a trusted reverse proxy or private access gateway.
+3. Put the authenticated Odysseus web/API entrypoint behind that layer.
+4. Keep raw service and model ports internal-only.

-```caddy
-odysseus.example.com {
-  reverse_proxy localhost:7000
-}
-```
+Cloudflare Access, Tailscale, Caddy, nginx, and Traefik can all fit this pattern; none are required by Odysseus. If your access layer reaches Odysseus on the same host, proxy to `http://127.0.0.1:7000` and keep `AUTH_ENABLED=true`, `LOCALHOST_BYPASS=false`, and `SECURE_COOKIES=true`.

-For a LAN-only Tailscale deployment, Caddy + [tailscale-cert](https://caddyserver.com/docs/caddyfile/options#auto-https) or the built-in MagicDNS HTTPS feature both work. nginx/Traefik configs are similar — proxy `localhost:7000`, terminate TLS at the proxy. Once that's in place, the browser warning goes away and your login is encrypted.
+Common internal-only ports from the default docs/compose setup:
+
+| Port | Service |
+|---|---|
+| `7000` | Odysseus raw app port |
+| `8080` | SearXNG |
+| `8091` | ntfy |
+| `8100` | ChromaDB host port for manual/compose access |
+| `11434` | Ollama |
+| `8000-8020` | Common local model/provider APIs |

 ## Contributing
 Help is welcome. The best entry points are fresh-install testing, provider setup
@@ -241,6 +374,7 @@ Key settings:
 | `APP_PORT` | `7000` | Docker Compose host port for the web UI. |
 | `AUTH_ENABLED` | `true` | Enable/disable login |
 | `LOCALHOST_BYPASS` | `false` | Development-only auth bypass for loopback requests. Keep false for shared/network deployments. |
+| `SECURE_COOKIES` | `false` | Set true when serving Odysseus through HTTPS at a trusted proxy or private access gateway. |
 | `DATABASE_URL` | `sqlite:///./data/app.db` | Database connection string |
 | `CHROMADB_HOST` | `localhost` | ChromaDB host for vector memory. Docker overrides this to `chromadb`. |
 | `CHROMADB_PORT` | `8100` | ChromaDB port for manual host runs. Docker overrides this to `8000`. |
--- a/ROADMAP.md
+++ b/ROADMAP.md
@@ -1,6 +1,6 @@
 # Roadmap / Help Wanted

-Odysseus is on a voyage, but not home yet. It works great for me (lol), but this is ship is moving fast and feedback/help would be appreciated! (I dont know what I'm doing hlep).
+Odysseus is on a voyage, but not home yet. It works great for me (lol), but this ship is moving fast and feedback/help would be appreciated! (I don't know what I'm doing, help).

 If you see weird CSS, strange layout behavior, or a suspiciously murky corner of
 the codebase, you are probably right to stay away.
@@ -8,25 +8,60 @@ the codebase, you are probably right to stay away.
 ## High Priority

 - SQUASH BUGS
- Fresh Docker install smoke tests on Linux, macOS, and Windows!!
+- Fresh install smoke tests on Linux, macOS, and Windows. Docker, native Python,
+  and WSL all need coverage.

 - Integration audit: do integrations even work? Confirm what works, what needs setup docs, and what should be removed or hidden. 
 - Self-host troubleshooting cookbook. Document the weird 30-second fixes that otherwise become 30-minute searches: Dovecot cleartext auth for local stacks, ntfy Android Instant Delivery for non-ntfy.sh servers, clipboard limits on plain-HTTP Tailscale URLs, Radicale collection URLs, and similar traps.
 - Cookbook reliability on other computers. This is probably the area most likely to need work across different machines, GPUs, drivers, shells, and Python environments.
- Tile/window management correctness. I had to brute force my way a bit here, I'm aware, popups, dropdowns, and fixed-position UI inside transformed modals can land in the wrong place.
- Esc button, it's small but a lot of windows that arent still close on esc and alot of them doesnt. 
- Skill audit, how does your model respond to skill injection, does it follow? Does its parsing miss? 
+- Cookbook SGLang support across platforms. Make sure SGLang setup/serve works
+  predictably on Linux, Windows/WSL, macOS where possible, Docker, and common
+  NVIDIA/AMD hardware paths.
+- Deep Research model presets by hardware. Recommend approved model/parameter
+  profiles for small, medium, and large local setups so people with different
+  hardware can use Deep Research without guessing. Surface this either in Deep
+  Research settings or as a Cookbook scan/dropdown suggestion.
+- Cookbook model scan/download ranking. Prioritize newer architectures and
+  better hardware-fit models instead of scoring everything almost the same.
+  Ranking should account for architecture age, quant format, VRAM/RAM fit,
+  backend support, vision/mmproj requirements, and likely serve reliability.
+- Cookbook error feedback and logging. Failed downloads, dependency installs,
+  preflights, and serve jobs should show the actual command/output/error in the
+  UI, with copyable logs and clear next steps instead of just "crashed".
+- Agent prompt/context bloat. Agent mode is too heavy for smaller local models:
+  tool schemas, skills, memory, documents, and instructions can eat the context
+  before the user request really starts. We need slimmer prompts, better tool
+  selection, smaller default tool sets, and clearer guidance for models with
+  4k/8k/16k context windows.
+- Skill/tool prompt-injection audit. User-editable skills, notes, documents,
+  fetched pages, and memories should be treated as untrusted data. Keep testing
+  whether models follow malicious instructions from those surfaces.
 - Better degraded-state reporting for ChromaDB, SearXNG, email, ntfy, and provider probes.
+- Email performance audit. Fetching, searching, opening, deleting, and sending
+  email can feel slow, especially over IMAP/SMTP providers with high latency.
+  Need someone who knows mail performance to profile the current flow, identify
+  whether the bottleneck is IMAP folder select/fetch, cache invalidation,
+  attachment/body loading, SMTP handshakes, or frontend refresh behavior, then
+  propose safer caching/prefetch/batching without breaking multi-account state.
 - Provider setup/probing audit for Anthropic, Gemini, Groq, xAI, OpenRouter, OpenAI, and DeepSeek.

 ## Refactor Targets
 - CSS cleanup. `static/style.css` basically Calypso's island atm.
 - Tour core helper. The onboarding tours have too much copy-pasted scaffolding; promote a shared `tour-core.js` helper before adding more tours.
+- Modal/window positioning cleanup. Some window controls have improved, but the
+  underlying popup/dropdown/fixed-position behavior is still too fragile.
 - Mobile media override discoverability. A lot of "CSS did not move" bugs are mobile `@media` overrides of the same selector; comments or linting around desktop/mobile paired rules would help.
 - Dead code pass for old routes, stale feature flags, and unused UI states.

 ## Frontend

+- Expand the Editor for quicker, more robust everyday use. Better file/document
+  handling, smoother window behavior, clearer save/export flows, stronger image
+  editing affordances, and fewer brittle edge cases.
+- Better AI integration for Notes and Todos. Notes should be easier for the
+  agent to read, update, summarize, and turn into actions. Todos should be
+  assignable to an agent from the UI, possibly through a button, task action,
+  or dedicated skill/tool flow.
 - Mobile gallery/editor polish. Easier to launch/download inpaint model or any missing pieces.
 - Accessibility pass: keyboard navigation, focus states, contrast, reduced motion.
 - Improve empty states and error messages on fresh installs.
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -8,16 +8,20 @@ Security fixes are handled on the default branch until formal releases are cut.

 ## Deployment Guidance

- Keep `AUTH_ENABLED=true`.
+- Keep `AUTH_ENABLED=true` for any network-accessible deployment.
+- Keep `LOCALHOST_BYPASS=false` outside local development.
+- Set `SECURE_COOKIES=true` when Odysseus is served through HTTPS by a trusted reverse proxy or private access gateway.
 - Use HTTPS when exposing the app beyond localhost.
- Put the app behind a trusted reverse proxy or private network.
- Protect `.env`, `data/`, logs, uploaded files, generated media, and database files.
+- Put the authenticated Odysseus web/API entrypoint behind a trusted reverse proxy or private access layer such as Cloudflare Access, Tailscale, or a VPN.
+- Keep ChromaDB, SearXNG, ntfy, Ollama, vLLM, llama.cpp, databases, and raw model/provider APIs internal-only.
+- Protect `.env`, `data/`, `logs/`, uploads, generated media, backups, auth/session files, database files, API keys, and model/provider tokens.
 - Disable open signup unless you intentionally want new accounts.
 - Keep demo/test users non-admin, and remove them entirely on serious deployments.
 - Give admin accounts strong passwords and enable 2FA where possible.
 - Leave high-risk agent tools restricted to admins: shell, Python, file read/write, email send/read, MCP, app API, task/skill/memory management, settings, tokens, and model serving.
 - Rotate API keys, webhook secrets, and Odysseus API tokens if they appear in logs, screenshots, demos, or shared chats.
 - Treat shell, model-serving, MCP, email, calendar, and vault features as privileged admin functionality.
+- Common internal-only ports are Odysseus `7000`, SearXNG `8080`, ntfy `8091`, ChromaDB `8100`, Ollama `11434`, and local model/provider APIs such as `8000-8020`.

 ## Publishing A Fork

@@ -29,7 +33,7 @@ git check-ignore -v .env data/auth.json data/app.db logs/compound.log odysseus.d
 git grep -n -I -E "(sk-[A-Za-z0-9_-]{20,}|xox[baprs]-|AIza[0-9A-Za-z_-]{20,}|Bearer [A-Za-z0-9._~+/-]{20,})" -- . ':!static/lib/**' ':!package-lock.json'
 ```

-Only `.env.example`, docs, source, tests, and static assets should be committed. Never commit live `data/` contents, local databases, uploaded files, generated media, logs, backups, API keys, password hashes, or personal documents.
+Only `.env.example`, docs, source, tests, and static assets should be committed. Never commit live `.env` values, `data/` contents, local databases, uploaded files, generated media, logs, backups, auth/session files, API keys, model/provider tokens, password hashes, or personal documents.

 ## Reporting

--- a/THREAT_MODEL.md
+++ b/THREAT_MODEL.md
@@ -0,0 +1,81 @@
+# Threat Model
+
+Odysseus is a **self-hosted AI workspace with privileged local access**. This document states the trust boundary so contributors can reason about security decisions without reading through the full auth and middleware stack.
+
+## Trust Boundary
+
+Odysseus is designed for **trusted users on a private network**, not public exposure. The README describes it as "treat it like an admin console" — that framing is accurate. A logged-in admin can execute shell commands, read and write files, send email, and control model serving. This is intentional. The threat model does not try to prevent admins from doing these things. It does try to prevent:
+
+- Unauthenticated access
+- Non-admins reaching admin-only capabilities
+- The AI agent acting on instructions injected through untrusted content (web results, emails, fetched pages, memories)
+- Internal services (ChromaDB, Ollama, SearXNG, etc.) being reachable from outside the host
+
+## Roles and Capabilities
+
+| Capability | Admin | Non-admin (default) |
+|---|---|---|
+| Chat with agent | ✓ | ✓ |
+| Browser tool | ✓ | ✓ |
+| Documents | ✓ | ✓ |
+| Research mode | ✓ | ✓ |
+| Image generation | ✓ | ✓ |
+| Memory management | ✓ | ✓ |
+| Shell / Python execution | ✓ | ✗ |
+| File read / write | ✓ | ✗ |
+| Email send / read | ✓ | ✗ |
+| MCP tools | ✓ | ✗ |
+| Calendar management | ✓ | ✗ |
+| Token / webhook management | ✓ | ✗ |
+| Model serving | ✓ | ✗ |
+| Vault | ✓ | ✗ |
+| Settings | ✓ | ✗ |
+
+Non-admin defaults are in `core/auth.py:DEFAULT_PRIVILEGES`. Tool enforcement is in `src/tool_security.py:NON_ADMIN_BLOCKED_TOOLS`. Any tool whose name starts with `mcp__` is also blocked for non-admins. Admins always get full access regardless of stored privilege values.
+
+## Authentication
+
+- **Sessions:** bcrypt passwords, 7-day session tokens stored atomically in `data/sessions.json` via `core/atomic_io.py`.
+- **2FA:** TOTP with 8 single-use backup codes. Verified after password check, before session issuance.
+- **Reserved usernames:** `internal-tool`, `api`, `demo`, `system` cannot be registered or renamed into. Defined in `core/auth.py:RESERVED_USERNAMES`.
+  - `internal-tool` is security-critical: `core/middleware.py:require_admin` treats any request where `request.state.current_user == "internal-tool"` as the in-process tool loopback and grants admin unconditionally. A real account with that name would silently pass every `require_admin` check.
+- **Orphan sessions:** `validate_token` re-checks that the user record still exists on every call. A deleted user's cookie is dropped on next request rather than continuing to authenticate.
+
+## Internal Tool Loopback
+
+Agent tool calls reach admin-gated HTTP routes over an in-process HTTP loopback. The mechanism:
+
+1. At app startup, `core/middleware.py` generates a random `INTERNAL_TOOL_TOKEN` via `secrets.token_hex(32)`. It is never persisted and never sent to clients.
+2. Loopback requests carry `X-Odysseus-Internal-Token: <token>` or have `request.state.current_user` already set to `"internal-tool"` by the auth middleware.
+3. `require_admin` recognises either signal and grants access without checking the session user.
+
+The agent may be running in a non-admin user's session, but tool dispatch first calls `src/tool_security.py:owner_is_admin_or_single_user` to verify the session owner is an admin before issuing any loopback call. Non-admin users cannot invoke admin tools even via the agent.
+
+## Prompt-Injection Hardening
+
+External content that reaches the LLM is treated as untrusted via `src/prompt_security.py`:
+
+- `untrusted_context_message(label, content)` wraps the content in a `user`-role message with a header block instructing the model not to follow instructions inside it. Content goes in as data, not as a system instruction.
+- `UNTRUSTED_CONTEXT_POLICY` is a system-prompt preamble that states the same policy at the top of every session where untrusted data may appear.
+
+**Untrusted surfaces that must go through this wrapper:** web search results, fetched URLs, emails (read), saved memories, skill text, notes, and any tool output sourced from outside the server. Injecting untrusted content directly into the system role is a security bug.
+
+## Security Headers
+
+`core/middleware.py:SecurityHeadersMiddleware` sets headers on every response:
+
+- `X-Frame-Options: DENY` + `frame-ancestors 'none'` on all routes except tool-render iframes (which are sandboxed at the HTML level).
+- `X-Content-Type-Options: nosniff` and `Referrer-Policy: no-referrer` everywhere.
+- **CSP:** nonce-based `script-src 'self' 'nonce-{nonce}' https://cdn.jsdelivr.net`. `style-src 'unsafe-inline'` is intentionally kept — `static/index.html` ships inline `<style>` blocks and JS modules set `style=""` attributes at runtime. Inline styles do not execute script so the risk is visual-only. Removing this requires templating the HTML files and auditing all JS-set style attributes.
+
+## Known Gaps
+
+These are open, acknowledged, and contributor help is welcome:
+
+1. **No shell/filesystem sandbox.** The agent `bash` and `read_file`/`write_file` tools run as the app process user with no network egress filtering or filesystem confinement. A successful prompt-injection reaching a shell-enabled admin session can make outbound requests to internal services. See #1058 for the sandbox proposal.
+
+2. **SSRF via `/api/v1/chat` `base_url` parameter.** A chat-scoped API token can supply an arbitrary `base_url`; the server forwards the LLM request to that host without validating the scheme or address. PR #1039 fixes this.
+
+3. **`src/search/` partial consolidation.** `src.search.core` and `src.search.providers` correctly alias `services.search` via `sys.modules` replacement. `analytics`, `cache`, `content`, `query`, and `ranking` are still independent copies that can drift. The SSRF regression tests in `tests/test_webhook_ssrf_resilience.py` test `src.webhook_manager` directly (separate from search), so the safety net there is intact. See #1058.
+
+4. **Token scopes are coarse.** There is no way to grant a session a subset of the owning user's privileges. Companion/mobile tokens carry either `chat` or `admin` scope with no per-capability granularity.
--- a/app.py
+++ b/app.py
@@ -1,6 +1,23 @@
 # app.py — slim orchestrator
+import mimetypes
 import os

+
+def register_static_mime_types() -> None:
+    """Force stable JS module MIME types across platforms.
+
+    Some native Windows setups inherit stale/incorrect registry mappings for
+    ``.js``/``.mjs``, which can make Starlette serve ES modules with a non-JS
+    ``Content-Type`` and cause the UI to load but fail on click. Re-register the
+    standard MIME types at startup so static assets are served consistently.
+    """
+
+    mimetypes.add_type("text/javascript", ".js")
+    mimetypes.add_type("application/javascript", ".mjs")
+
+
+register_static_mime_types()
+
 # Windows: force HuggingFace/fastembed to COPY model files instead of symlinking.
 # On a network-share/UNC data dir Windows can't follow HF's symlinks ([WinError
 # 1463]), so the ONNX embedding model fails to load. huggingface_hub reads this
@@ -25,6 +42,7 @@ import secrets
 from datetime import datetime
 from typing import Dict

+from contextlib import asynccontextmanager
 from fastapi import FastAPI, Request, HTTPException
 from fastapi.responses import JSONResponse, FileResponse, HTMLResponse
 from fastapi.middleware.cors import CORSMiddleware
@@ -57,6 +75,9 @@ logging.basicConfig(
 logger = logging.getLogger(__name__)

 # ========= APP =========
+# Lifespan is defined below (after all helpers it references are in scope)
+# and passed to FastAPI so we can use the modern context-manager lifecycle
+# instead of the deprecated @app.on_event("startup"/"shutdown") decorators.
 app = FastAPI(
    title="AI Chat Application",
    description="Comprehensive AI chat with memory, research, and multi-modal capabilities",
@@ -152,9 +173,25 @@ if AUTH_ENABLED:
        "/login",
    }
    AUTH_EXEMPT_PREFIXES = ["/static"]
+    # Dynamic paths whose own handler proves identity via a path-embedded
+    # secret instead of the session/bearer auth. The route handler at
+    # routes/task_routes.py validates the per-task `webhook_token` itself
+    # and returns 404 on mismatch, so the path is the credential — the
+    # UI labels these URLs "no auth needed" precisely because external
+    # callers (Zapier, n8n, curl) can't supply a session cookie. Without
+    # this exemption AuthMiddleware rejects every POST with 401 before
+    # the token is ever checked.
+    import re as _re
+    AUTH_EXEMPT_PATTERNS = [
+        _re.compile(r"^/api/tasks/[^/]+/webhook/[^/]+/?$"),
+    ]

    def _is_auth_exempt(path: str) -> bool:
-        return path in AUTH_EXEMPT_EXACT or any(path.startswith(p) for p in AUTH_EXEMPT_PREFIXES)
+        if path in AUTH_EXEMPT_EXACT:
+            return True
+        if any(path.startswith(p) for p in AUTH_EXEMPT_PREFIXES):
+            return True
+        return any(p.match(path) for p in AUTH_EXEMPT_PATTERNS)

    # In-memory token cache: prefix → list[(token_id, token_hash, owner, scopes)]. The DB
    # query was running on every API-bearer request and scanning bcrypt
@@ -662,6 +699,9 @@ app.include_router(setup_vault_routes())
 from routes.contacts_routes import setup_contacts_routes
 app.include_router(setup_contacts_routes())

+from companion import setup_companion_routes
+app.include_router(setup_companion_routes())
+
 # ========= ROUTES (kept in app.py) =========

 def _serve_html_with_nonce(request: Request, file_path: str) -> HTMLResponse:
@@ -736,6 +776,17 @@ async def get_version():
 async def health_check() -> Dict[str, str]:
    return {"status": "healthy", "timestamp": datetime.utcnow().isoformat()}

+@app.get("/api/ready")
+async def readiness_check() -> JSONResponse:
+    """Readiness / integrity self-check — DB, data dir, local-first storage.
+
+    Unlike /api/health (liveness), this returns 503 unless every critical
+    subsystem is whole, so an orchestrator can gate traffic on real readiness.
+    """
+    from src.readiness import check_readiness
+    result = check_readiness()
+    return JSONResponse(status_code=200 if result.get("ready") else 503, content=result)
+
@app.get("/api/runtime")
 async def runtime_info() -> Dict[str, object]:
    in_docker = os.path.exists("/.dockerenv")
@@ -758,8 +809,19 @@ async def runtime_info() -> Dict[str, object]:

 # ========= LIFECYCLE =========

-@app.on_event("startup")
-async def startup_event():
+@asynccontextmanager
+async def _lifespan(app):
+    """Modern lifespan context manager replacing deprecated @app.on_event."""
+    # ── STARTUP ──
+    await _startup_event()
+    yield
+    # ── SHUTDOWN ──
+    await _shutdown_event()
+
+app.router.lifespan_context = _lifespan
+
+
+async def _startup_event():
    global upload_cleanup_task
    logger.info("Application starting up...")
    webhook_manager.set_loop(asyncio.get_running_loop())
@@ -983,8 +1045,7 @@ async def startup_event():
    _startup_tasks.append(asyncio.create_task(_skill_audit_nightly_loop()))
    logger.info("Application startup complete")

-@app.on_event("shutdown")
-async def shutdown_event():
+async def _shutdown_event():
    logger.info("Application shutting down...")
    if upload_cleanup_task:
        upload_cleanup_task.cancel()
--- a/build-macos-app.sh
+++ b/build-macos-app.sh
@@ -119,7 +119,11 @@ fi

 notify "Starting…"
 cd "$INSTALL_DIR" || die_gui "Install folder not found: $INSTALL_DIR"
-"$UVICORN" app:app --host 127.0.0.1 --port "$PORT" >>"$LOG" 2>&1 &
+if [ "$(uname -m)" = "arm64" ]; then
+  arch -arm64 "$UVICORN" app:app --host 127.0.0.1 --port "$PORT" >>"$LOG" 2>&1 &
+else
+  "$UVICORN" app:app --host 127.0.0.1 --port "$PORT" >>"$LOG" 2>&1 &
+fi
 SERVER_PID=$!

 # Quitting the app stops the server it started.
--- a/companion/README.md
+++ b/companion/README.md
@@ -0,0 +1,28 @@
+# Companion bridge
+
+A thin, additive layer so a LAN client (e.g. a phone) can discover what an
+Odysseus server offers and pair to it, without duplicating any LLM logic.
+
+| Method | Path | Auth | Purpose |
+|---|---|---|---|
+| GET | `/api/companion/ping` | session or token | cheap, auth-validated health check |
+| GET | `/api/companion/info` | session or token | server identity + capability flags |
+| GET | `/api/companion/models` | session or token | the **caller's own** model endpoints |
+| GET | `/api/companion/pair` | **admin cookie** | pairing page (a form; never mints) |
+| POST | `/api/companion/pair` | **admin cookie** | mint a one-time pairing token (`?format=json` for an in-app screen) |
+
+`/models` scopes to the caller's real owner plus legacy null-owner shared rows
+(same rule as `owner_filter`) and never returns API-key material.
+
+## Pairing CSRF posture
+
+Minting happens **only on POST**. The session cookie is `SameSite=Lax`
+(`routes/auth_routes.py`), so a browser will not send it on a cross-site POST —
+the same protection `POST /api/tokens` relies on. A `GET` would be unsafe (Lax
+cookies ride top-level GET navigations), so `GET /pair` only renders a form.
+Minting invalidates the auth middleware's token cache, so a freshly minted token
+works on the next request without a restart.
+
+The pairing/scoping rules live in small, tested units (`token_owner`,
+`owner_can_see`, `mint_pairing_token`, `pairing.*`) — see
+`tests/test_companion_readonly.py` and `tests/test_companion_pairing.py`.
--- a/companion/init.py
+++ b/companion/init.py
@@ -0,0 +1,11 @@
+"""Odysseus companion bridge — additive LAN endpoints.
+
+Read endpoints (/api/companion/ping, /info, owner-scoped /models) so a LAN
+client can discover what a server offers, plus admin-only pairing
+(/api/companion/pair) that mints a one-time chat-scoped token on POST. No new LLM
+logic; auth is enforced by the existing AuthMiddleware. See companion/README.md.
+"""
+
+from companion.routes import setup_companion_routes
+
+__all__ = ["setup_companion_routes"]
--- a/companion/pairing.py
+++ b/companion/pairing.py
@@ -0,0 +1,126 @@
+"""Shared pairing helpers for the companion bridge.
+
+Token minting + LAN discovery + QR rendering, kept here as small, importable
+units so the route layer stays thin and the logic is directly testable.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import secrets
+import socket
+import uuid
+
+import bcrypt
+
+PAIRING_VERSION = 1
+COMPANION_SCOPE = "chat"
+
+
+def default_port() -> int:
+    """Best guess at the port the server is reachable on. Callers that know the
+    real request port should pass it explicitly."""
+    try:
+        return int(os.environ.get("APP_PORT", "7000"))
+    except ValueError:
+        return 7000
+
+
+def lan_ip_candidates() -> list[str]:
+    """Likely LAN IPv4 addresses for this host, best candidate first.
+
+    The UDP-connect trick reveals the egress interface the OS would use to reach
+    the default gateway -- i.e. the address a phone on the same Wi-Fi should
+    target. No packets are actually sent. Loopback is dropped.
+    """
+    candidates: list[str] = []
+
+    def _add(ip):
+        if ip and ip not in candidates and not ip.startswith("127."):
+            candidates.append(ip)
+
+    s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+    try:
+        s.connect(("8.8.8.8", 80))
+        _add(s.getsockname()[0])
+    except OSError:
+        pass
+    finally:
+        s.close()
+
+    try:
+        for info in socket.getaddrinfo(socket.gethostname(), None, socket.AF_INET):
+            _add(info[4][0])
+    except OSError:
+        pass
+
+    return candidates
+
+
+def find_admin_user() -> str | None:
+    """Resolve an admin username from data/auth.json (schema uses is_admin),
+    falling back to the first user."""
+    auth_path = os.path.join("data", "auth.json")
+    try:
+        with open(auth_path, "r", encoding="utf-8") as f:
+            data = json.load(f)
+    except (OSError, json.JSONDecodeError):
+        return None
+    if not isinstance(data, dict):
+        return None
+    users = data.get("users") or {}
+    if not isinstance(users, dict):
+        return None
+    for uname, udata in users.items():
+        if isinstance(udata, dict) and udata.get("is_admin") is True:
+            return uname
+    return next(iter(users), None)
+
+
+def mint_token(owner: str, name: str = "companion") -> tuple[str, str]:
+    """Create a chat-scoped API token row and return (token_id, raw_token).
+
+    The raw token is returned ONCE -- only its bcrypt hash + an 8-char prefix
+    are persisted. Mirrors routes/api_token_routes.py so cookie- and
+    companion-minted tokens are indistinguishable to the auth middleware.
+    """
+    from core.database import get_db_session, ApiToken
+
+    raw_token = "ody_" + secrets.token_urlsafe(32)
+    token_hash = bcrypt.hashpw(raw_token.encode(), bcrypt.gensalt()).decode()
+    token_id = str(uuid.uuid4())[:8]
+
+    with get_db_session() as db:
+        db.add(ApiToken(
+            id=token_id,
+            owner=owner,
+            name=name,
+            token_hash=token_hash,
+            token_prefix=raw_token[:8],
+            scopes=COMPANION_SCOPE,
+            is_active=True,
+        ))
+    return token_id, raw_token
+
+
+def pairing_payload(host: str, port: int, token: str) -> dict:
+    """The exact JSON a client scans / accepts. Keep keys stable."""
+    return {"v": PAIRING_VERSION, "host": host, "port": port, "token": token}
+
+
+def pairing_qr_png_data_uri(payload: dict) -> str | None:
+    """Render the pairing payload as a QR `data:` URI for an <img>. Returns None
+    if the optional qrcode dep is unavailable."""
+    try:
+        import base64
+        import io
+
+        import qrcode
+
+        img = qrcode.make(json.dumps(payload, separators=(",", ":")))
+        buf = io.BytesIO()
+        img.save(buf, format="PNG")
+        return "data:image/png;base64," + base64.b64encode(buf.getvalue()).decode()
+    except Exception:
+        return None
--- a/companion/routes.py
+++ b/companion/routes.py
@@ -0,0 +1,236 @@
+"""Companion bridge — /api/companion/*.
+
+A thin, additive layer so a LAN client (e.g. a phone) can discover what a server
+offers and pair to it, without duplicating any LLM logic.
+
+Auth is enforced globally by AuthMiddleware (app.py), so reaching a handler here
+means the caller is authenticated by either a cookie session or a Bearer `ody_`
+API token. The read endpoints (ping/info/models) accept either; the pairing
+endpoints are admin-cookie only.
+
+Pairing CSRF posture: minting happens ONLY on POST. The session cookie is
+SameSite=Lax (routes/auth_routes.py), which a browser does not send on a
+cross-site POST, so an admin's cookie can't be used by a malicious page to mint
+a token -- the same protection the existing POST /api/tokens relies on. Minting
+on a GET would be unsafe (Lax cookies ride top-level GET navigations), so GET
+/pair only renders a form.
+"""
+
+import html
+
+from fastapi import APIRouter, Request
+from fastapi.responses import HTMLResponse
+
+from core.middleware import require_admin
+from src.auth_helpers import get_current_user
+
+from companion import pairing as _pairing
+
+
+def token_owner(request: Request) -> str | None:
+    """The real owner to attribute a request to, for read-scoping.
+
+    Cookie sessions resolve to the logged-in username via get_current_user.
+    Bearer-token callers come through as the sandboxed pseudo-user "api"; their
+    real owner is stamped on request.state.api_token_owner by the auth
+    middleware. Returns None when no owner can be resolved.
+    """
+    if getattr(request.state, "api_token", False):
+        return getattr(request.state, "api_token_owner", None)
+    return get_current_user(request)
+
+
+def owner_can_see(row_owner, owner) -> bool:
+    """Owner-scope rule for read endpoints.
+
+    A caller sees a row when it is their own, or when it is a legacy null-owner
+    ("shared") row. A caller must NEVER see another owner's row. Mirrors the
+    `owner_filter` rule used elsewhere, expressed as a pure predicate so it can
+    be tested directly and used as a defensive in-Python check alongside the
+    SQL filter.
+    """
+    return row_owner is None or row_owner == owner
+
+
+def mint_pairing_token(owner: str, invalidate=None) -> tuple[str, str]:
+    """Mint a pairing token AND invalidate the auth middleware's in-memory token
+    cache, so the new token is accepted on the very next request without a server
+    restart. Returns (token_id, raw_token); the raw token is shown once.
+
+    `invalidate` is the app's request.app.state.invalidate_token_cache callable
+    (passed in so this stays a pure, testable unit).
+    """
+    token_id, raw_token = _pairing.mint_token(owner)
+    if callable(invalidate):
+        invalidate()
+    return token_id, raw_token
+
+
+def setup_companion_routes() -> APIRouter:
+    router = APIRouter(prefix="/api/companion", tags=["companion"])
+
+    @router.get("/ping")
+    def ping(request: Request):
+        """Cheap, auth-validated health check. A 200 with ok=true confirms the
+        host/port and credential are valid; middleware returns 401 otherwise."""
+        from core.constants import APP_VERSION
+        return {
+            "ok": True,
+            "name": "odysseus",
+            "version": APP_VERSION,
+            "auth": "token" if getattr(request.state, "api_token", False) else "session",
+        }
+
+    @router.get("/info")
+    def info(request: Request):
+        """Server identity + coarse capability flags. `owner` is the caller's own
+        identity (the token's owner for bearer callers)."""
+        from core.constants import APP_VERSION
+        return {
+            "name": "odysseus",
+            "version": APP_VERSION,
+            "owner": token_owner(request),
+            "capabilities": {"chat": True, "streaming": True},
+        }
+
+    @router.get("/models")
+    def models(request: Request):
+        """LLM model endpoints the CALLER can use.
+
+        The stock /api/models route scopes to get_current_user, which for a
+        bearer token is the sandboxed pseudo-user "api" (owns nothing). Here we
+        scope to the token's real owner instead, plus legacy null-owner shared
+        rows -- the same rule as owner_filter. Read-only; never returns api_key
+        material.
+        """
+        import json as _json
+
+        from core.database import SessionLocal, ModelEndpoint
+        from src.endpoint_resolver import build_chat_url
+
+        owner = token_owner(request)
+        out = []
+        db = SessionLocal()
+        try:
+            q = db.query(ModelEndpoint).filter(
+                ModelEndpoint.is_enabled == True,  # noqa: E712
+                (ModelEndpoint.model_type == "llm") | (ModelEndpoint.model_type == None),  # noqa: E711
+            )
+            if owner:
+                q = q.filter((ModelEndpoint.owner == owner) | (ModelEndpoint.owner == None))  # noqa: E711
+            for ep in q.all():
+                if not owner_can_see(ep.owner, owner):
+                    continue
+                try:
+                    model_ids = _json.loads(ep.cached_models) if ep.cached_models else []
+                except (ValueError, TypeError):
+                    model_ids = []
+                try:
+                    hidden = set(_json.loads(ep.hidden_models)) if ep.hidden_models else set()
+                except (ValueError, TypeError):
+                    hidden = set()
+                model_ids = [m for m in model_ids if m not in hidden]
+                try:
+                    chat_url = build_chat_url(ep.base_url)
+                except Exception:
+                    chat_url = ep.base_url
+                out.append({
+                    "endpoint_id": ep.id,
+                    "name": ep.name,
+                    "endpoint_url": chat_url,
+                    "models": model_ids,
+                    "supports_tools": ep.supports_tools,
+                })
+        finally:
+            db.close()
+        return {"endpoints": out}
+
+    @router.get("/pair")
+    def pair_page(request: Request):
+        """Admin-only pairing page. Renders a form that POSTs to mint a code.
+
+        A GET never mints a credential: SameSite=Lax session cookies ride
+        top-level GET navigations, so minting on GET would be triggerable by a
+        link or <img> (CSRF). The actual mint is the POST handler below.
+        """
+        require_admin(request)
+        page = """<!doctype html>
+<html><head><meta charset="utf-8"><meta name="viewport" content="width=device-width,initial-scale=1">
+<title>Pair a device</title>
+<style>
+  body{font-family:-apple-system,system-ui,sans-serif;max-width:520px;margin:48px auto;padding:0 20px;color:#e8e8e8;background:#16161a}
+  .card{background:#1f1f25;border:1px solid #2c2c35;border-radius:14px;padding:28px;text-align:center}
+  button{background:#7c9cff;color:#0e0e12;border:none;border-radius:10px;padding:12px 20px;font-size:15px;font-weight:600;cursor:pointer}
+</style></head>
+<body><div class="card">
+  <h2>Pair a device</h2>
+  <p>Generate a one-time pairing code (a chat-scoped API token) for a LAN client.</p>
+  <form method="POST" action="/api/companion/pair">
+    <button type="submit">Generate pairing code</button>
+  </form>
+  <p style="color:#8a8a96;font-size:12px;margin-top:18px">Admin only. Each code mints a new token, shown once. Manage or revoke under Settings &rarr; API tokens.</p>
+</div></body></html>"""
+        return HTMLResponse(page)
+
+    @router.post("/pair")
+    def pair_create(request: Request):
+        """Mint a pairing code. Admin-cookie only; CSRF-safe because the
+        SameSite=Lax session cookie is not sent on a cross-site POST (same
+        protection as POST /api/tokens). Minting invalidates the token cache so
+        the code works immediately, no restart. `?format=json` returns the
+        payload for an in-app pairing screen."""
+        require_admin(request)
+        owner = get_current_user(request)
+        invalidate = getattr(request.app.state, "invalidate_token_cache", None)
+        token_id, raw_token = mint_pairing_token(owner, invalidate)
+
+        hosts = _pairing.lan_ip_candidates()
+        host = hosts[0] if hosts else "127.0.0.1"
+        port = request.url.port or _pairing.default_port()
+        payload = _pairing.pairing_payload(host, port, raw_token)
+        qr = _pairing.pairing_qr_png_data_uri(payload)
+        qr_ok = bool(qr and qr.startswith("data:image/png;base64,"))
+
+        if (request.query_params.get("format") or "").lower() == "json":
+            return {
+                "host": host,
+                "port": port,
+                "token": raw_token,
+                "token_id": token_id,
+                "hosts": hosts,
+                "payload": payload,
+                "qr": qr if qr_ok else None,
+            }
+
+        import json as _json
+        payload_json = _json.dumps(payload, separators=(",", ":"))
+        # Only ever emit a known PNG data-URI into the src; every other value is
+        # html.escaped.
+        qr_block = (
+            f'<img src="{html.escape(qr)}" alt="Pairing QR" width="260" height="260">'
+            if qr_ok else "<p><em>QR rendering unavailable -- enter the details manually.</em></p>"
+        )
+        page = f"""<!doctype html>
+<html><head><meta charset="utf-8"><meta name="viewport" content="width=device-width,initial-scale=1">
+<title>Pairing code</title>
+<style>
+  body{{font-family:-apple-system,system-ui,sans-serif;max-width:520px;margin:40px auto;padding:0 20px;color:#e8e8e8;background:#16161a}}
+  .card{{background:#1f1f25;border:1px solid #2c2c35;border-radius:14px;padding:24px;text-align:center}}
+  code{{background:#0e0e12;padding:2px 6px;border-radius:6px;word-break:break-all}}
+  .row{{text-align:left;margin:10px 0;font-size:14px;color:#bdbdc7}}
+  .warn{{color:#e0a85e;font-size:13px;margin-top:18px}}
+</style></head>
+<body><div class="card">
+  <h2>Pairing code</h2>
+  {qr_block}
+  <div class="row"><strong>Host:</strong> <code>{html.escape(host)}</code></div>
+  <div class="row"><strong>Port:</strong> <code>{html.escape(str(port))}</code></div>
+  <div class="row"><strong>Token:</strong> <code>{html.escape(raw_token)}</code></div>
+  <div class="row"><strong>Payload:</strong> <code>{html.escape(payload_json)}</code></div>
+  <p class="warn">Shown once. This grants chat access to your Odysseus; revoke it
+  in Settings &rarr; API tokens (id <code>{html.escape(token_id)}</code>). The
+  device must be on the same network, and the server must bind to your LAN.</p>
+</div></body></html>"""
+        return HTMLResponse(page)
+
+    return router
--- a/core/auth.py
+++ b/core/auth.py
@@ -266,7 +266,8 @@ class AuthManager:
        renamed_sessions = 0
        with self._sessions_lock:
            for sess in self._sessions.values():
-                if (sess or {}).get("username") == old_username:
+                sess_user = str((sess or {}).get("username") or "").strip().lower()
+                if sess_user == old_username:
                    sess["username"] = new_username
                    renamed_sessions += 1
        if renamed_sessions:
@@ -375,7 +376,10 @@ class AuthManager:
            return True  # 2FA not enabled, always pass
        secret = user.get("totp_secret")
        if not secret:
-            return True
+            # 2FA is enabled but no secret is stored (corrupt/partially-written
+            # auth.json). Fail closed — returning True here bypassed the second
+            # factor entirely.
+            return False
        # Check backup codes first
        backup = user.get("totp_backup_codes", [])
        if code in backup:
--- a/core/database.py
+++ b/core/database.py
@@ -1,7 +1,9 @@
 import os
 import logging
+import sqlite3
 from datetime import datetime
-from sqlalchemy import create_engine, Column, String, Text, Boolean, DateTime, Integer, ForeignKey, JSON, Index, func, text
+from sqlalchemy import event, create_engine, Column, String, Text, Boolean, DateTime, Integer, ForeignKey, JSON, Index, func, text
+from sqlalchemy.engine import Engine
 from sqlalchemy.types import TypeDecorator
 from sqlalchemy.ext.declarative import declarative_base, declared_attr
 from sqlalchemy.orm import relationship, sessionmaker, backref
@@ -34,6 +36,18 @@ engine = create_engine(
 SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)


+# Listening on the Engine class ensures this listener fires for all Engine
+# instances created within the process, not just the primary application engine.
+# The isinstance(sqlite3.Connection) check ensures that this PRAGMA foreign_keys=ON
+# configuration remains a no-op when using non-SQLite database backends.
+@event.listens_for(Engine, "connect")
+def set_sqlite_pragma(dbapi_connection, connection_record):
+    if isinstance(dbapi_connection, sqlite3.Connection):
+        cursor = dbapi_connection.cursor()
+        cursor.execute("PRAGMA foreign_keys=ON")
+        cursor.close()
+
+
 class EncryptedText(TypeDecorator):
    """Text column transparently encrypted at rest via src.secret_storage.

@@ -298,6 +312,7 @@ class EmailAccount(TimestampMixin, Base):
    # SMTP (sending)
    smtp_host      = Column(String, default="")
    smtp_port      = Column(Integer, default=465)
+    smtp_security  = Column(String, default="ssl")  # ssl | starttls | none
    smtp_user      = Column(String, default="")
    smtp_password  = Column(String, default="")

@@ -1483,6 +1498,10 @@ def _migrate_seed_email_account():
        logging.getLogger(__name__).warning(f"seed email account migration: {e}")


+# WARNING: Foreign-key enforcement is enabled globally for all SQLite connections.
+# Any future migrations or schema changes that temporarily violate foreign-key
+# constraints will fail. To perform such operations, foreign_keys must be
+# temporarily disabled around the migration workflow.
 def init_db():
    """
    Initialize the database by creating all tables.
@@ -1517,6 +1536,7 @@ def init_db():
    _migrate_drop_ping_notes_tasks()
    _migrate_add_crew_member_id()
    _migrate_add_assistant_columns()
+    _migrate_add_email_smtp_security()
    _migrate_seed_email_account()
    _migrate_add_calendar_metadata()
    _migrate_add_calendar_is_utc()
@@ -1525,6 +1545,32 @@ def init_db():
    _migrate_encrypt_endpoint_keys()


+def _migrate_add_email_smtp_security():
+    """Add explicit SMTP security mode for Proton Bridge/custom local SMTP."""
+    import sqlite3
+    db_path = DATABASE_URL.replace("sqlite:///", "")
+    if not os.path.exists(db_path):
+        return
+    try:
+        conn = sqlite3.connect(db_path)
+        cursor = conn.execute("PRAGMA table_info(email_accounts)")
+        columns = [row[1] for row in cursor.fetchall()]
+        if columns and "smtp_security" not in columns:
+            conn.execute("ALTER TABLE email_accounts ADD COLUMN smtp_security TEXT DEFAULT 'ssl'")
+            conn.execute(
+                "UPDATE email_accounts SET smtp_security = CASE "
+                "WHEN COALESCE(smtp_port, 465) = 587 THEN 'starttls' "
+                "WHEN COALESCE(smtp_port, 465) = 465 THEN 'ssl' "
+                "ELSE 'ssl' END "
+                "WHERE smtp_security IS NULL OR smtp_security = ''"
+            )
+            conn.commit()
+            logging.getLogger(__name__).info("Migrated: added smtp_security column to email_accounts")
+        conn.close()
+    except Exception as e:
+        logging.getLogger(__name__).warning(f"smtp_security migration skipped: {e}")
+
+
 def _migrate_encrypt_endpoint_keys():
    """Encrypt any plaintext provider API keys in model_endpoints. Idempotent;
    raw SQL so the EncryptedText decorator isn't applied twice."""
--- a/core/platform_compat.py
+++ b/core/platform_compat.py
@@ -14,6 +14,7 @@ Design rules:
 from __future__ import annotations

 import os
+import ntpath
 import shutil
 import subprocess
 from pathlib import Path
@@ -134,11 +135,40 @@ _BASH_CACHE: Optional[str] = None
 _BASH_PROBED = False

 # Common Git-for-Windows install locations to probe when bash isn't on PATH.
-_WINDOWS_BASH_FALLBACKS = (
-    r"C:\Program Files\Git\bin\bash.exe",
-    r"C:\Program Files\Git\usr\bin\bash.exe",
-    r"C:\Program Files (x86)\Git\bin\bash.exe",
+_WINDOWS_BASH_ROOT_ENV_VARS = (
+    "ProgramFiles",
+    "ProgramW6432",
+    "ProgramFiles(x86)",
+    "LocalAppData",
 )
+_WINDOWS_BASH_DEFAULT_ROOTS = (
+    r"C:\Program Files\Git",
+    r"C:\Program Files (x86)\Git",
+)
+_WINDOWS_BASH_RELATIVE_PATHS = (
+    ("bin", "bash.exe"),
+    ("usr", "bin", "bash.exe"),
+)
+
+
+def _windows_bash_fallbacks() -> List[str]:
+    roots: List[str] = []
+    for env_name in _WINDOWS_BASH_ROOT_ENV_VARS:
+        base = os.environ.get(env_name)
+        if base:
+            roots.append(ntpath.join(base, "Git"))
+    roots.extend(_WINDOWS_BASH_DEFAULT_ROOTS)
+
+    paths: List[str] = []
+    seen = set()
+    for root in roots:
+        for rel in _WINDOWS_BASH_RELATIVE_PATHS:
+            path = ntpath.join(root, *rel)
+            key = path.lower()
+            if key not in seen:
+                seen.add(key)
+                paths.append(path)
+    return paths


 def find_bash() -> Optional[str]:
@@ -153,9 +183,9 @@ def find_bash() -> Optional[str]:
    if _BASH_PROBED:
        return _BASH_CACHE
    _BASH_PROBED = True
-    found = shutil.which("bash")
+    found = which_tool("bash")
    if not found and IS_WINDOWS:
-        for cand in _WINDOWS_BASH_FALLBACKS:
+        for cand in _windows_bash_fallbacks():
            if os.path.exists(cand):
                found = cand
                break
--- a/core/session_manager.py
+++ b/core/session_manager.py
@@ -29,6 +29,21 @@ def _message_timestamp_iso(value: Optional[datetime]) -> Optional[str]:
    return value.isoformat().replace("+00:00", "Z")


+def _parse_msg_content(raw):
+    """Parse message content from DB — deserialises JSON arrays back to lists
+    (multimodal content with image/audio attachments)."""
+    if isinstance(raw, list):
+        return raw
+    if isinstance(raw, str) and raw.startswith('[{') and '"type"' in raw:
+        try:
+            parsed = json.loads(raw)
+            if isinstance(parsed, list) and all(isinstance(p, dict) for p in parsed):
+                return parsed
+        except (json.JSONDecodeError, ValueError):
+            pass
+    return raw
+
+
 class SessionManager:
    """
    Manages chat sessions with database persistence.
@@ -119,7 +134,7 @@ class SessionManager:
                meta.setdefault('timestamp', _message_timestamp_iso(db_msg.timestamp))
                history.append(ChatMessage(
                    role=db_msg.role,
-                    content=db_msg.content,
+                    content=_parse_msg_content(db_msg.content),
                    metadata=meta,
                ))
        else:
@@ -134,7 +149,7 @@ class SessionManager:
                meta.setdefault('timestamp', _message_timestamp_iso(db_msg.timestamp))
                history.append(ChatMessage(
                    role=db_msg.role,
-                    content=db_msg.content,
+                    content=_parse_msg_content(db_msg.content),
                    metadata=meta,
                ))

@@ -187,30 +202,43 @@ class SessionManager:
        """Persist a single message to the database."""
        db = SessionLocal()
        try:
+            db_session = db.query(DbSession).filter(DbSession.id == session_id).first()
+            if db_session is None:
+                # A stream/tool callback can outlive a session delete. Do not
+                # create a chat_messages row with no parent session; also drop
+                # any stale cached session so later writes fail closed too.
+                self.sessions.pop(session_id, None)
+                logger.warning("Dropping message for deleted session %s", session_id)
+                return
+
            msg_id = str(uuid.uuid4())
            msg_time = datetime.utcnow()
            if message.metadata is None:
                message.metadata = {}
            message.metadata.setdefault('timestamp', _message_timestamp_iso(msg_time))
+            # Multimodal content (image/audio attachments) is a list — serialize
+            # to JSON so the Text column can store it.  On reload, _db_to_session
+            # detects the JSON-array prefix and parses it back.
+            _content = message.content
+            if isinstance(_content, list):
+                _content = json.dumps(_content)
            db_message = DbChatMessage(
                id=msg_id,
                session_id=session_id,
                role=message.role,
-                content=message.content,
+                content=_content,
                meta_data=json.dumps(message.metadata) if message.metadata else None,
                timestamp=msg_time,
            )
            db.add(db_message)

-            db_session = db.query(DbSession).filter(DbSession.id == session_id).first()
-            if db_session:
-                db_session.message_count = len(self.sessions.get(session_id, {}).history) if session_id in self.sessions else 0
-                _now = datetime.now(timezone.utc)
-                db_session.last_accessed = _now
-                # Clean "last conversation" timestamp — only bumped here on a
-                # real message persist, so it powers an accurate "Last active"
-                # sort that ignores renames / model swaps / mere opens.
-                db_session.last_message_at = _now
+            db_session.message_count = len(self.sessions.get(session_id, {}).history) if session_id in self.sessions else 0
+            _now = datetime.now(timezone.utc)
+            db_session.last_accessed = _now
+            # Clean "last conversation" timestamp — only bumped here on a
+            # real message persist, so it powers an accurate "Last active"
+            # sort that ignores renames / model swaps / mere opens.
+            db_session.last_message_at = _now

            db.commit()

@@ -276,7 +304,15 @@ class SessionManager:
                    id=msg_id,
                    session_id=session_id,
                    role=message.role,
-                    content=message.content,
+                    # Multimodal content (image/audio attachments) is a list;
+                    # serialize to JSON so the Text column round-trips via
+                    # _parse_msg_content. Storing the raw list let SQLAlchemy
+                    # bind its single-quoted repr, which _parse_msg_content
+                    # cannot parse (it looks for double-quoted "type"), so the
+                    # attachment was destroyed on reload. Mirrors _persist_message.
+                    content=(json.dumps(message.content)
+                             if isinstance(message.content, list)
+                             else message.content),
                    meta_data=json.dumps(message.metadata) if message.metadata else None,
                    timestamp=now + timedelta(microseconds=i),
                )
@@ -466,11 +502,17 @@ class SessionManager:
            db_session = db.query(DbSession).filter(DbSession.id == session_id).first()
            if db_session:
                db.delete(db_session)
+
+            # Drop the in-memory copy even when there is no DB row. A "ghost"
+            # session lives only here (never persisted, or its row was removed
+            # out-of-band); without this it can never be cleared and keeps
+            # 404ing on every operation (issue #1044).
+            removed_in_memory = self.sessions.pop(session_id, None) is not None
+
+            if db_session or removed_in_memory:
+                # Commit the document-detach / message-delete above (a no-op when
+                # the ghost had no rows) together with the session delete.
                db.commit()
-
-                if session_id in self.sessions:
-                    del self.sessions[session_id]
-
                logger.info(f"Deleted session {session_id}")
                return True
            return False
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -4,28 +4,53 @@ services:
    ports:
      - "${APP_BIND:-127.0.0.1}:${APP_PORT:-7000}:7000"
    volumes:
-      - ./data:/app/data
-      - ./logs:/app/logs
+      - ./data:/app/data:z
+      - ./logs:/app/logs:z
      # Cookbook remote-server SSH identity. Odysseus can generate a key here;
      # add the shown public key to each remote server's authorized_keys.
-      - ./data/ssh:/app/.ssh
+      - ./data/ssh:/app/.ssh:z
      # Cookbook local model cache. Inside Docker, "Local" means the Odysseus
      # container, so persist its HuggingFace cache under ./data/huggingface.
-      - ./data/huggingface:/app/.cache/huggingface
+      - ./data/huggingface:/app/.cache/huggingface:z
      # Cookbook-installed Python CLIs/packages (vLLM, llama-cpp-python, etc.)
      # land under /app/.local for the odysseus user. Persist them so a
      # container recreate does not silently remove installed serve engines.
-      - ./data/local:/app/.local
+      - ./data/local:/app/.local:z
    extra_hosts:
      # Lets the container reach local services on the Docker host, including
      # Ollama at http://host.docker.internal:11434.
      - "host.docker.internal:host-gateway"
-    env_file:
-      - .env
    environment:
+      - LLM_HOST=${LLM_HOST:-localhost}
+      - LLM_HOSTS=${LLM_HOSTS:-}
+      - OPENAI_API_KEY=${OPENAI_API_KEY:-}
+      - OLLAMA_BASE_URL=${OLLAMA_BASE_URL:-}
+      - RESEARCH_LLM_ENDPOINT=${RESEARCH_LLM_ENDPOINT:-}
+      - HF_TOKEN=${HF_TOKEN:-}
+      - HUGGING_FACE_HUB_TOKEN=${HUGGING_FACE_HUB_TOKEN:-}
      - SEARXNG_INSTANCE=http://searxng:8080
      - CHROMADB_HOST=chromadb
      - CHROMADB_PORT=8000
+      - DATABASE_URL=${DATABASE_URL:-sqlite:///./data/app.db}
+      - AUTH_ENABLED=${AUTH_ENABLED:-true}
+      - LOCALHOST_BYPASS=${LOCALHOST_BYPASS:-false}
+      - ODYSSEUS_ADMIN_USER=${ODYSSEUS_ADMIN_USER:-admin}
+      - ODYSSEUS_ADMIN_PASSWORD=${ODYSSEUS_ADMIN_PASSWORD:-}
+      - ALLOWED_ORIGINS=${ALLOWED_ORIGINS:-http://localhost,http://127.0.0.1}
+      - SECURE_COOKIES=${SECURE_COOKIES:-false}
+      - EMBEDDING_URL=${EMBEDDING_URL:-}
+      - EMBEDDING_MODEL=${EMBEDDING_MODEL:-}
+      - FASTEMBED_MODEL=${FASTEMBED_MODEL:-sentence-transformers/all-MiniLM-L6-v2}
+      - FASTEMBED_CACHE_PATH=${FASTEMBED_CACHE_PATH:-}
+      - CLEANUP_INTERVAL_HOURS=${CLEANUP_INTERVAL_HOURS:-24}
+      - ODYSSEUS_INPROCESS_POLLERS=${ODYSSEUS_INPROCESS_POLLERS:-1}
+      - ODYSSEUS_INPROCESS_TASKS=${ODYSSEUS_INPROCESS_TASKS:-1}
+      - ODYSSEUS_SCRIPT_HOST=${ODYSSEUS_SCRIPT_HOST:-localhost}
+      - DATA_BRAVE_API_KEY=${DATA_BRAVE_API_KEY:-}
+      - GOOGLE_API_KEY=${GOOGLE_API_KEY:-}
+      - GOOGLE_PSE_CX=${GOOGLE_PSE_CX:-}
+      - TAVILY_API_KEY=${TAVILY_API_KEY:-}
+      - SERPER_API_KEY=${SERPER_API_KEY:-}
      # PUID / PGID — the user/group the container drops to before
      # running uvicorn (entrypoint also chowns /app/data + /app/logs
      # to match, so bind-mounted files stay editable from the host).
@@ -54,7 +79,12 @@ services:
    restart: unless-stopped

  searxng:
-    image: docker.io/searxng/searxng:latest
+    # Pinned, not :latest — odysseus waits on searxng's healthcheck
+    # (depends_on: condition: service_healthy), so a broken upstream `latest`
+    # tag blocks the whole app from starting. 2026.6.2 crashes on boot with
+    # `KeyError: 'default_doi_resolver'`, failing the healthcheck (issue #1414).
+    # Bump this deliberately after verifying a newer tag boots clean.
+    image: docker.io/searxng/searxng:2026.5.31-7159b8aed
    entrypoint:
      - /bin/sh
      - -c
@@ -72,10 +102,24 @@ services:
      - "127.0.0.1:8080:8080"
    volumes:
      - searxng-data:/etc/searxng
-      - ./config/searxng/settings.yml:/tmp/searxng-settings.yml.template:ro
+      - ./config/searxng/settings.yml:/tmp/searxng-settings.yml.template:ro,z
    environment:
      - SEARXNG_BASE_URL=http://localhost:8080/
      - SEARXNG_SECRET=${SEARXNG_SECRET:-}
+    # The official searxng image runs as the non-root `searxng` user, but its
+    # entrypoint still needs to chown /etc/searxng on first boot, drop privs via
+    # su-exec, and (with our wrapper above) write settings.yml into the named
+    # volume. Without these capabilities the wrapper aborts at the redirection
+    # with EACCES and the container fails its healthcheck with permission
+    # errors during setup. Mirrors the cap set recommended by the upstream
+    # searxng-docker compose file. See issue #721.
+    cap_drop:
+      - ALL
+    cap_add:
+      - CHOWN
+      - SETGID
+      - SETUID
+      - DAC_OVERRIDE
    healthcheck:
      test: ["CMD-SHELL", "python -c \"import urllib.request; urllib.request.urlopen('http://localhost:8080/', timeout=5).read(1)\""]
      interval: 5s
--- a/docker/entrypoint.sh
+++ b/docker/entrypoint.sh
@@ -76,6 +76,15 @@ done
 # nvcc" even when the GPU itself is fully visible to the container.
 export VLLM_USE_FLASHINFER_SAMPLER="${VLLM_USE_FLASHINFER_SAMPLER:-0}"

+# Make Cookbook-installed Python CLIs visible after `pip install --user`.
+# vLLM and helper scripts land here because /app is the non-root user's HOME.
+export PATH="/app/.local/bin:$PATH"
+
+# Run first-time setup as the app user so data/ files get the right ownership.
+# setup.py is idempotent — skips auth.json / .env if they already exist.
+# || true so a setup failure never prevents the container from starting.
+gosu "$PUID:$PGID" python /app/setup.py || true
+
 # Drop root and run the actual app. `gosu` is preferred over `su` /
 # `sudo` because it cleans up the process tree (no extra shell layer)
 # so signals (SIGTERM from `docker stop`) reach uvicorn directly.
--- a/docker/gpu.amd.yml
+++ b/docker/gpu.amd.yml
@@ -1,5 +1,6 @@
 # AMD ROCm GPU overlay. Enable by setting COMPOSE_FILE in .env:
 #   COMPOSE_FILE=docker-compose.yml:docker/gpu.amd.yml
+#   RENDER_GID=<numeric output of: getent group render | cut -d: -f3>
 #
 # Requires ROCm drivers on the host (kfd + DRI devices). The host user
 # running Docker must be in the `video` and `render` groups.
--- a/docker/gpu.nvidia.yml
+++ b/docker/gpu.nvidia.yml
@@ -1,6 +1,11 @@
 # NVIDIA GPU overlay. Enable by setting COMPOSE_FILE in .env:
 #   COMPOSE_FILE=docker-compose.yml:docker/gpu.nvidia.yml
 #
+# Use scripts/check-docker-gpu.sh to diagnose GPU passthrough, optionally
+# install the NVIDIA Container Toolkit (Ubuntu/Debian), and write COMPOSE_FILE
+# to .env. The script is read-only by default — it installs nothing and never
+# edits .env unless explicitly asked.
+#
 # Requires the NVIDIA Container Toolkit on the host.
 #   Arch:    sudo pacman -S nvidia-container-toolkit
 #   Debian:  sudo apt install nvidia-container-toolkit
--- a/docs/a11y/focus-after.png
+++ b/docs/a11y/focus-after.png
--- a/docs/a11y/focus-before.png
+++ b/docs/a11y/focus-before.png
--- a/docs/a11y/login-after.png
+++ b/docs/a11y/login-after.png
--- a/docs/a11y/login-before.png
+++ b/docs/a11y/login-before.png
--- a/docs/gallery-314-desktop.png
+++ b/docs/gallery-314-desktop.png
--- a/docs/gallery-314-mobile.png
+++ b/docs/gallery-314-mobile.png
--- a/docs/index.html
+++ b/docs/index.html
@@ -25,7 +25,7 @@
    --radius: 8px;
  }
  * { box-sizing: border-box; }
-  html { scroll-behavior: smooth; scroll-snap-type: y mandatory; scroll-padding-top: 60px; }
+  html { scroll-behavior: smooth; scroll-snap-type: y proximity; scroll-padding-top: 60px; }
  /* Each section is a full-viewport "page" with its content centered, so only
     one shows at a time and the snap is obvious. */
  .hero, section {
--- a/launch-windows.ps1
+++ b/launch-windows.ps1
@@ -30,23 +30,80 @@ function Fail($msg) {
    exit 1
 }

-# 1. Locate a Python interpreter (3.11+ recommended)
+function Find-GitBash {
+    $cmd = Get-Command bash -ErrorAction SilentlyContinue
+    if ($cmd) { return $cmd.Source }
+
+    $roots = @()
+    foreach ($name in @("ProgramFiles", "ProgramW6432", "ProgramFiles(x86)", "LocalAppData")) {
+        $base = [Environment]::GetEnvironmentVariable($name)
+        if ($base) { $roots += (Join-Path $base "Git") }
+    }
+    $roots += @("C:\Program Files\Git", "C:\Program Files (x86)\Git")
+
+    foreach ($root in ($roots | Select-Object -Unique)) {
+        foreach ($relative in @("bin\bash.exe", "usr\bin\bash.exe")) {
+            $candidate = Join-Path $root $relative
+            if (Test-Path $candidate) { return $candidate }
+        }
+    }
+    return $null
+}
+
+# 1. Locate a Python interpreter (3.11+ required)
 Write-Step "Checking for Python"
+function Get-PythonVersionText($launcher, $launcherArgs) {
+    try {
+        return (& $launcher @launcherArgs -c "import sys; print('.'.join(map(str, sys.version_info[:3])))" 2>$null).Trim()
+    } catch {
+        return $null
+    }
+}
+
 $pyExe = $null
-foreach ($c in @("python", "py")) {
-    $cmd = Get-Command $c -ErrorAction SilentlyContinue
-    if ($cmd) { $pyExe = $cmd.Source; break }
+$pyArgs = @()
+$pyVersion = $null
+
+$pyLauncher = Get-Command py -ErrorAction SilentlyContinue
+if ($pyLauncher) {
+    foreach ($v in @("-3.13", "-3.12", "-3.11")) {
+        $ver = Get-PythonVersionText $pyLauncher.Source @($v)
+        if ($ver) {
+            $pyExe = $pyLauncher.Source
+            $pyArgs = @($v)
+            $pyVersion = $ver
+            break
+        }
+    }
 }
+
 if (-not $pyExe) {
-    Fail "Python not found on PATH. Install Python 3.11+ from https://www.python.org/downloads/ (check 'Add to PATH'), then re-run this script."
+    $pythonCmd = Get-Command python -ErrorAction SilentlyContinue
+    if ($pythonCmd) {
+        $ver = Get-PythonVersionText $pythonCmd.Source @()
+        if ($ver) {
+            $versionParts = $ver.Split('.')
+            $major = [int]$versionParts[0]
+            $minor = [int]$versionParts[1]
+            if ($major -gt 3 -or ($major -eq 3 -and $minor -ge 11)) {
+                $pyExe = $pythonCmd.Source
+                $pyVersion = $ver
+            }
+        }
+    }
 }
-Write-Host ("Using Python: " + $pyExe)
+
+if (-not $pyExe) {
+    Fail "Couldn't find Python 3.11+ for Windows setup. Install Python 3.11+ (or open the Python launcher with 'py -3.11') from https://www.python.org/downloads/, then re-run this script."
+}
+$pythonLabel = ("Using Python {0}: {1} {2}" -f $pyVersion, $pyExe, ($pyArgs -join ' ')).TrimEnd()
+Write-Host $pythonLabel

 # 2. Create the virtualenv if missing
 $venvPy = Join-Path $PSScriptRoot "venv\Scripts\python.exe"
 if (-not (Test-Path $venvPy)) {
    Write-Step "Creating virtual environment (venv)"
-    & $pyExe -m venv venv
+    & $pyExe @pyArgs -m venv venv
    if ($LASTEXITCODE -ne 0 -or -not (Test-Path $venvPy)) { Fail "Failed to create the virtual environment." }
 } else {
    Write-Host "venv already exists - skipping creation."
@@ -64,7 +121,7 @@ Write-Step "Running first-time setup"
 if ($LASTEXITCODE -ne 0) { Fail "setup.py failed." }

 # 5. Friendly note about Git Bash (full Cookbook / agent-shell parity)
-if (-not (Get-Command bash -ErrorAction SilentlyContinue)) {
+if (-not (Find-GitBash)) {
    Write-Host ""
    Write-Host "NOTE: Git Bash (bash.exe) was not found on PATH." -ForegroundColor Yellow
    Write-Host "      The core app works without it. For full Cookbook background" -ForegroundColor Yellow
--- a/mcp_servers/_common.py
+++ b/mcp_servers/_common.py
@@ -13,6 +13,10 @@ SEARCH_TIMEOUT = 30

 def truncate(text: str, limit: int = MAX_OUTPUT_CHARS) -> str:
    """Truncate text to *limit* characters with a suffix note."""
+    if not isinstance(text, str):
+        # Tool output is occasionally None or a non-string; len(None) would
+        # raise. Coerce so this shared helper never crashes a tool response.
+        text = "" if text is None else str(text)
    if len(text) > limit:
        return text[:limit] + f"\n... (truncated, {len(text)} chars total)"
    return text
--- a/mcp_servers/email_server.py
+++ b/mcp_servers/email_server.py
@@ -70,10 +70,12 @@ def _list_accounts_raw() -> list:
    try:
        conn = sqlite3.connect(str(path))
        conn.row_factory = sqlite3.Row
-        rows = conn.execute("""
+        columns = {r[1] for r in conn.execute("PRAGMA table_info(email_accounts)").fetchall()}
+        smtp_security_select = "smtp_security" if "smtp_security" in columns else "'' AS smtp_security"
+        rows = conn.execute(f"""
            SELECT id, name, is_default, enabled,
                   imap_host, imap_port, imap_user, imap_password, imap_starttls,
-                   smtp_host, smtp_port, smtp_user, smtp_password, from_address
+                   smtp_host, smtp_port, {smtp_security_select}, smtp_user, smtp_password, from_address
            FROM email_accounts WHERE enabled = 1
            ORDER BY is_default DESC, created_at ASC
        """).fetchall()
@@ -145,6 +147,7 @@ def _load_config(account: str | None = None) -> dict:
        "imap_starttls": os.environ.get("IMAP_STARTTLS", "true").lower() == "true",
        "smtp_host": os.environ.get("SMTP_HOST", ""),
        "smtp_port": int(os.environ.get("SMTP_PORT", "465")),
+        "smtp_security": os.environ.get("SMTP_SECURITY", ""),
        "smtp_user": os.environ.get("SMTP_USER", ""),
        "smtp_password": os.environ.get("SMTP_PASSWORD", ""),
        "smtp_starttls": os.environ.get("SMTP_STARTTLS", "false").lower() == "true",
@@ -189,6 +192,7 @@ def _load_config(account: str | None = None) -> dict:
        cfg["imap_ssl"] = int(cfg["imap_port"]) == 993 and not cfg["imap_starttls"]
        cfg["smtp_host"] = row["smtp_host"] or cfg["smtp_host"]
        cfg["smtp_port"] = int(row["smtp_port"] or cfg["smtp_port"])
+        cfg["smtp_security"] = row["smtp_security"] or cfg["smtp_security"] or ("starttls" if int(cfg["smtp_port"]) == 587 else "ssl")
        cfg["smtp_user"] = row["smtp_user"] or cfg["smtp_user"]
        cfg["smtp_password"] = _decrypt(row["smtp_password"]) if row["smtp_password"] else cfg["smtp_password"]
        cfg["from_address"] = row["from_address"] or row["imap_user"] or cfg["from_address"]
@@ -333,14 +337,25 @@ def _decode_header(raw):
    """Decode MIME encoded header."""
    if not raw:
        return ""
-    parts = email.header.decode_header(raw)
-    decoded = []
-    for data, charset in parts:
-        if isinstance(data, bytes):
-            decoded.append(data.decode(charset or "utf-8", errors="replace"))
-        else:
-            decoded.append(data)
-    return " ".join(decoded)
+    try:
+        # make_header concatenates per RFC 2047: no spurious space between an
+        # encoded-word and adjacent plain text (plain runs keep their own
+        # whitespace), and whitespace between two adjacent encoded-words is
+        # dropped. The old " ".join produced "Re:  Jose" style double spaces
+        # on every non-ASCII subject or sender.
+        return str(email.header.make_header(email.header.decode_header(raw)))
+    except Exception:
+        # Malformed header or unknown charset: lossy per-part decode
+        decoded = []
+        for data, charset in email.header.decode_header(raw):
+            if isinstance(data, bytes):
+                try:
+                    decoded.append(data.decode(charset or "utf-8", errors="replace"))
+                except LookupError:
+                    decoded.append(data.decode("utf-8", errors="replace"))
+            else:
+                decoded.append(data)
+        return "".join(decoded)


 def _extract_text(msg):
@@ -413,6 +428,11 @@ def _list_emails(folder="INBOX", max_results=20, unresponded_only=False,
        status, data = conn.uid("SEARCH", None, "(UNSEEN UNANSWERED)")
    elif unread_only:
        status, data = conn.uid("SEARCH", None, "(UNSEEN)")
+    elif unresponded_only:
+        # Was missing — unresponded_only=True (without unread_only) fell through
+        # to "ALL" and returned answered mail too, despite the documented
+        # "emails without replies" behaviour.
+        status, data = conn.uid("SEARCH", None, "(UNANSWERED)")
    else:
        # Include read too — IMAP search "ALL" returns the entire folder
        status, data = conn.uid("SEARCH", None, "ALL")
@@ -739,17 +759,17 @@ def _smtp_connect(account=None, cfg=None):
    if not _smtp_ready(cfg):
        raise ValueError(f"Email account {cfg.get('account_name') or account or 'default'} has no SMTP configured")
    port = int(cfg.get("smtp_port") or 465)
-    # Account rows only store host/port, not the legacy env-level smtp_ssl
-    # toggle. Infer the conventional TLS mode from the port so MCP tools match
-    # the web send path: 465 = implicit SSL, 587 = STARTTLS.
-    if port == 587:
+    security = str(cfg.get("smtp_security") or "").strip().lower()
+    if security not in {"ssl", "starttls", "none"}:
+        security = "starttls" if port == 587 else "ssl"
+    if security == "starttls":
        conn = smtplib.SMTP(
            cfg["smtp_host"],
            port,
            timeout=EMAIL_SOCKET_TIMEOUT,
        )
        conn.starttls()
-    elif cfg.get("smtp_ssl", True):
+    elif security == "ssl":
        conn = smtplib.SMTP_SSL(
            cfg["smtp_host"],
            port,
@@ -761,8 +781,6 @@ def _smtp_connect(account=None, cfg=None):
            port,
            timeout=EMAIL_SOCKET_TIMEOUT,
        )
-        if cfg["smtp_starttls"]:
-            conn.starttls()
    if cfg["smtp_user"] and cfg["smtp_password"]:
        conn.login(cfg["smtp_user"], cfg["smtp_password"])
    return conn
--- a/mcp_servers/memory_server.py
+++ b/mcp_servers/memory_server.py
@@ -161,10 +161,9 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
                deleted_text = m.get("text", "")
                deleted_category = m.get("category", "")
                break
-        original_len = len(memories)
-        memories = [m for m in memories if not m.get("id", "").startswith(memory_id)]
-        if len(memories) == original_len:
+        if not full_id:
            return [TextContent(type="text", text=f"Error: Memory '{memory_id}' not found")]
+        memories = [m for m in memories if m.get("id") != full_id]
        _memory_manager.save(memories)
        if _memory_vector and _memory_vector.healthy and full_id:
            try:
--- a/mcp_servers/rag_server.py
+++ b/mcp_servers/rag_server.py
@@ -101,10 +101,13 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
            return [TextContent(type="text", text=f"Error: {e}")]

    elif action == "add_directory":
-        directory = arguments.get("directory", "").strip()
+        _dir = arguments.get("directory")
+        directory = _dir.strip() if isinstance(_dir, str) else ""
        if not directory:
            return [TextContent(type="text", text="Error: add_directory needs a directory path")]
-        directory = os.path.expanduser(directory)
+        # Store an absolute path so indexed `source` metadata is absolute and
+        # remove_directory (which abspath-normalizes) can match it later (#1660).
+        directory = os.path.abspath(os.path.expanduser(directory))
        if not os.path.isdir(directory):
            return [TextContent(type="text", text=f"Error: Directory not found: {directory}")]
        if not _rag_manager:
@@ -112,14 +115,27 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
        try:
            result = _rag_manager.index_personal_documents(directory)
            indexed = result.get("indexed_count", 0) if isinstance(result, dict) else 0
+            # Record the directory so `list` and `remove_directory` can see it.
+            # Indexing was just done above, so pass index=False to avoid a second
+            # (ownerless) pass. Without this the directory was indexed but never
+            # tracked in indexed_directories, so it was invisible/unremovable.
+            if _personal_docs_manager and hasattr(_personal_docs_manager, "add_directory"):
+                try:
+                    _personal_docs_manager.add_directory(directory, index=False)
+                except Exception:
+                    pass
            return [TextContent(type="text", text=f"Directory '{directory}' added to RAG index ({indexed} chunks indexed)")]
        except Exception as e:
            return [TextContent(type="text", text=f"Error: Failed to index directory: {e}")]

    elif action == "remove_directory":
-        directory = arguments.get("directory", "").strip()
+        _dir = arguments.get("directory")
+        directory = _dir.strip() if isinstance(_dir, str) else ""
        if not directory:
            return [TextContent(type="text", text="Error: remove_directory needs a directory path")]
+        # Expand ~ to match add_directory, which indexes the expanded path.
+        # Without this, removing "~/docs" never matches the stored absolute path.
+        directory = os.path.expanduser(directory)
        if not _personal_docs_manager:
            return [TextContent(type="text", text="Error: Personal docs manager not available")]
        try:
--- a/odysseus-ui.service
+++ b/odysseus-ui.service
@@ -9,7 +9,7 @@ Type=simple
 # CHANGE THESE to match your user and install path:
 User=YOURUSER
 WorkingDirectory=/home/YOURUSER/odysseus-ui
-ExecStart=/home/YOURUSER/odysseus-ui/venv/bin/uvicorn app:app --port 8000 --host 0.0.0.0
+ExecStart=/home/YOURUSER/odysseus-ui/venv/bin/uvicorn app:app --port 7000 --host 0.0.0.0
 Restart=always
 RestartSec=3
 EnvironmentFile=-/home/YOURUSER/odysseus-ui/.env
--- a/requirements-optional.txt
+++ b/requirements-optional.txt
@@ -4,6 +4,14 @@
 # Note: chromadb-client + fastembed moved to requirements.txt — RAG, semantic
 # memory, and tool selection are core paths, so they ship by default now.

+# Local speech-to-text (microphone -> text) via faster-whisper, for the
+# "local" STT provider. Runs on CPU out of the box (CTranslate2 backend, no
+# torch needed). Install if you want to dictate/transcribe with the mic
+# without sending audio to an external endpoint.
+# Optional extra: install `torch` too if you have a CUDA GPU and want
+# GPU-accelerated transcription — it's auto-detected, CPU is used otherwise.
+faster-whisper
+
 # DuckDuckGo as a search provider option.
 # Install if you want DDG in the search-provider dropdown.
 # Alternatives: SearXNG, Brave, Tavily, Serper, Google PSE.
@@ -15,3 +23,14 @@ duckduckgo-search
 # network-served app — see ACKNOWLEDGMENTS.md. The MIT core (PDF *text*
 # extraction via pypdf) works without it; this only unlocks form-filling.
 PyMuPDF
+
+# Office / EPUB document text extraction (chat attachments + the personal-docs
+# RAG index). markitdown (MIT, Microsoft) converts .docx/.xlsx/.pptx/.xls/.epub
+# to Markdown — more token-efficient and model-legible than a raw dump. Optional
+# and lazy-imported via src/markitdown_runtime.py; without it those formats fall
+# back to a friendly "install to extract" banner and the core stays pure-MIT.
+# Extras pull mammoth/lxml/python-pptx/pandas/openpyxl/xlrd; the base also pulls
+# magika (onnxruntime), already a core dep via fastembed. We avoid the
+# [all]/Azure/audio extras (cloud + heavy). Pinned to a release >30 days old per
+# the dependency-age discussion in issue #485.
+markitdown[docx,pptx,xlsx,xls]==0.1.5
--- a/routes/admin_wipe_routes.py
+++ b/routes/admin_wipe_routes.py
@@ -27,6 +27,7 @@ from core.database import (
    Document,
    DocumentVersion,
    GalleryImage,
+    GalleryAlbum,
    CalendarEvent,
    CalendarCal,
 )
@@ -145,8 +146,9 @@ def setup_admin_wipe_routes(session_manager):
                return {"status": "deleted", "kind": kind, "count": count}

            if kind == "gallery":
-                count = db.query(GalleryImage).count()
+                count = db.query(GalleryImage).count() + db.query(GalleryAlbum).count()
                db.query(GalleryImage).delete()
+                db.query(GalleryAlbum).delete()
                db.commit()
                # Also drop the upload dir so disk doesn't keep orphans.
                _rmtree_quiet(os.path.join(DATA_DIR, "gallery"))
--- a/routes/auth_routes.py
+++ b/routes/auth_routes.py
@@ -67,6 +67,8 @@ class DeleteUserRequest(BaseModel):
 class RenameUserRequest(BaseModel):
    username: str

+class SetOpenRegistrationRequest(BaseModel):
+    enabled: bool

 SESSION_COOKIE = "odysseus_session"

@@ -295,6 +297,7 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
        # owner-scoped DB rows before changing auth so the account keeps
        # access to its sessions, docs, email accounts, tasks, etc.
        try:
+            from sqlalchemy import func
            from core.database import Base, SessionLocal
            db = SessionLocal()
            try:
@@ -304,7 +307,7 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
                        continue
                    (
                        db.query(model)
-                        .filter(model.owner == old_username)
+                        .filter(func.lower(model.owner) == old_username)
                        .update({"owner": new_username}, synchronize_session=False)
                    )
                db.commit()
@@ -322,9 +325,15 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
            from routes.prefs_routes import _load as _load_prefs, _save as _save_prefs
            prefs = _load_prefs()
            users = prefs.get("_users") if isinstance(prefs, dict) else None
-            if isinstance(users, dict) and old_username in users and new_username not in users:
-                users[new_username] = users.pop(old_username)
-                _save_prefs(prefs)
+            if isinstance(users, dict):
+                prefs_key = next(
+                    (k for k in users if str(k).strip().lower() == old_username),
+                    None,
+                )
+                new_taken = any(str(k).strip().lower() == new_username for k in users)
+                if prefs_key is not None and not new_taken:
+                    users[new_username] = users.pop(prefs_key)
+                    _save_prefs(prefs)
        except Exception as e:
            logger.warning("Failed to rename user prefs %s -> %s: %s", old_username, new_username, e)

@@ -333,15 +342,31 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
            raise HTTPException(400, "Cannot rename user")
        return {"ok": True, "username": new_username, "renamed_self": old_username == user}

-    @router.post("/signup-toggle")
+    @router.post("/signup-toggle", deprecated=True)
    async def toggle_signup(request: Request):
-        """Toggle open registration on/off. Admin only."""
+        """
+        Toggle open registration on/off. Admin only.
+
+        DEPRECATED: This endpoint uses toggle semantics which can lead to unsafe state changes.
+        Use PUT /open-signup instead.
+
+        This endpoint is kept for backward compatibility and may be removed in future versions.
+        """
        user = _get_current_user(request)
        if not user or not auth_manager.is_admin(user):
            raise HTTPException(403, "Admin only")
        auth_manager.signup_enabled = not auth_manager.signup_enabled
        return {"ok": True, "signup_enabled": auth_manager.signup_enabled}

+    @router.put("/open-signup")
+    async def set_signup_enabled(body: SetOpenRegistrationRequest, request: Request):
+        """Set open signup enabled state. Admin only."""
+        user = _get_current_user(request)
+        if not user or not auth_manager.is_admin(user):
+            raise HTTPException(403, "Admin only")
+        auth_manager.signup_enabled = body.enabled
+        return {"ok": True,"signup_enabled": auth_manager.signup_enabled}
+
    @router.delete("/users")
    async def admin_delete_user(body: DeleteUserRequest, request: Request):
        user = _get_current_user(request)
--- a/routes/backup_routes.py
+++ b/routes/backup_routes.py
@@ -77,7 +77,12 @@ def setup_backup_routes(memory_manager, preset_manager, skills_manager) -> APIRo
        # ── Memories ──
        if "memories" in body and isinstance(body["memories"], list):
            existing = memory_manager.load_all()
-            existing_texts = {e.get("text", "").strip().lower() for e in existing}
+            # Dedup against THIS user's own memories only. Using every tenant's
+            # rows (load_all) meant a memory whose text matched any other
+            # user's was silently skipped, so the importing user lost their own
+            # data. The full store is still saved back below.
+            existing_texts = {e.get("text", "").strip().lower()
+                              for e in existing if e.get("owner") == user}
            added = 0
            for mem in body["memories"]:
                if not isinstance(mem, dict) or not mem.get("text"):
--- a/routes/calendar_routes.py
+++ b/routes/calendar_routes.py
@@ -12,10 +12,27 @@ from dateutil.rrule import rrulestr, rruleset
 from dateutil.rrule import DAILY, WEEKLY, MONTHLY, YEARLY

 from core.database import SessionLocal, CalendarCal, CalendarEvent
-from src.auth_helpers import get_current_user
+from src.auth_helpers import get_current_user, require_user

 logger = logging.getLogger(__name__)

+
+def _ics_naive_dtstart(dt):
+    """Naive value matching how import_ics STORES CalendarEvent.dtstart.
+
+    Timed tz-aware events are stored as UTC with tzinfo stripped, all-day
+    dates as midnight datetimes, naive datetimes unchanged. The ICS dedup
+    must compute the same value or a re-import never matches the stored row.
+    """
+    if isinstance(dt, datetime):
+        if dt.tzinfo is not None:
+            from datetime import timezone as _tz
+            return dt.astimezone(_tz.utc).replace(tzinfo=None)
+        return dt
+    if isinstance(dt, date):
+        return datetime(dt.year, dt.month, dt.day)
+    return dt
+
 # Single-user fallback identity. Used only when:
 #   1. The app is configured for single-user (no auth middleware), AND
 #   2. The request didn't resolve to an authenticated user.
@@ -28,16 +45,17 @@ _SINGLE_USER_MODE = _os.environ.get("ODYSSEUS_SINGLE_USER", "1") != "0"


 def _require_user(request: Request) -> str:
-    """Return the authenticated user. In multi-user mode an unauthenticated
-    request raises 401; in single-user mode it falls through to
-    FALLBACK_OWNER. Prevents the silent cross-user data write that would
-    happen if a request slipped past auth middleware in a real deployment."""
-    u = get_current_user(request)
-    if u:
-        return u
-    if _SINGLE_USER_MODE:
-        return FALLBACK_OWNER
-    raise HTTPException(401, "Authentication required")
+    """Return the authenticated user. Uses require_user so AUTH_ENABLED=false
+    and single-user mode both work: require_user returns "" when auth is
+    disabled or unconfigured, and only raises 401 when auth is configured but
+    the caller is unauthenticated. Falls back to FALLBACK_OWNER for calendar
+    writes so data isn't stored under an empty owner in single-user mode."""
+    user = require_user(request)
+    if user:
+        return user
+    # require_user returned "" — auth is off or unconfigured (single-user).
+    # Use FALLBACK_OWNER so calendar rows have a stable owner for filtering.
+    return FALLBACK_OWNER


 def _get_or_404_calendar(db, cal_id: str, owner: str) -> CalendarCal:
@@ -64,6 +82,24 @@ def _get_or_404_event(db, uid: str, owner: str) -> CalendarEvent:
    return ev


+def _ics_escape(text: str) -> str:
+    """Escape a value for an iCalendar TEXT field (RFC 5545 §3.3.11).
+
+    Backslash, semicolon and comma are structural in TEXT values and must be
+    escaped, and newlines become a literal ``\\n``. Backslash is escaped first
+    so the escapes we add aren't re-escaped.
+    """
+    return (
+        (text or "")
+        .replace("\\", "\\\\")
+        .replace(";", "\\;")
+        .replace(",", "\\,")
+        .replace("\r\n", "\\n")
+        .replace("\n", "\\n")
+        .replace("\r", "\\n")
+    )
+
+
 def _resolve_base_uid(uid: str) -> str:
    """Extract the base series UID from a compound occurrence UID.

@@ -319,8 +355,8 @@ def _parse_dt(s: str) -> datetime:
            return None
        return h, mn

-    # today/tomorrow/yesterday [at] TIME
-    m = _re.match(r'^(today|tomorrow|tmrw|yesterday)(?:\s+at)?\s*(.*)$', lower)
+    # today/tonight/tomorrow/yesterday [at] TIME
+    m = _re.match(r'^(today|tonight|tomorrow|tmrw|yesterday)(?:\s+at)?\s*(.*)$', lower)
    if m:
        word, rest = m.group(1), m.group(2).strip()
        base = today
@@ -434,8 +470,21 @@ def _expand_rrule(
        return [d]

    # Parse the rrule, applying it to the base dtstart.
+    rrule_str = ev.rrule
+    if ev.dtstart is not None and getattr(ev.dtstart, "tzinfo", None) is None:
+        # Events are stored with a naive (UTC) dtstart, but standard .ics
+        # exporters (Google/Apple/Outlook/Fastmail) write the bound as an
+        # absolute UTC value, e.g. UNTIL=20240105T090000Z. dateutil refuses to
+        # mix a tz-aware UNTIL with a naive DTSTART ("RRULE UNTIL values must be
+        # specified in UTC when DTSTART is timezone-aware"), so the except branch
+        # below would silently collapse the whole series to a single event.
+        # Drop the trailing Z so UNTIL matches the naive DTSTART.
+        import re as _re
+        rrule_str = _re.sub(
+            r"(UNTIL=\d{8}(?:T\d{6})?)Z", r"\1", rrule_str, flags=_re.IGNORECASE
+        )
    try:
-        rule = rrulestr(ev.rrule, dtstart=ev.dtstart)
+        rule = rrulestr(rrule_str, dtstart=ev.dtstart)
    except Exception as ex:
        logger.warning(
            "Failed to parse rrule=%r for event %s: %s", ev.rrule, ev.uid, ex
@@ -509,13 +558,20 @@ def setup_calendar_routes() -> APIRouter:
        owner = _require_user(request)
        from routes.prefs_routes import _load_for_user
        cfg = (_load_for_user(owner) or {}).get("caldav", {}) or {}
+        caldav_password = cfg.get("password") or ""
+        if caldav_password:
+            try:
+                from src.secret_storage import decrypt
+                caldav_password = decrypt(caldav_password)
+            except Exception:
+                pass
        # Surface url+username but never hand the password back to the
        # client — saved-state UI shouldn't leak the credential.
        return {
            "url": cfg.get("url", "") or "",
            "username": cfg.get("username", "") or "",
            "password": "",
-            "has_password": bool(cfg.get("password")),
+            "has_password": bool(caldav_password),
            "local": not bool(cfg.get("url")),
        }

@@ -534,12 +590,20 @@ def setup_calendar_routes() -> APIRouter:
            prefs.pop("caldav", None)
            _save_for_user(owner, prefs)
            return {"ok": True, "cleared": True}
-        cfg["url"] = body.get("url", "").strip()
+        from src.caldav_sync import validate_caldav_url
+        try:
+            cfg["url"] = validate_caldav_url(body.get("url", ""))
+        except ValueError as e:
+            raise HTTPException(400, str(e))
        cfg["username"] = (body.get("username") or "").strip()
        # Preserve the stored password when the client sends an empty
        # one (edit form re-submitted without re-typing the password).
        if body.get("password"):
-            cfg["password"] = body["password"]
+            from src.secret_storage import encrypt
+            cfg["password"] = encrypt(body["password"])
+        elif cfg.get("password"):
+            from src.secret_storage import encrypt
+            cfg["password"] = encrypt(cfg["password"])
        prefs["caldav"] = cfg
        _save_for_user(owner, prefs)
        return {"ok": True}
@@ -566,9 +630,21 @@ def setup_calendar_routes() -> APIRouter:
            cfg = (_load_for_user(owner) or {}).get("caldav", {}) or {}
            url = url or (cfg.get("url") or "")
            user = user or (cfg.get("username") or "")
-            pw = pw or (cfg.get("password") or "")
+            if not pw:
+                pw = cfg.get("password") or ""
+                if pw:
+                    try:
+                        from src.secret_storage import decrypt
+                        pw = decrypt(pw)
+                    except Exception:
+                        pass
        if not (url and user and pw):
            return {"ok": False, "error": "Missing URL, username, or password"}
+        from src.caldav_sync import validate_caldav_url
+        try:
+            url = validate_caldav_url(url)
+        except ValueError as e:
+            return {"ok": False, "error": str(e)}
        import httpx
        propfind_body = (
            '<?xml version="1.0" encoding="UTF-8"?>\n'
@@ -576,13 +652,25 @@ def setup_calendar_routes() -> APIRouter:
            '</d:prop></d:propfind>'
        )
        try:
-            async with httpx.AsyncClient(timeout=8.0, follow_redirects=True) as cx:
+            async with httpx.AsyncClient(timeout=8.0, follow_redirects=False, trust_env=False) as cx:
                r = await cx.request(
                    "PROPFIND", url,
                    auth=(user, pw),
                    headers={"Depth": "0", "Content-Type": "application/xml"},
                    content=propfind_body,
                )
+                # If the server demands Digest (Baïkal default, SabreDAV-based
+                # servers, Radicale with htdigest), the Basic attempt above
+                # 401s. Retry once with httpx.DigestAuth so this test matches
+                # what the real sync does via caldav.DAVClient in
+                # src/caldav_sync.py (which negotiates the scheme).
+                if r.status_code == 401 and "digest" in r.headers.get("www-authenticate", "").lower():
+                    r = await cx.request(
+                        "PROPFIND", url,
+                        auth=httpx.DigestAuth(user, pw),
+                        headers={"Depth": "0", "Content-Type": "application/xml"},
+                        content=propfind_body,
+                    )
            # 207 = Multi-Status — standard CalDAV success. 200 also
            # acceptable. Anything else (401/403/404/5xx) means trouble.
            if r.status_code in (200, 207):
@@ -593,6 +681,8 @@ def setup_calendar_routes() -> APIRouter:
                return {"ok": False, "error": "Forbidden — user can't access that URL"}
            if r.status_code == 404:
                return {"ok": False, "error": "Not found — check the URL path"}
+            if 300 <= r.status_code < 400:
+                return {"ok": False, "error": "Redirects are not followed for CalDAV safety; use the final URL"}
            return {"ok": False, "error": f"HTTP {r.status_code}"}
        except httpx.ConnectError as e:
            return {"ok": False, "error": f"Connection refused: {e}"[:200]}
@@ -739,6 +829,16 @@ def setup_calendar_routes() -> APIRouter:
            )
            db.add(ev)
            db.commit()
+            if cal.source == "caldav":
+                # Push the new event to the remote so it appears on the user's
+                # other devices — the sync is otherwise pull-only (#800).
+                from src.caldav_writeback import writeback_event
+                await writeback_event(owner, cal.source, cal.id, {
+                    "uid": uid, "summary": data.summary, "description": data.description,
+                    "location": data.location, "dtstart": dtstart, "dtend": dtend,
+                    "all_day": data.all_day, "is_utc": _is_utc and not data.all_day,
+                    "rrule": data.rrule or "",
+                })
            return {"ok": True, "uid": uid}
        except HTTPException:
            raise
@@ -785,6 +885,14 @@ def setup_calendar_routes() -> APIRouter:
            if data.color is not None:
                ev.color = data.color if data.color else None
            db.commit()
+            cal = db.query(CalendarCal).filter(CalendarCal.id == ev.calendar_id).first()
+            if cal and cal.source == "caldav":
+                from src.caldav_writeback import writeback_event
+                await writeback_event(owner, cal.source, cal.id, {
+                    "uid": ev.uid, "summary": ev.summary, "description": ev.description,
+                    "location": ev.location, "dtstart": ev.dtstart, "dtend": ev.dtend,
+                    "all_day": ev.all_day, "is_utc": ev.is_utc, "rrule": ev.rrule or "",
+                })
            return {"ok": True}
        except HTTPException:
            raise
@@ -805,8 +913,15 @@ def setup_calendar_routes() -> APIRouter:
        db = SessionLocal()
        try:
            ev = _get_or_404_event(db, base_uid, owner)
+            # Capture what the remote push needs BEFORE the row is gone.
+            _cal = db.query(CalendarCal).filter(CalendarCal.id == ev.calendar_id).first()
+            _is_caldav = bool(_cal and _cal.source == "caldav")
+            _cal_id, _ev_uid = ev.calendar_id, ev.uid
            db.delete(ev)
            db.commit()
+            if _is_caldav:
+                from src.caldav_writeback import writeback_event
+                await writeback_event(owner, "caldav", _cal_id, {"uid": _ev_uid}, delete=True)
            return {"ok": True}
        except HTTPException:
            raise
@@ -938,7 +1053,12 @@ def setup_calendar_routes() -> APIRouter:
                source_uid = str(comp.get("uid", "")) or None
                if source_uid:
                    src_dtstart = dtstart.dt
-                    naive_src = src_dtstart.replace(tzinfo=None) if hasattr(src_dtstart, 'tzinfo') and src_dtstart.tzinfo else src_dtstart
+                    # Normalize to the SAME naive form import_ics stores, so a
+                    # re-import of a tz-aware event matches the existing row.
+                    # The old code stripped tzinfo WITHOUT converting to UTC
+                    # (wall clock), while storage converts to UTC first, so
+                    # every re-import of a TZID event created a duplicate.
+                    naive_src = _ics_naive_dtstart(src_dtstart)
                    existing = (
                        db.query(CalendarEvent)
                        .filter(
@@ -1032,23 +1152,23 @@ def setup_calendar_routes() -> APIRouter:
                "BEGIN:VCALENDAR",
                "VERSION:2.0",
                "PRODID:-//Odysseus//Calendar//EN",
-                f"X-WR-CALNAME:{cal.name}",
+                f"X-WR-CALNAME:{_ics_escape(cal.name)}",
            ]
            for ev in events:
                lines.append("BEGIN:VEVENT")
                lines.append(f"UID:{ev.uid}")
-                lines.append(f"SUMMARY:{ev.summary or ''}")
+                lines.append(f"SUMMARY:{_ics_escape(ev.summary or '')}")
                if ev.all_day:
                    lines.append(f"DTSTART;VALUE=DATE:{ev.dtstart.strftime('%Y%m%d')}")
                    lines.append(f"DTEND;VALUE=DATE:{ev.dtend.strftime('%Y%m%d')}")
                else:
-                    lines.append(f"DTSTART:{ev.dtstart.strftime('%Y%m%dT%H%M%S')}")
-                    lines.append(f"DTEND:{ev.dtend.strftime('%Y%m%dT%H%M%S')}")
+                    _dt_suffix = "Z" if getattr(ev, "is_utc", False) else ""
+                    lines.append(f"DTSTART:{ev.dtstart.strftime('%Y%m%dT%H%M%S')}{_dt_suffix}")
+                    lines.append(f"DTEND:{ev.dtend.strftime('%Y%m%dT%H%M%S')}{_dt_suffix}")
                if ev.description:
-                    desc = ev.description.replace(chr(10), '\\n')
-                    lines.append(f"DESCRIPTION:{desc}")
+                    lines.append(f"DESCRIPTION:{_ics_escape(ev.description)}")
                if ev.location:
-                    lines.append(f"LOCATION:{ev.location}")
+                    lines.append(f"LOCATION:{_ics_escape(ev.location)}")
                if ev.rrule:
                    lines.append(f"RRULE:{ev.rrule}")
                lines.append("END:VEVENT")
--- a/routes/chat_helpers.py
+++ b/routes/chat_helpers.py
@@ -3,6 +3,7 @@
 import asyncio
 import json
 import logging
+import os
 import re
 from dataclasses import dataclass, field
 from typing import Any, Optional
@@ -11,6 +12,7 @@ from core.models import ChatMessage
 from core.database import SessionLocal
 from core.database import Session as DBSession, ModelEndpoint
 from src.llm_core import normalize_model_id
+from src.endpoint_resolver import normalize_base
 from src.context_compactor import maybe_compact, trim_for_context
 from src.auth_helpers import get_current_user
 from src.prompt_security import untrusted_context_message
@@ -119,7 +121,7 @@ def needs_auto_name(name: str) -> bool:
    if name.startswith("Chat:") or name == "Chat":
        return True
    # Default frontend name: "modelname HH:MM:SS AM/PM"
-    if re.match(r'^.+ \d{1,2}:\d{2}:\d{2}\s*(AM|PM)$', name):
+    if re.match(r"^.+ \d{1,2}:\d{2}:\d{2}(\s*(AM|PM))?$", name, re.IGNORECASE):
        return True
    return False

@@ -146,9 +148,13 @@ async def auto_name_session(session_manager, sess):
        if not first_msg:
            return

+        owner = getattr(sess, "owner", None)
        t_url, t_model, t_headers = resolve_task_endpoint(
-            sess.endpoint_url, sess.model, sess.headers,
+            sess.endpoint_url, sess.model, sess.headers, owner=owner,
        )
+        if not t_model:
+            logger.debug("[auto-name] No model provided, skipping")
+            return

        # max_tokens big enough that reasoning models (Minimax M2,
        # DeepSeek R1, QwQ, etc.) have headroom for <think>…</think>
@@ -306,7 +312,24 @@ def fire_message_event(request, webhook_manager, session_id: str, sess, message:
    fire_event("message_sent", user)


-def resolve_session_auth(sess, session_id: str):
+def _session_url_matches_endpoint(session_url: str, endpoint_base: str) -> bool:
+    if not session_url or not endpoint_base:
+        return False
+    try:
+        from src.endpoint_resolver import build_chat_url, normalize_base
+
+        sess_url = session_url.rstrip("/")
+        base = normalize_base(endpoint_base).rstrip("/")
+        return sess_url in {
+            base,
+            base + "/chat/completions",
+            build_chat_url(base).rstrip("/"),
+        }
+    except Exception:
+        return False
+
+
+def resolve_session_auth(sess, session_id: str, owner: Optional[str] = None):
    """Ensure session has auth headers — resolve from endpoint DB if missing."""
    has_auth = sess.headers and isinstance(sess.headers, dict) and any(
        k.lower() in ('authorization', 'x-api-key') for k in sess.headers
@@ -315,25 +338,96 @@ def resolve_session_auth(sess, session_id: str):
        return

    try:
-        from src.endpoint_resolver import build_headers
+        from src.endpoint_resolver import build_headers, normalize_base
        db = SessionLocal()
        try:
-            domain = sess.endpoint_url.split("//")[1].split("/")[0] if "//" in sess.endpoint_url else ""
-            if domain:
-                ep = db.query(ModelEndpoint).filter(ModelEndpoint.base_url.contains(domain)).first()
-                if ep and ep.api_key:
-                    sess.headers = build_headers(ep.api_key, ep.base_url)
-                    db.query(DBSession).filter(DBSession.id == session_id).update(
-                        {"headers": json.dumps(sess.headers)}
-                    )
-                    db.commit()
-                    logger.info(f"Resolved and persisted auth headers for session {session_id} from endpoint {ep.name}")
+            target_url = getattr(sess, "endpoint_url", "") or ""
+            if not target_url:
+                return
+            q = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True)
+            if owner:
+                # Missing headers usually means "recover from the saved endpoint".
+                # Scope that lookup to the session owner, otherwise two users
+                # with similar endpoint URLs can borrow each other's API key.
+                from src.auth_helpers import owner_filter
+                q = owner_filter(q, ModelEndpoint, owner)
+            for ep in q.all():
+                if not _session_url_matches_endpoint(target_url, ep.base_url or ""):
+                    continue
+                if not ep.api_key:
+                    return
+                base = normalize_base(ep.base_url or "")
+                sess.headers = build_headers(ep.api_key, base)
+                update_q = db.query(DBSession).filter(DBSession.id == session_id)
+                if owner:
+                    update_q = update_q.filter(DBSession.owner == owner)
+                update_q.update({"headers": sess.headers})
+                db.commit()
+                logger.info(f"Resolved and persisted auth headers for session {session_id} from endpoint {ep.name}")
+                return
        finally:
            db.close()
    except Exception as e:
        logger.warning(f"Failed to resolve session headers: {e}")


+def _match_cached_model_id(requested: str, models) -> Optional[str]:
+    if not requested or not models:
+        return None
+    model_ids = [str(m) for m in models if m]
+    if requested in model_ids:
+        return requested
+
+    req_base = os.path.basename(requested.rstrip("/"))
+    for model_id in model_ids:
+        if os.path.basename(model_id.rstrip("/")) == req_base:
+            return model_id
+    return None
+
+
+def _normalize_model_id_from_cache(sess) -> Optional[str]:
+    """Use stored endpoint model IDs before falling back to a live /models probe."""
+    endpoint_url = getattr(sess, "endpoint_url", "") or ""
+    requested = getattr(sess, "model", "") or ""
+    if not endpoint_url or not requested:
+        return None
+
+    try:
+        session_base = normalize_base(endpoint_url)
+    except Exception:
+        session_base = endpoint_url.rstrip("/")
+    if not session_base:
+        return None
+
+    db = SessionLocal()
+    try:
+        endpoints = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True).all()
+        for ep in endpoints:
+            try:
+                if normalize_base(getattr(ep, "base_url", "") or "") != session_base:
+                    continue
+            except Exception:
+                continue
+
+            raw_models = getattr(ep, "cached_models", None)
+            if not raw_models:
+                continue
+            try:
+                models = json.loads(raw_models) if isinstance(raw_models, str) else raw_models
+            except Exception:
+                continue
+
+            matched = _match_cached_model_id(requested, models)
+            if matched:
+                return matched
+    except Exception as e:
+        logger.debug("Cached model normalization skipped: %s", e)
+    finally:
+        db.close()
+
+    return None
+
+
 async def build_chat_context(
    sess,
    request,
@@ -434,8 +528,9 @@ async def build_chat_context(
    for transcript in preprocessed.youtube_transcripts:
        preface.append(untrusted_context_message("youtube transcript", transcript))

-    # Normalize model ID
-    norm = normalize_model_id(sess.endpoint_url, sess.model)
+    # Normalize model ID. Prefer cached endpoint models so group chat does not
+    # re-hit slow local /models endpoints on every participant turn.
+    norm = _normalize_model_id_from_cache(sess) or normalize_model_id(sess.endpoint_url, sess.model)
    if norm:
        sess.model = norm

@@ -743,7 +838,7 @@ def run_post_response_tasks(
        from services.memory.memory_extractor import extract_and_store
        from src.task_endpoint import resolve_task_endpoint
        t_url, t_model, t_headers = resolve_task_endpoint(
-            sess.endpoint_url, sess.model, sess.headers,
+            sess.endpoint_url, sess.model, sess.headers, owner=owner,
        )
        asyncio.create_task(extract_and_store(
            sess, memory_manager, memory_vector,
@@ -780,7 +875,7 @@ def run_post_response_tasks(
            from services.memory.skill_extractor import maybe_extract_skill
            from src.task_endpoint import resolve_task_endpoint
            s_url, s_model, s_headers = resolve_task_endpoint(
-                sess.endpoint_url, sess.model, sess.headers,
+                sess.endpoint_url, sess.model, sess.headers, owner=owner,
            )
            logger.debug("[skill-extract] dispatching extractor (model=%s)", s_model)
            asyncio.create_task(maybe_extract_skill(
--- a/routes/chat_routes.py
+++ b/routes/chat_routes.py
@@ -23,10 +23,12 @@ from src.prompt_security import untrusted_context_message
 from core.exceptions import SessionNotFoundError
 from src.auth_helpers import get_current_user
 from routes.session_routes import _verify_session_owner
+from routes.document_helpers import _owner_session_filter
 from core.database import SessionLocal, get_session_mode, set_session_mode
 from core.database import Session as DBSession, ChatMessage as DBChatMessage
 from core.database import Document as DBDocument, ModelEndpoint
 from routes.research_routes import _resolve_research_endpoint
+from routes.model_routes import _visible_models
 from routes.chat_helpers import (
    resolve_session_auth,
    build_chat_context,
@@ -41,6 +43,7 @@ logger = logging.getLogger(__name__)

 # Track active streams for partial-save safety net
 _active_streams: Dict[str, dict] = {}
+_IMAGE_MODEL_PREFIXES = ("gpt-image", "dall-e", "chatgpt-image")


 def _stream_set(session_id: str, **fields) -> None:
@@ -69,13 +72,17 @@ def _session_url_matches_endpoint(session_url: str, endpoint_base: str) -> bool:
    return sess in variants or sess.startswith(base + "/")


-def _clear_orphaned_session_endpoint(sess) -> bool:
+def _clear_orphaned_session_endpoint(sess, owner: str | None = None) -> bool:
    """Clear a session model if its endpoint was deleted from ModelEndpoint."""
    if not getattr(sess, "endpoint_url", ""):
        return False
    db = SessionLocal()
    try:
-        endpoints = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True).all()
+        q = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True)
+        if owner:
+            from src.auth_helpers import owner_filter
+            q = owner_filter(q, ModelEndpoint, owner)
+        endpoints = q.all()
        for ep in endpoints:
            if _session_url_matches_endpoint(sess.endpoint_url or "", ep.base_url or ""):
                return False
@@ -96,6 +103,132 @@ def _clear_orphaned_session_endpoint(sess) -> bool:
        db.close()


+def _endpoint_cache_contains_model(endpoint, model: str) -> bool:
+    """Return True when a populated endpoint model cache includes ``model``.
+
+    Empty/malformed caches are treated as unknown rather than a negative match
+    so older image endpoints without cached models still work.
+    """
+    raw = getattr(endpoint, "cached_models", None)
+    if not raw:
+        return True
+    try:
+        models = json.loads(raw) if isinstance(raw, str) else raw
+    except Exception:
+        return True
+    if not isinstance(models, list) or not models:
+        return True
+    wanted = (model or "").strip()
+    return wanted in {str(item).strip() for item in models}
+
+
+def _is_image_generation_session(sess, owner: str | None = None) -> bool:
+    """Whether this chat session should bypass text chat and generate images.
+
+    Model-name prefixes are explicit image models. Endpoint type is only used
+    when the current session endpoint actually matches that image endpoint, and
+    when a populated endpoint model cache includes the selected model. This
+    prevents an image endpoint on the same host from misrouting ordinary text
+    models into the image-generation path.
+    """
+    model = (getattr(sess, "model", "") or "").strip()
+    if any(model.lower().startswith(prefix) for prefix in _IMAGE_MODEL_PREFIXES):
+        return True
+
+    endpoint_url = (getattr(sess, "endpoint_url", "") or "").strip()
+    if not endpoint_url:
+        return False
+
+    db = SessionLocal()
+    try:
+        q = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True)
+        if owner:
+            from src.auth_helpers import owner_filter
+            q = owner_filter(q, ModelEndpoint, owner)
+        endpoints = q.all()
+        for endpoint in endpoints:
+            if (getattr(endpoint, "model_type", None) or "llm") != "image":
+                continue
+            if not _session_url_matches_endpoint(endpoint_url, getattr(endpoint, "base_url", "") or ""):
+                continue
+            if _endpoint_cache_contains_model(endpoint, model):
+                return True
+    except Exception:
+        return False
+    finally:
+        db.close()
+    return False
+
+
+def _recover_empty_session_model(sess, session_id: str, owner: str | None = None) -> bool:
+    """Re-populate sess.model from the matching endpoint's cached models.
+
+    Covers the window between endpoint setup and the first chat send: the
+    picker showed a model in the dropdown but the session record never got
+    written (Issue #587 — UI uses the cached endpoint list, not s.model).
+    Without this, we'd POST the upstream with model="" and get a generic
+    401/503 instead of using the model the user already picked.
+
+    Returns True iff sess.model was repaired.
+    """
+    if getattr(sess, "model", None):
+        return False
+    db = SessionLocal()
+    try:
+        # Prefer the endpoint whose base URL matches the session — we know the
+        # user already pointed this session at that endpoint, so its first
+        # cached model is the most defensible default.
+        ep = None
+        if getattr(sess, "endpoint_url", ""):
+            q = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True)
+            if owner:
+                from src.auth_helpers import owner_filter
+                q = owner_filter(q, ModelEndpoint, owner)
+            endpoints = q.all()
+            for cand in endpoints:
+                if _session_url_matches_endpoint(sess.endpoint_url or "", cand.base_url or ""):
+                    ep = cand
+                    break
+        if not ep:
+            return False
+        try:
+            cached = json.loads(ep.cached_models) if isinstance(ep.cached_models, str) else (ep.cached_models or [])
+        except Exception:
+            cached = []
+        if not cached:
+            return False
+        try:
+            visible = _visible_models(cached, getattr(ep, "hidden_models", None))
+        except Exception:
+            visible = cached
+        if not visible:
+            return False
+        model = visible[0]
+        if not isinstance(model, str) or not model.strip():
+            return False
+        model = model.strip()
+        # Persist so the next request, websocket reconnect, or page reload
+        # picks up the same model (we'd otherwise re-pick on every send
+        # and silently switch on the user if the cached order shifts).
+        db_session = db.query(DBSession).filter(DBSession.id == session_id).first()
+        if db_session:
+            db_session.model = model
+            db_session.updated_at = datetime.utcnow()
+            db.commit()
+        sess.model = model
+        logger.info(
+            "Recovered empty session model for %s — picked %r from endpoint %s",
+            session_id, model, ep.id,
+        )
+        return True
+    except Exception as e:
+        db.rollback()
+        logger.warning("Failed to recover empty session model for %s: %s", session_id, e)
+        return False
+    finally:
+        db.close()
+
+
 def setup_chat_routes(
    session_manager,
    chat_handler,
@@ -130,9 +263,20 @@ def setup_chat_routes(
            sess = session_manager.get_session(session)
        except KeyError:
            raise HTTPException(404, f"Session '{session}' not found")
-        if _clear_orphaned_session_endpoint(sess):
+        owner = get_current_user(request)
+        if _clear_orphaned_session_endpoint(sess, owner=owner):
            raise HTTPException(400, "Selected model endpoint was removed. Pick another model in Settings.")

+        # Empty model + live endpoint = setup race (Issue #587). Repair from
+        # the endpoint's cached model list before privilege checks, which
+        # otherwise see "" and behave inconsistently with the allowlist.
+        _recover_empty_session_model(sess, session, owner=owner)
+        if not getattr(sess, "model", "").strip():
+            raise HTTPException(
+                400,
+                "No model selected for this chat. Open the model picker and choose one before sending.",
+            )
+
        # Same allowed_models + daily-cap gate as chat_stream (mirror so the
        # non-streaming path can't be used to bypass).
        _enforce_chat_privileges(request, sess)
@@ -270,8 +414,21 @@ def setup_chat_routes(
            # but BEFORE loading. Prevents cross-user session hijack.
            _verify_session_owner(request, session)
            sess = session_manager.get_session(session)
-            if _clear_orphaned_session_endpoint(sess):
+            owner = get_current_user(request)
+            if _clear_orphaned_session_endpoint(sess, owner=owner):
                raise HTTPException(400, "Selected model endpoint was removed. Pick another model in Settings.")
+            # Issue #587: picker shows a model from the endpoint cache but
+            # s.model never made it onto the DB row (first-send race after
+            # endpoint setup, or a previous endpoint delete/recreate). Pull
+            # the first cached model off the matching endpoint so the
+            # upstream isn't called with model="" (which surfaces as a
+            # generic 401/503).
+            _recover_empty_session_model(sess, session, owner=owner)
+            if not getattr(sess, "model", "").strip():
+                raise HTTPException(
+                    400,
+                    "No model selected for this chat. Open the model picker and choose one before sending.",
+                )
        except SessionNotFoundError as e:
            raise HTTPException(404, str(e))
        except (ValueError, ValidationError):
@@ -288,7 +445,7 @@ def setup_chat_routes(
        _enforce_chat_privileges(request, sess)

        # Ensure session has auth headers
-        resolve_session_auth(sess, session)
+        resolve_session_auth(sess, session, owner=get_current_user(request))

        # Check for research_pending BEFORE mode persist overwrites it
        do_research = str(use_research).lower() == "true"
@@ -343,18 +500,22 @@ def setup_chat_routes(
        try:
            if active_doc_id:
                logger.info(f"[doc-inject] active_doc_id from frontend: {active_doc_id}")
-                active_doc = _doc_db.query(DBDocument).filter(
-                    DBDocument.id == active_doc_id,
-                ).first()
+                # Scope to the caller's documents. The session and in-memory
+                # fallbacks below are already owner/session-bound; this
+                # explicit-id path looked up by id alone, so a user could
+                # inject another user's document by passing its id.
+                _doc_q = _doc_db.query(DBDocument).filter(DBDocument.id == active_doc_id)
+                active_doc = _owner_session_filter(_doc_q, ctx.user).first()
                if active_doc:
                    logger.info(f"[doc-inject] found by ID: title={active_doc.title!r}, lang={active_doc.language!r}, is_active={active_doc.is_active}, content_len={len(active_doc.current_content or '')}")
                else:
                    logger.warning(f"[doc-inject] NOT FOUND by ID {active_doc_id}")
            if not active_doc:
-                active_doc = _doc_db.query(DBDocument).filter(
+                _session_doc_q = _doc_db.query(DBDocument).filter(
                    DBDocument.session_id == session,
                    DBDocument.is_active == True
-                ).order_by(DBDocument.updated_at.desc()).first()
+                )
+                active_doc = _owner_session_filter(_session_doc_q, ctx.user).order_by(DBDocument.updated_at.desc()).first()
                if active_doc:
                    logger.info(f"[doc-inject] found by session fallback: title={active_doc.title!r}")
            # Last resort: the document the agent itself just created/edited
@@ -368,7 +529,8 @@ def setup_chat_routes(
                    from src.tool_implementations import get_active_document
                    _mem_id = get_active_document()
                    if _mem_id:
-                        cand = _doc_db.query(DBDocument).filter(DBDocument.id == _mem_id).first()
+                        _mem_q = _doc_db.query(DBDocument).filter(DBDocument.id == _mem_id)
+                        cand = _owner_session_filter(_mem_q, ctx.user).first()
                        if cand and (not cand.session_id or cand.session_id == session):
                            active_doc = cand
                            logger.info(f"[doc-inject] found by in-memory active id: title={active_doc.title!r} (session_id={cand.session_id!r})")
@@ -563,6 +725,7 @@ def setup_chat_routes(
                        prior_findings=_prior_findings,
                        prior_urls=_prior_urls,
                        on_complete=_on_research_done,
+                        owner=_user,
                    )

                    _heartbeat_counter = 0
@@ -619,7 +782,7 @@ def setup_chat_routes(
            # output. Resolved once per request.
            try:
                from src.endpoint_resolver import resolve_chat_fallback_candidates
-                _fallback_candidates = resolve_chat_fallback_candidates()
+                _fallback_candidates = resolve_chat_fallback_candidates(owner=_user)
            except Exception:
                _fallback_candidates = []

@@ -632,28 +795,7 @@ def setup_chat_routes(
                _model_info["character_name"] = ctx.preset.character_name
            yield f'data: {json.dumps(_model_info)}\n\n'

-            # Detect image models and route directly to image generation
-            _IMAGE_MODEL_PREFIXES = ("gpt-image", "dall-e", "chatgpt-image")
-            _is_image_model = any(sess.model.lower().startswith(p) for p in _IMAGE_MODEL_PREFIXES)
-
-            # Also check if the endpoint is registered as an image-type endpoint
-            if not _is_image_model:
-                try:
-                    from src.endpoint_resolver import normalize_base as _nb
-                    _ep_base = _nb(sess.endpoint_url)
-                    _db = SessionLocal()
-                    try:
-                        _is_image_model = _db.query(ModelEndpoint).filter(
-                            ModelEndpoint.model_type == "image",
-                            ModelEndpoint.is_enabled == True,
-                            ModelEndpoint.base_url.contains(_ep_base.split("://")[-1].split("/")[0]),
-                        ).first() is not None
-                    finally:
-                        _db.close()
-                except Exception:
-                    pass
-
-            if _is_image_model:
+            if _is_image_generation_session(sess, owner=_user):
                from src.settings import get_setting
                if not get_setting("image_gen_enabled", True):
                    yield f'data: {json.dumps({"delta": "Image generation is disabled by the administrator."})}\n\n'
@@ -664,7 +806,7 @@ def setup_chat_routes(
                _user_msg = message or ""
                yield f'data: {json.dumps({"type": "tool_start", "tool": "generate_image", "command": _user_msg[:100]})}\n\n'
                yield ": heartbeat\n\n"
-                _img_result = await do_generate_image(f"{_user_msg}\n{sess.model}", session)
+                _img_result = await do_generate_image(f"{_user_msg}\n{sess.model}", session, owner=_user)
                _img_output = _img_result.get("results", _img_result.get("error", ""))
                _img_tool_data = {"type": "tool_output", "tool": "generate_image", "command": _user_msg[:100], "output": _img_output, "exit_code": 0 if "error" not in _img_result else 1}
                for _k in ("image_url", "image_id", "image_prompt", "image_model", "image_size", "image_quality"):
@@ -688,6 +830,7 @@ def setup_chat_routes(
                return
            elif chat_mode == "chat":
                _chat_start = time.time()
+                _answered_by = None  # set if the selected model failed and a fallback answered
                # ── Chat mode: call stream_llm directly, NO tools, NO document access ──
                try:
                    _chat_candidates = [(sess.endpoint_url, sess.model, sess.headers)] + _fallback_candidates
@@ -708,16 +851,35 @@ def setup_chat_routes(
                            try:
                                data = json.loads(chunk[6:])
                                if "delta" in data:
-                                    full_response += data["delta"]
-                                    _stream_set(session, partial=full_response)
+                                    # Reasoning tokens arrive flagged thinking:true.
+                                    # Forward them so the client can show a thinking
+                                    # indicator, but don't fold them into the saved
+                                    # reply (mirrors the rewrite path below).
+                                    if not data.get("thinking"):
+                                        full_response += data["delta"]
+                                        _stream_set(session, partial=full_response)
+                                    yield chunk
+                                elif data.get("type") == "fallback":
+                                    # Selected model failed; a fallback answered.
+                                    # Forward the notice and remember the real model.
+                                    _answered_by = data.get("answered_by") or _answered_by
                                    yield chunk
                                elif data.get("type") == "usage":
                                    last_metrics = data.get("data", {})
-                                    last_metrics["model"] = sess.model
+                                    last_metrics["model"] = _answered_by or sess.model
                                    if ctx.context_length and last_metrics.get("input_tokens"):
                                        pct = min(round((last_metrics["input_tokens"] / ctx.context_length) * 100, 1), 100.0)
                                        last_metrics["context_percent"] = pct
                                        last_metrics["context_length"] = ctx.context_length
+                                    # The frontend reads `tokens_per_second`; the raw usage event
+                                    # carries the backend's true gen speed as `gen_tps` (llama.cpp
+                                    # timings). Map it through so this direct-chat path shows real
+                                    # t/s instead of "n/a" → falling back to a bare token count.
+                                    if last_metrics.get("gen_tps") and not last_metrics.get("tokens_per_second"):
+                                        last_metrics["tokens_per_second"] = last_metrics["gen_tps"]
+                                        last_metrics["tps_source"] = "backend"
+                                    # Wall-clock response time for the stats popup ("Time").
+                                    last_metrics.setdefault("response_time", round(time.time() - _chat_start, 2))
                                    yield f'data: {json.dumps({"type": "metrics", "data": last_metrics})}\n\n'
                            except json.JSONDecodeError:
                                yield chunk
@@ -781,6 +943,7 @@ def setup_chat_routes(
                # ── Agent mode: full agent loop with tools ──
                _agent_rounds = 0
                _agent_tool_calls = 0
+                _answered_by = None  # set if the selected model failed and a fallback answered
                try:
                    from src.settings import get_setting
                    _tool_budget = int(get_setting("agent_max_tool_calls", 0))
@@ -805,8 +968,12 @@ def setup_chat_routes(
                            try:
                                data = json.loads(chunk[6:])
                                if "delta" in data:
-                                    full_response += data["delta"]
-                                    _stream_set(session, partial=full_response)
+                                    # Reasoning tokens arrive flagged thinking:true.
+                                    # Forward them for the live indicator, but keep
+                                    # them out of the saved reply (same as chat mode).
+                                    if not data.get("thinking"):
+                                        full_response += data["delta"]
+                                        _stream_set(session, partial=full_response)
                                    yield chunk
                                elif data.get("type") == "web_sources":
                                    web_sources = data.get("data", [])
@@ -821,9 +988,16 @@ def setup_chat_routes(
                                    elif data.get("type") == "tool_start":
                                        _agent_tool_calls += 1
                                    yield chunk
+                                elif data.get("type") == "fallback":
+                                    # Selected model failed; a fallback answered.
+                                    # Forward the notice and remember the real
+                                    # model so metrics reflect it, not the masked
+                                    # selected model.
+                                    _answered_by = data.get("answered_by") or _answered_by
+                                    yield chunk
                                elif data.get("type") == "metrics":
                                    last_metrics = data.get("data", {})
-                                    last_metrics["model"] = sess.model
+                                    last_metrics["model"] = _answered_by or sess.model
                                    yield f'data: {json.dumps({"type": "metrics", "data": last_metrics})}\n\n'
                            except json.JSONDecodeError:
                                yield chunk
@@ -920,11 +1094,15 @@ def setup_chat_routes(
        _verify_session_owner(request, session_id)
        # A detached run can still be going even if _active_streams was popped;
        # report it as active so the client knows to reconnect via /resume.
-        if session_id not in _active_streams:
+        # Read once via .get() to avoid a KeyError race between the membership
+        # check and the indexed read if a sibling stream's finally pops the
+        # entry in between (same pattern _stream_set already uses).
+        rec = _active_streams.get(session_id)
+        if rec is None:
            if agent_runs.is_active(session_id):
                return {"status": "streaming", "detached": True}
            raise HTTPException(404, "No active stream for this session")
-        return _active_streams[session_id]
+        return rec

    # ------------------------------------------------------------------ #
    # POST /api/inject_context
@@ -1088,7 +1266,7 @@ def setup_chat_routes(
                                db_msg = (
                                    db.query(DBChatMessage)
                                    .filter(DBChatMessage.session_id == session_id, DBChatMessage.role == 'assistant')
-                                    .order_by(DBChatMessage.created_at.desc())
+                                    .order_by(DBChatMessage.timestamp.desc())
                                    .first()
                                )
                                if db_msg:
--- a/routes/contacts_routes.py
+++ b/routes/contacts_routes.py
@@ -130,21 +130,28 @@ def _parse_vcards(text: str) -> List[Dict]:
        contact = {"name": "", "emails": [], "phones": [], "uid": ""}
        for line in block.split("\n"):
            line = line.strip()
-            if line.startswith("FN:") or line.startswith("FN;"):
-                contact["name"] = _vunesc(line.split(":", 1)[1]) if ":" in line else ""
-            elif line.startswith("EMAIL"):
+            # Strip an optional RFC 6350 group prefix (e.g. "item1.EMAIL;...")
+            # that Apple Contacts / iCloud / many CardDAV servers emit by
+            # default — without this the property-name checks below miss those
+            # lines and silently drop the email / phone. The group token only
+            # precedes the property name, so it is safe to strip for matching
+            # and value extraction, and a no-op for non-grouped lines.
+            name_part = re.sub(r"^[A-Za-z0-9-]+\.", "", line, count=1)
+            if name_part.startswith("FN:") or name_part.startswith("FN;"):
+                contact["name"] = _vunesc(name_part.split(":", 1)[1]) if ":" in name_part else ""
+            elif name_part.startswith("EMAIL"):
                # Handle EMAIL:foo@bar OR EMAIL;TYPE=...:foo@bar OR EMAIL;PREF=1:foo@bar
-                if ":" in line:
-                    email_addr = _vunesc(line.split(":", 1)[1])
+                if ":" in name_part:
+                    email_addr = _vunesc(name_part.split(":", 1)[1])
                    if email_addr and email_addr not in contact["emails"]:
                        contact["emails"].append(email_addr)
-            elif line.startswith("TEL"):
-                if ":" in line:
-                    phone = _vunesc(line.split(":", 1)[1])
+            elif name_part.startswith("TEL"):
+                if ":" in name_part:
+                    phone = _vunesc(name_part.split(":", 1)[1])
                    if phone and phone not in contact["phones"]:
                        contact["phones"].append(phone)
-            elif line.startswith("UID:"):
-                contact["uid"] = _vunesc(line[4:])
+            elif name_part.startswith("UID:"):
+                contact["uid"] = _vunesc(name_part[4:])
        if contact["name"] or contact["emails"]:
            contacts.append(contact)
    return contacts
@@ -676,8 +683,8 @@ def setup_contacts_routes():
    @router.post("/add")
    async def add_contact(data: dict, _admin: str = Depends(require_admin)):
        """Add a new contact."""
-        name = data.get("name", "").strip()
-        email = data.get("email", "").strip()
+        name = (data.get("name") or "").strip()
+        email = (data.get("email") or "").strip()
        if not email:
            return {"success": False, "error": "Email required"}
        # Check if already exists
--- a/routes/cookbook_helpers.py
+++ b/routes/cookbook_helpers.py
@@ -148,6 +148,108 @@ def _local_tooling_path_export(executable: str) -> str:
    return f'export PATH="{esc}:$PATH"'


+def _pip_install_no_cache(cmd: str) -> str:
+    """Add ``--no-cache-dir`` to a pip install command.
+
+    Cookbook dependency installs (vLLM, llama-cpp-python, …) build large wheels;
+    pip's default cache lives under ``$HOME/.cache/pip`` and these builds can fill
+    a small home filesystem with ``[Errno 28] No space left on device`` mid-build
+    (issue #1219), leaving the dependency "installed" but unusable (#1459).
+    Disabling the cache for these one-off installs keeps them off the home disk
+    (the maintainer's suggested ``PIP_CACHE_DIR=`` workaround, made the default).
+    Idempotent; leaves non-pip-install commands untouched."""
+    if not cmd or "pip install" not in cmd or "--no-cache-dir" in cmd:
+        return cmd
+    return cmd.replace("pip install", "pip install --no-cache-dir", 1)
+
+
+def _pip_install_attempt(pip_cmd: str) -> str:
+    """Wrap a single pip install command so its exit status survives the
+    fallback chain and its stderr is visible in the tmux log on failure.
+
+    Without this wrapper, `pip … 2>&1 | tail -5` returns ``tail``'s exit
+    code (0), masking pip's real failure and preventing the next fallback
+    from running.  The generated snippet captures all output to a temp
+    file, prints the last 5 lines on failure (so the Cookbook log panel
+    shows useful diagnostics), cleans up, and exits with pip's original
+    status.
+    """
+    return (
+        "bash -c '"
+        f'_out=$(mktemp) && {pip_cmd} >"$_out" 2>&1; _rc=$?; '
+        'tail -5 "$_out"; rm -f "$_out"; exit $_rc'
+        "'"
+    )
+
+
+def _pip_install_fallback_chain(package: str, *, python_cmd: str = "python3 -m pip", upgrade: bool = False) -> str:
+    """Build a bash pip install fallback chain that surfaces errors.
+
+    Try the active interpreter/environment first. ``--user`` is invalid
+    inside many venvs, so only attempt the ``--user`` fallback when NOT
+    inside a venv.
+
+    Each attempt is wrapped via :func:`_pip_install_attempt` so pip's real
+    exit code is preserved (no ``| tail`` masking) and the last 5 lines of
+    pip output appear in the Cookbook log on failure.
+    """
+    upgrade_flag = " -U" if upgrade else ""
+    # Shell-quote the package spec: an extras spec like ``llama-cpp-python[server]``
+    # contains brackets that bash would treat as a glob, so it must be quoted
+    # before being embedded in the install command. Plain names (e.g.
+    # ``huggingface_hub``) are returned unchanged by ``shlex.quote``.
+    pkg = shlex.quote(package)
+    base = _pip_install_attempt(f"{python_cmd} install -q{upgrade_flag} {pkg}")
+    user = _pip_install_attempt(f"{python_cmd} install --user --break-system-packages -q{upgrade_flag} {pkg}")
+    # Derive the python executable for the venv detection check.
+    # Must use the same interpreter that pip belongs to; hardcoding
+    # python3 breaks when pip lives in a venv that only has "python".
+    if " -m pip" in python_cmd:
+        python_exe = python_cmd.replace(" -m pip", "")
+    elif python_cmd.strip() == "pip":
+        python_exe = "python"
+    elif python_cmd.strip() == "pip3":
+        python_exe = "python3"
+    else:
+        python_exe = "python3"
+    venv_check = f'{python_exe} -c "import sys; sys.exit(0 if sys.prefix != sys.base_prefix else 1)"'
+    # Negated: `! venv_check` succeeds (exit 0) when NOT in a venv → `&&` tries
+    # --user.  When IN a venv `! venv_check` fails → `&&` skips --user and the
+    # group exits non-zero, propagating the base-install failure instead of
+    # masking it as success (the `|| { venv_check || … }` shape from #903
+    # swallowed the exit code because venv_check's exit-0 became the group's
+    # result).
+    return f"{base} || {{ ! {venv_check} && {user}; }}"
+
+
+def _venv_safe_local_pip_install_cmd(cmd: str, *, local: bool, in_venv: bool) -> str:
+    """Drop pip user-install flags that are invalid for local venv installs.
+
+    Cookbook dependency installs run through the model-serve task path so users
+    can watch progress in the same log UI. For local POSIX runs, that task
+    prepends Odysseus' own interpreter directory to PATH. If Odysseus itself is
+    running from a venv, `python3` resolves to the venv Python and pip rejects
+    `--user` with "User site-packages are not visible in this virtualenv".
+
+    Keep remote and non-venv installs unchanged: remotes may intentionally use
+    system Python, and Docker/non-venv installs still need user-site fallback.
+    """
+    if not local or not in_venv:
+        return cmd
+    if "pip install" not in (cmd or ""):
+        return cmd
+    try:
+        parts = shlex.split(cmd)
+    except ValueError:
+        return cmd
+    stripped = [
+        part
+        for part in parts
+        if part not in {"--user", "--break-system-packages"}
+    ]
+    return shlex.join(stripped)
+
+
 def _cached_model_scan_script(model_dirs: list[str] | None = None) -> str:
    """Build the standalone Python scanner used by /api/model/cached."""
    lines = [
@@ -166,6 +268,38 @@ def _cached_model_scan_script(model_dirs: list[str] | None = None) -> str:
        "    for root, dirs, fns in os.walk(top, followlinks=False):",
        "        dirs[:] = [d for d in dirs if not os.path.islink(os.path.join(root, d)) and safe_path(os.path.join(root, d))]",
        "        yield root, dirs, fns",
+        "def gguf_role(name):",
+        "    n = name.lower()",
+        "    if n.startswith('mmproj') or 'mmproj' in n: return 'projector'",
+        "    return 'model'",
+        "def gguf_quant(name):",
+        "    m = re.search(r'(?i)(UD-)?(IQ[0-9]_[A-Z0-9_]+|Q[0-9](?:_[A-Z0-9]+)+|BF16|F16|FP16|F32|Q8_0)', name)",
+        "    return m.group(0).upper() if m else ''",
+        "def collect_ggufs(base):",
+        "    files = []",
+        "    split_groups = {}",
+        "    if not os.path.isdir(base) or not safe_path(base): return files",
+        "    for root, dirs, fns in safe_walk(base):",
+        "        for fn in sorted(fns):",
+        "            if not fn.lower().endswith('.gguf'): continue",
+        "            fp = os.path.join(root, fn)",
+        "            try: size = os.path.getsize(fp)",
+        "            except Exception: size = 0",
+        "            try: rel = os.path.relpath(fp, base).replace(os.sep, '/')",
+        "            except Exception: rel = fn",
+        "            sm = re.match(r'(?i)^(.+)-(\\d+)-of-(\\d+)\\.gguf$', fn)",
+        "            if sm:",
+        "                prefix, part_s, total_s = sm.group(1), sm.group(2), sm.group(3)",
+        "                key = (root, prefix, total_s)",
+        "                g = split_groups.setdefault(key, {'name':fn,'rel_path':rel,'size_bytes':0,'role':gguf_role(fn),'quant':gguf_quant(fn),'parts':int(total_s),'split':True})",
+        "                g['size_bytes'] += size",
+        "                if int(part_s) == 1:",
+        "                    g.update({'name':fn,'rel_path':rel,'role':gguf_role(fn),'quant':gguf_quant(fn)})",
+        "                continue",
+        "            files.append({'name':fn,'rel_path':rel,'size_bytes':size,'role':gguf_role(fn),'quant':gguf_quant(fn)})",
+        "    files.extend(split_groups.values())",
+        "    files.sort(key=lambda f: (f.get('role') != 'model', f.get('rel_path', '')))",
+        "    return files",
        "def scan_hf(cache):",
        "    if not os.path.isdir(cache): return",
        "    for d in sorted(os.listdir(cache)):",
@@ -180,16 +314,14 @@ def _cached_model_scan_script(model_dirs: list[str] | None = None) -> str:
        "                if f.is_file(): nf += 1; sz += f.stat().st_size",
        "                if f.name.endswith('.incomplete'): ic = True",
        "        snap = os.path.join(cache, d, 'snapshots')",
-        "        is_diffusion = False; is_gguf = False",
+        "        is_diffusion = False; gguf_files = []",
        "        if os.path.isdir(snap):",
        "            for sd in os.listdir(snap):",
        "                sf = os.path.join(snap, sd)",
        "                if not os.path.isdir(sf): continue",
        "                if os.path.exists(os.path.join(sf, 'model_index.json')): is_diffusion = True",
-        "                try:",
-        "                    if any(x.endswith('.gguf') for x in os.listdir(sf)): is_gguf = True",
-        "                except Exception: pass",
-        "        models.append({'repo_id':rid,'size_bytes':sz,'nb_files':nf,'has_incomplete':ic,'path':cache,'is_diffusion':is_diffusion,'is_gguf':is_gguf})",
+        "                for f in collect_ggufs(sf): f['rel_path'] = sd + '/' + f['rel_path']; gguf_files.append(f)",
+        "        models.append({'repo_id':rid,'size_bytes':sz,'nb_files':nf,'has_incomplete':ic,'path':cache,'is_diffusion':is_diffusion,'is_gguf':bool(gguf_files),'gguf_files':gguf_files})",
        "def scan_dir(p):",
        "    if not os.path.isdir(p) or not safe_path(p): return",
        "    for d in sorted(os.listdir(p)):",
@@ -198,13 +330,14 @@ def _cached_model_scan_script(model_dirs: list[str] | None = None) -> str:
        "        fp = os.path.join(p, d)",
        "        if not os.path.isdir(fp) or os.path.islink(fp) or not safe_path(fp): continue",
        "        if d in seen: continue",
-        "        is_model = False; is_gguf = False",
+        "        is_model = False; gguf_files = []",
        "        for root, dirs, fns in safe_walk(fp):",
        "            for fn in fns:",
-        "                if fn.endswith('.gguf'): is_gguf = True; is_model = True",
+        "                if fn.lower().endswith('.gguf'): is_model = True",
        "                elif fn == 'config.json' or fn.endswith('.safetensors') or fn.endswith('.bin'): is_model = True",
        "            if is_model: break",
        "        if not is_model: continue",
+        "        gguf_files = collect_ggufs(fp)",
        "        seen.add(d)",
        "        sz, nf = 0, 0",
        "        for dp, _, fns in safe_walk(fp):",
@@ -212,7 +345,7 @@ def _cached_model_scan_script(model_dirs: list[str] | None = None) -> str:
        "                try: nf += 1; sz += os.path.getsize(os.path.join(dp, fn))",
        "                except Exception: pass",
        "        is_diff = os.path.exists(os.path.join(fp, 'model_index.json'))",
-        "        models.append({'repo_id':d,'size_bytes':sz,'nb_files':nf,'has_incomplete':False,'path':p,'is_local_dir':True,'is_diffusion':is_diff,'is_gguf':is_gguf})",
+        "        models.append({'repo_id':d,'size_bytes':sz,'nb_files':nf,'has_incomplete':False,'path':p,'is_local_dir':True,'is_diffusion':is_diff,'is_gguf':bool(gguf_files),'gguf_files':gguf_files})",
        "def parse_size(num, unit):",
        "    try: n = float(num)",
        "    except Exception: return 0",
@@ -293,6 +426,38 @@ _SERVE_CMD_ALLOWLIST = {
 _GGUF_PRELUDE_RE = re.compile(
    r'^MODEL_FILE=\$\([^\n]*?\)\s*&&\s*\{[^{}]*\}\s*\|\|\s*\{[^{}]*\}\s*&&\s*'
 )
+_OLLAMA_HOST_ASSIGNMENT_RE = re.compile(r"(?:^|\s)OLLAMA_HOST=([^\s]+)")
+_OLLAMA_BIND_RE = re.compile(r"^\[([^\]]+)\]:(\d+)$|^([^:]+):(\d+)$")
+_OLLAMA_BIND_HOST_RE = re.compile(r"^[A-Za-z0-9._:-]+$")
+
+
+def _ollama_bind_from_cmd(cmd: str | None, *, default_host: str = "127.0.0.1") -> tuple[str, str]:
+    """Return the Ollama bind host/port requested by a serve command.
+
+    Plain local `ollama serve` defaults to loopback. Remote callers can pass a
+    wider default host so the resulting API is reachable by Odysseus.
+    """
+    if not cmd:
+        return default_host, "11434"
+    match = _OLLAMA_HOST_ASSIGNMENT_RE.search(cmd)
+    if not match:
+        return default_host, "11434"
+    value = match.group(1).strip("'\"")
+    bind_match = _OLLAMA_BIND_RE.match(value)
+    if not bind_match:
+        return "127.0.0.1", "11434"
+    bracketed_host = bind_match.group(1)
+    host = bracketed_host or bind_match.group(3) or "127.0.0.1"
+    port = bind_match.group(2) or bind_match.group(4) or "11434"
+    if not _OLLAMA_BIND_HOST_RE.match(host):
+        return "127.0.0.1", "11434"
+    try:
+        port_num = int(port, 10)
+    except ValueError:
+        return "127.0.0.1", "11434"
+    if port_num < 1 or port_num > 65535:
+        return "127.0.0.1", "11434"
+    return f"[{host}]" if bracketed_host else host, port


 def _check_serve_binary(seg: str) -> None:
@@ -370,6 +535,83 @@ def _append_serve_exit_code_lines(runner_lines: list[str], *, keep_shell_open: b
        runner_lines.append('echo ""; echo "=== Process exited with code $ODYSSEUS_CMD_EXIT ==="; exec "${SHELL:-/bin/bash}"')
    else:
        runner_lines.append('echo ""; echo "=== Process exited with code $ODYSSEUS_CMD_EXIT ==="')
+        runner_lines.append('exit "$ODYSSEUS_CMD_EXIT"')
+
+
+def _append_llama_cpp_linux_accel_build_lines(runner_lines: list[str]) -> None:
+    """Append Linux llama.cpp build lines that prefer ROCm/HIP when available.
+
+    Cookbook already detects AMD GPUs elsewhere, but the llama.cpp bootstrap used
+    to hard-wire CUDA on Linux. That made ROCm hosts attempt a CUDA configure and
+    fail with "CUDA Toolkit not found" instead of building with HIP.
+    """
+    # Detect pip-installed nvcc (from vLLM/nvidia CUDA wheels) and put it on PATH
+    # so cmake's CUDA configure can find it. We keep this after the ROCm/HIP
+    # check — a machine with both stacks should honor the native HIP toolchain on
+    # AMD hosts instead of accidentally preferring a stray nvcc wheel.
+    runner_lines.append('    for _cudir in ~/.local/lib/python*/site-packages/nvidia/cu13 ~/.local/lib/python*/site-packages/nvidia/cu12 ~/.local/lib/python*/site-packages/nvidia/cuda_nvcc; do')
+    runner_lines.append('      [ -x "$_cudir/bin/nvcc" ] && export CUDA_HOME="$_cudir" && export PATH="$_cudir/bin:$PATH" && break')
+    runner_lines.append('    done')
+    # rm -rf build so a prior poisoned CMakeCache.txt (e.g. from a failed CUDA
+    # or HIP attempt) doesn't cause the next configure to reuse stale settings.
+    runner_lines.append('    cd ~/llama.cpp && rm -rf build')
+    runner_lines.append('    if command -v hipconfig &>/dev/null || [ -d /opt/rocm ] || [ -n "$ROCM_PATH" ] || [ -n "$HIP_PATH" ]; then')
+    runner_lines.append('      if command -v hipconfig &>/dev/null; then')
+    runner_lines.append('        export HIPCXX="${HIPCXX:-$(hipconfig -l)/clang}"')
+    runner_lines.append('        export HIP_PATH="${HIP_PATH:-$(hipconfig -R)}"')
+    runner_lines.append('      fi')
+    runner_lines.append('      echo "[odysseus] ROCm/HIP detected — building llama-server with HIP support..."')
+    runner_lines.append('      cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_HIP=ON && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
+    runner_lines.append('    elif command -v nvcc &>/dev/null; then')
+    # nvcc alone is not sufficient — pip-installed CUDA wheels or incomplete
+    # tooling can expose nvcc without shipping libcudart, causing cmake to fail
+    # mid-build with "CUDA runtime library not found". Check cudart explicitly
+    # via a small helper so the guard stays readable.
+    runner_lines.append('      _odysseus_has_cudart() {')
+    runner_lines.append('        ldconfig -p 2>/dev/null | grep -q \'libcudart\\.so\' && return 0')
+    runner_lines.append('        local _cuh="${CUDA_HOME:-/usr/local/cuda}"')
+    runner_lines.append('        ls "$_cuh/lib64/libcudart.so"* &>/dev/null && return 0')
+    runner_lines.append('        ls "$_cuh/lib/libcudart.so"* &>/dev/null && return 0')
+    runner_lines.append('        ls /usr/local/cuda/lib64/libcudart.so* &>/dev/null && return 0')
+    runner_lines.append('        ls /usr/local/cuda/lib/libcudart.so* &>/dev/null && return 0')
+    runner_lines.append('        ls "${_cuh%/cuda_nvcc}/cuda_runtime/lib/libcudart.so"* &>/dev/null && return 0')
+    runner_lines.append('        return 1')
+    runner_lines.append('      }')
+    runner_lines.append('      if _odysseus_has_cudart; then')
+    runner_lines.append('        echo "[odysseus] CUDA nvcc + cudart found — building llama-server with CUDA (GPU) support..."')
+    runner_lines.append('        cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_CUDA=ON && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
+    runner_lines.append('      else')
+    runner_lines.append('        echo "[odysseus] WARNING: nvcc found but CUDA runtime (libcudart.so) is not visible — building llama-server for CPU only."')
+    runner_lines.append('        echo "[odysseus]   GPU inference will not be available for this llama.cpp build."')
+    runner_lines.append('        echo "[odysseus]   Ensure libcudart is installed (e.g. cuda-runtime package) and visible via ldconfig or CUDA_HOME."')
+    runner_lines.append('        cmake -B build -DCMAKE_BUILD_TYPE=Release && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
+    runner_lines.append('      fi')
+    runner_lines.append('    else')
+    runner_lines.append('      echo "[odysseus] WARNING: no HIP/CUDA toolchain found — building llama-server for CPU only."')
+    runner_lines.append('      echo "[odysseus]   GPU inference will not be available for this llama.cpp build."')
+    runner_lines.append('      echo "[odysseus]   Install ROCm for AMD GPUs or vLLM/CUDA tooling for NVIDIA, then re-launch this serve task."')
+    runner_lines.append('      cmake -B build -DCMAKE_BUILD_TYPE=Release && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
+    runner_lines.append('    fi')
+
+
+def _llama_cpp_rebuild_cmd() -> str:
+    """Shell command that clears the Cookbook-managed llama.cpp build.
+
+    Removes the cached ``llama-server`` symlink and the ``~/llama.cpp/build``
+    directory so the next llama.cpp serve recompiles from source, picking up a
+    CUDA or HIP toolchain if one is now available. The serve bootstrap only
+    builds when ``llama-server`` is missing from PATH, so without this an
+    existing CPU-only build is reused forever. It deliberately installs and
+    downloads nothing; the rebuild itself happens on the next serve.
+    """
+    return (
+        'mkdir -p "$HOME/bin" && '
+        'rm -f "$HOME/bin/llama-server" && '
+        'rm -rf "$HOME/llama.cpp/build" && '
+        'echo "[odysseus] Cleared the cached llama.cpp build. '
+        'Re-launch the serve task to rebuild llama-server from source '
+        '(CUDA or HIP will be used if a toolchain is now available)."'
+    )


 class ModelDownloadRequest(BaseModel):
--- a/routes/cookbook_routes.py
+++ b/routes/cookbook_routes.py
@@ -37,7 +37,8 @@ from routes.cookbook_helpers import (
    _validate_local_dir, _validate_ssh_port, _validate_gpus, _shell_path,
    _ps_squote, _bash_squote, _validate_serve_cmd, _parse_serve_phase,
    _safe_env_prefix, _local_tooling_path_export, _append_serve_preflight_exit_lines,
-    _append_serve_exit_code_lines, _cached_model_scan_script,
+    _append_serve_exit_code_lines, _append_llama_cpp_linux_accel_build_lines, _cached_model_scan_script,
+    _ollama_bind_from_cmd, _pip_install_fallback_chain, _pip_install_no_cache, _venv_safe_local_pip_install_cmd,
    ModelDownloadRequest, ServeRequest,
 )

@@ -148,6 +149,15 @@ def setup_cookbook_routes() -> APIRouter:
                "No GPUs are visible to the serve process.",
                [{"label": "clear Cookbook GPU selection or choose available GPUs", "op": "settings", "field": "gpus", "value": ""}],
            ),
+            (
+                r"Failed to infer device type|NVML Shared Library Not Found|No module named 'amdsmi'|platform is not available",
+                "vLLM could not find a supported GPU (CUDA or ROCm). "
+                "This machine may have integrated or unsupported graphics only.",
+                [
+                    {"label": "switch to llama.cpp (CPU/Metal, works without a discrete GPU)", "op": "manual"},
+                    {"label": "switch to Ollama (CPU/Metal, works without a discrete GPU)", "op": "manual"},
+                ],
+            ),
            (
                r"vllm.*command not found|No module named vllm|ERROR: vLLM is not installed",
                "vLLM is not installed or not in PATH on this server.",
@@ -163,6 +173,11 @@ def setup_cookbook_routes() -> APIRouter:
                "llama.cpp / llama-cpp-python dependencies are missing.",
                [{"label": "install llama.cpp dependencies or llama-cpp-python[server]", "op": "dependency", "package": "llama-cpp-python[server]"}],
            ),
+            (
+                r"No GGUF found on this host|no \.gguf file|No GGUF file found",
+                "No GGUF file found for this model on this host. The llama.cpp backend needs a .gguf file.",
+                [{"label": "download a GGUF build of this model (repo name usually ends in -GGUF, file like Q4_K_M.gguf)", "op": "manual"}],
+            ),
            (
                r"No module named 'torch'|No module named torch|No module named 'diffusers'|No module named diffusers",
                "Diffusion serving requires PyTorch and diffusers.",
@@ -368,11 +383,15 @@ def setup_cookbook_routes() -> APIRouter:
                encoding="utf-8",
            )
            argv = [os.environ.get("ComSpec", "cmd.exe"), "/c", str(script_path)]
+        env = os.environ.copy()
+        env["PYTHONUTF8"] = "1"
+        env["PYTHONIOENCODING"] = "utf-8"
        proc = subprocess.Popen(
            argv,
            stdout=subprocess.DEVNULL,
            stderr=subprocess.DEVNULL,
            stdin=subprocess.DEVNULL,
+            env=env,
            **detached_popen_kwargs(),
        )
        pid_path.write_text(str(proc.pid), encoding="utf-8")
@@ -432,12 +451,12 @@ def setup_cookbook_routes() -> APIRouter:
        # throughput. Retries set disable_hf_transfer to fall back to the plain,
        # slower-but-reliable downloader (resumes cleanly from the .incomplete files).
        # Use `python3 -m pip` not `pip` — macOS has no bare `pip` command.
-        lines.append("command -v hf >/dev/null 2>&1 || python3 -m pip install --user --break-system-packages -q -U huggingface_hub 2>/dev/null || python3 -m pip install -q -U huggingface_hub 2>/dev/null")
+        lines.append(f"command -v hf >/dev/null 2>&1 || {_pip_install_fallback_chain('huggingface_hub', upgrade=True)}")
        if req.disable_hf_transfer:
            lines.append("export HF_HUB_ENABLE_HF_TRANSFER=0")
            lines.append("export HF_HUB_DOWNLOAD_MAX_WORKERS=4")
        else:
-            lines.append("python3 -c 'import hf_transfer' 2>/dev/null || python3 -m pip install --user --break-system-packages -q hf_transfer 2>/dev/null || python3 -m pip install -q hf_transfer 2>/dev/null")
+            lines.append(f"python3 -c 'import hf_transfer' 2>/dev/null || {_pip_install_fallback_chain('hf_transfer')}")
            lines.append("python3 -c 'import hf_transfer' 2>/dev/null && export HF_HUB_ENABLE_HF_TRANSFER=1")
            lines.append("export HF_HUB_DOWNLOAD_MAX_WORKERS=8")

@@ -531,12 +550,18 @@ def setup_cookbook_routes() -> APIRouter:
                )
            # Ensure pip-user scripts (e.g. hf CLI installed via --user) are on PATH
            runner_lines.append('export PATH="$HOME/.local/bin:$PATH"')
-            # Install hf CLI + hf_transfer best-effort so future runs get the fast path.
+            # Install hf CLI + optional hf_transfer best-effort. Retries disable
+            # hf_transfer because the Rust parallel path is fast but has been
+            # flaky near the end of very large multi-file downloads.
            # Use --break-system-packages on PEP-668 systems (Arch, newer Debian) so it doesn't bail.
-            runner_lines.append("command -v hf >/dev/null 2>&1 || pip install --user --break-system-packages -q -U huggingface_hub 2>/dev/null || pip install -q -U huggingface_hub 2>/dev/null")
-            runner_lines.append("python3 -c 'import hf_transfer' 2>/dev/null || pip install --user --break-system-packages -q hf_transfer 2>/dev/null || pip install -q hf_transfer 2>/dev/null")
-            runner_lines.append("python3 -c 'import hf_transfer' 2>/dev/null && export HF_HUB_ENABLE_HF_TRANSFER=1")
-            runner_lines.append("export HF_HUB_DOWNLOAD_MAX_WORKERS=8")
+            runner_lines.append(f"command -v hf >/dev/null 2>&1 || {_pip_install_fallback_chain('huggingface_hub', python_cmd='pip', upgrade=True)}")
+            if req.disable_hf_transfer:
+                runner_lines.append("export HF_HUB_ENABLE_HF_TRANSFER=0")
+                runner_lines.append("export HF_HUB_DOWNLOAD_MAX_WORKERS=4")
+            else:
+                runner_lines.append(f"python3 -c 'import hf_transfer' 2>/dev/null || {_pip_install_fallback_chain('hf_transfer', python_cmd='pip')}")
+                runner_lines.append("python3 -c 'import hf_transfer' 2>/dev/null && export HF_HUB_ENABLE_HF_TRANSFER=1")
+                runner_lines.append("export HF_HUB_DOWNLOAD_MAX_WORKERS=8")
            # Surface whether the HF token actually reached THIS server, so a gated
            # download's "not authorized" failure can be told apart from a missing
            # token (the token is masked — we only print applied / not-set).
@@ -547,15 +572,19 @@ def setup_cookbook_routes() -> APIRouter:
            runner_lines.append(f'  {hf_cmd} < /dev/null')
            runner_lines.append('elif python3 -c "import huggingface_hub" 2>/dev/null; then')
            runner_lines.append('  echo "hf CLI not found, using Python huggingface_hub..."')
-            runner_lines.append(f'  python3 -c "import os; from huggingface_hub import snapshot_download; snapshot_download(\'{req.repo_id}\'{_dl_pyarg}, max_workers=8)"')
+            runner_lines.append(f'  python3 -c "import os; from huggingface_hub import snapshot_download; snapshot_download(\'{req.repo_id}\'{_dl_pyarg}, max_workers={4 if req.disable_hf_transfer else 8})"')
            runner_lines.append('else')
            runner_lines.append('  echo "Installing huggingface-hub and dependencies..."')
            runner_lines.append('  pip install --no-deps -q huggingface-hub 2>/dev/null')
-            runner_lines.append('  pip install -q filelock fsspec packaging pyyaml tqdm typer httpx requests hf_transfer 2>/dev/null')
-            runner_lines.append("  python3 -c 'import hf_transfer' 2>/dev/null && export HF_HUB_ENABLE_HF_TRANSFER=1")
-            runner_lines.append(f'  python3 -c "import os; from huggingface_hub import snapshot_download; snapshot_download(\'{req.repo_id}\'{_dl_pyarg}, max_workers=8)"')
+            if req.disable_hf_transfer:
+                runner_lines.append('  pip install -q filelock fsspec packaging pyyaml tqdm typer httpx requests 2>/dev/null')
+                runner_lines.append('  export HF_HUB_ENABLE_HF_TRANSFER=0')
+            else:
+                runner_lines.append('  pip install -q filelock fsspec packaging pyyaml tqdm typer httpx requests hf_transfer 2>/dev/null')
+                runner_lines.append("  python3 -c 'import hf_transfer' 2>/dev/null && export HF_HUB_ENABLE_HF_TRANSFER=1")
+            runner_lines.append(f'  python3 -c "import os; from huggingface_hub import snapshot_download; snapshot_download(\'{req.repo_id}\'{_dl_pyarg}, max_workers={4 if req.disable_hf_transfer else 8})"')
            runner_lines.append('fi')
-            runner_lines.append('if [ $? -eq 0 ]; then echo ""; echo "DOWNLOAD_OK"; else echo ""; echo "DOWNLOAD_FAILED (exit $?)"; fi')
+            runner_lines.append('_ec=$?; if [ $_ec -eq 0 ]; then echo ""; echo "DOWNLOAD_OK"; else echo ""; echo "DOWNLOAD_FAILED (exit $_ec)"; fi')
            runner_lines.append(f"rm -f {remote_runner}")
            runner_lines.append('exec "${SHELL:-/bin/bash}"')
            runner_path = TMUX_LOG_DIR / f"{session_id}_run.sh"
@@ -586,11 +615,11 @@ def setup_cookbook_routes() -> APIRouter:
                # Detached path: no controlling TTY, so skip `< /dev/null`
                # (handled by Popen stdin=DEVNULL) and don't keep a shell open.
                lines.append(hf_cmd)
-                lines.append('if [ $? -eq 0 ]; then echo ""; echo "DOWNLOAD_OK"; else echo ""; echo "DOWNLOAD_FAILED (exit $?)"; fi')
+                lines.append('_ec=$?; if [ $_ec -eq 0 ]; then echo ""; echo "DOWNLOAD_OK"; else echo ""; echo "DOWNLOAD_FAILED (exit $_ec)"; fi')
            else:
                # < /dev/null suppresses interactive "update available? [Y/n]" prompt
                lines.append(f"{hf_cmd} < /dev/null")
-                lines.append('if [ $? -eq 0 ]; then echo ""; echo "DOWNLOAD_OK"; else echo ""; echo "DOWNLOAD_FAILED (exit $?)"; fi')
+                lines.append('_ec=$?; if [ $_ec -eq 0 ]; then echo ""; echo "DOWNLOAD_OK"; else echo ""; echo "DOWNLOAD_FAILED (exit $_ec)"; fi')
                lines.append(f"rm -f '{wrapper_script}'")
                lines.append('exec "${SHELL:-/bin/bash}"')
                wrapper_script.write_text("\n".join(lines) + "\n", encoding="utf-8")
@@ -672,11 +701,14 @@ def setup_cookbook_routes() -> APIRouter:
                cwd=str(Path.home()),
            )
        else:
-            # LOCAL scan: run the interpreter directly. `python3` isn't a thing on
-            # Windows (it's `python`/`py`), and shell single-quoting of the path
-            # doesn't survive cmd.exe — so resolve the interpreter and exec it
-            # with the script path as an argv element (no shell quoting needed).
-            local_py = (
+            # LOCAL scan: use sys.executable (the venv Python Odysseus is already
+            # running under) — it's guaranteed real Python on all platforms.
+            # Falling back to which_tool on Windows risks hitting the Microsoft
+            # Store stub alias for "python3"/"python", which prints
+            # "Python was not found; run without arguments to install from the
+            # Microsoft Store" and exits 9009, producing empty stdout and a
+            # JSON parse error. sys.executable bypasses PATH entirely.
+            local_py = sys.executable or (
                which_tool("python3") or which_tool("python")
                or which_tool("py") or "python"
            )
@@ -714,6 +746,8 @@ def setup_cookbook_routes() -> APIRouter:
                    entry["backend"] = m.get("backend")
                if m.get("is_ollama"):
                    entry["is_ollama"] = True
+                if isinstance(m.get("gguf_files"), list):
+                    entry["gguf_files"] = m["gguf_files"]
                models.append(entry)
        except Exception as e:
            logger.warning(f"Failed to parse cached models: {e}")
@@ -775,6 +809,80 @@ def setup_cookbook_routes() -> APIRouter:
        finally:
            db.close()

+    def _auto_register_llm_endpoint(req: ServeRequest, remote: str | None) -> str | None:
+        """Register a freshly-served LLM as a model endpoint so it appears in the
+        model picker without a manual /setup step — the text-model sibling of
+        _auto_register_image_endpoint.
+
+        Cookbook serve commands launch an OpenAI-compatible server (llama.cpp's
+        llama-server, vLLM, SGLang, or Ollama) on a known port. We point an
+        endpoint at that server's /v1; the picker auto-discovers the model id by
+        probing /v1/models and dims the endpoint until the server is reachable,
+        so registering immediately (before the server finishes loading) is safe.
+        """
+        import re
+        from core.database import SessionLocal, ModelEndpoint
+
+        # Port: an explicit --port wins. Otherwise fall back by backend — Ollama
+        # is the only server in our generated commands that omits --port.
+        port_match = re.search(r'--port\s+(\d+)', req.cmd)
+        if port_match:
+            port = int(port_match.group(1))
+        elif "ollama" in req.cmd:
+            port = 11434
+        else:
+            port = 8080  # llama.cpp's llama-server default — the Apple Silicon path
+
+        # Determine host (mirrors the image path: SSH alias for remote serves).
+        if remote:
+            host = remote.split("@")[-1] if "@" in remote else remote
+        else:
+            host = "localhost"
+
+        base_url = f"http://{host}:{port}/v1"
+
+        short_name = req.repo_id.split("/")[-1] if "/" in req.repo_id else req.repo_id
+        display_name = short_name or "Local model"
+
+        # If the serve command opts models into OpenAI tool-calling, record it so
+        # agent_loop trusts emitted tool_calls instead of the name heuristic.
+        supports_tools = True if "--enable-auto-tool-choice" in req.cmd else None
+
+        db = SessionLocal()
+        try:
+            # Reuse an endpoint already pointed at this URL instead of duplicating.
+            existing = db.query(ModelEndpoint).filter(ModelEndpoint.base_url == base_url).first()
+            if existing:
+                existing.is_enabled = True
+                existing.model_type = "llm"
+                existing.name = display_name
+                if supports_tools is not None:
+                    existing.supports_tools = supports_tools
+                db.commit()
+                logger.info(f"Updated existing local model endpoint: {base_url}")
+                return existing.id
+
+            ep_id = f"local-{uuid.uuid4().hex[:8]}"
+            ep = ModelEndpoint(
+                id=ep_id,
+                name=display_name,
+                base_url=base_url,
+                api_key=None,
+                is_enabled=True,
+                model_type="llm",
+                supports_tools=supports_tools,
+            )
+            db.add(ep)
+            db.commit()
+            logger.info(f"Auto-registered local model endpoint: {display_name} @ {base_url}")
+            return ep_id
+        except Exception as e:
+            logger.error(f"Failed to auto-register local model endpoint: {e}")
+            db.rollback()
+            return None
+        finally:
+            db.close()
+
    @router.post("/api/model/serve")
    async def model_serve(request: Request, req: ServeRequest):
        """Launch a model server in a tmux session (or PowerShell background process on Windows).
@@ -800,8 +908,17 @@ def setup_cookbook_routes() -> APIRouter:
        # many downstream `"engine" in req.cmd` membership checks can't hit
        # `TypeError: argument of type 'NoneType'` (a 500 instead of a clean 400).
        req.cmd = _validate_serve_cmd(req.cmd) or ""
+        req.cmd = _venv_safe_local_pip_install_cmd(
+            req.cmd,
+            local=not bool(req.remote_host),
+            in_venv=sys.prefix != sys.base_prefix,
+        )
        is_pip_install = bool(req.cmd and "pip install" in req.cmd)
        if is_pip_install:
+            # Keep big dependency wheel builds (vLLM, …) off the home filesystem's
+            # pip cache so they don't fail mid-build with "No space left" (#1219)
+            # and leave the dep installed-but-unusable (#1459).
+            req.cmd = _pip_install_no_cache(req.cmd)
            # PEP-508-style package spec — letters, digits, `.-_` for the
            # name; `[` `]` for extras; `<>=!~,` for version specifiers.
            # v2 review HIGH-14: tightened from the previous regex which
@@ -922,7 +1039,7 @@ def setup_cookbook_routes() -> APIRouter:
                runner_lines.append('  if ! python3 -c "import llama_cpp" 2>/dev/null; then')
                runner_lines.append('    pkg install -y cmake 2>/dev/null')
                runner_lines.append('    pip install numpy diskcache jinja2 2>/dev/null')
-                runner_lines.append('    CMAKE_ARGS="-DGGML_BLAS=OFF -DGGML_LLAMAFILE=OFF" pip install llama-cpp-python --no-build-isolation --no-cache-dir 2>&1 || true')
+                runner_lines.append('    CMAKE_ARGS="-DGGML_BLAS=OFF -DGGML_LLAMAFILE=OFF" pip install \'llama-cpp-python[server]\' --no-build-isolation --no-cache-dir 2>&1 || true')
                runner_lines.append('  fi')
                runner_lines.append('elif ! command -v llama-server &>/dev/null; then')
                runner_lines.append('  echo "Native llama-server not found — building from source (one-time, may take a few minutes)..."')
@@ -944,61 +1061,45 @@ def setup_cookbook_routes() -> APIRouter:
                runner_lines.append('      && cmake --build build -j"$NPROC" --target llama-server \\')
                runner_lines.append('      && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
                runner_lines.append('  else')
-                # Detect pip-installed nvcc (from vLLM/nvidia CUDA wheels) and put
-                # it on PATH so cmake's CUDA configure can find it.  We check the
-                # same three layouts as entrypoint.sh:
-                #   nvidia/cu13       — nvidia-nvcc-cu13
-                #   nvidia/cu12       — nvidia-nvcc-cu12
-                #   nvidia/cuda_nvcc  — nvidia-cuda-nvcc-cu12 (sub-package style)
-                runner_lines.append('    for _cudir in ~/.local/lib/python*/site-packages/nvidia/cu13 ~/.local/lib/python*/site-packages/nvidia/cu12 ~/.local/lib/python*/site-packages/nvidia/cuda_nvcc; do')
-                runner_lines.append('      [ -x "$_cudir/bin/nvcc" ] && export CUDA_HOME="$_cudir" && export PATH="$_cudir/bin:$PATH" && break')
-                runner_lines.append('    done')
-                # rm -rf build so a prior poisoned CMakeCache.txt (e.g. from a
-                # failed CUDA attempt) doesn't cause the next configure to reuse
-                # stale settings and silently produce a CPU-only binary.
-                runner_lines.append('    cd ~/llama.cpp && rm -rf build')
-                runner_lines.append('    if command -v nvcc &>/dev/null; then')
-                runner_lines.append('      echo "[odysseus] CUDA nvcc found — building llama-server with CUDA (GPU) support..."')
-                runner_lines.append('      cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_CUDA=ON \\')
-                runner_lines.append('        && cmake --build build -j"$NPROC" --target llama-server \\')
-                runner_lines.append('        && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
-                runner_lines.append('    else')
-                runner_lines.append('      echo "[odysseus] WARNING: nvcc not found — building llama-server for CPU only."')
-                runner_lines.append('      echo "[odysseus]   GPU inference will not be available for this llama.cpp build."')
-                runner_lines.append('      echo "[odysseus]   To get a GPU build, first install vLLM via Cookbook -> Dependencies"')
-                runner_lines.append('      echo "[odysseus]   (its CUDA wheels include nvcc), then re-launch this serve task."')
-                runner_lines.append('      cmake -B build -DCMAKE_BUILD_TYPE=Release \\')
-                runner_lines.append('        && cmake --build build -j"$NPROC" --target llama-server \\')
-                runner_lines.append('        && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
-                runner_lines.append('    fi')
+                _append_llama_cpp_linux_accel_build_lines(runner_lines)
                runner_lines.append('  fi')
                runner_lines.append('  # If the native build failed, fall back to the Python bindings.')
                runner_lines.append('  if ! command -v llama-server &>/dev/null && ! python3 -c "import llama_cpp" 2>/dev/null; then')
                runner_lines.append('    echo "llama-server build failed — installing Python bindings as fallback..."')
-                runner_lines.append('    pip install --user --break-system-packages -q llama-cpp-python 2>/dev/null || pip install -q llama-cpp-python 2>/dev/null || true')
+                runner_lines.append(f"    {_pip_install_fallback_chain('llama-cpp-python[server]', python_cmd='pip')} || true")
+                runner_lines.append('  fi')
+                runner_lines.append('  if ! command -v llama-server &>/dev/null && ! python3 -c "import llama_cpp" 2>/dev/null; then')
+                runner_lines.append('    echo "ERROR: llama.cpp serving is not available after install/build attempts."')
+                runner_lines.append('    ODYSSEUS_PREFLIGHT_EXIT=127')
                runner_lines.append('  fi')
                runner_lines.append('fi')
            elif "ollama" in req.cmd:
                handled_ollama_serve = True
-                _ollama_port = "11434"
-                _ollama_match = re.search(r"OLLAMA_HOST=[^\s:]+:(\d+)", req.cmd)
-                if _ollama_match:
-                    _ollama_port = _ollama_match.group(1)
+                _ollama_default_host = "0.0.0.0" if remote else "127.0.0.1"
+                _ollama_host, _ollama_port = _ollama_bind_from_cmd(
+                    req.cmd,
+                    default_host=_ollama_default_host,
+                )
                # Ollama can be a host binary, a system service, or a Docker
                # container. If the HTTP API is already reachable, the model is
                # already served and we should not require a host `ollama` CLI.
+                runner_lines.append(f'ODYSSEUS_OLLAMA_HOST={_bash_squote(_ollama_host)}')
                runner_lines.append(f'ODYSSEUS_OLLAMA_PORT="{_ollama_port}"')
                runner_lines.append('ODYSSEUS_OLLAMA_URL=""')
-                runner_lines.append('for _ody_ollama_port in "$ODYSSEUS_OLLAMA_PORT" 11434; do')
-                runner_lines.append('  [ -z "$_ody_ollama_port" ] && continue')
-                runner_lines.append('  for _ody_ollama_host in 127.0.0.1 localhost host.docker.internal; do')
-                runner_lines.append('    _ody_ollama_url="http://${_ody_ollama_host}:${_ody_ollama_port}"')
-                runner_lines.append('    if curl -sf "$_ody_ollama_url/api/tags" >/dev/null 2>&1; then')
-                runner_lines.append('      ODYSSEUS_OLLAMA_URL="$_ody_ollama_url"')
-                runner_lines.append('      ODYSSEUS_OLLAMA_PORT="$_ody_ollama_port"')
-                runner_lines.append('      break 2')
-                runner_lines.append('    fi')
+                runner_lines.append('for _ody_ollama_try in $(seq 1 20); do')
+                runner_lines.append('  for _ody_ollama_port in "$ODYSSEUS_OLLAMA_PORT" 11434; do')
+                runner_lines.append('    [ -z "$_ody_ollama_port" ] && continue')
+                runner_lines.append('    for _ody_ollama_host in 127.0.0.1 localhost host.docker.internal; do')
+                runner_lines.append('      _ody_ollama_url="http://${_ody_ollama_host}:${_ody_ollama_port}"')
+                runner_lines.append('      if curl -sf "$_ody_ollama_url/api/tags" >/dev/null 2>&1; then')
+                runner_lines.append('        ODYSSEUS_OLLAMA_URL="$_ody_ollama_url"')
+                runner_lines.append('        ODYSSEUS_OLLAMA_PORT="$_ody_ollama_port"')
+                runner_lines.append('        break 3')
+                runner_lines.append('      fi')
+                runner_lines.append('    done')
                runner_lines.append('  done')
+                runner_lines.append('  [ "$_ody_ollama_try" -eq 1 ] && echo "[odysseus] Waiting for an existing Ollama API on ports ${ODYSSEUS_OLLAMA_PORT}/11434..."')
+                runner_lines.append('  sleep 1')
                runner_lines.append('done')
                runner_lines.append('if [ -n "$ODYSSEUS_OLLAMA_URL" ]; then')
                runner_lines.append('  if [ "$ODYSSEUS_OLLAMA_PORT" != "' + _ollama_port + '" ]; then')
@@ -1015,8 +1116,12 @@ def setup_cookbook_routes() -> APIRouter:
                runner_lines.append('  echo "=== Process exited with code 127 ==="')
                runner_lines.append('  exec bash -i')
                runner_lines.append('fi')
-                runner_lines.append('echo "Starting ollama server on 0.0.0.0:${ODYSSEUS_OLLAMA_PORT}..."')
-                runner_lines.append('OLLAMA_HOST="0.0.0.0:${ODYSSEUS_OLLAMA_PORT}" ollama serve')
+                runner_lines.append('ODYSSEUS_OLLAMA_URL="http://${ODYSSEUS_OLLAMA_HOST}:${ODYSSEUS_OLLAMA_PORT}"')
+                if remote and _ollama_host in ("0.0.0.0", "::"):
+                    runner_lines.append('echo "[odysseus] WARNING: remote Ollama will bind to ${ODYSSEUS_OLLAMA_HOST}:${ODYSSEUS_OLLAMA_PORT} so Odysseus can reach it from this host."')
+                    runner_lines.append('echo "[odysseus] Ollama has no built-in authentication; expose this only on a trusted LAN/VPN or provide an explicit OLLAMA_HOST with your own access controls."')
+                runner_lines.append('echo "Starting ollama server on ${ODYSSEUS_OLLAMA_HOST}:${ODYSSEUS_OLLAMA_PORT}..."')
+                runner_lines.append('OLLAMA_HOST="${ODYSSEUS_OLLAMA_HOST}:${ODYSSEUS_OLLAMA_PORT}" ollama serve')
                runner_lines.append('_ody_exit=$?')
                runner_lines.append('echo')
                runner_lines.append('echo "=== Process exited with code ${_ody_exit} ==="')
@@ -1032,19 +1137,24 @@ def setup_cookbook_routes() -> APIRouter:
                # find the `vllm` CLI ("command not found"). Mirrors llama.cpp above.
                runner_lines.append('export PATH="$HOME/.local/bin:$PATH"')
                runner_lines.append('if ! command -v vllm &>/dev/null; then')
-                runner_lines.append('  echo "ERROR: vLLM is not installed. Open Cookbook -> Dependencies and install vllm on this server, then launch again."')
+                runner_lines.append('  echo "ERROR: vLLM is not installed."')
                runner_lines.append('  ODYSSEUS_PREFLIGHT_EXIT=127')
                runner_lines.append('fi')
            elif "sglang.launch_server" in req.cmd:
                runner_lines.append('export PATH="$HOME/.local/bin:$PATH"')
-                runner_lines.append('if ! python3 -c "import sglang" 2>/dev/null; then')
-                runner_lines.append('  echo "ERROR: SGLang is not installed. Open Cookbook -> Dependencies and install sglang on this server, then launch again."')
+                runner_lines.append('if ! command -v sglang &>/dev/null; then')
+                runner_lines.append('  echo "ERROR: SGLang is not installed."')
+                runner_lines.append('  ODYSSEUS_PREFLIGHT_EXIT=127')
+                runner_lines.append('elif ! ODYSSEUS_SGLANG_IMPORT_ERROR="$(python3 -c "import sglang" 2>&1)"; then')
+                runner_lines.append('  echo "ERROR: SGLang is installed but failed to import."')
+                runner_lines.append('  printf "%s\\n" "$ODYSSEUS_SGLANG_IMPORT_ERROR"')
                runner_lines.append('  ODYSSEUS_PREFLIGHT_EXIT=127')
                runner_lines.append('fi')
            elif "scripts/diffusion_server.py" in req.cmd or ".diffusion_server.py" in req.cmd:
                runner_lines.append('export PATH="$HOME/.local/bin:$PATH"')
-                runner_lines.append('if ! python3 -c "import torch, diffusers" 2>/dev/null; then')
-                runner_lines.append('  echo "ERROR: Diffusion serving requires PyTorch + diffusers. Open Cookbook -> Dependencies and install diffusers on this server, then launch again."')
+                runner_lines.append('if ! ODYSSEUS_DIFFUSION_IMPORT_ERROR="$(python3 -c "import torch, diffusers" 2>&1)"; then')
+                runner_lines.append('  echo "ERROR: Diffusion serving requires PyTorch + diffusers."')
+                runner_lines.append('  printf "%s\\n" "$ODYSSEUS_DIFFUSION_IMPORT_ERROR"')
                runner_lines.append('  ODYSSEUS_PREFLIGHT_EXIT=127')
                runner_lines.append('fi')

@@ -1116,11 +1226,16 @@ def setup_cookbook_routes() -> APIRouter:
                stderr = (await proc.stderr.read()).decode(errors="replace")
                return {"ok": False, "error": stderr, "session_id": session_id}

-        # Auto-register as model endpoint if serving a diffusion model
+        # Auto-register a model endpoint so the served model shows up in the model
+        # picker with no manual /setup step. Diffusion models get an image
+        # endpoint; any other real model serve (i.e. not a pip-install task) gets
+        # a local LLM endpoint pointed at its /v1.
        endpoint_id = None
        is_diffusion = "diffusion_server.py" in req.cmd
        if is_diffusion:
            endpoint_id = _auto_register_image_endpoint(req, remote)
+        elif not is_pip_install:
+            endpoint_id = _auto_register_llm_endpoint(req, remote)

        # Log to assistant
        try:
@@ -1357,9 +1472,16 @@ def setup_cookbook_routes() -> APIRouter:
            total_mb = max(0, int(total_bytes / (1024 * 1024)))
            used_mb = max(0, min(total_mb, int(used_bytes / (1024 * 1024))))
            free_mb = max(0, total_mb - used_mb)
+            # GTT = the system-RAM pool the GPU pages into when VRAM is full.
+            # On a discrete card a large gtt_used means the model spilled past
+            # VRAM into RAM over PCIe — much slower. Surface it so the UI can
+            # warn "spilling to RAM" instead of the user wondering why it's slow.
+            gtt_used_raw = await _gpu_read_file(f"{base}/mem_info_gtt_used", host, ssh_port)
+            gtt_used_mb = max(0, int(int(gtt_used_raw) / (1024 * 1024))) if (gtt_used_raw and gtt_used_raw.isdigit()) else 0
            gpus.append({
                "index": len(gpus), "name": name, "uuid": entry,
                "free_mb": free_mb, "total_mb": total_mb, "used_mb": used_mb,
+                "gtt_used_mb": gtt_used_mb,
                "util_pct": 0, "busy": bool(total_mb and (free_mb / total_mb) < 0.85),
                "processes": [], "backend": "rocm", "source": "amd-sysfs",
                "unified_memory": unified,
@@ -1461,6 +1583,46 @@ def setup_cookbook_routes() -> APIRouter:
        if gpus:
            return {"ok": True, "gpus": gpus, "backend": "cuda", "source": "nvidia-smi"}

+        # Local Apple Silicon / Metal fallback. macOS has no nvidia-smi and no
+        # Linux /sys/class/drm tree, but services.hwfit.hardware already knows
+        # how to size the shared unified-memory GPU budget. Keep this route in
+        # sync so Cookbook's GPU picker doesn't show "nvidia-smi not found" on
+        # native Mac launches.
+        if not host and sys.platform == "darwin":
+            try:
+                from services.hwfit.hardware import detect_system
+                info = detect_system(fresh=True)
+                backend = str(info.get("backend") or "").lower()
+                if backend in {"metal", "mps", "apple"} and info.get("gpu_count", 0) > 0:
+                    total_mb = int(float(info.get("gpu_vram_gb") or info.get("total_ram_gb") or 0) * 1024)
+                    free_mb = int(float(info.get("available_ram_gb") or 0) * 1024)
+                    if total_mb and (free_mb <= 0 or free_mb > total_mb):
+                        free_mb = total_mb
+                    used_mb = max(0, total_mb - max(0, free_mb))
+                    return {
+                        "ok": True,
+                        "gpus": [{
+                            "index": 0,
+                            "name": info.get("gpu_name") or info.get("cpu_name") or "Apple Silicon GPU",
+                            "uuid": "apple-metal-0",
+                            "free_mb": max(0, free_mb),
+                            "total_mb": max(0, total_mb),
+                            "used_mb": used_mb,
+                            "util_pct": 0,
+                            "busy": bool(total_mb and (free_mb / total_mb) < 0.5),
+                            "processes": [],
+                            "backend": "metal",
+                            "source": "apple-metal",
+                            "unified_memory": True,
+                        }],
+                        "backend": "metal",
+                        "source": "apple-metal",
+                        "fallback_from": "nvidia-smi",
+                        "nvidia_error": nvidia_error,
+                    }
+            except Exception as e:
+                logger.warning("Apple Metal GPU fallback failed: %s", e)
+
        amd_gpus = await _probe_amd_sysfs(host, ssh_port)
        if amd_gpus:
            return {
@@ -1607,6 +1769,33 @@ def setup_cookbook_routes() -> APIRouter:

            disk_tasks = on_disk.get("tasks") or [] if isinstance(on_disk, dict) else []
            incoming_tasks = data.get("tasks") if isinstance(data.get("tasks"), list) else []
+            # Anti-poisoning guard: a stale browser tab can keep POSTing a
+            # download task as status='done' from before the strict-finish
+            # fix landed, undoing any server-side correction. For each
+            # incoming "done" download, override to "running" if the last
+            # shard pattern says N<total AND no DOWNLOAD_OK/DOWNLOAD_FAILED/
+            # /snapshots/ sentinel is in the output.
+            import re as _re_dl
+            for _it in incoming_tasks:
+                if (not isinstance(_it, dict)) or _it.get("type") != "download" or _it.get("status") != "done":
+                    continue
+                _out = _it.get("output") or ""
+                if ("DOWNLOAD_OK" in _out) or ("DOWNLOAD_FAILED" in _out) or ("/snapshots/" in _out):
+                    continue
+                _shards = _re_dl.findall(r"model-(\d+)-of-(\d+)\.safetensors", _out)
+                if _shards:
+                    _n, _tot = _shards[-1]
+                    if int(_n) < int(_tot):
+                        logger.info(f"cookbook state POST: rejecting stale done for {_it.get('sessionId')} "
+                                    f"(last shard {_n}/{_tot}, no DOWNLOAD_OK)")
+                        _it["status"] = "running"
+                else:
+                    _completed = _out.count("Download complete")
+                    _starts = _out.count("Downloading '")
+                    if _starts > _completed:
+                        logger.info(f"cookbook state POST: rejecting stale done for {_it.get('sessionId')} "
+                                    f"({_completed}/{_starts} files complete, no DOWNLOAD_OK)")
+                        _it["status"] = "running"
            incoming_ids = {t.get("sessionId") for t in incoming_tasks if isinstance(t, dict) and t.get("sessionId")}
            import time as _t
            now_ms = int(_t.time() * 1000)
@@ -1763,6 +1952,43 @@ def setup_cookbook_routes() -> APIRouter:
    def _cookbook_tasks_status_sync():
        import subprocess

+        def _download_cache_complete(repo_id: str, remote_host: str = "", ssh_port: str = "") -> bool:
+            """Best-effort check for a completed HF cache entry.
+
+            tmux output can stop at a stale progress line if the pane/session
+            disappears before Cookbook captures the final DOWNLOAD_OK marker.
+            In that case, trust the cache shape: a snapshot directory with files
+            and no *.incomplete blobs means HuggingFace finished materializing the
+            model.
+            """
+            if not repo_id or "/" not in repo_id:
+                return False
+            py = (
+                "import os,sys;"
+                "repo=sys.argv[1];"
+                "base=os.environ.get('HUGGINGFACE_HUB_CACHE') or os.path.join(os.environ.get('HF_HOME', os.path.expanduser('~/.cache/huggingface')), 'hub');"
+                "d=os.path.join(base,'models--'+repo.replace('/','--'));"
+                "snap=os.path.join(d,'snapshots');"
+                "ok=os.path.isdir(snap) and any(os.path.isdir(os.path.join(snap,x)) and os.listdir(os.path.join(snap,x)) for x in os.listdir(snap));"
+                "inc=False;"
+                "blobs=os.path.join(d,'blobs');"
+                "inc=os.path.isdir(blobs) and any(x.endswith('.incomplete') for x in os.listdir(blobs));"
+                "sys.exit(0 if ok and not inc else 1)"
+            )
+            cmd = ["python3", "-c", py, repo_id]
+            try:
+                if remote_host:
+                    ssh_base = ["ssh"]
+                    if ssh_port and ssh_port != "22":
+                        ssh_base.extend(["-p", str(ssh_port)])
+                    shell_cmd = " ".join(shlex.quote(x) for x in cmd)
+                    proc = subprocess.run(ssh_base + [remote_host, shell_cmd], timeout=12, capture_output=True)
+                else:
+                    proc = subprocess.run(cmd, timeout=12, capture_output=True)
+                return proc.returncode == 0
+            except Exception:
+                return False
+
        # Load saved tasks from cookbook state
        tasks = []
        if _cookbook_state_path.exists():
@@ -1902,14 +2128,21 @@ def setup_cookbook_routes() -> APIRouter:
            # persists after the process exits, so a finished download still has a
            # snapshot to classify (DOWNLOAD_OK / exit marker) — evaluate it even
            # when the PID is gone instead of blindly reporting "stopped".
+            download_zero_files = False
            status = "unknown"
            if is_alive or (local_win_task and full_snapshot):
                lower = full_snapshot.lower()
-                has_exit = "=== process exited with code" in lower
+                exit_match = re.search(r"=== process exited with code\s+(-?\d+)", full_snapshot, re.I)
+                has_exit = exit_match is not None
+                exit_code = int(exit_match.group(1)) if exit_match else None
                has_error = "error" in lower or "failed" in lower or "traceback" in lower
                if has_exit and task_type == "serve":
                    # Serve tasks that exit are always errors — they should run indefinitely
                    status = "error"
+                elif has_exit and task_type == "download":
+                    # Dependency installs are tracked as download tasks but only
+                    # emit the generic runner exit marker, not HF download markers.
+                    status = "completed" if exit_code == 0 else "error"
                elif has_exit and "unrecognized arguments" in lower:
                    status = "error"
                elif has_error and not ("application startup complete" in lower):
@@ -1918,7 +2151,11 @@ def setup_cookbook_routes() -> APIRouter:
                    # Only download tasks treat 100% as "completed".
                    # Serve tasks log 100%|██████| during inference progress
                    # (diffusion sampling, etc.) — that's "running", not done.
-                    status = "completed"
+                    if re.search(r"Fetching\s+0\s+files", full_snapshot, re.IGNORECASE):
+                        status = "error"
+                        download_zero_files = True
+                    else:
+                        status = "completed"
                elif "application startup complete" in lower:
                    status = "ready"
                elif not is_alive:
@@ -1928,7 +2165,14 @@ def setup_cookbook_routes() -> APIRouter:
                    status = "running"
            else:
                # Session is dead — check if it completed or crashed
-                status = "stopped"
+                if task_type == "download" and _download_cache_complete(_payload.get("repo_id") or model, remote, str(_tport or "")):
+                    status = "completed"
+                    if not progress_text:
+                        progress_text = "Download complete"
+                    if not full_snapshot:
+                        full_snapshot = "DOWNLOAD_OK"
+                else:
+                    status = "stopped"

            # Parse structured phase info — single source of truth for the UI
            phase_info = _parse_serve_phase(full_snapshot, task_type) if (task_type == "serve" and status == "running" and full_snapshot) else {}
@@ -1938,6 +2182,8 @@ def setup_cookbook_routes() -> APIRouter:
            diagnosis = _diagnose_serve_output(full_snapshot) if task_type == "serve" and full_snapshot else None
            if diagnosis and status in {"running", "unknown", "stopped"}:
                status = "error"
+            if download_zero_files:
+                diagnosis = {"message": "No matching files were downloaded. The model repo or filename/quant pattern may be wrong (for example a ':Q4_K_M' tag that does not exist in the repo). Check the repo and the include/quant pattern."}
            output_tail = "\n".join(full_snapshot.splitlines()[-12:]) if full_snapshot else ""

            results.append({
--- a/routes/document_helpers.py
+++ b/routes/document_helpers.py
@@ -152,7 +152,7 @@ def _resolve_user_upload_path(
        owner=owner,
        auth_manager=auth_manager,
    )
-    if not resolved:
+    if not isinstance(resolved, dict) or not resolved:
        return None
    path = resolved.get("path")
    upload_dir = getattr(upload_handler, "upload_dir", None)
@@ -203,6 +203,8 @@ def _assert_pdf_marker_upload_owned(
 def _derive_title(content: str) -> str:
    """Derive a title from document content."""
    import re
+    if not isinstance(content, str):
+        return "Untitled"
    text = content.strip()
    if not text:
        return "Untitled"
--- a/routes/document_routes.py
+++ b/routes/document_routes.py
@@ -15,6 +15,21 @@ from src.auth_helpers import get_current_user
 logger = logging.getLogger(__name__)


+def _aggregate_language_facets(lang_rows):
+    """Sum document counts per display language for the library facet.
+
+    NULL-language and explicit "text" rows share the "text" bucket (the
+    language filter treats them as one), so they must be ADDED. The old dict
+    comprehension keyed both to "text", silently overwriting one group and
+    undercounting the facet versus what the filter actually returns.
+    """
+    out = {}
+    for lang, cnt in lang_rows:
+        key = lang or "text"
+        out[key] = out.get(key, 0) + cnt
+    return out
+
+

 from routes.document_helpers import (
    DocumentCreate, DocumentUpdate, DocumentPatch,
@@ -145,7 +160,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
            create_form_markdown_document,
            create_plain_pdf_document,
        )
-        from src.document_processor import _process_pdf
+        from src.document_processor import _process_pdf, strip_pdf_content_marker
        import os

        from src.auth_helpers import require_privilege
@@ -184,7 +199,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:

        title = os.path.splitext(meta.get("original_name") or meta.get("name") or upload_id)[0]
        try:
-            body_text = _process_pdf(pdf_path).lstrip("\n[PDF content]:").strip()
+            body_text = strip_pdf_content_marker(_process_pdf(pdf_path))
        except Exception:
            body_text = None

@@ -258,7 +273,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
            )
            lang_q = _owner_session_filter(lang_q, user)
            lang_rows = lang_q.group_by(Document.language).all()
-            languages = {lang or "text": cnt for lang, cnt in lang_rows}
+            languages = _aggregate_language_facets(lang_rows)

            # Session count (owner-filtered)
            sc_q = (
@@ -402,7 +417,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
        text extraction was wired, plus for scanned/image-only PDFs where the
        VL model picks up text the basic pypdf path missed."""
        import re
-        from src.document_processor import _process_pdf
+        from src.document_processor import _process_pdf, strip_pdf_content_marker
        from src.pdf_form_doc import find_source_upload_id

        user = get_current_user(request)
@@ -423,7 +438,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
                raise HTTPException(404, "Source PDF could not be located")

            try:
-                body_text = _process_pdf(pdf_path).lstrip("\n[PDF content]:").strip()
+                body_text = strip_pdf_content_marker(_process_pdf(pdf_path))
            except Exception as e:
                logger.error(f"extract_pdf_text failed for {pdf_path}: {e}")
                raise HTTPException(500, f"Extraction failed: {e}")
@@ -593,6 +608,15 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
            if req.session_id is not None:
                # Empty string = unlink from session
                doc.session_id = req.session_id if req.session_id else None
+                if not req.session_id:
+                    # Tab closed / doc detached from its session — drop the
+                    # in-memory active-doc pointer so the last-resort injection
+                    # path doesn't re-surface this doc in a later chat (#1160).
+                    try:
+                        from src.tool_implementations import clear_active_document
+                        clear_active_document(doc_id)
+                    except Exception:
+                        pass
            db.commit()
            db.refresh(doc)
            return _doc_to_dict(doc)
@@ -615,6 +639,13 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
                raise HTTPException(404, "Document not found")
            _verify_doc_owner(db, doc, user)
            doc.is_active = False
+            # Closed/deleted — drop the in-memory active-doc pointer so it isn't
+            # re-injected into a later, unrelated chat (#1160).
+            try:
+                from src.tool_implementations import clear_active_document
+                clear_active_document(doc_id)
+            except Exception:
+                pass
            db.commit()
            return {"status": "deleted", "id": doc_id}
        except HTTPException:
@@ -885,7 +916,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
            for i, doc in enumerate(batch):
                if i >= len(verdicts):
                    break
-                verdict = verdicts[i].lower().strip()
+                verdict = str(verdicts[i] or "").lower().strip()
                if verdict == "junk":
                    doc.tidy_verdict = "junk"
                    db.delete(doc)
--- a/routes/editor_draft_routes.py
+++ b/routes/editor_draft_routes.py
@@ -67,6 +67,14 @@ def _summary(d: EditorDraft) -> Dict[str, Any]:
    }


+def _load_payload(raw: Optional[str]) -> Dict[str, Any]:
+    try:
+        payload = json.loads(raw) if raw else {}
+    except Exception:
+        return {}
+    return payload if isinstance(payload, dict) else {}
+
+
 def setup_editor_draft_routes() -> APIRouter:
    router = APIRouter(tags=["editor-drafts"])

@@ -93,13 +101,9 @@ def setup_editor_draft_routes() -> APIRouter:
            ).first()
            if not d or not _owns(d, user):
                raise HTTPException(404, "Draft not found")
-            try:
-                payload = json.loads(d.payload) if d.payload else {}
-            except Exception:
-                payload = {}
            return {
                **_summary(d),
-                "payload": payload,
+                "payload": _load_payload(d.payload),
            }
        finally:
            db.close()
--- a/routes/email_helpers.py
+++ b/routes/email_helpers.py
@@ -15,7 +15,6 @@ and `email_pollers.py` (the background loops):
 import os
 import imaplib
 import smtplib
-import ssl
 import email as email_mod
 import email.header
 import email.utils
@@ -33,47 +32,43 @@ from fastapi import Query, HTTPException, Request
 from pydantic import BaseModel
 from typing import Optional, List

-from src.auth_helpers import get_current_user
+from src.auth_helpers import _auth_disabled, get_current_user
 from src.secret_storage import decrypt as _decrypt

 logger = logging.getLogger(__name__)


-def _send_smtp_message(cfg: dict, from_addr: str, recipients: list[str], message: str | bytes, timeout: int = 30) -> None:
-    """Send through SMTP using the conventional TLS mode for the configured port.
+def _smtp_security_mode(cfg: dict) -> str:
+    raw = str(cfg.get("smtp_security") or "").strip().lower()
+    if raw in {"ssl", "starttls", "none"}:
+        return raw
+    port = int(cfg.get("smtp_port") or 465)
+    if port == 587:
+        return "starttls"
+    return "ssl"

-    Account settings only store host/port today. Port 465 is implicit TLS
-    (SMTP_SSL); port 587 is plain SMTP upgraded with STARTTLS. Using SSL
-    directly against 587 raises the classic "[SSL: WRONG_VERSION_NUMBER]"
-    error even when credentials are correct.
-    """
+
+def _send_smtp_message(cfg: dict, from_addr: str, recipients: list[str], message: str | bytes, timeout: int = 30) -> None:
+    """Send through SMTP using the configured transport security mode."""
    host = cfg["smtp_host"]
    port = int(cfg.get("smtp_port") or 465)
    user = cfg.get("smtp_user") or ""
    password = cfg.get("smtp_password") or ""
-    def _send_starttls(starttls_port: int = 587) -> None:
-        with smtplib.SMTP(host, starttls_port, timeout=timeout) as smtp:
-            smtp.starttls()
-            if user and password:
-                smtp.login(user, password)
-            smtp.sendmail(from_addr, recipients, message)
+    security = _smtp_security_mode(cfg)

-    if port == 587:
-        _send_starttls(587)
-        return
-
-    try:
+    if security == "ssl":
        with smtplib.SMTP_SSL(host, port, timeout=timeout) as smtp:
            if user and password:
                smtp.login(user, password)
            smtp.sendmail(from_addr, recipients, message)
        return
-    except (TimeoutError, ssl.SSLError) as e:
-        if port == 465:
-            logger.warning("SMTP implicit TLS on %s:465 failed (%s); retrying STARTTLS on 587", host, e)
-            _send_starttls(587)
-            return
-        raise
+
+    with smtplib.SMTP(host, port, timeout=timeout) as smtp:
+        if security == "starttls":
+            smtp.starttls()
+        if user and password:
+            smtp.login(user, password)
+        smtp.sendmail(from_addr, recipients, message)


 def _strip_think(text: str) -> str:
@@ -152,6 +147,8 @@ def _require_auth(request: Request) -> str:
    u = get_current_user(request)
    if u:
        return u
+    if _auth_disabled():
+        return ""
    auth_mgr = getattr(request.app.state, "auth_manager", None)
    if auth_mgr is not None and getattr(auth_mgr, "is_configured", False):
        raise HTTPException(401, "Not authenticated")
@@ -300,7 +297,8 @@ def _init_scheduled_db():
            send_at TEXT NOT NULL,
            created_at TEXT NOT NULL,
            status TEXT NOT NULL DEFAULT 'pending',
-            error TEXT
+            error TEXT,
+            owner TEXT DEFAULT ''
        )
    """)
    # Email summary cache (keyed by Message-ID)
@@ -438,6 +436,35 @@ def _init_scheduled_db():
            conn.execute("ALTER TABLE scheduled_emails ADD COLUMN account_id TEXT")
        if "odysseus_kind" not in cols:
            conn.execute("ALTER TABLE scheduled_emails ADD COLUMN odysseus_kind TEXT")
+        if "owner" not in cols:
+            conn.execute("ALTER TABLE scheduled_emails ADD COLUMN owner TEXT DEFAULT ''")
+        conn.execute("CREATE INDEX IF NOT EXISTS ix_scheduled_emails_owner_status ON scheduled_emails(owner, status)")
+        # Backfill owner on legacy rows from the owning email account so the
+        # owner-scoped list/cancel routes surface pre-migration scheduled
+        # sends to the right user (the poller already resolves these by
+        # account at send time; this aligns the UI with that).
+        legacy_accounts = conn.execute(
+            "SELECT DISTINCT account_id FROM scheduled_emails "
+            "WHERE (owner IS NULL OR owner = '') AND account_id IS NOT NULL AND account_id != ''"
+        ).fetchall()
+        if legacy_accounts:
+            try:
+                from core.database import SessionLocal as _SL, EmailAccount as _EA
+                _db = _SL()
+                try:
+                    for (acct_id,) in legacy_accounts:
+                        row = _db.query(_EA.owner).filter(_EA.id == acct_id).first()
+                        acct_owner = (row[0] or "") if row else ""
+                        if acct_owner:
+                            conn.execute(
+                                "UPDATE scheduled_emails SET owner = ? "
+                                "WHERE account_id = ? AND (owner IS NULL OR owner = '')",
+                                (acct_owner, acct_id),
+                            )
+                finally:
+                    _db.close()
+            except Exception:
+                pass
    except Exception:
        pass
    # Lazy migration: add turns_json to email_boundaries for server-side
@@ -541,6 +568,7 @@ def _get_email_config(account_id: str | None = None, owner: str = "") -> dict:
                    "account_name": row.name,
                    "smtp_host": row.smtp_host or "",
                    "smtp_port": int(row.smtp_port or 465),
+                    "smtp_security": _smtp_security_mode({"smtp_security": getattr(row, "smtp_security", ""), "smtp_port": row.smtp_port}),
                    "smtp_user": row.smtp_user or "",
                    "smtp_password": _decrypt(row.smtp_password or ""),
                    "imap_host": row.imap_host or "",
@@ -567,6 +595,10 @@ def _get_email_config(account_id: str | None = None, owner: str = "") -> dict:
        "account_name": "legacy",
        "smtp_host": settings.get("smtp_host", os.environ.get("SMTP_HOST", "")),
        "smtp_port": int(settings.get("smtp_port", os.environ.get("SMTP_PORT", "465")) or 465),
+        "smtp_security": _smtp_security_mode({
+            "smtp_security": settings.get("smtp_security", os.environ.get("SMTP_SECURITY", "")),
+            "smtp_port": settings.get("smtp_port", os.environ.get("SMTP_PORT", "465")),
+        }),
        "smtp_user": settings.get("smtp_user", os.environ.get("SMTP_USER", "")),
        "smtp_password": settings.get("smtp_password", os.environ.get("SMTP_PASSWORD", "")),
        "imap_host": settings.get("imap_host", os.environ.get("IMAP_HOST", "")),
@@ -606,7 +638,32 @@ def _list_email_accounts() -> list[dict]:

 # ── IMAP helpers ──

-_IMAP_TIMEOUT_SECONDS = 15
+def _coerce_imap_timeout_seconds(raw: str | None) -> int:
+    try:
+        value = int(raw or "30")
+    except (TypeError, ValueError):
+        value = 30
+    return max(5, min(value, 300))
+
+
+_IMAP_TIMEOUT_SECONDS = _coerce_imap_timeout_seconds(os.environ.get("ODYSSEUS_IMAP_TIMEOUT_SECONDS"))
+
+
+def _open_imap_connection(host: str, port: int, *, starttls: bool, timeout: int = _IMAP_TIMEOUT_SECONDS):
+    """Open an IMAP connection using the configured security mode."""
+    port = int(port or 993)
+    if starttls:
+        conn = imaplib.IMAP4(host, port, timeout=timeout)
+        conn.starttls()
+    elif port == 993:
+        conn = imaplib.IMAP4_SSL(host, port, timeout=timeout)
+    else:
+        conn = imaplib.IMAP4(host, port, timeout=timeout)
+    try:
+        conn.sock.settimeout(timeout)
+    except Exception:
+        pass
+    return conn

 def _imap_connect(account_id: str | None = None, owner: str = ""):
    # SECURITY: passing `owner` scopes the fallback config lookup so a brand
@@ -620,17 +677,12 @@ def _imap_connect(account_id: str | None = None, owner: str = ""):
    # The last branch is critical: previously this fell into IMAP4_SSL
    # for any non-STARTTLS port, which would fail the TLS handshake on
    # plain local servers (Dovecot on 31143, etc.).
-    if cfg.get("imap_starttls"):
-        conn = imaplib.IMAP4(cfg["imap_host"], cfg["imap_port"], timeout=_IMAP_TIMEOUT_SECONDS)
-        conn.starttls()
-    elif int(cfg.get("imap_port") or 993) == 993:
-        conn = imaplib.IMAP4_SSL(cfg["imap_host"], cfg["imap_port"], timeout=_IMAP_TIMEOUT_SECONDS)
-    else:
-        conn = imaplib.IMAP4(cfg["imap_host"], cfg["imap_port"], timeout=_IMAP_TIMEOUT_SECONDS)
-    try:
-        conn.sock.settimeout(_IMAP_TIMEOUT_SECONDS)
-    except Exception:
-        pass
+    conn = _open_imap_connection(
+        cfg["imap_host"],
+        cfg["imap_port"],
+        starttls=bool(cfg.get("imap_starttls")),
+        timeout=_IMAP_TIMEOUT_SECONDS,
+    )
    conn.login(cfg["imap_user"], cfg["imap_password"])
    return conn

@@ -699,7 +751,13 @@ def _decode_header(raw):
    decoded = []
    for data, charset in parts:
        if isinstance(data, bytes):
-            decoded.append(data.decode(charset or "utf-8", errors="replace"))
+            try:
+                decoded.append(data.decode(charset or "utf-8", errors="replace"))
+            except (LookupError, ValueError):
+                # Unknown/invalid MIME charset (e.g. a malformed or spam header
+                # like =?x-unknown-charset?B?...?=). errors="replace" only covers
+                # byte-decode errors, not codec lookup, so fall back to utf-8.
+                decoded.append(data.decode("utf-8", errors="replace"))
        else:
            decoded.append(data)
    return " ".join(decoded)
@@ -793,22 +851,27 @@ def _detect_spam_folder(conn):
        return None


-def _imap_move(uid, dest, src="INBOX"):
+def _imap_move(uid, dest, src="INBOX", account_id: str | None = None, owner: str = ""):
    """Move a single IMAP UID from src folder to dest. Returns True on success."""
+    c = None
    try:
-        c = _imap_connect()
+        c = _imap_connect(account_id, owner=owner)
        c.select(_q(src))
        status, _ = c.copy(uid, _q(dest))
        if status != "OK":
-            c.logout()
            return False
        c.store(uid, "+FLAGS", "\\Deleted")
        c.expunge()
-        c.logout()
        return True
    except Exception as e:
        logger.warning(f"IMAP move {uid} → {dest} failed: {e}")
        return False
+    finally:
+        if c:
+            try:
+                c.logout()
+            except Exception:
+                pass


 def _extract_attachment_text(msg, max_chars: int = 6000) -> str:
@@ -999,7 +1062,9 @@ def _fetch_sender_thread_context(sender_addr: str,
                                 exclude_folder: str = "INBOX",
                                 limit: int = 3,
                                 max_chars_per_email: int = 1500,
-                                 max_attachment_chars: int = 4000) -> str:
+                                 max_attachment_chars: int = 4000,
+                                 account_id: str | None = None,
+                                 owner: str = "") -> str:
    """Pull the last N emails from `sender_addr` (across common folders),
    extract their body snippets + attachment text, and return one formatted
    block ready to be glued into an LLM system prompt as "REFERENCED MATERIAL".
@@ -1021,7 +1086,7 @@ def _fetch_sender_thread_context(sender_addr: str,
        seen_uids.add((exclude_folder or "INBOX", str(exclude_uid)))

    try:
-        conn = _imap_connect()
+        conn = _imap_connect(account_id, owner=owner)
    except Exception as e:
        logger.warning(f"sender-thread-context: imap connect failed: {e}")
        return ""
@@ -1104,7 +1169,12 @@ def _fetch_sender_thread_context(sender_addr: str,
    return "\n\n=====\n\n".join(blocks)


-def _pre_retrieve_context(body: str, sender: str) -> tuple:
+def _pre_retrieve_context(
+    body: str,
+    sender: str,
+    account_id: str | None = None,
+    owner: str = "",
+) -> tuple:
    """Extract key terms from an incoming email and search past emails + contacts.

    Returns (context_snippets, terms_list). Best-effort; never raises.
@@ -1128,18 +1198,37 @@ def _pre_retrieve_context(body: str, sender: str) -> tuple:
        # ── Known-sender check: only retrieve context for senders we already
        # have a relationship with. New / cold senders get an empty context.
        sender_addr = email.utils.parseaddr(sender or "")[1].lower()
-        is_known = False
+        # The CardDAV address book is global admin data backed by a single
+        # Radicale instance, so only fold it into reply context for an admin /
+        # single-user owner. Non-admin owners still get their own (owner-scoped)
+        # IMAP history below, just not the shared contacts.
        try:
-            from routes.contacts_routes import _fetch_contacts
-            for c in _fetch_contacts() or []:
-                if (c.get("email") or "").lower() == sender_addr:
-                    is_known = True
-                    break
+            from src.tool_security import owner_is_admin_or_single_user
+            contacts_allowed = owner_is_admin_or_single_user(owner or None)
        except Exception:
-            pass
+            contacts_allowed = not bool(owner)
+        is_known = False
+        if contacts_allowed:
+            try:
+                from routes.contacts_routes import _fetch_contacts
+                for c in _fetch_contacts() or []:
+                    # Contacts are normalized to plural `emails` lists, but
+                    # keep the legacy singular key fallback for older data.
+                    contact_emails = []
+                    raw_emails = c.get("emails")
+                    if isinstance(raw_emails, list):
+                        contact_emails.extend(str(e or "") for e in raw_emails)
+                    legacy_email = c.get("email")
+                    if legacy_email:
+                        contact_emails.append(str(legacy_email))
+                    if any((addr or "").strip().lower() == sender_addr for addr in contact_emails):
+                        is_known = True
+                        break
+            except Exception:
+                pass
        if not is_known and sender_addr:
            try:
-                with _imap() as _ck:
+                with _imap(account_id, owner=owner) as _ck:
                    _ck.select("INBOX", readonly=True)
                    st_known, dk = _ck.search(None, f'(FROM "{sender_addr}")')
                    if st_known == "OK" and dk and dk[0]:
@@ -1177,7 +1266,7 @@ def _pre_retrieve_context(body: str, sender: str) -> tuple:
            return context_snippets, terms_list

        try:
-            ctx_conn = _imap_connect()
+            ctx_conn = _imap_connect(account_id, owner=owner)
            for folder in ["INBOX", "Sent", "Archive", "Drafts"]:
                try:
                    st_sel, _sd = ctx_conn.select(_q(folder), readonly=True)
@@ -1221,18 +1310,18 @@ def _pre_retrieve_context(body: str, sender: str) -> tuple:

        try:
            from routes.contacts_routes import _fetch_contacts
-            all_contacts = _fetch_contacts()
+            all_contacts = _fetch_contacts() if contacts_allowed else []
            for term in terms_list:
                t_lower = term.lower()
                matches = [c for c in all_contacts
                           if t_lower in (c.get("name") or "").lower()
-                           or t_lower in (c.get("email") or "").lower()]
+                           or any(t_lower in (e or "").lower() for e in (c.get("emails") or []))]
                for c in matches[:2]:
                    parts = [f"Name: {c.get('name','')}"]
-                    if c.get("email"):
-                        parts.append(f"Email: {c['email']}")
-                    if c.get("phone"):
-                        parts.append(f"Phone: {c['phone']}")
+                    if c.get("emails"):
+                        parts.append(f"Email: {', '.join(c['emails'])}")
+                    if c.get("phones"):
+                        parts.append(f"Phone: {', '.join(c['phones'])}")
                    context_snippets.append(f"[Contact match for \"{term}\"] " + ", ".join(parts))
        except Exception:
            pass
--- a/routes/email_pollers.py
+++ b/routes/email_pollers.py
@@ -45,6 +45,21 @@ from routes.email_helpers import (
 logger = logging.getLogger(__name__)


+def _owner_for_email_account(account_id: str | None) -> str:
+    if not account_id:
+        return ""
+    try:
+        from core.database import SessionLocal as _SL, EmailAccount as _EA
+        db = _SL()
+        try:
+            row = db.query(_EA.owner).filter(_EA.id == account_id).first()
+            return (row[0] or "") if row else ""
+        finally:
+            db.close()
+    except Exception:
+        return ""
+
+
 # ── Routes ──

 async def _emit_progress(progress_cb, message: str):
@@ -84,6 +99,36 @@ async def _run_auto_summarize_once(do_summary: bool = True, do_reply: bool = Tru
        _save_settings(s2)


+def _latest_inbox_fallback_uids(conn, reconnect):
+    """Latest INBOX UIDs via ``SEARCH ALL``, with a poisoned-socket guard (#1613).
+
+    On a large Gmail mailbox the fallback ``SEARCH ALL`` can time out mid-reply,
+    leaving its enormous ``* SEARCH <uids…>`` line unread on the socket. The next
+    command (the downstream re-select / EXAMINE) then reads those leftover bytes
+    and fails with ``EXAMINE => unexpected response: b'325188 …'``. Reconnecting
+    on failure guarantees the downstream command starts from a clean socket.
+
+    Returns ``(uids, conn)`` — ``conn`` is the live connection to keep using: the
+    same one on success, a fresh one (via ``reconnect()``) if we had to recover.
+    """
+    try:
+        conn.select("INBOX", readonly=True)
+        status, data = conn.uid("SEARCH", None, "ALL")
+        uids = []
+        if status == "OK" and data and data[0]:
+            for u in reversed(data[0].split()[-8:]):
+                uids.append(("INBOX", u))
+            logger.info("Email task SINCE scan found no messages; fell back to latest INBOX messages")
+        return uids, conn
+    except Exception as _e:
+        logger.warning(f"Latest-INBOX fallback scan failed: {_e}")
+        try:
+            conn.logout()
+        except Exception:
+            pass
+        return [], reconnect()
+
+
 async def _auto_summarize_pass(days_back: int = 1, account_id: str | None = None, progress_cb=None) -> str:
    """Single pass of the auto-summarize/reply scan.

@@ -132,7 +177,7 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None
    import sqlite3 as _sql3
    import requests as _req
    from src.endpoint_resolver import resolve_endpoint
-    from src.llm_core import _uses_max_completion_tokens
+    from src.llm_core import _uses_max_completion_tokens, _restricts_temperature

    settings = _load_settings()
    auto_sum = settings.get("email_auto_summarize", False)
@@ -143,25 +188,18 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None
    if not auto_sum and not auto_reply and not auto_tag and not auto_spam and not auto_cal:
        return "Nothing to do"

-    # Owner of the account being processed. All calendar reads/writes below are
-    # scoped to this user: the multi-account fan-out runs every user's mailbox,
-    # so an unscoped pass would disclose and mutate other tenants' calendars.
-    _acct_owner = None
-    try:
-        from core.database import SessionLocal as _SLo, EmailAccount as _EAo
-        _dbo = _SLo()
-        try:
-            if account_id:
-                _arow = _dbo.query(_EAo).filter(_EAo.id == account_id).first()
-                _acct_owner = _arow.owner if _arow else None
-        finally:
-            _dbo.close()
-    except Exception:
-        _acct_owner = None
+    # Owner of the account being processed. All calendar + mailbox reads/writes
+    # below are scoped to this user: the multi-account fan-out runs every user's
+    # mailbox, so an unscoped pass would disclose/mutate other tenants' data.
+    # One resolution feeds both the mailbox path (account_owner) and upstream's
+    # calendar path (_acct_owner, which expects None rather than "").
+    account_owner = _owner_for_email_account(account_id)
+    _acct_owner = account_owner or None

+    conn = None
    try:
        await _emit_progress(progress_cb, "Connecting to mail…")
-        conn = _imap_connect(account_id)
+        conn = _imap_connect(account_id, owner=account_owner)
        from datetime import timedelta as _td
        since = (datetime.utcnow() - _td(days=max(1, days_back))).strftime("%d-%b-%Y")
        # uid_list carries real IMAP UIDs, matching the email UI/read routes.
@@ -193,26 +231,27 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None
        # the latest visible inbox messages so Clear cache -> Run again can
        # actually repopulate AI reply/summary/tag caches.
        if not uid_list:
-            try:
-                conn.select("INBOX", readonly=True)
-                status, data = conn.uid("SEARCH", None, "ALL")
-                if status == "OK" and data and data[0]:
-                    for u in reversed(data[0].split()[-8:]):
-                        uid_list.append(("INBOX", u))
-                    logger.info("Email task SINCE scan found no messages; fell back to latest INBOX messages")
-            except Exception as _e:
-                logger.warning(f"Latest-INBOX fallback scan failed: {_e}")
-        # Re-select INBOX as default for downstream code
+            _fb_uids, conn = _latest_inbox_fallback_uids(
+                conn, lambda: _imap_connect(account_id, owner=account_owner)
+            )
+            uid_list.extend(_fb_uids)
+        # Re-select INBOX as default for downstream code (on a clean socket even
+        # if the SEARCH ALL fallback above failed — see #1613).
        conn.select("INBOX", readonly=True)
        if not uid_list:
-            conn.logout()
            return "No recent emails"
        await _emit_progress(progress_cb, f"Found {len(uid_list)} recent email(s); checking cache…")

        _c = _sql3.connect(SCHEDULED_DB)
        _sum_existing = {r[0] for r in _c.execute("SELECT message_id FROM email_summaries").fetchall()}
        _reply_existing = {r[0] for r in _c.execute("SELECT message_id FROM email_ai_replies").fetchall()}
-        _tag_existing = {r[0] for r in _c.execute("SELECT message_id FROM email_tags").fetchall()} if (auto_tag or auto_spam) else set()
+        if auto_tag or auto_spam:
+            if account_owner:
+                _tag_existing = {r[0] for r in _c.execute("SELECT message_id FROM email_tags WHERE owner=?", (account_owner,)).fetchall()}
+            else:
+                _tag_existing = {r[0] for r in _c.execute("SELECT message_id FROM email_tags WHERE owner='' OR owner IS NULL").fetchall()}
+        else:
+            _tag_existing = set()
        _cal_existing = {r[0] for r in _c.execute("SELECT message_id FROM email_calendar_extractions").fetchall()} if auto_cal else set()
        # Urgency is handled by the built-in `check_email_urgency` task. Keep
        # this legacy poller path disabled so users don't get two independent
@@ -225,7 +264,7 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None
        # this per-iteration was making big inbox scans crawl. Used by the
        # urgency self-loop check below.
        try:
-            _self_self_addr = (_get_email_config(account_id).get("from_address") or "").strip().lower()
+            _self_self_addr = (_get_email_config(account_id, owner=account_owner).get("from_address") or "").strip().lower()
        except Exception:
            _self_self_addr = ""

@@ -233,11 +272,10 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None
        if auto_spam and not spam_folder:
            logger.warning("Auto-spam enabled but no Junk/Spam folder detected — will classify but not move")

-        url, model, headers = resolve_endpoint("utility")
+        url, model, headers = resolve_endpoint("utility", owner=account_owner)
        if not url:
-            url, model, headers = resolve_endpoint("default")
+            url, model, headers = resolve_endpoint("default", owner=account_owner)
        if not url or not model:
-            conn.logout()
            return "No model configured"

        writing_style = settings.get("email_writing_style", "")
@@ -355,6 +393,9 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None
                        "temperature": 0.3,
                        "stream": False,
                    }
+                    # Reasoning models (o1/o3/o4/gpt-5) reject an explicit temperature.
+                    if _restricts_temperature(model):
+                        payload.pop("temperature", None)
                    try:
                        # Use to_thread so this sync HTTP call doesn't freeze
                        # the entire event loop while the LLM thinks (240s).
@@ -392,8 +433,8 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None
                    await _emit_progress(progress_cb, f"Drafting reply {processed + 1}/{_max_process} · checked {examined}/{len(uid_list)}")
                    # Background reply drafting should not make the whole app
                    # feel busy. Keep it lightweight: no extra IMAP context
-                    # mining here; manual AI Reply can still do that when the
-                    # user explicitly asks for a draft on one email.
+                    # mining here; manual AI Reply can still do that (owner-scoped)
+                    # when the user explicitly asks for a draft on one email.
                    context_snippets, _terms = [], []
                    sys_prompt = _EMAIL_REPLY_SYS_PROMPT_BASE
                    if att_text:
@@ -708,7 +749,7 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None
                            # Send alert email immediately if critical or high
                            if urgency in ("critical", "high"):
                                try:
-                                    cfg = _get_email_config(account_id)
+                                    cfg = _get_email_config(account_id, owner=account_owner)
                                    to_addr = cfg["from_address"]  # self-email

                                    # Deep-link to open the original email in Odysseus (if public URL is configured).
@@ -716,8 +757,8 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None
                                    from src.settings import load_settings as _ls
                                    _pub = (_ls().get("app_public_url") or "").rstrip("/")
                                    uid_str = uid.decode() if isinstance(uid, bytes) else str(uid)
-                                    from urllib.parse import quote as _q
-                                    open_url = f"{_pub}/#email={_q(_folder, safe='')}:{uid_str}" if _pub else ""
+                                    from urllib.parse import quote as _url_q
+                                    open_url = f"{_pub}/#email={_url_q(_folder, safe='')}:{uid_str}" if _pub else ""

                                    alert_subject = f"[{urgency.upper()}] {subject}"
                                    alert_body = (
@@ -806,12 +847,15 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None
                            "temperature": 0.1,
                            "stream": False,
                        }
+                        # Reasoning models (o1/o3/o4/gpt-5) reject an explicit temperature.
+                        if _restricts_temperature(model):
+                            payload.pop("temperature", None)
                        # to_thread keeps the event loop responsive during the LLM call
                        resp = await asyncio.to_thread(
                            _req.post, url, json=payload, headers=req_headers, timeout=120
                        )
                        if not resp.ok:
-                            logger.warning(f"Auto-classify {uid.decode()} HTTP {resp.status_code}: {resp.text[:200]}")
+                            logger.warning(f"Auto-classify {uid.decode() if isinstance(uid, bytes) else str(uid)} HTTP {resp.status_code}: {resp.text[:200]}")
                        else:
                            rdata = resp.json()
                            m = (rdata.get("choices") or [{}])[0].get("message", {})
@@ -840,17 +884,17 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None

                                moved_to = ""
                                if is_spam and auto_spam and spam_folder:
-                                    if _imap_move(uid, spam_folder):
+                                    if _imap_move(uid, spam_folder, account_id=account_id, owner=account_owner):
                                        moved_to = spam_folder
                                        logger.info(f"Auto-spam moved uid={uid.decode()} to {spam_folder}: {spam_reason}")

                                _c = _sql3.connect(SCHEDULED_DB)
                                _c.execute("""
                                    INSERT OR REPLACE INTO email_tags
-                                    (message_id, uid, folder, subject, sender, tags, spam_verdict,
+                                    (message_id, owner, uid, folder, subject, sender, tags, spam_verdict,
                                     spam_reason, moved_to, model_used, created_at)
-                                    VALUES (?, ?, 'INBOX', ?, ?, ?, ?, ?, ?, ?, ?)
-                                """, (message_id, uid.decode(), subject, sender,
+                                    VALUES (?, ?, ?, 'INBOX', ?, ?, ?, ?, ?, ?, ?, ?)
+                                """, (message_id, account_owner or "", uid.decode(), subject, sender,
                                      json.dumps(tags), 1 if is_spam else 0,
                                      spam_reason, moved_to, model, datetime.utcnow().isoformat()))
                                _c.commit()
@@ -865,7 +909,6 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None
                logger.warning(f"Auto-process {uid} failed: {e}")
                continue

-        conn.logout()
        await _emit_progress(progress_cb, "Finishing…")
        if processed > 0:
            logger.info(f"Auto-processed {processed} new email(s) for summary/reply/classify")
@@ -902,6 +945,12 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None
    except Exception as e:
        logger.warning(f"Auto-summarize pass error: {e}")
        return f"Error: {e}"
+    finally:
+        if conn:
+            try:
+                conn.logout()
+            except Exception:
+                pass


 async def _auto_summarize_poller():
@@ -930,8 +979,9 @@ def _scheduled_poll_once() -> dict:
        conn = sqlite3.connect(SCHEDULED_DB)
        cols = [row[1] for row in conn.execute("PRAGMA table_info(scheduled_emails)").fetchall()]
        kind_expr = "odysseus_kind" if "odysseus_kind" in cols else "'scheduled' AS odysseus_kind"
+        owner_expr = "owner" if "owner" in cols else "'' AS owner"
        rows = conn.execute(f"""
-            SELECT id, to_addr, cc, bcc, subject, body, in_reply_to, references_hdr, attachments, account_id, {kind_expr}
+            SELECT id, to_addr, cc, bcc, subject, body, in_reply_to, references_hdr, attachments, account_id, {kind_expr}, {owner_expr}
            FROM scheduled_emails
            WHERE status = 'pending' AND send_at <= ?
        """, (now_iso,)).fetchall()
@@ -943,7 +993,8 @@ def _scheduled_poll_once() -> dict:
                attachments = json.loads(r[8] or "[]")
                row_account_id = r[9] if len(r) > 9 else None
                odysseus_kind = r[10] if len(r) > 10 else "scheduled"
-                cfg = _get_email_config(row_account_id)
+                row_owner = (r[11] if len(r) > 11 else "") or _owner_for_email_account(row_account_id)
+                cfg = _get_email_config(row_account_id, owner=row_owner)
                has_atts = bool(attachments)
                if has_atts:
                    outer = MIMEMultipart("mixed")
@@ -980,7 +1031,7 @@ def _scheduled_poll_once() -> dict:

                # Append to local Sent folder
                try:
-                    with _imap() as imap:
+                    with _imap(row_account_id, owner=row_owner) as imap:
                        sent_folder = _detect_sent_folder(imap)
                        imap.append(sent_folder, "\\Seen", None, outer.as_bytes())
                except Exception as e:
--- a/routes/email_routes.py
+++ b/routes/email_routes.py
@@ -17,7 +17,6 @@ import sqlite3 as _sql3
 import email as email_mod
 import email.header
 import email.utils
-import imaplib
 import smtplib
 import json
 import re
@@ -40,7 +39,8 @@ from routes.email_helpers import (
    _strip_think, _extract_reply, _apply_email_style_mechanics, require_owner, require_user, _assert_owns_account,
    _q, _attach_compose_uploads, _cleanup_compose_uploads,
    _load_settings, _save_settings, _get_email_config,
-    _send_smtp_message,
+    _send_smtp_message, _smtp_security_mode,
+    _IMAP_TIMEOUT_SECONDS, _open_imap_connection,
    _imap_connect, _imap, _decode_header, _detect_sent_folder, _detect_drafts_folder,
    _extract_attachment_text, _list_attachments_from_msg,
    _extract_attachment_to_disk, _extract_html, _extract_text,
@@ -90,6 +90,16 @@ def _email_tag_owner_aliases(account_id: str | None, owner: str = "") -> list[st
    return out or [""]


+def _email_tag_owner_clause(account_id: str | None, owner: str = "") -> tuple[str, list[str]]:
+    aliases = _email_tag_owner_aliases(account_id, owner)
+    placeholders = ",".join("?" * len(aliases))
+    # In configured multi-user mode, do not treat legacy owner='' rows as
+    # visible to everyone. Single-user/unconfigured mode keeps legacy rows.
+    if owner:
+        return f"owner IN ({placeholders})", aliases
+    return f"(owner IN ({placeholders}) OR owner IS NULL)", aliases
+
+
 def _record_email_received_events(owner: str, account_id: str | None, folder: str, emails: list[dict]):
    """Baseline inbox messages, then fire `email_received` for new arrivals."""
    if not owner or (folder or "INBOX").upper() != "INBOX" or not emails:
@@ -312,6 +322,20 @@ def _apply_odysseus_headers(msg, kind: str | None = None, ref_id: str | None = N
        msg["X-Odysseus-Ref"] = re.sub(r"[^A-Za-z0-9_.:-]", "-", ref_id)[:128]


+def _envelope_recipients(*fields: str) -> list:
+    """Extract bare SMTP envelope addresses from one or more To/Cc/Bcc header
+    strings. A naive `field.split(",")` corrupts display names that contain a
+    comma (e.g. `"Smith, John" <john@corp.com>`, the canonical Outlook form):
+    it splits into `"Smith` and `John" <john@corp.com>`, breaking delivery.
+    email.utils.getaddresses parses the address grammar correctly."""
+    out = []
+    for _name, addr in email.utils.getaddresses([f for f in fields if f]):
+        addr = (addr or "").strip()
+        if addr:
+            out.append(addr)
+    return out
+
+
 def _md_to_email_html(text: str) -> str:
    """Render the compose markdown body to a SAFE HTML fragment for the email's
    text/html part. Everything is HTML-escaped FIRST (so a pasted <script> /
@@ -457,7 +481,7 @@ def setup_email_routes():
    _IMAP_POOL = {}   # account_id → (conn, last_used_at)
    _IMAP_IDLE_MAX = 60.0
    _WARMING_READS = set()
-    _WARM_READ_LIMIT = 3
+    _WARM_READ_LIMIT = 1
    _WARM_MAX_BYTES = 128 * 1024
    _WARM_RECENT_SECONDS = 7 * 24 * 60 * 60
    _pool_lock = _threading.Lock()
@@ -591,11 +615,11 @@ def setup_email_routes():
        SECURITY: `owner` is propagated so when `account_id` is missing,
        the fallback config lookup is scoped to this user's accounts only.
        """
+        conn = None
        try:
            conn = _imap_connect(account_id, owner=owner)
            select_status, _ = conn.select(_q(folder), readonly=True)
            if select_status != "OK":
-                conn.logout()
                return {"emails": [], "total": 0, "folder": folder, "error": f"Folder not found: {folder}"}

            from_clause = ""
@@ -645,8 +669,7 @@ def setup_email_routes():
                try:
                    import sqlite3 as _sql3t
                    _ct = _sql3t.connect(SCHEDULED_DB)
-                    _owner_aliases = _email_tag_owner_aliases(account_id, owner)
-                    _owner_ph = ",".join("?" * len(_owner_aliases))
+                    _owner_clause, _owner_params = _email_tag_owner_clause(account_id, owner)
                    # SECURITY: owner-scope the lookup (review C2/H8). Without
                    # this, user A's `tag:urgent` filter would surface UIDs
                    # written by user B and IMAP would return whatever
@@ -658,8 +681,8 @@ def setup_email_routes():
                        rows_t = _ct.execute(
                            "SELECT message_id, uid FROM email_tags "
                            "WHERE folder=? AND spam_verdict=1 "
-                            f"AND (owner IN ({_owner_ph}) OR owner IS NULL)",
-                            (folder, *_owner_aliases),
+                            f"AND {_owner_clause}",
+                            (folder, *_owner_params),
                        ).fetchall()
                        for mid, uid in rows_t:
                            if mid:
@@ -670,8 +693,8 @@ def setup_email_routes():
                        rows_t = _ct.execute(
                            "SELECT message_id, uid, tags FROM email_tags "
                            "WHERE folder=? AND tags IS NOT NULL AND tags != '' "
-                            f"AND (owner IN ({_owner_ph}) OR owner IS NULL)",
-                            (folder, *_owner_aliases),
+                            f"AND {_owner_clause}",
+                            (folder, *_owner_params),
                        ).fetchall()
                        for r in rows_t:
                            try:
@@ -743,12 +766,11 @@ def setup_email_routes():
                _uid_strs = [u.decode() for u in uid_list]
                if _uid_strs:
                    placeholders = ",".join("?" * len(_uid_strs))
-                    _owner_aliases = _email_tag_owner_aliases(account_id, owner)
-                    _owner_ph = ",".join("?" * len(_owner_aliases))
+                    _owner_clause, _owner_params = _email_tag_owner_clause(account_id, owner)
                    rows = _c.execute(
                        f"SELECT uid, tags, spam_verdict FROM email_tags "
-                        f"WHERE folder=? AND (owner IN ({_owner_ph}) OR owner IS NULL) AND uid IN ({placeholders})",
-                        [folder, *_owner_aliases, *_uid_strs],
+                        f"WHERE folder=? AND {_owner_clause} AND uid IN ({placeholders})",
+                        [folder, *_owner_params, *_uid_strs],
                    ).fetchall()
                    for r in rows:
                        try:
@@ -805,14 +827,13 @@ def setup_email_routes():
                    if header_ids:
                        import sqlite3 as _sql3m
                        _cm = _sql3m.connect(SCHEDULED_DB)
-                        _owner_aliases_m = _email_tag_owner_aliases(account_id, owner)
-                        _owner_ph_m = ",".join("?" * len(_owner_aliases_m))
+                        _owner_clause_m, _owner_params_m = _email_tag_owner_clause(account_id, owner)
                        _mid_ph = ",".join("?" * len(header_ids))
                        rows_m = _cm.execute(
                            f"SELECT message_id, tags, spam_verdict FROM email_tags "
-                            f"WHERE folder=? AND (owner IN ({_owner_ph_m}) OR owner IS NULL) "
+                            f"WHERE folder=? AND {_owner_clause_m} "
                            f"AND message_id IN ({_mid_ph})",
-                            [folder, *_owner_aliases_m, *header_ids],
+                            [folder, *_owner_params_m, *header_ids],
                        ).fetchall()
                        _cm.close()
                        for mid, tags_raw, spam_raw in rows_m:
@@ -924,12 +945,17 @@ def setup_email_routes():
            except Exception as _summary_err:
                logger.debug(f"Bulk summary attach skipped: {_summary_err}")

-            conn.logout()
            return {"emails": emails, "total": total, "folder": folder, "offset": offset}
        except Exception as e:
            logger.error(f"Failed to list emails: {e}")
            detail = str(e).strip()
            return {"emails": [], "total": 0, "error": f"Mail operation failed: {detail[:180]}" if detail else "Mail operation failed"}
+        finally:
+            if conn:
+                try:
+                    conn.logout()
+                except Exception:
+                    pass

    @router.get("/list")
    async def list_emails(
@@ -971,10 +997,11 @@ def setup_email_routes():
    async def unflag_spam(uid: str, owner: str = Depends(require_owner)):
        """User override — mark email as not spam."""
        try:
+            owner_clause, owner_params = _email_tag_owner_clause(None, owner)
            _c = _sql3.connect(SCHEDULED_DB)
            _c.execute(
-                "UPDATE email_tags SET spam_verdict=0, spam_reason='' WHERE uid=?",
-                (uid,),
+                f"UPDATE email_tags SET spam_verdict=0, spam_reason='' WHERE uid=? AND {owner_clause}",
+                [uid, *owner_params],
            )
            _c.commit()
            _c.close()
@@ -997,8 +1024,10 @@ def setup_email_routes():
        ql = (q or "").strip().lower()
        try:
            conn = _sql3.connect(SCHEDULED_DB)
+            owner_clause, owner_params = _email_tag_owner_clause(None, owner)
            rows = conn.execute(
-                "SELECT sender FROM email_tags WHERE sender IS NOT NULL AND sender != ''"
+                f"SELECT sender FROM email_tags WHERE sender IS NOT NULL AND sender != '' AND {owner_clause}",
+                owner_params,
            ).fetchall()
            conn.close()
            seen = {}
@@ -1046,7 +1075,7 @@ def setup_email_routes():

                # Escape backslash and quote for the IMAP-SEARCH quoted-string.
                q_escaped = q.replace('\\', '\\\\').replace('"', '\\"')
-                search_cmd = f'(OR FROM "{q_escaped}" TEXT "{q_escaped}")'
+                search_cmd = f'(OR OR FROM "{q_escaped}" SUBJECT "{q_escaped}" TEXT "{q_escaped}")'

                status, data = _imap_uid_search(conn, search_cmd)
                if status != "OK" or not data[0]:
@@ -1928,11 +1957,7 @@ def setup_email_routes():
            outer.attach(body_container)
            _attach_compose_uploads(outer, attachments)

-        recipients = [r.strip() for r in to.split(",") if r.strip()]
-        if cc:
-            recipients.extend([r.strip() for r in cc.split(",") if r.strip()])
-        if bcc:
-            recipients.extend([r.strip() for r in bcc.split(",") if r.strip()])
+        recipients = _envelope_recipients(to, cc, bcc)

        _send_smtp_message(cfg, cfg["from_address"], recipients, outer.as_string())

@@ -1964,13 +1989,22 @@ def setup_email_routes():
            # minute doesn't trip the past-time guard.
            if parsed_at < now_utc:
                return {"success": False, "error": "send_at must be in the future"}
+            # Normalize to naive UTC before storing: the poller selects due
+            # rows with a lexicographic string compare against a naive
+            # datetime.utcnow().isoformat(), so storing the raw client string
+            # makes "+02:00" schedules fire hours late, negative offsets fire
+            # hours early, and a "Z" suffix compares after the fractional
+            # seconds of the poller timestamp.
+            if parsed_at.tzinfo:
+                parsed_at = parsed_at.astimezone(_tz.utc).replace(tzinfo=None)
+            send_at = parsed_at.isoformat()

            sid = _uuid.uuid4().hex[:16]
            conn = sqlite3.connect(SCHEDULED_DB)
            conn.execute("""
                INSERT INTO scheduled_emails
-                (id, to_addr, cc, bcc, subject, body, in_reply_to, references_hdr, attachments, send_at, created_at, status, account_id, odysseus_kind)
-                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'pending', ?, ?)
+                (id, to_addr, cc, bcc, subject, body, in_reply_to, references_hdr, attachments, send_at, created_at, status, account_id, odysseus_kind, owner)
+                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'pending', ?, ?, ?)
            """, (
                sid,
                req.get("to", ""),
@@ -1985,6 +2019,7 @@ def setup_email_routes():
                datetime.utcnow().isoformat(),
                req.get("account_id") or None,
                req.get("odysseus_kind") or "scheduled",
+                owner or "",
            ))
            conn.commit()
            conn.close()
@@ -2003,9 +2038,9 @@ def setup_email_routes():
            rows = conn.execute("""
                SELECT id, to_addr, cc, subject, send_at, created_at, status, error
                FROM scheduled_emails
-                WHERE status IN ('pending', 'failed')
+                WHERE status IN ('pending', 'failed') AND owner = ?
                ORDER BY send_at ASC
-            """).fetchall()
+            """, (owner or "",)).fetchall()
            conn.close()
            return {"scheduled": [
                {
@@ -2023,7 +2058,10 @@ def setup_email_routes():
        import sqlite3
        try:
            conn = sqlite3.connect(SCHEDULED_DB)
-            conn.execute("DELETE FROM scheduled_emails WHERE id = ? AND status = 'pending'", (sid,))
+            conn.execute(
+                "DELETE FROM scheduled_emails WHERE id = ? AND status = 'pending' AND owner = ?",
+                (sid, owner or ""),
+            )
            conn.commit()
            conn.close()
            return {"success": True}
@@ -2035,7 +2073,7 @@ def setup_email_routes():
    async def resolve_contact(name: str = Query(..., description="Name to search for"), owner: str = Depends(require_owner)):
        """Search Sent folder for a contact by name. Returns matching email addresses."""
        try:
-            with _imap() as conn:
+            with _imap(owner=owner) as conn:
                matches = {}
                for folder in ["Sent", "INBOX", "Drafts"]:
                    try:
@@ -2133,12 +2171,9 @@ def setup_email_routes():
            outer.attach(body_container)
            _attach_compose_uploads(outer, req.attachments)

-        # Build recipient list
-        recipients = [r.strip() for r in req.to.split(",") if r.strip()]
-        if req.cc:
-            recipients.extend([r.strip() for r in req.cc.split(",") if r.strip()])
-        if req.bcc:
-            recipients.extend([r.strip() for r in req.bcc.split(",") if r.strip()])
+        # Build recipient list (parse the address grammar so display names with
+        # commas don't get split into broken envelope addresses)
+        recipients = _envelope_recipients(req.to, req.cc, req.bcc)

        # Serialize what the background task needs so the request object can be GC'd
        outer_bytes = outer.as_bytes()
@@ -2146,6 +2181,7 @@ def setup_email_routes():
        _from = cfg["from_address"]
        _smtp_host = cfg["smtp_host"]
        _smtp_port = cfg["smtp_port"]
+        _smtp_security = cfg.get("smtp_security")
        _smtp_user = cfg["smtp_user"]
        _smtp_pw = cfg["smtp_password"]
        _recipients = list(recipients)
@@ -2163,6 +2199,7 @@ def setup_email_routes():
                    {
                        "smtp_host": _smtp_host,
                        "smtp_port": _smtp_port,
+                        "smtp_security": _smtp_security,
                        "smtp_user": _smtp_user,
                        "smtp_password": _smtp_pw,
                    },
@@ -2417,7 +2454,7 @@ def setup_email_routes():
        """Generate a quick AI summary of an email body."""
        try:
            from src.endpoint_resolver import resolve_endpoint
-            from src.llm_core import _uses_max_completion_tokens
+            from src.llm_core import _uses_max_completion_tokens, _restricts_temperature
            import requests as _req

            body = data.get("body", "")
@@ -2474,6 +2511,9 @@ def setup_email_routes():
                "temperature": 0.3,
                "stream": False,
            }
+            # Reasoning models (o1/o3/o4/gpt-5) reject an explicit temperature.
+            if _restricts_temperature(model):
+                payload.pop("temperature", None)
            resp = await asyncio.to_thread(
                _req.post, url, json=payload, headers=req_headers, timeout=180
            )
@@ -2585,7 +2625,7 @@ def setup_email_routes():
                    # `api_key` field.
                    from core.database import SessionLocal as _SL, Session as _CS
                    _db = _SL()
-                    sess = _db.query(_CS).filter(_CS.id == session_id).first()
+                    sess = _db.query(_CS).filter(_CS.id == session_id, _CS.owner == owner).first()
                    if sess and sess.endpoint_url:
                        url = sess.endpoint_url
                        # Some sessions stored headers double-encoded (a JSON
@@ -2644,9 +2684,10 @@ def setup_email_routes():
            # Manual AI Reply should feel immediate. The heavier context mining
            # can involve multiple IMAP folder searches and attachment parsing;
            # reserve that for callers that explicitly opt out of fast mode.
+            # Owner-scoped so pre-retrieval never crosses tenants.
            context_snippets, _terms = ([], [])
            if not fast_reply:
-                context_snippets, _terms = _pre_retrieve_context(original_body, to)
+                context_snippets, _terms = _pre_retrieve_context(original_body, to, owner=owner)

            # NEW: also pull the last few emails from the original sender +
            # their attachments. The "to" field on this endpoint is the
@@ -2662,6 +2703,7 @@ def setup_email_routes():
                        exclude_uid=source_uid,
                        exclude_folder=source_folder,
                        limit=3,
+                        owner=owner,
                    )
                except Exception as _e:
                    logger.warning(f"sender-thread-context failed: {_e}")
@@ -2723,7 +2765,7 @@ def setup_email_routes():
            # Configured fallback chains last.
            for cand in resolve_utility_fallback_candidates(owner=owner) or []:
                _add(*cand)
-            for cand in resolve_chat_fallback_candidates() or []:
+            for cand in resolve_chat_fallback_candidates(owner=owner) or []:
                _add(*cand)
            try:
                reply = await llm_call_async_with_fallback(
@@ -2814,13 +2856,16 @@ def setup_email_routes():
        import uuid as _uuid
        db = SessionLocal()
        try:
-            row = db.query(EmailAccount).filter(EmailAccount.is_default == True).first()  # noqa: E712
+            q = db.query(EmailAccount).filter(EmailAccount.is_default == True)  # noqa: E712
+            if owner:
+                q = q.filter(EmailAccount.owner == owner)
+            row = q.first()
            if row is None:
-                row = EmailAccount(id=_uuid.uuid4().hex, name="Default", is_default=True, enabled=True)
+                row = EmailAccount(id=_uuid.uuid4().hex, owner=owner, name="Default", is_default=True, enabled=True)
                db.add(row)
            field_map = {
                "smtp_host": "smtp_host", "smtp_port": "smtp_port", "smtp_user": "smtp_user",
-                "imap_host": "imap_host", "imap_port": "imap_port", "imap_user": "imap_user",
+                "smtp_security": "smtp_security", "imap_host": "imap_host", "imap_port": "imap_port", "imap_user": "imap_user",
                "imap_starttls": "imap_starttls", "email_from": "from_address",
            }
            for in_key, col_name in field_map.items():
@@ -2838,6 +2883,10 @@ def setup_email_routes():
                row.imap_password = _enc(data["imap_password"])
            if data.get("smtp_password"):
                row.smtp_password = _enc(data["smtp_password"])
+            clear_q = db.query(EmailAccount).filter(EmailAccount.id != row.id)
+            if owner:
+                clear_q = clear_q.filter(EmailAccount.owner == owner)
+            clear_q.update({EmailAccount.is_default: False})
            db.commit()
        finally:
            db.close()
@@ -2902,6 +2951,7 @@ def setup_email_routes():
                    "imap_starttls": bool(r.imap_starttls),
                    "smtp_host": r.smtp_host or "",
                    "smtp_port": int(r.smtp_port or 465),
+                    "smtp_security": _smtp_security_mode({"smtp_security": getattr(r, "smtp_security", ""), "smtp_port": r.smtp_port}),
                    "smtp_user": r.smtp_user or "",
                    "from_address": r.from_address or "",
                    "has_imap_password": bool(r.imap_password),
@@ -2934,6 +2984,7 @@ def setup_email_routes():
                imap_starttls=bool(data.get("imap_starttls", True)),
                smtp_host=(data.get("smtp_host") or "").strip(),
                smtp_port=int(data.get("smtp_port") or 465),
+                smtp_security=_smtp_security_mode({"smtp_security": data.get("smtp_security"), "smtp_port": data.get("smtp_port") or 465}),
                smtp_user=(data.get("smtp_user") or "").strip(),
                smtp_password=_enc(data.get("smtp_password") or ""),
                from_address=(data.get("from_address") or "").strip(),
@@ -2977,6 +3028,8 @@ def setup_email_routes():
            for key in ("imap_port", "smtp_port"):
                if data.get(key) not in (None, ""):
                    setattr(row, key, int(data[key]))
+            if "smtp_security" in data:
+                row.smtp_security = _smtp_security_mode({"smtp_security": data.get("smtp_security"), "smtp_port": data.get("smtp_port") or row.smtp_port})
            for key in ("imap_starttls", "enabled"):
                if key in data:
                    setattr(row, key, bool(data[key]))
@@ -3061,6 +3114,7 @@ def setup_email_routes():
                    "imap_starttls": bool(row.imap_starttls),
                    "smtp_host": row.smtp_host or "",
                    "smtp_port": row.smtp_port or 465,
+                    "smtp_security": _smtp_security_mode({"smtp_security": getattr(row, "smtp_security", ""), "smtp_port": row.smtp_port}),
                    "smtp_user": row.smtp_user or "",
                    "smtp_password": _decrypt(row.smtp_password or ""),
                }
@@ -3093,13 +3147,12 @@ def setup_email_routes():
            # port (Dovecot on 31143, etc.) would always fail the SSL
            # handshake because they're not actually wrapped in TLS.
            try:
-                if imap_starttls:
-                    conn = imaplib.IMAP4(imap_host, imap_port, timeout=10)
-                    conn.starttls()
-                elif imap_port == 993:
-                    conn = imaplib.IMAP4_SSL(imap_host, imap_port, timeout=10)
-                else:
-                    conn = imaplib.IMAP4(imap_host, imap_port, timeout=10)
+                conn = _open_imap_connection(
+                    imap_host,
+                    imap_port,
+                    starttls=imap_starttls,
+                    timeout=_IMAP_TIMEOUT_SECONDS,
+                )
                try:
                    conn.login(imap_user, imap_pass)
                    imap_result = {"ok": True}
@@ -3112,14 +3165,16 @@ def setup_email_routes():
        smtp_host = (body.get("smtp_host") or "").strip()
        if smtp_host:
            smtp_port = int(body.get("smtp_port") or 465)
+            smtp_security = _smtp_security_mode({"smtp_security": body.get("smtp_security"), "smtp_port": smtp_port})
            smtp_user = (body.get("smtp_user") or imap_user).strip()
            smtp_pass = body.get("smtp_password") or imap_pass
            try:
-                if smtp_port == 587:
-                    smtp = smtplib.SMTP(smtp_host, smtp_port, timeout=10)
-                    smtp.starttls()
-                else:
+                if smtp_security == "ssl":
                    smtp = smtplib.SMTP_SSL(smtp_host, smtp_port, timeout=10)
+                else:
+                    smtp = smtplib.SMTP(smtp_host, smtp_port, timeout=10)
+                    if smtp_security == "starttls":
+                        smtp.starttls()
                try:
                    smtp.login(smtp_user, smtp_pass)
                    smtp_result = {"ok": True}
--- a/routes/embedding_routes.py
+++ b/routes/embedding_routes.py
@@ -86,7 +86,8 @@ def _load_custom_endpoint() -> dict:
    """Load the saved custom embedding endpoint, if any."""
    try:
        if os.path.exists(_ENDPOINT_FILE):
-            return json.loads(Path(_ENDPOINT_FILE).read_text(encoding="utf-8"))
+            data = json.loads(Path(_ENDPOINT_FILE).read_text(encoding="utf-8"))
+            return data if isinstance(data, dict) else {}
    except Exception:
        pass
    return {}
@@ -160,7 +161,7 @@ def setup_embedding_routes():
        _downloading[model_name] = True
        try:
            # Run in thread to not block the event loop
-            loop = asyncio.get_event_loop()
+            loop = asyncio.get_running_loop()
            cache = _cache_dir()
            await loop.run_in_executor(
                None,
@@ -242,6 +243,18 @@ def setup_embedding_routes():
        if not url:
            raise HTTPException(400, "URL is required")

+        # SSRF hardening: validate the user-supplied URL before any outbound
+        # request. Local-first means loopback/LAN endpoints are allowed by
+        # default; non-HTTP(S) schemes and the cloud metadata range are always
+        # rejected. Set EMBEDDING_BLOCK_PRIVATE_IPS=true for full lockdown.
+        from src.url_safety import check_outbound_url
+        ok, reason = check_outbound_url(
+            url,
+            block_private=os.getenv("EMBEDDING_BLOCK_PRIVATE_IPS", "false").lower() == "true",
+        )
+        if not ok:
+            raise HTTPException(400, f"Rejected endpoint URL: {reason}")
+
        # Quick health check
        try:
            import httpx
--- a/routes/font_routes.py
+++ b/routes/font_routes.py
@@ -5,6 +5,15 @@ from fastapi import APIRouter

 CUSTOM_FONTS_DIR = os.path.join("static", "fonts", "custom")
 FONT_EXTENSIONS = {".ttf", ".otf", ".woff", ".woff2"}
+FAMILY_SUFFIX_WORDS = ("Display", "Rounded", "Serif", "Sans", "Mono", "Code", "Text")
+
+
+def _split_family_token(token):
+    """Split common compact font-family suffixes without breaking brand names."""
+    for suffix in FAMILY_SUFFIX_WORDS:
+        if token.endswith(suffix) and len(token) > len(suffix):
+            return f"{token[:-len(suffix)]} {suffix}"
+    return re.sub(r'(?<=[a-z])(?=[A-Z])', ' ', token)


 def _derive_family(filename):
@@ -15,10 +24,9 @@ def _derive_family(filename):
        r'[-_ ]?(Thin|ExtraLight|UltraLight|Light|Regular|Medium|SemiBold|DemiBold|Bold|ExtraBold|UltraBold|Black|Heavy|Italic|Oblique|Variable|VF)$',
        '', name, flags=re.IGNORECASE
    )
-    # Insert spaces before uppercase runs: "JetBrainsMono" → "Jet Brains Mono"
-    name = re.sub(r'(?<=[a-z])(?=[A-Z])', ' ', name)
    # Replace dashes/underscores with spaces
    name = re.sub(r'[-_]+', ' ', name).strip()
+    name = " ".join(_split_family_token(part) for part in name.split())
    return name or filename


--- a/routes/gallery_helpers.py
+++ b/routes/gallery_helpers.py
@@ -32,10 +32,21 @@ def _extract_exif(content: bytes) -> dict:
        from PIL import Image
        from io import BytesIO
        img = Image.open(BytesIO(content))
+        # Read the raw EXIF before any transpose: exif_transpose strips the
+        # orientation tag and with it the parsed EXIF view.
+        exif = img._getexif() if hasattr(img, '_getexif') else None
+
+        # Record DISPLAY dimensions (EXIF-rotated), matching upload_handler.
+        # A phone photo with Orientation 6/8 is stored landscape but shown
+        # portrait, so the raw width/height swap the aspect ratio.
+        try:
+            from PIL import ImageOps
+            img = ImageOps.exif_transpose(img) or img
+        except Exception:
+            pass
        result["width"] = img.width
        result["height"] = img.height

-        exif = img._getexif() if hasattr(img, '_getexif') else None
        if not exif:
            return result

@@ -110,9 +121,17 @@ def _image_to_dict(img: GalleryImage, session_name: str = None) -> Dict[str, Any


 def _owner_filter(q, user):
-    """Apply owner filtering to a gallery query."""
+    """Apply owner filtering to a gallery query.
+
+    When auth is disabled (single-user mode) get_current_user returns None
+    and there is no per-user scoping. The main library list and stats already
+    treat None as "show everything" (`if user is not None`), so this helper
+    must too — otherwise the tag/model filter sidebars come back empty and the
+    tag-cleanup endpoints (clear-user-tags, clear-ai-tags, dedupe-tags)
+    silently affect zero rows in the most common self-hosted deployment.
+    """
    if user is None:
-        return q.filter(False)
+        return q
    return q.filter(GalleryImage.owner == user)


--- a/routes/gallery_routes.py
+++ b/routes/gallery_routes.py
@@ -3,6 +3,9 @@
 import os
 import hashlib
 import logging
+import re
+import uuid
+from pathlib import Path
 from typing import Dict, Any, Optional

 from fastapi import APIRouter, HTTPException, Query, Request
@@ -17,6 +20,14 @@ from routes.gallery_helpers import (

 logger = logging.getLogger(__name__)

+
+def _sanitize_gallery_filename(filename: str) -> str:
+    """Return a local filename safe to join under generated_images."""
+    safe_name = re.sub(r"[^A-Za-z0-9._-]", "_", Path(filename or "").name)[:128]
+    if not safe_name or safe_name in {".", ".."}:
+        safe_name = uuid.uuid4().hex[:12]
+    return safe_name
+
 def setup_gallery_routes() -> APIRouter:
    router = APIRouter(tags=["gallery"])

@@ -122,7 +133,7 @@ def setup_gallery_routes() -> APIRouter:
            content = await file.read()
            img_dir = Path("data/generated_images")
            img_dir.mkdir(parents=True, exist_ok=True)
-            img_path = img_dir / img.filename
+            img_path = img_dir / _sanitize_gallery_filename(img.filename)
            img_path.write_bytes(content)

            # Refresh dimensions in case the editor resized the canvas.
@@ -912,6 +923,16 @@ def setup_gallery_routes() -> APIRouter:
        body = await request.json()
        # Use endpoint from request body (editor dropdown) or fall back to DB lookup
        base = (body.pop("_endpoint", "") or "").rstrip("/")
+        # SSRF hardening: validate a client-supplied endpoint before any
+        # outbound request (mirrors routes/embedding_routes.py).
+        if base:
+            from src.url_safety import check_outbound_url
+            ok, reason = check_outbound_url(
+                base,
+                block_private=os.getenv("IMAGE_BLOCK_PRIVATE_IPS", "false").lower() == "true",
+            )
+            if not ok:
+                raise HTTPException(400, f"Rejected endpoint URL: {reason}")
        chosen_model = (body.pop("_model", "") or "").strip()
        api_key = None
        if not base:
@@ -1104,6 +1125,18 @@ def setup_gallery_routes() -> APIRouter:
            raise HTTPException(400, "No image provided")

        endpoint = (body.get("_endpoint") or "").rstrip("/")
+        # SSRF hardening: a client-supplied endpoint is fetched server-side
+        # below, so validate it first (mirrors routes/embedding_routes.py).
+        # Local-first means loopback/LAN is allowed by default; the cloud
+        # metadata range and non-HTTP(S) schemes are always rejected.
+        if endpoint:
+            from src.url_safety import check_outbound_url
+            ok, reason = check_outbound_url(
+                endpoint,
+                block_private=os.getenv("IMAGE_BLOCK_PRIVATE_IPS", "false").lower() == "true",
+            )
+            if not ok:
+                raise HTTPException(400, f"Rejected endpoint URL: {reason}")
        model = (body.get("_model") or "").strip()

        base = endpoint
@@ -1125,7 +1158,7 @@ def setup_gallery_routes() -> APIRouter:
            db = SessionLocal()
            try:
                for ep in db.query(ModelEndpoint).all():
-                    if ep.base_url.rstrip("/").rstrip("/v1") == base.rstrip("/v1"):
+                    if ep.base_url.rstrip("/").removesuffix("/v1").rstrip("/") == base.rstrip("/").removesuffix("/v1").rstrip("/"):
                        api_key = ep.api_key
                        break
            finally:
@@ -1696,7 +1729,7 @@ def setup_gallery_routes() -> APIRouter:
                return {"error": "No vision-capable endpoint configured"}

            # Call vision model — format differs between Anthropic and OpenAI
-            from src.llm_core import _detect_provider
+            from src.llm_core import _detect_provider, _restricts_temperature, _uses_max_completion_tokens
            provider = _detect_provider(chat_url)
            tag_prompt = (
                "Analyze this photo. Return ONLY a comma-separated list of tags. "
@@ -1721,6 +1754,7 @@ def setup_gallery_routes() -> APIRouter:
                    }],
                }
            else:
+                _tok_key = "max_completion_tokens" if _uses_max_completion_tokens(model_name) else "max_tokens"
                payload = {
                    "model": model_name,
                    "messages": [{
@@ -1730,9 +1764,12 @@ def setup_gallery_routes() -> APIRouter:
                            {"type": "image_url", "image_url": {"url": f"data:{mime};base64,{b64}"}},
                        ],
                    }],
-                    "max_tokens": 200,
+                    _tok_key: 200,
                    "temperature": 0.3,
                }
+                # Reasoning models (o1/o3/o4/gpt-5) reject an explicit temperature.
+                if _restricts_temperature(model_name):
+                    payload.pop("temperature", None)

            h = {"Content-Type": "application/json"}
            if headers:
--- a/routes/history_routes.py
+++ b/routes/history_routes.py
@@ -58,7 +58,7 @@ def setup_history_routes(session_manager) -> APIRouter:
                    .all()
                )
                import json as _json
-                history_dict = []
+                db_history = []
                for m in db_messages:
                    entry = {"role": m.role, "content": m.content}
                    meta = {}
@@ -71,12 +71,19 @@ def setup_history_routes(session_manager) -> APIRouter:
                        meta["timestamp"] = m.timestamp.isoformat() + "Z"
                    if meta:
                        entry["metadata"] = meta
-                    history_dict.append(entry)
-                if history_dict:
+                    db_history.append(entry)
+                if db_history:
+                    # Rebuild in-memory history from the full set so hidden
+                    # messages (e.g. compaction summaries) are kept for AI context.
                    session.history = [
                        ChatMessage(role=m["role"], content=m["content"], metadata=m.get("metadata"))
-                        for m in history_dict
+                        for m in db_history
                    ]
+                # Response excludes hidden messages, matching the in-memory path.
+                history_dict = [
+                    m for m in db_history
+                    if not (m.get("metadata") or {}).get("hidden")
+                ]
            except Exception as e:
                logger.error(f"DB fallback failed for {session_id}: {e}")
            finally:
@@ -265,7 +272,7 @@ def setup_history_routes(session_manager) -> APIRouter:
                db_messages = (
                    db.query(DbChatMessage)
                    .filter(DbChatMessage.session_id == session_id, DbChatMessage.role == 'assistant')
-                    .order_by(DbChatMessage.created_at.desc())
+                    .order_by(DbChatMessage.timestamp.desc())
                    .first()
                )
                if db_messages:
@@ -320,7 +327,7 @@ def setup_history_routes(session_manager) -> APIRouter:
                db_msg = (
                    db.query(DbChatMessage)
                    .filter(DbChatMessage.session_id == session_id, DbChatMessage.role == 'assistant')
-                    .order_by(DbChatMessage.created_at.desc())
+                    .order_by(DbChatMessage.timestamp.desc())
                    .first()
                )
                if db_msg:
@@ -401,7 +408,7 @@ def setup_history_routes(session_manager) -> APIRouter:
                db_messages = (
                    db.query(DbChatMessage)
                    .filter(DbChatMessage.session_id == session_id)
-                    .order_by(DbChatMessage.created_at)
+                    .order_by(DbChatMessage.timestamp)
                    .all()
                )
                # Find last two assistant messages in DB
@@ -477,10 +484,10 @@ def setup_history_routes(session_manager) -> APIRouter:

    @router.get("/api/conversations/topics")
    async def get_conversation_topics(request: Request) -> Dict[str, Any]:
-        from src.auth_helpers import get_current_user
-        user = get_current_user(request)
+        from src.auth_helpers import require_user
+        user = require_user(request)
        try:
-            return analyze_topics(session_manager, owner=user)
+            return analyze_topics(session_manager, owner=user or None)
        except Exception as e:
            raise HTTPException(500, f"Topic analysis failed: {e}")

--- a/routes/hwfit_routes.py
+++ b/routes/hwfit_routes.py
@@ -1,87 +1,105 @@
+import re
 from copy import deepcopy

 from fastapi import APIRouter


+# Backends the manual hardware simulator accepts. Must stay a subset of what
+# services.hwfit.fit understands so a simulated box ranks like a real one:
+# "metal" routes through the Apple-Silicon path (GGUF-only, llama.cpp/Ollama),
+# the CPU backends through the RAM/offload path, cuda/rocm through vLLM.
+_MANUAL_BACKENDS = {"cuda", "rocm", "metal", "cpu_x86", "cpu_arm"}
+
+
+def _apply_manual_hardware(system, manual_mode="", manual_gpu_count="", manual_vram_gb="", manual_ram_gb="", manual_backend=""):
+    """Manual hardware is a "what if I had this setup" simulator —
+    REPLACES the detected hardware entirely instead of adding to it.
+
+    The previous additive behavior averaged the manual VRAM across
+    all GPUs (base + manual), which meant adding "1× 400 GB" on top
+    of "2× 70 GB" only nudged the per-GPU cap from 70 to 180 GB
+    (= 540 / 3), so GGUF models bigger than that still didn't surface
+    — exactly the "cap stuck at detected level" bug the user hit.
+    """
+    manual_mode = (manual_mode or "").lower()
+    if manual_mode not in {"gpu", "ram"}:
+        return system
+
+    try:
+        override_ram_gb = float(manual_ram_gb) if manual_ram_gb else 0
+    except ValueError:
+        override_ram_gb = 0
+    override_ram_gb = max(0.0, override_ram_gb)
+    if override_ram_gb:
+        # Replace RAM, don't add. The number in the field is the
+        # TOTAL system memory the user wants to simulate.
+        system["available_ram_gb"] = round(override_ram_gb, 1)
+        system["total_ram_gb"] = round(override_ram_gb, 1)
+    system["manual_hardware"] = True
+
+    if manual_mode == "ram":
+        # RAM-only simulation — wipe GPU entirely so the ranker uses
+        # CPU/RAM paths.
+        system["has_gpu"] = False
+        system["gpu_name"] = None
+        system["gpu_vram_gb"] = 0
+        system["gpu_count"] = 0
+        system["gpus"] = []
+        system["gpu_groups"] = []
+        system["backend"] = "cpu_x86"
+        system.pop("unified_memory", None)
+        return system
+
+    try:
+        count = int(manual_gpu_count) if manual_gpu_count else 1
+    except ValueError:
+        count = 1
+    try:
+        vram_each = float(manual_vram_gb) if manual_vram_gb else 8.0
+    except ValueError:
+        vram_each = 8.0
+    count = max(1, min(count, 16))
+    vram_each = max(1.0, vram_each)
+    backend = (manual_backend or system.get("backend") or "cuda").lower()
+    if backend not in _MANUAL_BACKENDS:
+        backend = "cuda"
+    total_vram = round(vram_each * count, 1)
+    gpu_name = f"Simulated {backend.upper()} GPU" + (f" × {count}" if count > 1 else "")
+    system["has_gpu"] = True
+    system["gpu_name"] = gpu_name
+    system["gpu_vram_gb"] = total_vram
+    system["gpu_count"] = count
+    system["gpus"] = [
+        {"index": i, "name": gpu_name, "vram_gb": vram_each}
+        for i in range(count)
+    ]
+    # Single homogeneous pool — vram_each here is the ACTUAL per-GPU
+    # VRAM the user entered, not an average. That's the whole point:
+    # raising vram_each lifts the per-GPU cap (GGUF, tensor-parallel
+    # math) all the way up, not just by a small fraction.
+    system["gpu_groups"] = [{
+        "name": gpu_name,
+        "vram_each": vram_each,
+        "count": count,
+        "indices": list(range(count)),
+        "vram_total": total_vram,
+    }]
+    system["homogeneous"] = True
+    system["backend"] = backend
+    # Apple Silicon shares one unified memory pool with the GPU; flag it so
+    # the API/UI report it the way real Metal detection does. Discrete GPUs
+    # (cuda/rocm) and the CPU backends carry separate VRAM, so clear any
+    # stale flag a previous detection left on the dict.
+    if backend == "metal":
+        system["unified_memory"] = True
+    else:
+        system.pop("unified_memory", None)
+    return system
+
+
 def setup_hwfit_routes():
    router = APIRouter(prefix="/api/hwfit", tags=["hwfit"])

-    def _apply_manual_hardware(system, manual_mode="", manual_gpu_count="", manual_vram_gb="", manual_ram_gb="", manual_backend=""):
-        """Manual hardware is a "what if I had this setup" simulator —
-        REPLACES the detected hardware entirely instead of adding to it.
-
-        The previous additive behavior averaged the manual VRAM across
-        all GPUs (base + manual), which meant adding "1× 400 GB" on top
-        of "2× 70 GB" only nudged the per-GPU cap from 70 to 180 GB
-        (= 540 / 3), so GGUF models bigger than that still didn't surface
-        — exactly the "cap stuck at detected level" bug the user hit.
-        """
-        manual_mode = (manual_mode or "").lower()
-        if manual_mode not in {"gpu", "ram"}:
-            return system
-
-        try:
-            override_ram_gb = float(manual_ram_gb) if manual_ram_gb else 0
-        except ValueError:
-            override_ram_gb = 0
-        override_ram_gb = max(0.0, override_ram_gb)
-        if override_ram_gb:
-            # Replace RAM, don't add. The number in the field is the
-            # TOTAL system memory the user wants to simulate.
-            system["available_ram_gb"] = round(override_ram_gb, 1)
-            system["total_ram_gb"] = round(override_ram_gb, 1)
-        system["manual_hardware"] = True
-
-        if manual_mode == "ram":
-            # RAM-only simulation — wipe GPU entirely so the ranker uses
-            # CPU/RAM paths.
-            system["has_gpu"] = False
-            system["gpu_name"] = None
-            system["gpu_vram_gb"] = 0
-            system["gpu_count"] = 0
-            system["gpus"] = []
-            system["gpu_groups"] = []
-            system["backend"] = "cpu_x86"
-            return system
-
-        try:
-            count = int(manual_gpu_count) if manual_gpu_count else 1
-        except ValueError:
-            count = 1
-        try:
-            vram_each = float(manual_vram_gb) if manual_vram_gb else 8.0
-        except ValueError:
-            vram_each = 8.0
-        count = max(1, min(count, 16))
-        vram_each = max(1.0, vram_each)
-        backend = (manual_backend or system.get("backend") or "cuda").lower()
-        if backend not in {"cuda", "rocm", "cpu_x86", "cpu_arm"}:
-            backend = "cuda"
-        total_vram = round(vram_each * count, 1)
-        gpu_name = f"Simulated {backend.upper()} GPU" + (f" × {count}" if count > 1 else "")
-        system["has_gpu"] = True
-        system["gpu_name"] = gpu_name
-        system["gpu_vram_gb"] = total_vram
-        system["gpu_count"] = count
-        system["gpus"] = [
-            {"index": i, "name": gpu_name, "vram_gb": vram_each}
-            for i in range(count)
-        ]
-        # Single homogeneous pool — vram_each here is the ACTUAL per-GPU
-        # VRAM the user entered, not an average. That's the whole point:
-        # raising vram_each lifts the per-GPU cap (GGUF, tensor-parallel
-        # math) all the way up, not just by a small fraction.
-        system["gpu_groups"] = [{
-            "name": gpu_name,
-            "vram_each": vram_each,
-            "count": count,
-            "indices": list(range(count)),
-            "vram_total": total_vram,
-        }]
-        system["homogeneous"] = True
-        system["backend"] = backend
-        return system
-
    @router.get("/system")
    def get_system(host: str = "", ssh_port: str = "", platform: str = "", fresh: bool = False):
        """Detect and return current system hardware info. Pass host=user@server for remote.
@@ -181,6 +199,64 @@ def setup_hwfit_routes():
        results = rank_models(system, use_case=use_case or None, limit=limit, search=search or None, sort=sort, quant=quant or None, target_context=target_context, fit_only=fit_only)
        return {"system": system, "models": results}

+    @router.get("/profiles")
+    def get_serve_profiles(model: str = "", host: str = "", ssh_port: str = "", platform: str = "", fresh: bool = False, serve_weights_gb: float = 0.0, serve_quant: str = ""):
+        """Compute llama.cpp serve profiles (Quality/Balanced/Speed) for `model`
+        against the detected hardware on `host` (or local). Returns concrete
+        flags (n_gpu_layers, n_cpu_moe, cache_type, ctx) the serve UI can apply.
+
+        `model` is matched against the catalog by name; if it's not in the
+        catalog (e.g. an ad-hoc HF repo), pass enough hints via a minimal synthetic
+        entry isn't possible here, so we return [] and the UI keeps manual flags.
+        """
+        from services.hwfit.hardware import detect_system
+        from services.hwfit.models import get_models
+        from services.hwfit.profiles import compute_serve_profiles
+        system = detect_system(host=host, ssh_port=ssh_port, platform=platform, fresh=fresh)
+        if system.get("error"):
+            return {"system": system, "profiles": [], "error": system["error"]}
+        catalog = {m.get("name"): m for m in (get_models() or [])}
+
+        def _norm(s):
+            # Normalize for matching: drop org/ prefix, a trailing -GGUF/-gguf
+            # marker, and any quant tag, lowercase. So "DeepSeek-Coder-V2-Lite-
+            # Instruct-GGUF" (a local folder name) matches catalog entry
+            # "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct".
+            s = (s or "").lower().strip()
+            s = s.split("/")[-1]                     # drop org prefix
+            s = re.sub(r"[-_.]?gguf$", "", s)        # drop trailing gguf marker
+            s = re.sub(r"[-_.](q\d[^/]*|iq\d[^/]*|fp8|bf16|f16|awq[^/]*|gptq[^/]*)$", "", s)
+            return s
+
+        m = catalog.get(model)
+        if m is None and model:
+            want = _norm(model)
+            for name, entry in catalog.items():
+                nn = _norm(name)
+                if nn and (nn == want or want.endswith(nn) or nn.endswith(want)):
+                    m = entry
+                    break
+        if m is None:
+            return {"system": system, "profiles": [], "error": "model not in catalog"}
+        # Surface the model's trained context limit so the serve UI can clamp a
+        # user-typed context down to it (asking for ctx > n_ctx_train overflows
+        # and, with a quantized KV cache, can crash the GPU).
+        model_ctx_max = 0
+        for k in ("context_length", "max_position_embeddings", "n_ctx_train", "context"):
+            v = m.get(k)
+            if isinstance(v, (int, float)) and v > 0:
+                model_ctx_max = int(v)
+                break
+        return {
+            "system": system,
+            "profiles": compute_serve_profiles(
+                system, m,
+                serve_weights_gb=(serve_weights_gb or None),
+                serve_quant=(serve_quant or None),
+            ),
+            "model_ctx_max": model_ctx_max,
+        }
+
    @router.get("/image-models")
    def get_image_models(sort: str = "fit", search: str = "", host: str = "", gpu_count: str = "", ssh_port: str = "", platform: str = "", fresh: bool = False, manual_mode: str = "", manual_gpu_count: str = "", manual_vram_gb: str = "", manual_ram_gb: str = "", manual_backend: str = "", ignore_detected_gpu: bool = False, ignore_detected_ram: bool = False):
        """Rank image generation models against detected hardware."""
--- a/routes/memory_routes.py
+++ b/routes/memory_routes.py
@@ -27,7 +27,7 @@ from src.request_models import MemoryAddRequest
 from core.database import SessionLocal
 from src.llm_core import llm_call_async
 from services.memory.memory_extractor import audit_memories
-from src.auth_helpers import get_current_user
+from src.auth_helpers import get_current_user, require_user
 from src.endpoint_resolver import resolve_endpoint

 logger = logging.getLogger(__name__)
@@ -191,8 +191,7 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM
    @router.post("/extract")
    async def extract_memory(request: Request, session: str = Form(...)) -> Dict[str, List[str]]:
        """Analyze a session's chat history and return memory suggestions."""
-        if not get_current_user(request):
-            raise HTTPException(401, "Not authenticated")
+        require_user(request)
        try:
            sess = session_manager.get_session(session)
        except KeyError:
--- a/routes/model_routes.py
+++ b/routes/model_routes.py
@@ -1,73 +1,213 @@
 # routes/model_routes.py
 """Routes for model and provider management."""
+import os
 import re
 import uuid
 import json
+import socket
 import time as _time
 import logging
 import httpx
 from datetime import datetime
 from typing import List, Dict, Any, Optional
-from urllib.parse import urlparse
+from urllib.parse import urlparse, urlunparse
 from fastapi import APIRouter, HTTPException, Form, Query, Body, Request
 from pydantic import BaseModel
 from fastapi.responses import StreamingResponse
 from core.database import SessionLocal, ModelEndpoint, Session as DbSession
 from core.middleware import require_admin
-from src.llm_core import _detect_provider, ANTHROPIC_MODELS
+from src.llm_core import _detect_provider, _host_match, ANTHROPIC_MODELS
 from src.settings import load_settings as _load_settings, save_settings as _save_settings
-from src.endpoint_resolver import normalize_base as _normalize_base, build_chat_url
-from src.auth_helpers import owner_filter
+from src.endpoint_resolver import (
+    normalize_base as _normalize_base,
+    build_chat_url,
+    build_models_url,
+    build_headers,
+)
+from src.auth_helpers import _auth_disabled, owner_filter

 logger = logging.getLogger(__name__)

+_SPEECH_ENDPOINT_SETTINGS = (
+    ("tts_provider", "tts_model", "tts-1", "Text to Speech"),
+    ("stt_provider", "stt_model", "base", "Speech to Text"),
+)

-def _anthropic_api_root(base: str) -> str:
-    """Return Anthropic's API root without duplicating /v1."""
-    base = (base or "").strip().rstrip("/")
-    host = urlparse(base).hostname or ""
-    if host.endswith("anthropic.com") and base.endswith("/v1"):
-        return base[:-3].rstrip("/")
-    return base
+_ENDPOINT_SETTING_FIELDS = {
+    "default_endpoint_id":  ("default_model",  "Default Model"),
+    "utility_endpoint_id":  ("utility_model",   "Utility Model"),
+    "research_endpoint_id": ("research_model",  "Deep Research"),
+    "task_endpoint_id":     ("task_model",       "Background Tasks"),
+}
+
+_ENDPOINT_FALLBACK_FIELDS = {
+    "default_model_fallbacks": "Default Model Fallbacks",
+    "utility_model_fallbacks": "Utility Model Fallbacks",
+    "vision_model_fallbacks":  "Vision Model Fallbacks",
+}


-def _ollama_api_root(base: str) -> str:
-    """Return Ollama's native API root without depending on deferred imports."""
-    base = (base or "").strip().rstrip("/")
-    parsed = urlparse(base)
-    host = parsed.hostname or ""
-    path = (parsed.path or "").rstrip("/")
-    if path.endswith("/api"):
-        return base
-    if host.endswith("ollama.com"):
-        root = f"{parsed.scheme}://{parsed.netloc}" if parsed.scheme and parsed.netloc else "https://ollama.com"
-        return root.rstrip("/") + "/api"
-    return base
+def _speech_settings_using_endpoint(settings: dict, ep_id: str) -> list:
+    """Return speech settings that reference a model endpoint."""
+    endpoint_ref = f"endpoint:{ep_id}"
+    return [
+        label
+        for provider_key, _, _, label in _SPEECH_ENDPOINT_SETTINGS
+        if (settings.get(provider_key) or "") == endpoint_ref
+    ]


-def _models_url(base: str) -> str:
-    """Return provider-specific model-list URL for route-local probing."""
-    provider = _detect_provider(base)
-    host = urlparse(base).hostname or ""
-    if provider == "anthropic" or host.endswith("anthropic.com"):
-        return _anthropic_api_root(base) + "/v1/models"
-    if provider == "ollama" or host.endswith("ollama.com"):
-        return _ollama_api_root(base) + "/tags"
-    return base.rstrip("/") + "/models"
+def _clear_speech_settings_for_endpoint(settings: dict, ep_id: str) -> list:
+    """Reset speech settings that reference a model endpoint."""
+    endpoint_ref = f"endpoint:{ep_id}"
+    cleared = []
+    for provider_key, model_key, default_model, label in _SPEECH_ENDPOINT_SETTINGS:
+        if (settings.get(provider_key) or "") == endpoint_ref:
+            settings[provider_key] = "disabled"
+            settings[model_key] = default_model
+            cleared.append(label)
+    return cleared


-def _provider_headers(api_key: Optional[str], base: str) -> Dict[str, str]:
-    """Build provider auth headers without depending on import-time stubs."""
-    if not api_key:
-        return {}
-    provider = _detect_provider(base)
-    host = urlparse(base).hostname or ""
-    if provider == "anthropic" or host.endswith("anthropic.com"):
-        return {
-            "x-api-key": api_key,
-            "anthropic-version": "2023-06-01",
-        }
-    return {"Authorization": f"Bearer {api_key}"}
+def _endpoint_settings_using_endpoint(settings: dict, ep_id: str, *, include_speech: bool = False) -> list:
+    """Return labels for settings and fallback chains that reference an endpoint."""
+    affected = []
+    for ep_key, (_, label) in _ENDPOINT_SETTING_FIELDS.items():
+        if (settings.get(ep_key) or "") == ep_id:
+            affected.append(label)
+    for fallback_key, label in _ENDPOINT_FALLBACK_FIELDS.items():
+        chain = settings.get(fallback_key) or []
+        if any(isinstance(entry, dict) and (entry.get("endpoint_id") or "") == ep_id for entry in chain):
+            affected.append(label)
+    if include_speech:
+        affected.extend(_speech_settings_using_endpoint(settings, ep_id))
+    return affected
+
+
+def _clear_endpoint_settings_for_endpoint(settings: dict, ep_id: str, *, include_speech: bool = False) -> list:
+    """Remove an endpoint from direct settings and model fallback chains."""
+    cleared = []
+    for ep_key, (model_key, label) in _ENDPOINT_SETTING_FIELDS.items():
+        if (settings.get(ep_key) or "") == ep_id:
+            settings[ep_key] = ""
+            settings[model_key] = ""
+            cleared.append(label)
+    for fallback_key, label in _ENDPOINT_FALLBACK_FIELDS.items():
+        chain = settings.get(fallback_key)
+        if not isinstance(chain, list):
+            continue
+        kept = [
+            entry for entry in chain
+            if not (isinstance(entry, dict) and (entry.get("endpoint_id") or "") == ep_id)
+        ]
+        if len(kept) != len(chain):
+            settings[fallback_key] = kept
+            cleared.append(label)
+    if include_speech:
+        cleared.extend(_clear_speech_settings_for_endpoint(settings, ep_id))
+    return cleared
+
+
+def _clear_user_pref_endpoint_refs(all_prefs: dict, ep_id: str) -> int:
+    """Remove endpoint references from scoped or legacy-flat user preferences."""
+    if not isinstance(all_prefs, dict):
+        return 0
+    users = all_prefs.get("_users")
+    pref_sets = users.values() if isinstance(users, dict) else [all_prefs]
+    cleared_users = 0
+    for prefs in pref_sets:
+        if isinstance(prefs, dict) and _clear_endpoint_settings_for_endpoint(prefs, ep_id):
+            cleared_users += 1
+    return cleared_users
+
+
+# Loopback hosts a user might type for a local model server (LM Studio,
+# llama.cpp, vLLM, …). Inside Docker these point at the *container*, not the
+# host the server actually runs on.
+_ANY_BIND_HOSTS = {"0.0.0.0", "::"}
+_LOOPBACK_HOSTS = {"localhost", "127.0.0.1", "::1", *_ANY_BIND_HOSTS}
+
+
+def _docker_host_gateway_reachable() -> bool:
+    """True when we run inside a container whose host is reachable via
+    ``host.docker.internal`` (compose maps it to ``host-gateway``). Returns
+    False on native installs and on container setups without the mapping, so
+    the loopback rewrite below stays a no-op there."""
+    in_container = os.path.exists("/.dockerenv")
+    if not in_container:
+        try:
+            with open("/proc/1/cgroup", encoding="utf-8") as fh:
+                in_container = any(t in fh.read() for t in ("docker", "containerd", "kubepods"))
+        except OSError:
+            in_container = False
+    if not in_container:
+        return False
+    try:
+        socket.getaddrinfo("host.docker.internal", None)
+        return True
+    except OSError:
+        return False
+
+def _container_loopback_reachable(base_url: str, timeout: float = 0.2) -> bool:
+    """True when the requested loopback host:port is already reachable from
+    inside the current container.
+
+    This distinguishes "a model server running alongside Odysseus in the same
+    container" from "a model server running on the Docker host". Only the
+    latter should be rewritten to host.docker.internal.
+    """
+    try:
+        parsed = urlparse(base_url)
+    except Exception:
+        return False
+    host = (parsed.hostname or "").lower()
+    port = parsed.port
+    if host not in _LOOPBACK_HOSTS or not port:
+        return False
+    probe_host = "::1" if host == "::1" else "127.0.0.1"
+    family = socket.AF_INET6 if probe_host == "::1" else socket.AF_INET
+    try:
+        with socket.socket(family, socket.SOCK_STREAM) as sock:
+            sock.settimeout(timeout)
+            sock.connect((probe_host, port))
+        return True
+    except OSError:
+        return False
+
+
+def _rewrite_loopback_for_docker(base_url: str, *, container_local: bool = False) -> str:
+    """Rewrite a loopback model-endpoint URL to ``host.docker.internal`` when
+    running in Docker. A URL like ``http://localhost:1234/v1`` (the LM Studio
+    default) otherwise targets the Odysseus container itself, so the probe gets
+    a connection error and the endpoint is rejected with a misleading "No
+    models found for that provider/key".
+
+    Cookbook local serves are the opposite case: Odysseus started the model
+    server inside the same container/process environment, so the saved endpoint
+    must remain container-local. In that mode, normalize a bind address such as
+    0.0.0.0 to a connectable loopback host, but do not jump to the Docker host.
+    """
+    try:
+        parsed = urlparse(base_url)
+    except Exception:
+        return base_url
+    host = (parsed.hostname or "").lower()
+    if host not in _LOOPBACK_HOSTS:
+        return base_url
+    if container_local:
+        if host in _ANY_BIND_HOSTS:
+            netloc = "127.0.0.1" + (f":{parsed.port}" if parsed.port else "")
+            return urlunparse(parsed._replace(netloc=netloc))
+        return base_url
+    if host in _ANY_BIND_HOSTS and not _docker_host_gateway_reachable():
+        netloc = "127.0.0.1" + (f":{parsed.port}" if parsed.port else "")
+        return urlunparse(parsed._replace(netloc=netloc))
+    if _container_loopback_reachable(base_url):
+        return base_url
+    if not _docker_host_gateway_reachable():
+        return base_url
+    netloc = "host.docker.internal" + (f":{parsed.port}" if parsed.port else "")
+    return urlunparse(parsed._replace(netloc=netloc))


 # ── Curated model lists per provider ──
@@ -84,10 +224,13 @@ _PROVIDER_CURATED = {
        "claude-sonnet-4-5", "claude-haiku-3-5",
    ],
    "zai": [
-        "glm-5", "glm-4.7", "glm-4.7-flash",
+        "glm-5", "glm-5.1", "glm-5v-turbo", "glm-4.7", "glm-4.7-flash",
        "glm-4.6", "glm-4.6v",
        "glm-4.5", "glm-4.5v", "glm-4.5-air", "glm-4.5-flash",
    ],
+    "zai-coding": [
+        "glm-5.1", "glm-5v-turbo", "glm-5-turbo", "glm-4.7", "glm-4.5-air",
+    ],
    "deepseek": [
        "deepseek-chat", "deepseek-reasoner",
    ],
@@ -122,31 +265,40 @@ _PROVIDER_CURATED = {
    ],
 }

-# Map URL substrings → curated-list keys for providers whose _detect_provider()
+# Map hostnames → curated-list keys for providers whose _detect_provider()
 # returns a generic value (e.g. "openai") but deserve their own curated list.
 # "openrouter" is a sentinel meaning "no curation — show all models as curated".
-_URL_TO_CURATED = {
-    "z.ai": "zai",
-    "api.deepseek.com": "deepseek",
-    "api.groq.com": "groq",
-    "api.mistral.ai": "mistral",
-    "api.together.xyz": "together",
-    "api.fireworks.ai": "fireworks",
-    "generativelanguage.googleapis.com": "google",
-    "api.x.ai": "xai",
-    "openrouter.ai": "openrouter",
-    "ollama.com": "ollama",
-}
+# Entries are matched by hostname equality or subdomain suffix (via _host_match),
+# so e.g. "deepseek.com" covers api.deepseek.com without matching the substring
+# inside an unrelated URL.
+_HOST_TO_CURATED = (
+    ("z.ai", "zai"),
+    ("deepseek.com", "deepseek"),
+    ("groq.com", "groq"),
+    ("mistral.ai", "mistral"),
+    ("together.xyz", "together"),
+    ("together.ai", "together"),
+    ("fireworks.ai", "fireworks"),
+    ("googleapis.com", "google"),
+    ("x.ai", "xai"),
+    ("openrouter.ai", "openrouter"),
+    ("ollama.com", "ollama"),
+)


 def _match_provider_curated(base_url: str, provider: str) -> str:
    """Return the curated-list key for a given endpoint.

-    Checks the base URL against _URL_TO_CURATED first, then falls back
-    to the raw provider string from _detect_provider().
+    Checks path-based overrides first (for hosts serving multiple plans),
+    then matches the base URL's hostname against known providers, and
+    finally falls back to the raw provider string from _detect_provider().
    """
-    for substring, key in _URL_TO_CURATED.items():
-        if substring in (base_url or ""):
+    # Path-based overrides for hosts that serve multiple curated lists.
+    parsed = urlparse(base_url)
+    if _host_match(base_url, "z.ai") and "/api/coding" in (parsed.path or ""):
+        return "zai-coding"
+    for domain, key in _HOST_TO_CURATED:
+        if _host_match(base_url, domain):
            return key
    return provider

@@ -235,16 +387,20 @@ def _probe_single_model(base: str, api_key: str, model_id: str, timeout: int = 1
    elif provider == "ollama":
        from src.llm_core import _build_ollama_payload
        target_url = build_chat_url(base)
-        h = _provider_headers(api_key, base)
+        h = build_headers(api_key, base)
        h["Content-Type"] = "application/json"
        payload = _build_ollama_payload(model_id, messages, 0.0, 5, stream=False, tools=_test_tools)
    else:
        target_url = build_chat_url(base)
-        h = _provider_headers(api_key, base)
+        h = build_headers(api_key, base)
        h["Content-Type"] = "application/json"
-        from src.llm_core import _uses_max_completion_tokens
+        from src.llm_core import _uses_max_completion_tokens, _restricts_temperature
        _max_key = "max_completion_tokens" if _uses_max_completion_tokens(model_id) else "max_tokens"
-        payload = {"model": model_id, "messages": messages, _max_key: 5, "temperature": 0.0}
+        payload = {"model": model_id, "messages": messages, _max_key: 5}
+        # Reasoning models (o1/o3/o4/gpt-5) reject an explicit temperature, so a
+        # probe that hardcodes one falsely reports a working endpoint as failing.
+        if not _restricts_temperature(model_id):
+            payload["temperature"] = 0.0
        if _test_tools:
            payload["tools"] = _test_tools

@@ -308,7 +464,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
    base = resolve_url(_normalize_base(base_url))
    if _detect_provider(base) == "anthropic":
        # Try Anthropic's /v1/models endpoint first
-        url = _anthropic_api_root(base) + "/v1/models"
+        url = build_models_url(base)
        headers = {"anthropic-version": "2023-06-01"}
        if api_key:
            headers["x-api-key"] = api_key
@@ -331,8 +487,8 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
                return []
            logger.warning(f"Anthropic /v1/models failed, using hardcoded list: {e}")
        return list(ANTHROPIC_MODELS)
-    url = _models_url(base)
-    headers = _provider_headers(api_key, base)
+    url = build_models_url(base)
+    headers = build_headers(api_key, base)
    try:
        r = httpx.get(url, headers=headers, timeout=timeout)
        r.raise_for_status()
@@ -343,6 +499,13 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
        if not models:
            models = [m.get("name") or m.get("model") for m in (data.get("models") or []) if m.get("name") or m.get("model")]
        if models:
+            # Z.AI coding plan omits some working models from /models;
+            # append curated-only entries for that endpoint only.
+            if _host_match(base, "z.ai") and "/api/coding" in (urlparse(base).path or ""):
+                _ck = _match_provider_curated(base, None)
+                for _e in _PROVIDER_CURATED.get(_ck, []):
+                    if _e not in set(models) and not any(m.startswith(_e) for m in models):
+                        models.append(_e)
            return models
    except httpx.HTTPStatusError as e:
        if api_key:
@@ -387,7 +550,24 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->
    if api_key:
        headers["Authorization"] = f"Bearer {api_key}"

+    # Ollama exposes /v1/models (OpenAI-compatible) AND native /api/version,
+    # /api/tags. The OpenAI-style GET base + "/models" returns 404 when the
+    # base is the host root or the native /api root (e.g. http://localhost:11434,
+    # http://localhost:11434/api) because /models lives under /v1 there. Treat
+    # 4xx on a port-11434 / Ollama-named base as "try the native paths" rather
+    # than as a definitive offline verdict — Ollama is reachable, it just
+    # doesn't speak OpenAI on that prefix. Without this gate the quickstart
+    # marks an alive Ollama as offline whenever cached_models is empty (issue
+    # #1025): _probe_endpoint() falls through to /api/tags on the same 404, but
+    # _ping_endpoint() was returning before that fallback could run.
+    parsed_base = urlparse(base)
+    looks_like_ollama = (
+        parsed_base.port == 11434
+        or "ollama" in (parsed_base.hostname or "").lower()
+    )
+
    url = base + "/models"
+    last_error: Optional[str] = None
    try:
        r = httpx.get(url, headers=headers, timeout=timeout)
        if 300 <= r.status_code < 400:
@@ -399,17 +579,21 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->
                    "error": "That is Odysseus, not a model server. Use the Ollama URL, usually http://host.docker.internal:11434/v1 in Docker.",
                }
            return {"reachable": False, "status_code": r.status_code, "error": f"HTTP {r.status_code} redirect"}
-        if r.status_code < 500:
-            return {"reachable": r.status_code < 400, "status_code": r.status_code, "error": None if r.status_code < 400 else f"HTTP {r.status_code}"}
+        if r.status_code < 400:
+            return {"reachable": True, "status_code": r.status_code, "error": None}
+        if r.status_code < 500 and not looks_like_ollama:
+            return {"reachable": False, "status_code": r.status_code, "error": f"HTTP {r.status_code}"}
+        last_error = f"HTTP {r.status_code}"
    except Exception as e:
        last_error = str(e)[:120]
-    else:
-        last_error = f"HTTP {r.status_code}"

    try:
-        parsed = urlparse(base)
-        if parsed.port == 11434 or "ollama" in (parsed.hostname or "").lower():
-            root = base[:-3].rstrip("/") if base.endswith("/v1") else base
+        if looks_like_ollama:
+            root = base
+            for suffix in ("/v1", "/api"):
+                if root.endswith(suffix):
+                    root = root[: -len(suffix)].rstrip("/")
+                    break
            for path in ("/api/version", "/api/tags"):
                try:
                    r = httpx.get(root + path, timeout=timeout)
@@ -449,6 +633,15 @@ def _model_endpoint_error_message(base_url: str, ping: Dict[str, Any] = None) ->
    return "No models found for that provider/key."


+def _visible_models(cached_models, hidden_models):
+    """Filter cached model IDs by hidden_models. Returns list of visible IDs."""
+    all_models = json.loads(cached_models) if isinstance(cached_models, str) else (cached_models or [])
+    if not hidden_models:
+        return all_models
+    hidden = set(json.loads(hidden_models) if isinstance(hidden_models, str) else (hidden_models or []))
+    return [m for m in all_models if m not in hidden]
+
+
 def setup_model_routes(model_discovery):
    router = APIRouter(prefix="/api")

@@ -625,7 +818,7 @@ def setup_model_routes(model_discovery):
        # list to unauthenticated callers.
        try:
            auth_mgr = getattr(request.app.state, "auth_manager", None)
-            if not owner and auth_mgr is not None and getattr(auth_mgr, "is_configured", False):
+            if not owner and not _auth_disabled() and auth_mgr is not None and getattr(auth_mgr, "is_configured", False):
                raise HTTPException(401, "Not authenticated")
        except HTTPException:
            raise
@@ -746,8 +939,8 @@ def setup_model_routes(model_discovery):
                    entry["error"] = str(e)
                    entry["model_count"] = 0
            else:
-                url = _models_url(base)
-                headers = _provider_headers(ep.api_key, base)
+                url = build_models_url(base)
+                headers = build_headers(ep.api_key, base)
                try:
                    t0 = _time.time()
                    r = httpx.get(url, headers=headers, timeout=5)
@@ -965,23 +1158,23 @@ def setup_model_routes(model_discovery):
        require_models: str = Form("false"),
        model_type: str = Form("llm"),
        supports_tools: str = Form(""),  # "true"/"false"/"" (unknown)
+        container_local: str = Form("false"),
        # Default `shared=true` → endpoints are visible to all users (the
        # app's historical behaviour). Admins can pass `shared=false` to
        # scope a new endpoint to their own account only.
        shared: str = Form("true"),
    ):
        require_admin(request)
-        base_url = base_url.strip().rstrip("/")
-        # Normalize: strip trailing /models, /chat/completions, /v1/messages etc to get clean base
-        for suffix in ["/models", "/chat/completions", "/completions", "/v1/messages"]:
-            if base_url.endswith(suffix):
-                base_url = base_url[:-len(suffix)].rstrip("/")
        base_url = _normalize_base(base_url)
        if not base_url:
            raise HTTPException(400, "Base URL is required")
        # Resolve hostname via Tailscale if DNS fails
        from src.endpoint_resolver import resolve_url
        base_url = resolve_url(base_url)
+        # In Docker, manually added loopback URLs usually point at a host-local
+        # server. Cookbook local serves are launched inside Odysseus itself, so
+        # keep those container-local when the frontend marks them as such.
+        base_url = _rewrite_loopback_for_docker(base_url, container_local=_truthy(container_local))

        # Auto-generate name from URL if not provided
        if not name.strip():
@@ -1052,11 +1245,15 @@ def setup_model_routes(model_discovery):
            )
            db.add(ep)
            db.commit()
-            # Auto-set as default chat endpoint if none configured yet
+            # Auto-set as default chat endpoint if none configured yet. Seed
+            # the first CHAT model (not raw model_ids[0]) so we don't pin the
+            # global default to an embedding/tts/etc. entry a provider happens
+            # to list first.
            settings = _load_settings()
            if not settings.get("default_endpoint_id"):
+                from src.endpoint_resolver import _first_chat_model
                settings["default_endpoint_id"] = ep.id
-                settings["default_model"] = model_ids[0] if model_ids else ""
+                settings["default_model"] = _first_chat_model(model_ids) or ""
                _save_settings(settings)
            _invalidate_models_cache()
            _local_probe_cache["data"] = None
@@ -1081,14 +1278,12 @@ def setup_model_routes(model_discovery):
        api_key: str = Form(""),
    ):
        require_admin(request)
-        base_url = base_url.strip().rstrip("/")
-        for suffix in ["/models", "/chat/completions", "/completions", "/v1/messages"]:
-            if base_url.endswith(suffix):
-                base_url = base_url[:-len(suffix)].rstrip("/")
+        base_url = _normalize_base(base_url)
        if not base_url:
            raise HTTPException(400, "Base URL is required")
        from src.endpoint_resolver import resolve_url
        base_url = resolve_url(base_url)
+        base_url = _rewrite_loopback_for_docker(base_url)
        probe_timeout = 3 if (":11434" in base_url or "ollama" in base_url.lower()) else 2
        models = _probe_endpoint(base_url, api_key.strip() or None, timeout=probe_timeout)
        ping = {"reachable": True, "error": None} if models else _ping_endpoint(base_url, api_key.strip() or None, timeout=probe_timeout)
@@ -1301,9 +1496,9 @@ def setup_model_routes(model_discovery):
            chat_url = build_chat_url(base)
            if not model and getattr(ep, "cached_models", None):
                try:
-                    models = _json.loads(ep.cached_models) if isinstance(ep.cached_models, str) else ep.cached_models
-                    if models:
-                        model = models[0]
+                    visible = _visible_models(ep.cached_models, getattr(ep, "hidden_models", None))
+                    if visible:
+                        model = visible[0]
                except Exception:
                    pass
            return {"endpoint_id": ep.id, "endpoint_url": chat_url, "model": model}
@@ -1337,58 +1532,63 @@ def setup_model_routes(model_discovery):
                    ep.name = body["name"].strip() or ep.name
                if "model_type" in body and isinstance(body["model_type"], str):
                    ep.model_type = body["model_type"].strip() or ep.model_type
+                # Rotating an API key used to require DELETE+POST, which wiped
+                # endpoint_url/model from every session referencing the old base
+                # URL. Allow in-place updates so the admin can change the key
+                # (or correct a typo'd base URL) without nuking session state.
+                if "api_key" in body and isinstance(body["api_key"], str):
+                    _new_key = body["api_key"].strip()
+                    # Empty string means "clear it" (e.g. local Ollama no longer needs a key).
+                    ep.api_key = _new_key or None
+                if "base_url" in body and isinstance(body["base_url"], str):
+                    _new_base = body["base_url"].strip().rstrip("/")
+                    for _suffix in ("/models", "/chat/completions", "/completions", "/v1/messages"):
+                        if _new_base.endswith(_suffix):
+                            _new_base = _new_base[: -len(_suffix)].rstrip("/")
+                    _new_base = _normalize_base(_new_base)
+                    if _new_base:
+                        ep.base_url = _new_base
            else:
                ep.is_enabled = not ep.is_enabled
            db.commit()
            _invalidate_models_cache()
+            _local_probe_cache["data"] = None
            return {
                "id": ep.id,
                "is_enabled": ep.is_enabled,
                "supports_tools": ep.supports_tools,
                "name": ep.name,
                "model_type": ep.model_type,
+                "base_url": ep.base_url,
            }
        finally:
            db.close()

-    # ── Settings fields that store an endpoint ID ──
-    _EP_SETTING_FIELDS = {
-        "default_endpoint_id":  ("default_model",  "Default Model"),
-        "utility_endpoint_id":  ("utility_model",   "Utility Model"),
-        "research_endpoint_id": ("research_model",  "Deep Research"),
-        "task_endpoint_id":     ("task_model",       "Background Tasks"),
-    }
-
    def _settings_using_endpoint(ep_id: str) -> list:
        """Return human-readable labels for settings that reference this endpoint."""
-        settings = _load_settings()
-        affected = []
-        for ep_key, (_, label) in _EP_SETTING_FIELDS.items():
-            if (settings.get(ep_key) or "") == ep_id:
-                affected.append(label)
-        tts_prov = settings.get("tts_provider") or ""
-        if tts_prov == f"endpoint:{ep_id}":
-            affected.append("Text to Speech")
-        return affected
+        return _endpoint_settings_using_endpoint(_load_settings(), ep_id, include_speech=True)

    def _clear_settings_for_endpoint(ep_id: str) -> list:
        """Clear all settings that reference this endpoint. Returns list of cleared labels."""
        settings = _load_settings()
-        cleared = []
-        for ep_key, (model_key, label) in _EP_SETTING_FIELDS.items():
-            if (settings.get(ep_key) or "") == ep_id:
-                settings[ep_key] = ""
-                settings[model_key] = ""
-                cleared.append(label)
-        tts_prov = settings.get("tts_provider") or ""
-        if tts_prov == f"endpoint:{ep_id}":
-            settings["tts_provider"] = "disabled"
-            settings["tts_model"] = "tts-1"
-            cleared.append("Text to Speech")
+        cleared = _clear_endpoint_settings_for_endpoint(settings, ep_id, include_speech=True)
        if cleared:
            _save_settings(settings)
        return cleared

+    def _clear_user_prefs_for_endpoint(ep_id: str) -> int:
+        """Clear per-user endpoint selections and fallback chains."""
+        try:
+            from routes.prefs_routes import _load as _load_prefs, _save as _save_prefs
+            all_prefs = _load_prefs()
+            cleared_users = _clear_user_pref_endpoint_refs(all_prefs, ep_id)
+            if cleared_users:
+                _save_prefs(all_prefs)
+            return cleared_users
+        except Exception as e:
+            logger.warning("Failed to clear user prefs for endpoint %s: %s", ep_id, e)
+            return 0
+
    def _session_uses_endpoint_url(session_url: str, base_url: str) -> bool:
        if not session_url or not base_url:
            return False
@@ -1402,12 +1602,18 @@ def setup_model_routes(model_discovery):
        return sess in variants or sess.startswith(base + "/")

    def _clear_sessions_for_endpoint(db, base_url: str) -> int:
+        """Drop stored auth for sessions using an endpoint being deleted.
+
+        Keep the session's endpoint URL and model intact. If the admin is
+        replacing an endpoint with the same URL, clearing those fields leaves
+        the UI looking selected while chat requests arrive with an empty model.
+        The chat-time orphan guard still clears truly dead endpoints when no
+        matching enabled endpoint exists.
+        """
        cleared = 0
        rows = db.query(DbSession).filter(DbSession.endpoint_url.isnot(None)).all()
        for row in rows:
            if _session_uses_endpoint_url(row.endpoint_url or "", base_url):
-                row.endpoint_url = ""
-                row.model = ""
                row.headers = {}
                row.updated_at = datetime.utcnow()
                cleared += 1
@@ -1425,8 +1631,6 @@ def setup_model_routes(model_discovery):
        try:
            for sess in list(getattr(manager, "sessions", {}).values()):
                if _session_uses_endpoint_url(getattr(sess, "endpoint_url", "") or "", base_url):
-                    sess.endpoint_url = ""
-                    sess.model = ""
                    sess.headers = {}
                    cleared += 1
        except Exception:
@@ -1449,6 +1653,7 @@ def setup_model_routes(model_discovery):
                raise HTTPException(404, "Endpoint not found")
            # Clean up any settings that reference this endpoint
            cleared = _clear_settings_for_endpoint(ep_id)
+            cleared_user_preferences = _clear_user_prefs_for_endpoint(ep_id)
            cleared_sessions = _clear_sessions_for_endpoint(db, ep.base_url)
            cleared_loaded_sessions = _clear_loaded_sessions_for_endpoint(ep.base_url)
            db.delete(ep)
@@ -1458,6 +1663,7 @@ def setup_model_routes(model_discovery):
            return {
                "deleted": True,
                "cleared_settings": cleared,
+                "cleared_user_preferences": cleared_user_preferences,
                "cleared_sessions": cleared_sessions,
                "cleared_loaded_sessions": cleared_loaded_sessions,
            }
--- a/routes/note_routes.py
+++ b/routes/note_routes.py
@@ -683,9 +683,8 @@ def setup_note_routes(task_scheduler=None):
        Returns {synthesis, email_sent}.
        """
        # Gate against anonymous callers — LLM synthesis can burn tokens.
-        from src.auth_helpers import get_current_user as _gcu
-        if not _gcu(request):
-            raise HTTPException(401, "Not authenticated")
+        from src.auth_helpers import require_user as _ru
+        _ru(request)
        body = await request.json()
        note_id = body.get("note_id")
        title = (body.get("title") or "").strip()
@@ -697,7 +696,7 @@ def setup_note_routes(task_scheduler=None):
        # the same dispatch without an HTTP roundtrip + auth cookie.
        return await dispatch_reminder(
            title=title, note_body=note_body, note_id=note_id,
-            owner=_gcu(request) or "",
+            owner=_owner(request) or "",
            queue_browser=False,
        )

--- a/routes/personal_routes.py
+++ b/routes/personal_routes.py
@@ -69,9 +69,12 @@ def setup_personal_routes(personal_docs_manager, rag_manager, rag_available):
        if not directory:
            raise HTTPException(400, "Directory path is required")

-        base_abs = os.path.abspath(PERSONAL_DIR)
+        # realpath (not abspath) so a symlink inside PERSONAL_DIR that points
+        # outside it is resolved before the commonpath confinement check below;
+        # abspath only normalises `..` and would let such a symlink escape.
+        base_abs = os.path.realpath(PERSONAL_DIR)
        candidate = directory if os.path.isabs(directory) else os.path.join(base_abs, directory)
-        resolved = os.path.abspath(candidate)
+        resolved = os.path.realpath(candidate)
        try:
            in_base = os.path.commonpath([resolved, base_abs]) == base_abs
        except ValueError:
--- a/routes/prefs_routes.py
+++ b/routes/prefs_routes.py
@@ -12,7 +12,8 @@ def _load():
    """Load the raw prefs file (internal use only)."""
    try:
        with open(PREFS_FILE, "r", encoding="utf-8") as f:
-            return json.load(f)
+            data = json.load(f)
+            return data if isinstance(data, dict) else {}
    except (FileNotFoundError, json.JSONDecodeError):
        return {}

@@ -40,7 +41,18 @@ def _save_for_user(user: Optional[str], prefs: dict):
    """Save preferences for a specific user."""
    all_prefs = _load()
    if user is None:
-        # Auth disabled — save flat
+        # Auth disabled. If the store is already multi-user (e.g. auth was
+        # turned off on a deployment that previously ran multi-user), writing
+        # `prefs` flat would overwrite the whole `_users` map and destroy every
+        # other user's preferences. Instead write back into the same (first)
+        # slot _load_for_user(None) reads from, preserving the others.
+        if "_users" in all_prefs:
+            users = all_prefs["_users"]
+            first_key = next(iter(users), None)
+            if first_key is not None:
+                users[first_key] = prefs
+                _save(all_prefs)
+                return
        _save(prefs)
        return
    if "_users" not in all_prefs:
--- a/routes/research_routes.py
+++ b/routes/research_routes.py
@@ -3,6 +3,7 @@
 import asyncio
 import json
 import logging
+import re
 import uuid
 from datetime import datetime
 from pathlib import Path
@@ -12,7 +13,9 @@ from fastapi import APIRouter, HTTPException, Query, Request
 from fastapi.responses import HTMLResponse, StreamingResponse
 from pydantic import BaseModel, Field
 from src.endpoint_resolver import resolve_endpoint
-from src.auth_helpers import get_current_user
+from src.auth_helpers import _auth_disabled, get_current_user
+
+_SESSION_ID_RE = re.compile(r"^[a-zA-Z0-9-]{1,128}$")

 logger = logging.getLogger(__name__)

@@ -55,9 +58,15 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
        verify the session belongs to this user."""
        user = get_current_user(request)
        if not user:
+            if _auth_disabled():
+                return ""
            raise HTTPException(401, "Not authenticated")
        return user

+    def _validate_session_id(session_id: str) -> None:
+        if not _SESSION_ID_RE.fullmatch(session_id):
+            raise HTTPException(400, "Invalid session ID format")
+
    def _owns_in_memory(session_id: str, user: str) -> bool:
        """Ownership check for an in-flight (in-memory) research task.
        Falls back to the on-disk JSON if the task has already finished."""
@@ -95,6 +104,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
    @router.get("/api/research/status/{session_id}")
    async def research_status(session_id: str, request: Request):
        user = _require_user(request)
+        _validate_session_id(session_id)
        if not _owns_in_memory(session_id, user):
            raise HTTPException(404, "No research found for this session")
        status = research_handler.get_status(session_id)
@@ -105,6 +115,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
    @router.post("/api/research/cancel/{session_id}")
    async def research_cancel(session_id: str, request: Request):
        user = _require_user(request)
+        _validate_session_id(session_id)
        if not _owns_in_memory(session_id, user):
            raise HTTPException(404, "No research found for this session")
        cancelled = research_handler.cancel_research(session_id)
@@ -113,6 +124,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
    @router.post("/api/research/result/{session_id}")
    async def research_result(session_id: str, request: Request):
        user = _require_user(request)
+        _validate_session_id(session_id)
        if not _owns_in_memory(session_id, user):
            raise HTTPException(404, "No research result available")
        result = research_handler.get_result(session_id)
@@ -140,6 +152,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
    async def research_report(session_id: str, request: Request):
        """Serve the visual HTML report for a completed research session."""
        user = _require_user(request)
+        _validate_session_id(session_id)
        _assert_owns_research(session_id, user)
        logger.info(f"Visual report requested for session {session_id}")
        try:
@@ -160,6 +173,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
        """Mark an image URL as hidden for this research's visual report.
        Persisted to the research JSON so subsequent /report renders skip it."""
        user = _require_user(request)
+        _validate_session_id(session_id)
        _assert_owns_research(session_id, user)
        ok = research_handler.hide_image(session_id, body.url)
        if not ok:
@@ -170,6 +184,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
    async def research_unhide_images(session_id: str, request: Request):
        """Clear the hidden-images list for a research session."""
        user = _require_user(request)
+        _validate_session_id(session_id)
        _assert_owns_research(session_id, user)
        ok = research_handler.unhide_all_images(session_id)
        if not ok:
@@ -235,6 +250,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
        """Return the full JSON for a single research result — sources,
        summary, stats — used by the Library preview panel."""
        user = _require_user(request)
+        _validate_session_id(session_id)
        path = Path("data/deep_research") / f"{session_id}.json"
        if not path.exists():
            raise HTTPException(404, "Research not found")
@@ -251,6 +267,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
    async def research_archive(session_id: str, request: Request, archived: bool = Query(True)):
        """Soft-archive / restore a research report (sets `archived` in its JSON)."""
        user = _require_user(request)
+        _validate_session_id(session_id)
        path = Path("data/deep_research") / f"{session_id}.json"
        if not path.exists():
            raise HTTPException(404, "Research not found")
@@ -270,6 +287,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
    async def research_delete(session_id: str, request: Request):
        """Delete a research result from disk."""
        user = _require_user(request)
+        _validate_session_id(session_id)
        data_dir = Path("data/deep_research")
        json_path = data_dir / f"{session_id}.json"
        deleted = False
@@ -299,7 +317,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
        endpoint_id: Optional[str] = None
        model: Optional[str] = None
        max_time: int = Field(default=300, ge=60, le=1800)
-        extraction_timeout: Optional[int] = Field(default=None, ge=15, le=600)
+        extraction_timeout: Optional[int] = Field(default=None, ge=15, le=3600)
        extraction_concurrency: Optional[int] = Field(default=None, ge=1, le=12)
        category: Optional[str] = None

@@ -413,6 +431,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
    async def research_stream(session_id: str, request: Request):
        """SSE stream of research progress events."""
        user = _require_user(request)
+        _validate_session_id(session_id)
        if not _owns_in_memory(session_id, user):
            raise HTTPException(404, "No research found for this session")
        async def _generate():
@@ -446,6 +465,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
    async def research_result_peek(session_id: str, request: Request):
        """Get research result without clearing it (for panel use)."""
        user = _require_user(request)
+        _validate_session_id(session_id)
        if not _owns_in_memory(session_id, user):
            raise HTTPException(404, "No research found for this session")
        result = research_handler.get_result(session_id)
@@ -474,7 +494,14 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
        injects a single system message containing the report and sources so
        the user can ask follow-up questions in a clean conversation.
        """
-        _require_user(request)
+        user = _require_user(request)
+        _validate_session_id(session_id)
+        # SECURITY: gate on ownership before reading the persisted research —
+        # otherwise any authenticated user could spin off (and thereby read)
+        # another user's report by guessing its session ID. Mirrors every other
+        # endpoint in this file (see result_peek above).
+        if not _owns_in_memory(session_id, user):
+            raise HTTPException(404, "No research found for this session")
        if session_manager is None:
            raise HTTPException(500, "session_manager not configured")

@@ -555,7 +582,6 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:

        # Create new session
        new_sid = str(uuid.uuid4())
-        user = get_current_user(request)

        title_query = (query or "research").strip()
        if len(title_query) > 60:
--- a/routes/session_routes.py
+++ b/routes/session_routes.py
@@ -11,45 +11,118 @@ from core.session_manager import SessionManager
 from core.models import ChatMessage
 from src.request_models import SessionResponse
 from core.database import Session as DbSession, SessionLocal, Document, GalleryImage
-from src.auth_helpers import get_current_user
+from src.auth_helpers import get_current_user, effective_user


-def _verify_session_owner(request: Request, session_id: str):
-    """Verify the current user owns the session. Raises 404 if not."""
-    user = get_current_user(request)
+def _sanitize_export_filename(name: str) -> str:
+    """Return a conservative filename safe for Content-Disposition."""
+    name = name if isinstance(name, str) else ""
+    name = re.sub(r"[^A-Za-z0-9._-]", "_", name)
+    return name[:128]
+
+
+def _verify_session_owner(request: Request, session_id: str, session_manager=None):
+    """Verify the current user owns the session. Raises 404 if not.
+
+    Ownership is checked against the DB row when one exists (unchanged). If
+    there is no DB row but the caller owns an in-memory "ghost" session — one
+    that lives only in ``session_manager`` because it was never persisted, or
+    its DB row was removed out-of-band — fall back to the in-memory owner so the
+    user can still manage and delete it. Without this fallback such sessions are
+    listed by ``/api/sessions`` (they come from the in-memory manager) yet every
+    per-session operation 404s, making them impossible to delete (issue #1044).
+
+    ``session_manager`` is optional and defaults to ``None`` so existing callers
+    that only care about persisted sessions keep their exact prior behavior.
+    """
+    user = effective_user(request)
    if not user:
        raise HTTPException(403, "Authentication required")
    db = SessionLocal()
    try:
        row = db.query(DbSession.owner).filter(DbSession.id == session_id).first()
-        if not row:
-            raise HTTPException(404, f"Session {session_id} not found")
-        if row.owner != user:
-            raise HTTPException(404, f"Session {session_id} not found")
    finally:
        db.close()
+    if row is not None:
+        if row.owner != user:
+            raise HTTPException(404, f"Session {session_id} not found")
+        return
+    # No DB row — allow the caller to act on an in-memory ghost they own.
+    if session_manager is not None:
+        ghost = getattr(session_manager, "sessions", {}).get(session_id)
+        if ghost is not None and getattr(ghost, "owner", None) == user:
+            return
+    raise HTTPException(404, f"Session {session_id} not found")

 logger = logging.getLogger(__name__)

 router = APIRouter(prefix="/api", tags=["sessions"])

-def _pick_endpoint_for_sort():
+
+def _current_user_is_admin(request: Request, user: str | None) -> bool:
+    if not user:
+        return False
+    auth_mgr = getattr(request.app.state, "auth_manager", None)
+    is_admin = getattr(auth_mgr, "is_admin", None)
+    if not callable(is_admin):
+        return False
+    try:
+        return bool(is_admin(user))
+    except Exception:
+        return False
+
+
+def _reject_raw_endpoint_url_for_non_admin(
+    request: Request,
+    user: str | None,
+    endpoint_id: str | None,
+    endpoint_url: str | None,
+) -> None:
+    """Require registered endpoints for signed-in non-admin session changes."""
+    if endpoint_id and endpoint_id.strip():
+        return
+    if not endpoint_url:
+        return
+    # Raw URLs make the server dial whatever host the request supplies. For
+    # non-admin users, require a saved endpoint row so normal owner scoping and
+    # endpoint validation have already happened.
+    if user and not _current_user_is_admin(request, user):
+        raise HTTPException(403, "Choose a registered model endpoint")
+
+
+def _persist_session_headers(session_id: str, headers: dict | None) -> None:
+    """Persist endpoint auth headers for DB-backed session metadata."""
+    db = SessionLocal()
+    try:
+        db_session = db.query(DbSession).filter(DbSession.id == session_id).first()
+        if db_session:
+            db_session.headers = headers or {}
+            db_session.updated_at = datetime.utcnow()
+            db.commit()
+    except Exception:
+        db.rollback()
+        raise
+    finally:
+        db.close()
+
+
+def _pick_endpoint_for_sort(owner=None):
    """Pick model endpoint for auto-sort LLM call — uses utility endpoint setting, falls back to default."""
    from src.endpoint_resolver import resolve_endpoint
    # Try utility endpoint first (what the user configured for background tasks)
-    url, model, headers = resolve_endpoint("utility")
+    url, model, headers = resolve_endpoint("utility", owner=owner)
    if url and model:
        return url, model, headers
    # Fall back to task endpoint
    try:
        from src.task_endpoint import resolve_task_endpoint
-        url, model, headers = resolve_task_endpoint()
+        url, model, headers = resolve_task_endpoint(owner=owner)
        if url and model:
            return url, model, headers
    except Exception:
        pass
    # Fall back to default
-    url, model, headers = resolve_endpoint("default")
+    url, model, headers = resolve_endpoint("default", owner=owner)
    if url and model:
        return url, model, headers
    return None, None, None
@@ -63,7 +136,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
    
    @router.get("/sessions")
    def list_sessions(request: Request):
-        user = get_current_user(request)
+        user = effective_user(request)
        # Lazy purge: incognito sessions are ephemeral by design — wipe leftovers
        # from the DB and session_manager so they vanish on the next page refresh.
        # BUT: skip sessions that were created within the last 10 minutes.
@@ -172,11 +245,41 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
        endpoint_id: str = Form(""),
    ):
        skip_val = str(skip_validation).lower() == "true"
+        user = get_current_user(request)
+        endpoint_api_key = ""
+        endpoint_base_url = ""
+        _reject_raw_endpoint_url_for_non_admin(request, user, endpoint_id, endpoint_url)
+        if endpoint_id and endpoint_id.strip():
+            from core.database import ModelEndpoint
+            from src.auth_helpers import owner_filter
+            from src.endpoint_resolver import build_chat_url, normalize_base
+            _db = SessionLocal()
+            try:
+                q = _db.query(ModelEndpoint).filter(
+                    ModelEndpoint.id == endpoint_id.strip(),
+                    ModelEndpoint.is_enabled == True,
+                )
+                if user:
+                    q = owner_filter(q, ModelEndpoint, user)
+                endpoint_row = q.first()
+                if not endpoint_row:
+                    raise HTTPException(400, "Model endpoint no longer exists")
+                endpoint_base_url = endpoint_row.base_url or ""
+                endpoint_api_key = endpoint_row.api_key or ""
+                endpoint_url = build_chat_url(normalize_base(endpoint_base_url))
+            finally:
+                _db.close()

        if not endpoint_url and not skip_val:
            raise HTTPException(400, "endpoint_url is required (choose from /api/models)")

        model_to_use = model
+        request_api_key = api_key.strip() if api_key else ""
+        effective_api_key = request_api_key or endpoint_api_key
+        validation_headers = None
+        if effective_api_key:
+            from src.endpoint_resolver import build_headers
+            validation_headers = build_headers(effective_api_key, endpoint_base_url or endpoint_url)

        if skip_val:
            # skip_validation = trust the caller and do NOT probe /v1/models.
@@ -187,7 +290,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
        elif not model_to_use:
            from src.llm_core import list_model_ids
            ids = list_model_ids(endpoint_url, timeout=REQUEST_TIMEOUT,
-                                 headers={"Authorization": f"Bearer {api_key}"} if api_key.strip() else None)
+                                 headers=validation_headers)
            if not ids:
                raise HTTPException(400, "Cannot reach /v1/models")
            # Default to the first CHAT model — endpoints often list embedding/
@@ -202,7 +305,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
            import os as _os
            req_base = _os.path.basename(model_to_use.rstrip("/"))
            avail = list_model_ids(endpoint_url, timeout=REQUEST_TIMEOUT,
-                                   headers={"Authorization": f"Bearer {api_key}"} if api_key.strip() else None)
+                                   headers=validation_headers)
            if not avail:
                raise HTTPException(400, "Cannot reach /v1/models")
            if model_to_use not in avail:
@@ -217,7 +320,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
                model_to_use = found
        
        sid = str(uuid.uuid4())
-        user = get_current_user(request)
+        user = effective_user(request)
        session = session_manager.create_session(
            session_id=sid,
            name=name or "",
@@ -227,22 +330,15 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
            owner=user,
        )
        # Set auth headers for custom API-key endpoints
-        resolved_key = api_key.strip() if api_key else ""
+        resolved_key = request_api_key
        resolved_base = endpoint_url
-        if not resolved_key and endpoint_id and endpoint_id.strip():
-            from core.database import ModelEndpoint
-            _db = SessionLocal()
-            try:
-                ep = _db.query(ModelEndpoint).filter(ModelEndpoint.id == endpoint_id.strip()).first()
-                if ep and ep.api_key:
-                    resolved_key = ep.api_key
-                    resolved_base = ep.base_url
-            finally:
-                _db.close()
+        if not resolved_key and endpoint_api_key:
+            resolved_key = endpoint_api_key
+            resolved_base = endpoint_base_url
        if resolved_key:
            from src.endpoint_resolver import build_headers
            session.headers = build_headers(resolved_key, resolved_base)
-            session_manager.save_sessions()
+            _persist_session_headers(sid, session.headers)
        # Fire webhook (sync-safe)
        if webhook_manager:
            webhook_manager.fire_and_forget("session.created", {
@@ -288,27 +384,38 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
                db.close()
        # Switch model/endpoint mid-session
        if model is not None and endpoint_url is not None:
+            user = get_current_user(request)
+            _reject_raw_endpoint_url_for_non_admin(request, user, endpoint_id, endpoint_url)
+            endpoint_api_key = ""
+            endpoint_base_url = ""
            if endpoint_id:
                from core.database import ModelEndpoint
+                from src.auth_helpers import owner_filter
+                from src.endpoint_resolver import build_chat_url, normalize_base
                _db = SessionLocal()
                try:
-                    ep = _db.query(ModelEndpoint).filter(ModelEndpoint.id == endpoint_id).first()
+                    q = _db.query(ModelEndpoint).filter(
+                        ModelEndpoint.id == endpoint_id,
+                        ModelEndpoint.is_enabled == True,
+                    )
+                    if user:
+                        q = owner_filter(q, ModelEndpoint, user)
+                    ep = q.first()
                    if not ep:
                        raise HTTPException(400, "Model endpoint no longer exists")
+                    endpoint_base_url = ep.base_url or ""
+                    endpoint_api_key = ep.api_key or ""
+                    endpoint_url = build_chat_url(normalize_base(endpoint_base_url))
                finally:
                    _db.close()
            session.model = model
            session.endpoint_url = endpoint_url
            # Update auth headers from the endpoint's stored API key
-            if endpoint_id:
-                _db = SessionLocal()
-                try:
-                    ep = _db.query(ModelEndpoint).filter(ModelEndpoint.id == endpoint_id).first()
-                    if ep and ep.api_key:
-                        from src.endpoint_resolver import build_headers
-                        session.headers = build_headers(ep.api_key, ep.base_url)
-                finally:
-                    _db.close()
+            if endpoint_api_key:
+                from src.endpoint_resolver import build_headers
+                session.headers = build_headers(endpoint_api_key, endpoint_base_url)
+            else:
+                session.headers = {}
            # Persist to DB
            db = SessionLocal()
            try:
@@ -316,6 +423,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
                if db_session:
                    db_session.model = model
                    db_session.endpoint_url = endpoint_url
+                    db_session.headers = session.headers or {}
                    db_session.updated_at = datetime.utcnow()
                    db.commit()
            finally:
@@ -356,7 +464,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
            ids = []
        for sid in ids:
            try:
-                _verify_session_owner(request, sid)
+                _verify_session_owner(request, sid, session_manager)
                session_manager.delete_session(sid)
                db = SessionLocal()
                try:
@@ -374,7 +482,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
    @router.delete("/session/{sid}")
    def delete_session(request: Request, sid: str):
        """Permanently delete a session and all its messages."""
-        _verify_session_owner(request, sid)
+        _verify_session_owner(request, sid, session_manager)
        try:
            # Block deletion of starred/favorited sessions
            db = SessionLocal()
@@ -499,7 +607,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
    @router.get("/sessions/archived")
    def list_archived_sessions(request: Request, search: str = "", offset: int = 0, limit: int = 20, sort: str = "recent", model: str = ""):
        """List archived sessions for the archive browser."""
-        user = get_current_user(request)
+        user = effective_user(request)
        db = SessionLocal()
        try:
            q = db.query(DbSession).filter(DbSession.archived == True)
@@ -510,7 +618,12 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
                safe_search = search.replace('%', r'\%').replace('_', r'\_')
                q = q.filter(DbSession.name.ilike(f"%{safe_search}%", escape='\\'))
            if model:
-                q = q.filter(DbSession.model.ilike(f"%{model}"))
+                # Contains match (mirrors the name filter above). The old
+                # f"%{model}" was a SUFFIX-only match, so filtering by "gpt-4"
+                # dropped "gpt-4o" and over-matched on shared suffixes; it also
+                # left LIKE wildcards in the user value unescaped.
+                safe_model = model.replace('%', r'\%').replace('_', r'\_')
+                q = q.filter(DbSession.model.ilike(f"%{safe_model}%", escape='\\'))
            total = q.count()
            sort_map = {
                "recent": DbSession.updated_at.desc(),
@@ -558,6 +671,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_

        safe_name = re.sub(r'[^\w\-_]', '_', session.name)
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
+        filename = _sanitize_export_filename(filename)

        if fmt == "json":
            import json as _json
@@ -635,7 +749,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
    
    @router.post("/sessions/save")
    def sessions_save_now(request: Request):
-        user = get_current_user(request)
+        user = effective_user(request)
        if not user:
            raise HTTPException(401, "Not authenticated")
        session_manager.save_sessions()
@@ -651,7 +765,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
        if not OPENAI_API_KEY:
            raise HTTPException(400, "Server missing OPENAI_API_KEY")
        sid = str(uuid.uuid4())
-        user = get_current_user(request)
+        user = effective_user(request)
        session = session_manager.create_session(
            session_id=sid,
            name="",
@@ -728,7 +842,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
        from src.endpoint_resolver import resolve_endpoint
        from src.llm_core import llm_call_async

-        url, model, headers = resolve_endpoint("utility")
+        url, model, headers = resolve_endpoint("utility", owner=get_current_user(request))
        if not url or not model:
            url, model, headers = session.endpoint_url, session.model, session.headers
        if not url or not model:
@@ -791,7 +905,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
        users can clean junk without spending tokens.
        """
        from src.llm_core import llm_call
-        user = get_current_user(request)
+        user = effective_user(request)
        user_sessions = session_manager.get_sessions_for_user(user)

        # Delete empty and throwaway sessions before sorting
@@ -928,9 +1042,9 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_

        # Pick an endpoint — prefer admin-configured task endpoint
        from src.task_endpoint import resolve_task_endpoint
-        url, model, headers = resolve_task_endpoint()
+        url, model, headers = resolve_task_endpoint(owner=user)
        if not url:
-            url, model, headers = _pick_endpoint_for_sort()
+            url, model, headers = _pick_endpoint_for_sort(owner=user)
        if not url:
            raise HTTPException(503, "No available model endpoint for auto-sort")

--- a/routes/shell_routes.py
+++ b/routes/shell_routes.py
@@ -118,6 +118,7 @@ def _running_in_container(dockerenv_path="/.dockerenv", cgroup_path="/proc/1/cgr


 DockerRowStatus = namedtuple("DockerRowStatus", ["applicable", "install_hint"])
+PackageUpdateStatus = namedtuple("PackageUpdateStatus", ["available", "note"])


 def _docker_row_status(*, on_remote, in_container, installed, default_hint):
@@ -127,6 +128,24 @@ def _docker_row_status(*, on_remote, in_container, installed, default_hint):
    return DockerRowStatus(applicable=True, install_hint=default_hint)


+def _pip_dist_name(pkg: dict) -> str:
+    """Distribution name for importlib.metadata lookups.
+
+    The Cookbook package catalog carries both the import name (``name``, e.g.
+    ``llama_cpp``) and the pip spec (``pip``, e.g. ``llama-cpp-python[server]``).
+    The distribution is NOT always the import name with underscores swapped for
+    dashes — ``llama_cpp`` ships in the ``llama-cpp-python`` distribution — so
+    derive it from the pip spec (stripping any ``[extras]`` and version markers)
+    and fall back to the munged import name only when no pip spec is declared.
+    """
+    pip = (pkg.get("pip") or "").strip()
+    if pip:
+        base = re.split(r"[\[<>=!~;\s]", pip, maxsplit=1)[0].strip()
+        if base:
+            return base
+    return (pkg.get("name") or "").replace("_", "-")
+
+
 def _package_installed_from_probe(name: str, probe: dict) -> bool:
    """Return whether an optional dependency is usable by Cookbook.

@@ -162,7 +181,10 @@ def _package_status_note(name: str, probe: dict) -> str:
    locations = module.get("locations") or []
    if name == "vllm":
        if binaries.get("vllm"):
-            return f"vLLM CLI: {binaries['vllm']}"
+            parts = [f"vLLM CLI: {binaries['vllm']}"]
+            if dists.get("vllm"):
+                parts.append(f"python package: vllm {dists['vllm']}")
+            return "; ".join(parts)
        if module.get("found") and not dists.get("vllm"):
            loc = locations[0] if locations else module.get("origin") or "unknown path"
            return f"Python sees a vllm namespace at {loc}, but no vLLM CLI is on PATH."
@@ -183,13 +205,70 @@ def _package_status_note(name: str, probe: dict) -> str:
    return ""


+def _package_pip_update_status(pkg: dict, probe: dict | None = None) -> PackageUpdateStatus:
+    """Return whether the Dependencies UI should offer a generic pip update.
+
+    "Installed" means Cookbook can use the dependency. It does not always mean
+    the dependency is a Python package that Cookbook should update with pip:
+    native llama-server can come from a package manager/source build, and a CLI
+    may be on PATH without matching Python package metadata.
+    """
+    if pkg.get("kind") == "system" or not pkg.get("pip"):
+        return PackageUpdateStatus(False, "Update this system dependency outside Odysseus.")
+
+    name = pkg.get("name")
+    binaries = probe.get("binaries") if isinstance(probe, dict) and isinstance(probe.get("binaries"), dict) else {}
+    dists = probe.get("dists") if isinstance(probe, dict) and isinstance(probe.get("dists"), dict) else {}
+
+    if name == "llama_cpp" and binaries.get("llama-server"):
+        return PackageUpdateStatus(
+            False,
+            "Using native llama-server on PATH; update it with its package manager or source checkout.",
+        )
+    if name == "vllm" and binaries.get("vllm") and not dists.get("vllm"):
+        return PackageUpdateStatus(
+            False,
+            "Using a vLLM CLI on PATH without Python package metadata; update it outside Odysseus.",
+        )
+
+    return PackageUpdateStatus(True, "Update uses pip in the selected Python environment.")
+
+
+def _prepend_user_install_bins_to_path() -> None:
+    """Make pip --user console scripts visible to dependency probes.
+
+    Docker Cookbook installs vLLM with `python -m pip install --user`, which
+    drops the `vllm` CLI in /app/.local/bin. The running app process does not
+    inherit that PATH update, so `shutil.which("vllm")` can report missing even
+    after a successful install.
+    """
+    try:
+        import site
+
+        candidates = [os.path.join(site.USER_BASE, "bin")]
+    except Exception:
+        candidates = []
+    candidates.append(os.path.expanduser("~/.local/bin"))
+
+    parts = os.environ.get("PATH", "").split(os.pathsep) if os.environ.get("PATH") else []
+    changed = False
+    for path in reversed([p for p in candidates if p]):
+        if path not in parts:
+            parts.insert(0, path)
+            changed = True
+    if changed:
+        os.environ["PATH"] = os.pathsep.join(parts)
+
+
 def _package_probe_script(names: list[str]) -> str:
    names_lit = ",".join(repr(n) for n in names)
    return f"""
 import importlib.util
 import importlib.metadata as md
 import json
+import os
 import shutil
+import site

 names=[{names_lit}]
 dist_names={{
@@ -204,6 +283,24 @@ bin_names={{
    'llama_cpp':['llama-server'],
 }}

+def add_user_install_bins_to_path():
+    candidates = []
+    try:
+        candidates.append(os.path.join(site.USER_BASE, 'bin'))
+    except Exception:
+        pass
+    candidates.append(os.path.expanduser('~/.local/bin'))
+    parts = os.environ.get('PATH', '').split(os.pathsep) if os.environ.get('PATH') else []
+    changed = False
+    for path in reversed([p for p in candidates if p]):
+        if path not in parts:
+            parts.insert(0, path)
+            changed = True
+    if changed:
+        os.environ['PATH'] = os.pathsep.join(parts)
+
+add_user_install_bins_to_path()
+
 def mod_status(n):
    spec = importlib.util.find_spec(n)
    loader = getattr(spec, 'loader', None) if spec else None
@@ -317,7 +414,7 @@ async def _generate_pty(cmd: str, timeout: int, request: Request):
        yield f"data: {json.dumps({'exit_code': -1, 'error': PTY_UNSUPPORTED_ERROR})}\n\n"
        return

-    loop = asyncio.get_event_loop()
+    loop = asyncio.get_running_loop()
    master_fd, slave_fd = pty.openpty()

    # Set master to non-blocking
@@ -469,7 +566,8 @@ async def _generate_tmux(cmd: str, request: Request):
        f"EC=${{PIPESTATUS[0]}}\n"
        f"echo ':::EXIT_CODE:::'$EC >> '{log_path}'\n"
        f"rm -f '{script_path}'\n"
-        f"exit $EC\n"
+        f"exit $EC\n",
+        encoding="utf-8",
    )
    script_path.chmod(0o755)
    logger.info("tmux wrapper script created: session=%s path=%s", session_id, script_path)
@@ -504,7 +602,7 @@ async def _generate_tmux(cmd: str, request: Request):
        # Read new lines from log
        try:
            if log_path.exists():
-                lines = log_path.read_text(errors="replace").splitlines()
+                lines = log_path.read_text(encoding="utf-8", errors="replace").splitlines()
                new_lines = lines[lines_sent:]
                for line in new_lines:
                    if line.startswith(":::EXIT_CODE:::"):
@@ -532,7 +630,7 @@ async def _generate_tmux(cmd: str, request: Request):
            # Session ended — do one final read
            await asyncio.sleep(0.5)
            if log_path.exists():
-                lines = log_path.read_text(errors="replace").splitlines()
+                lines = log_path.read_text(encoding="utf-8", errors="replace").splitlines()
                for line in lines[lines_sent:]:
                    if line.startswith(":::EXIT_CODE:::"):
                        try:
@@ -735,10 +833,11 @@ def setup_shell_routes() -> APIRouter:
                ]

                finished = 0
-                deadline = (asyncio.get_event_loop().time() + timeout) if timeout else None
+                loop = asyncio.get_running_loop()
+                deadline = (loop.time() + timeout) if timeout else None
                while finished < 2:
                    if deadline:
-                        remaining = deadline - asyncio.get_event_loop().time()
+                        remaining = deadline - loop.time()
                        if remaining <= 0:
                            raise asyncio.TimeoutError()
                        wait = min(remaining, 2.0)
@@ -791,7 +890,15 @@ def setup_shell_routes() -> APIRouter:
        """
        _require_admin(request)
        _reject_cross_site(request)
-        import importlib, importlib.metadata as importlib_metadata, shlex, json as _json
+        import importlib, importlib.metadata as importlib_metadata, shlex, json as _json, site, sys
+        _prepend_user_install_bins_to_path()
+        importlib.invalidate_caches()
+        try:
+            user_site = site.getusersitepackages()
+            if user_site and os.path.isdir(user_site) and user_site not in sys.path:
+                sys.path.append(user_site)
+        except Exception:
+            pass
        if ssh_port and str(ssh_port).strip() not in ("", "22"):
            _port = str(ssh_port).strip()
            if not _SSH_PORT_RE.match(_port) or not (1 <= int(_port) <= 65535):
@@ -870,6 +977,7 @@ def setup_shell_routes() -> APIRouter:

        for pkg in packages:
            on_remote = bool(host and pkg.get("target") == "remote")
+            probe = None
            if on_remote:
                pkg["installed"] = bool(remote_status.get(pkg["name"], False))
                probe = remote_details.get(pkg["name"])
@@ -883,19 +991,36 @@ def setup_shell_routes() -> APIRouter:
            elif pkg["name"] == "llama_cpp" and shutil.which("llama-server"):
                pkg["installed"] = True
                pkg["status_note"] = f"native llama-server: {shutil.which('llama-server')}"
+                probe = {"binaries": {"llama-server": shutil.which("llama-server")}, "dists": {}}
+            elif pkg["name"] == "vllm":
+                _vllm_cli = shutil.which("vllm")
+                pkg["installed"] = _vllm_cli is not None
+                if pkg["installed"]:
+                    try:
+                        _vllm_version = importlib_metadata.version(_pip_dist_name(pkg))
+                    except importlib_metadata.PackageNotFoundError:
+                        _vllm_version = None
+                    probe = {
+                        "binaries": {"vllm": _vllm_cli},
+                        "dists": {"vllm": _vllm_version} if _vllm_version else {},
+                    }
+                    pkg["status_note"] = _package_status_note("vllm", probe)
            else:
                try:
                    importlib.import_module(pkg["name"])
-                    if pkg["name"] == "vllm":
-                        pkg["installed"] = shutil.which("vllm") is not None
-                    else:
-                        importlib_metadata.version(pkg["name"].replace("_", "-"))
-                        pkg["installed"] = True
+                    importlib_metadata.version(_pip_dist_name(pkg))
+                    pkg["installed"] = True
                except ImportError:
                    pkg["installed"] = False
                except importlib_metadata.PackageNotFoundError:
                    pkg["installed"] = False

+            if pkg.get("installed"):
+                update_status = _package_pip_update_status(pkg, probe)
+                pkg["pip_update_available"] = update_status.available
+                if update_status.note:
+                    pkg["update_note"] = update_status.note
+
            if pkg["name"] == "docker":
                status = _docker_row_status(
                    on_remote=on_remote,
@@ -933,4 +1058,39 @@ def setup_shell_routes() -> APIRouter:
            return {"ok": True, "output": stdout.decode()[-200:]}
        return {"ok": False, "error": stderr.decode()[-300:]}

+    @router.post("/api/cookbook/rebuild-engine")
+    async def rebuild_engine(request: Request):
+        """Clear the cached llama.cpp build so the next serve recompiles.
+
+        Admin only — this removes the Cookbook-managed ``~/bin/llama-server``
+        symlink and ``~/llama.cpp/build`` directory, locally or on the selected
+        remote server. It installs and downloads nothing; the next llama.cpp
+        serve rebuilds from source and picks up CUDA/HIP if a toolchain is now
+        present. This is the missing "force a fresh GPU build" lever for hosts
+        stuck on a CPU-only llama-server.
+        """
+        _require_admin(request)
+        from routes.cookbook_helpers import _llama_cpp_rebuild_cmd
+        body = await request.json()
+        engine = str(body.get("engine") or "llamacpp").strip()
+        if engine != "llamacpp":
+            return {"ok": False, "error": f"Unsupported engine: {engine}"}
+        host = str(body.get("remote_host") or "").strip()
+        ssh_port = body.get("ssh_port")
+        cmd = _llama_cpp_rebuild_cmd()
+        try:
+            argv = (_ssh_base_argv(host, ssh_port) + [cmd]) if host else ["bash", "-lc", cmd]
+        except ValueError as e:
+            raise HTTPException(400, str(e))
+        try:
+            proc = await asyncio.create_subprocess_exec(
+                *argv, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
+            )
+            out, err = await asyncio.wait_for(proc.communicate(), timeout=30)
+        except asyncio.TimeoutError:
+            return {"ok": False, "error": "Rebuild-engine command timed out."}
+        if proc.returncode == 0:
+            return {"ok": True, "output": out.decode("utf-8", errors="replace")[-400:]}
+        return {"ok": False, "error": err.decode("utf-8", errors="replace")[-400:]}
+
    return router
--- a/routes/skills_routes.py
+++ b/routes/skills_routes.py
@@ -79,6 +79,8 @@ def _skill_test_task(skill: dict) -> str:
    an email); if we just hand over the 'when to use' text the agent has nothing
    to work on and stalls asking for input. So we tell it to create its own
    realistic fixture first, then apply the skill end-to-end."""
+    if not isinstance(skill, dict):
+        skill = {}
    ctx = (skill.get("when_to_use") or skill.get("description") or skill.get("name") or "").strip()
    return (
        "Test this skill end-to-end. FIRST, set up a small realistic scenario it "
@@ -310,6 +312,8 @@ def _should_check_retrieval_precision(skill: dict) -> bool:
        "installation", "install", "system", "ssh", "document", "documents",
        "search", "email", "calendar", "gpu", "server", "python",
    }
+    if not isinstance(skill, dict):
+        return False
    tags = {str(t or "").strip().lower() for t in (skill.get("tags") or [])}
    if tags & broad:
        return True
@@ -463,13 +467,13 @@ async def _run_skill_test_job(key, name, md, task, url, model, headers, owner, s
    if skills_manager is not None:
        v = (job["verdict"] or {}).get("verdict") or "unknown"
        try:
-            skills_manager.set_audit(name, v, by_teacher=False, worker_model=model)
+            skills_manager.set_audit(name, v, by_teacher=False, worker_model=model, owner=owner)
        except Exception:
            pass
        conf = {"pass": 0.95, "needs_work": 0.6, "fail": 0.4}.get(v)
        if conf is not None:
            try:
-                skills_manager.update_skill(name, {"confidence": conf})
+                skills_manager.update_skill(name, {"confidence": conf}, owner=owner)
            except Exception:
                pass
    job["status"] = "done"
@@ -563,6 +567,7 @@ def _skill_duplicate_blocker(skills_manager, name: str, owner) -> Optional[str]:
                False,
                [keeper_name],
                f"Lower-priority duplicate of {keeper_name}",
+                owner=owner,
            )
        except Exception:
            pass
@@ -629,7 +634,7 @@ def _audit_finalize_status(skills_manager, name: str, owner, verdict: str,
    if generic_reason:
        necessary = False
        try:
-            skills_manager.set_necessity(name, False, [], generic_reason)
+            skills_manager.set_necessity(name, False, [], generic_reason, owner=owner)
        except Exception:
            pass
    duplicate_of = _skill_duplicate_blocker(skills_manager, name, owner) if verdict == "pass" else None
@@ -638,7 +643,7 @@ def _audit_finalize_status(skills_manager, name: str, owner, verdict: str,
    c = float(confidence or 0.0)
    status = "published" if (auto_publish and necessary and verdict == "pass" and c >= min_conf) else "draft"
    try:
-        skills_manager.update_skill(name, {"status": status})
+        skills_manager.update_skill(name, {"status": status}, owner=owner)
    except Exception:
        pass
    return status
@@ -662,7 +667,7 @@ def _apply_skill_md(skills_manager, name: str, md: str, owner) -> bool:
            "teacher_model": sk.teacher_model, "owner": sk.owner or owner,
            "when_to_use": sk.when_to_use, "procedure": sk.procedure,
            "pitfalls": sk.pitfalls, "verification": sk.verification, "body_extra": sk.body_extra,
-        }))
+        }, owner=owner))
    except Exception as e:
        logger.warning(f"Audit: could not save edited skill {name}: {e}")
        return False
@@ -762,11 +767,11 @@ async def _audit_one_skill(skills_manager, skill, url, model, headers,
    # earns a bit less; a skill that still fails is marked low.
    def _set_conf(c):
        try:
-            skills_manager.update_skill(name, {"confidence": c})
+            skills_manager.update_skill(name, {"confidence": c}, owner=owner)
        except Exception:
            pass

-    md = skills_manager.read_skill_md(name)
+    md = skills_manager.read_skill_md(name, owner=owner)
    if not md:
        log(f"{name}: no source — skipped")
        return {"skill": name, "result": "skipped"}
@@ -788,7 +793,8 @@ async def _audit_one_skill(skills_manager, skill, url, model, headers,
        nec = await _eval_skill_necessity(md, others, url, model, headers)
        if nec is not None:
            skills_manager.set_necessity(name, nec.get("necessary", True),
-                                         nec.get("redundant_with"), nec.get("reason"))
+                                         nec.get("redundant_with"), nec.get("reason"),
+                                         owner=owner)
            if not nec.get("necessary", True):
                log(f"{name}: possibly unnecessary — {nec.get('reason', '')[:80]}")
    except Exception as e:
@@ -799,12 +805,12 @@ async def _audit_one_skill(skills_manager, skill, url, model, headers,
    if generic_reason or duplicate_of or (isinstance(nec, dict) and nec.get("necessary") is False):
        reason = generic_reason or (f"Lower-priority duplicate of {duplicate_of}" if duplicate_of else str((nec or {}).get("reason") or "Unnecessary skill"))
        try:
-            skills_manager.update_skill(name, {"status": "draft", "confidence": 0.35})
-            skills_manager.set_audit(name, "skipped", by_teacher=False, worker_model=model)
+            skills_manager.update_skill(name, {"status": "draft", "confidence": 0.35}, owner=owner)
+            skills_manager.set_audit(name, "skipped", by_teacher=False, worker_model=model, owner=owner)
            if duplicate_of:
-                skills_manager.set_necessity(name, False, [duplicate_of], reason)
+                skills_manager.set_necessity(name, False, [duplicate_of], reason, owner=owner)
            else:
-                skills_manager.set_necessity(name, False, [], reason)
+                skills_manager.set_necessity(name, False, [], reason, owner=owner)
        except Exception:
            pass
        log(f"{name}: draft — skipped functional test ({reason[:100]})")
@@ -848,13 +854,13 @@ async def _audit_one_skill(skills_manager, skill, url, model, headers,
            if fixed and fixed.strip() != md.strip():
                _apply_skill_md(skills_manager, name, fixed, owner)
        _set_conf(0.95)
-        skills_manager.set_audit(name, "pass", by_teacher=False, worker_model=model)
+        skills_manager.set_audit(name, "pass", by_teacher=False, worker_model=model, owner=owner)
        refreshed = next((s for s in skills_manager.load(owner=owner) if s.get("name") == name), None)
        status = _audit_finalize_status(skills_manager, name, owner, "pass", 0.95, (refreshed or {}).get("necessity"), verdict)
        log(f"{name}: {status} — confidence 95%")
        return {"skill": name, "result": "pass", "verdict": verdict, "confidence": 0.95, "status": status}
    if v in ("unknown", "inconclusive"):
-        skills_manager.set_audit(name, "inconclusive", by_teacher=False, worker_model=model)
+        skills_manager.set_audit(name, "inconclusive", by_teacher=False, worker_model=model, owner=owner)
        status = _audit_finalize_status(skills_manager, name, owner, "inconclusive", skill.get("confidence") or 0.0, skill.get("necessity"))
        log(f"{name}: {status} — inconclusive")
        return {"skill": name, "result": "inconclusive", "verdict": verdict, "status": status}
@@ -869,7 +875,7 @@ async def _audit_one_skill(skills_manager, skill, url, model, headers,
        log(f"{name}: retry (self) = {v}")
        if v == "pass":
            _set_conf(0.85)
-            skills_manager.set_audit(name, "pass", by_teacher=False, worker_model=model)
+            skills_manager.set_audit(name, "pass", by_teacher=False, worker_model=model, owner=owner)
            refreshed = next((s for s in skills_manager.load(owner=owner) if s.get("name") == name), None)
            status = _audit_finalize_status(skills_manager, name, owner, "pass", 0.85, (refreshed or {}).get("necessity"), verdict)
            log(f"{name}: {status} — confidence 85% after self-edit")
@@ -893,7 +899,9 @@ async def _audit_one_skill(skills_manager, skill, url, model, headers,
        log(f"{name}: retry on student after teacher rewrite = {v}")
        if v == "pass":
            _set_conf(0.8)
-            skills_manager.set_audit(name, "pass", by_teacher=True, worker_model=model, teacher_model=t_model)
+            skills_manager.set_audit(
+                name, "pass", by_teacher=True, worker_model=model, teacher_model=t_model, owner=owner
+            )
            refreshed = next((s for s in skills_manager.load(owner=owner) if s.get("name") == name), None)
            status = _audit_finalize_status(skills_manager, name, owner, "pass", 0.8, (refreshed or {}).get("necessity"), verdict)
            log(f"{name}: {status} — confidence 80% after teacher rewrite")
@@ -901,13 +909,14 @@ async def _audit_one_skill(skills_manager, skill, url, model, headers,

    # Still failing → demote to draft + low confidence + flag (do NOT delete).
    try:
-        skills_manager.update_skill(name, {"status": "draft", "confidence": 0.35})
+        skills_manager.update_skill(name, {"status": "draft", "confidence": 0.35}, owner=owner)
    except Exception:
        pass
    skills_manager.set_audit(
        name, v or "fail", by_teacher=teacher_ran,
        worker_model=model,
        teacher_model=(teacher[1] if teacher_ran and teacher else ""),
+        owner=owner,
    )
    log(f"{name}: flagged — confidence lowered, kept as draft for manual review")
    return {"skill": name, "result": "flagged", "verdict": verdict, "confidence": 0.35}
@@ -976,7 +985,7 @@ async def _run_audit_all_job(key, skills_manager, names, url, model, headers, te
        job.pop("task", None)


-def _resolve_audit_models():
+def _resolve_audit_models(owner=None):
    """Resolve (url, model, headers, teacher) for an audit run from Settings.

    Worker = Utility model (falling back to Default, normalized to a served
@@ -985,7 +994,7 @@ def _resolve_audit_models():
    ValueError if no worker model.
    """
    from src.endpoint_resolver import resolve_endpoint
-    url, model, headers = resolve_endpoint("utility")
+    url, model, headers = resolve_endpoint("utility", owner=owner)
    if not url or not model:
        raise ValueError("No model configured — set a Default or Utility model in Settings.")
    try:
@@ -1029,7 +1038,7 @@ async def run_scheduled_skill_audit(skills_manager: SkillsManager,
        return {"status": "running", "skipped": True}

    try:
-        url, model, headers, teacher = _resolve_audit_models()
+        url, model, headers, teacher = _resolve_audit_models(owner=owner)
    except ValueError as e:
        logger.info(f"Scheduled skill audit skipped — {e}")
        return {"status": "skipped", "reason": str(e)}
@@ -1246,7 +1255,7 @@ def setup_skills_routes(skills_manager: SkillsManager) -> APIRouter:
        if not match:
            raise HTTPException(404, "Skill not found")
        _verify_owner(match, user)
-        md = skills_manager.read_skill_md(match.get("name"))
+        md = skills_manager.read_skill_md(match.get("name"), owner=user)
        if md is None:
            raise HTTPException(404, "Skill source unavailable (legacy entry?)")
        return {"name": match.get("name"), "markdown": md}
@@ -1273,14 +1282,14 @@ def setup_skills_routes(skills_manager: SkillsManager) -> APIRouter:
            raise HTTPException(404, "Skill not found")
        _verify_owner(match, user)
        name = match.get("name")
-        md = skills_manager.read_skill_md(name) or ""
+        md = skills_manager.read_skill_md(name, owner=user) or ""

        if not task:
            task = _skill_test_task(match)

        # Prefer the configured DEFAULT (→ Utility) model — not the current chat
        # session's model. Fall back to the caller's session model only if unset.
-        url, model, headers = resolve_endpoint("default")
+        url, model, headers = resolve_endpoint("default", owner=user)
        if not url or not model:
            url = url or ((body.get("endpoint_url") or "").strip() or None)
            model = model or ((body.get("model") or "").strip() or None)
@@ -1360,7 +1369,7 @@ def setup_skills_routes(skills_manager: SkillsManager) -> APIRouter:

        # Worker model (Default, normalized) + optional teacher — shared resolver.
        try:
-            url, model, headers, teacher = _resolve_audit_models()
+            url, model, headers, teacher = _resolve_audit_models(owner=user)
        except ValueError as e:
            raise HTTPException(400, str(e))

@@ -1437,7 +1446,7 @@ def setup_skills_routes(skills_manager: SkillsManager) -> APIRouter:
    @router.post("/{skill_id}/markdown")
    async def save_skill_markdown(request: Request, skill_id: str):
        """Replace SKILL.md with new raw content. Parses + validates first."""
-        from services.memory.skill_format import Skill, slugify
+        from services.memory.skill_format import Skill
        user = _owner(request)
        body = await request.json()
        new_content = body.get("markdown")
@@ -1452,7 +1461,10 @@ def setup_skills_routes(skills_manager: SkillsManager) -> APIRouter:
            sk = Skill.from_markdown(new_content)
        except Exception as e:
            raise HTTPException(400, f"Could not parse SKILL.md: {e}")
-        sk.name = slugify(sk.name or match.get("name"))
+        # Never rename on save: a changed `name` in the markdown would move
+        # the skill dir (update_skill) and orphan the original id, so a later
+        # delete 404s (#1333). Pin to the stored name, like _apply_skill_md.
+        sk.name = match.get("name")
        if not sk.owner:
            sk.owner = match.get("owner") or user
        ok = skills_manager.update_skill(match.get("name"), {
@@ -1474,7 +1486,7 @@ def setup_skills_routes(skills_manager: SkillsManager) -> APIRouter:
            "pitfalls": sk.pitfalls,
            "verification": sk.verification,
            "body_extra": sk.body_extra,
-        })
+        }, owner=user)
        if not ok:
            raise HTTPException(500, "Update failed")
        # Manual markdown edits can create or substantially rewrite a draft
@@ -1496,7 +1508,7 @@ def setup_skills_routes(skills_manager: SkillsManager) -> APIRouter:
        updates = body.dict(exclude_none=True)
        if not updates:
            return {"ok": True}
-        ok = skills_manager.update_skill(match.get("name"), updates)
+        ok = skills_manager.update_skill(match.get("name"), updates, owner=user)
        if not ok:
            raise HTTPException(404, "Skill not found")
        if not match.get("audit_verdict"):
@@ -1511,7 +1523,7 @@ def setup_skills_routes(skills_manager: SkillsManager) -> APIRouter:
        if not match:
            raise HTTPException(404, "Skill not found")
        _verify_owner(match, user)
-        ok = skills_manager.delete_skill(match.get("name"))
+        ok = skills_manager.delete_skill(match.get("name"), owner=user)
        if not ok:
            raise HTTPException(404, "Skill not found")
        return {"ok": True}
--- a/routes/upload_routes.py
+++ b/routes/upload_routes.py
@@ -8,6 +8,7 @@ from typing import List
 import logging
 from core.middleware import require_admin
 from src.auth_helpers import get_current_user
+from src.upload_handler import count_recent_uploads

 logger = logging.getLogger(__name__)

@@ -24,15 +25,18 @@ def setup_upload_routes(upload_handler):
            
        client_ip = request.client.host if request.client else "unknown"
        out = []
-        
-        # Limit concurrent uploads per IP
-        ip_upload_count = sum(
-            1 for f in files 
-            if client_ip in upload_handler.upload_rate_log and 
-            any(now > time.time() - 10 for now in upload_handler.upload_rate_log[client_ip][-len(files):])
+
+        # Limit concurrent uploads per IP. Count genuine recent upload events —
+        # NOT the number of files in this batch. The previous check summed over
+        # `files`, so a single multi-file request counted itself as N concurrent
+        # uploads and tripped the limit (issue #1346: "attach more than one file
+        # → the model doesn't even see them"). save_upload still enforces the
+        # per-minute sliding-window rate limit per file.
+        recent_uploads = count_recent_uploads(
+            upload_handler.upload_rate_log.get(client_ip, []), time.time()
        )
-        
-        if ip_upload_count >= upload_handler.max_concurrent_uploads:
+
+        if recent_uploads >= upload_handler.max_concurrent_uploads:
            raise HTTPException(
                status_code=429,
                detail=f"Maximum concurrent uploads ({upload_handler.max_concurrent_uploads}) exceeded"
@@ -107,7 +111,7 @@ def setup_upload_routes(upload_handler):
        if os.path.exists(uploads_db):
            with open(uploads_db, encoding="utf-8") as f:
                db = json.load(f)
-            info = next((fi for fi in db.values() if fi["id"] == file_id), None)
+            info = next((fi for fi in db.values() if fi.get("id") == file_id), None)
            if info:
                original_name = info.get("name", file_id)
        auth_mgr = getattr(request.app.state, "auth_manager", None)
@@ -155,7 +159,7 @@ def setup_upload_routes(upload_handler):
        if os.path.exists(uploads_db):
            with open(uploads_db, encoding="utf-8") as f:
                db = json.load(f)
-            info = next((fi for fi in db.values() if fi["id"] == file_id), None)
+            info = next((fi for fi in db.values() if fi.get("id") == file_id), None)
        return info

    def _vision_cache_path(file_id: str) -> str:
--- a/routes/vault_routes.py
+++ b/routes/vault_routes.py
@@ -61,7 +61,8 @@ def _find_bw() -> str:
 def _load_config() -> dict:
    if VAULT_FILE.exists():
        try:
-            return json.loads(VAULT_FILE.read_text(encoding="utf-8"))
+            data = json.loads(VAULT_FILE.read_text(encoding="utf-8"))
+            return data if isinstance(data, dict) else {}
        except Exception:
            pass
    return {}
@@ -75,11 +76,18 @@ def _save_config(cfg: dict):
    safe_chmod(str(VAULT_FILE), 0o600)


-async def _run_bw(args: list, session: str = None, input_text: str = None) -> tuple:
+async def _run_bw(args: list, session: str = None, input_text: str = None,
+                  bw_password: str = None) -> tuple:
    env = {}
    env.update(os.environ)
    if session:
        env["BW_SESSION"] = session
+    # Secrets must never be passed as argv — process arguments are world-readable
+    # via `ps` / `/proc/<pid>/cmdline` to any local user. Keep --passwordenv
+    # support for bw commands that need it; unlock/login callers should prefer
+    # stdin so the master password is not left in the child environment either.
+    if bw_password is not None:
+        env["BW_PASSWORD"] = bw_password
    bw_path = _find_bw()
    try:
        proc = await asyncio.create_subprocess_exec(
@@ -175,8 +183,12 @@ def setup_vault_routes():
    async def unlock(req: VaultUnlockRequest, request: Request):
        """Unlock the vault and save the session key."""
        require_admin(request)
+        # Pass the master password on stdin, not argv. argv is visible through
+        # `ps` / /proc/<pid>/cmdline; stdin also avoids leaving the secret in
+        # the child process environment.
        stdout, stderr, rc = await _run_bw(
-            ["unlock", req.master_password, "--raw"],
+            ["unlock", "--raw"],
+            input_text=req.master_password + "\n",
        )
        if rc != 0:
            return {"ok": False, "error": f"Unlock failed: {stderr[:300]}"}
--- a/routes/webhook_routes.py
+++ b/routes/webhook_routes.py
@@ -26,6 +26,44 @@ MAX_MESSAGE_LEN = 32_000
 from core.middleware import require_admin as _require_admin


+def _first_enabled_endpoint(db, owner):
+    """First enabled ModelEndpoint VISIBLE to `owner` — their own rows plus
+    legacy null-owner ("shared") rows. Owner-scoped on purpose: ModelEndpoint
+    is per-user (core/database.py — "when non-null, the model picker only shows
+    the endpoint to that user"), and the sync-chat fallback uses the row's
+    decrypted `api_key`. An unscoped ``.first()`` would let a chat-scoped token
+    (e.g. a paired mobile device) fall back onto ANOTHER user's private
+    endpoint and silently spend that owner's API key / quota — and reach
+    whatever internal base_url they configured. Mirrors the owner_filter scoping
+    in routes/model_routes.py and companion/routes.py. A null/empty owner is a
+    no-op (single-user / legacy mode), preserving the original behaviour.
+    """
+    from core.database import ModelEndpoint
+    from src.auth_helpers import owner_filter
+    q = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True)  # noqa: E712
+    q = owner_filter(q, ModelEndpoint, owner)
+    return q.first()
+
+
+def _caller_owns_session(sess_owner, caller) -> bool:
+    """Strict session-ownership gate for the token-authenticated sync-chat
+    endpoint (`POST /api/v1/chat`).
+
+    Mirrors ``_verify_session_owner`` in session_routes.py and the null-owner
+    gates in notes/calendar/gallery: a caller may resume a session ONLY when
+    its owner matches them exactly. A null/empty session owner (legacy or
+    migrated rows) is deliberately NOT resumable by an arbitrary token — the
+    old ``sess_owner and sess_owner != caller`` form skipped the check whenever
+    ``sess_owner`` was falsy, so any chat-scoped token (e.g. a paired mobile
+    device) could resume such a session, inject a message, and read back its
+    history and reuse the owner's endpoint credentials. Fail closed: an
+    unresolvable caller also returns False.
+    """
+    if not caller:
+        return False
+    return sess_owner == caller
+
+
 def setup_webhook_routes(
    webhook_manager: WebhookManager,
    auth_manager,
@@ -159,6 +197,7 @@ def setup_webhook_routes(
        "openrouter": "https://openrouter.ai/api/v1",
        "ollama": "https://ollama.com/api",
        "fireworks": "https://api.fireworks.ai/inference/v1",
+        "venice": "https://api.venice.ai/api/v1",
    }

    # Model prefix → provider mapping for auto-detection
@@ -203,7 +242,6 @@ def setup_webhook_routes(

        from core.models import ChatMessage
        from src.llm_core import llm_call_async
-        from core.database import ModelEndpoint
        from src.endpoint_resolver import build_chat_url, build_headers, build_models_url, normalize_base

        message = body.message.strip()
@@ -228,8 +266,11 @@ def setup_webhook_routes(
                _tok_user = token_owner or getattr(request.state, "user", None) or _gcu(request)
            except Exception:
                _tok_user = None
+            # Strict ownership (see _caller_owns_session): fail closed so a
+            # null-owner / cross-owner session can't be resumed by an arbitrary
+            # chat-scoped token.
            _sess_owner = getattr(sess, "owner", None)
-            if _tok_user and _sess_owner and _sess_owner != _tok_user:
+            if not _caller_owns_session(_sess_owner, _tok_user):
                raise HTTPException(404, "Session not found")

        # --- Case 2: Direct API key + model (no pre-configured endpoint needed) ---
@@ -265,7 +306,9 @@ def setup_webhook_routes(
        if not sess:
            db = SessionLocal()
            try:
-                ep = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True).first()
+                # Owner-scoped: only THIS token owner's endpoints + legacy
+                # shared rows, never another user's private endpoint/api_key.
+                ep = _first_enabled_endpoint(db, token_owner)
            finally:
                db.close()

--- a/scripts/add_hwfit_models.py
+++ b/scripts/add_hwfit_models.py
@@ -9,7 +9,9 @@ Adds:

 Metadata is taken from the HF Hub `list_models(full=True)` response plus the
 repo name (which encodes the param size, e.g. "Qwen3.6-35B-A3B"). Param-less
-names fall back to a single per-repo model_info() call to read safetensors.
+names fall back, in order, to the parent `base_model:` tag, the repo's
+`config.json` (computed from `hidden_size` / `num_hidden_layers` / MoE
+fields), and finally a per-repo `model_info()` call to read safetensors.

 Re-runnable: merges by `name`, leaving existing entries untouched unless
 --overwrite is passed. Writes a .bak first.
@@ -23,7 +25,8 @@ import re
 import sys
 from datetime import datetime

-from huggingface_hub import HfApi
+from huggingface_hub import HfApi, hf_hub_download
+from huggingface_hub.utils import EntryNotFoundError, RepositoryNotFoundError

 DATA_PATH = os.path.join(os.path.dirname(__file__), "..", "services", "hwfit", "data", "hf_models.json")
 DATA_PATH = os.path.abspath(DATA_PATH)
@@ -43,7 +46,8 @@ _GENERIC_TAGS = {
    "transformers", "safetensors", "conversational", "text-generation",
    "image-text-to-text", "text-generation-inference", "endpoints_compatible",
    "autotrain_compatible", "compressed-tensors", "gguf", "mlx", "vllm", "4-bit",
-    "8-bit", "awq", "gptq", "fp8", "quantized", "chat",
+    "8-bit", "awq", "gptq", "fp8", "fp4", "nvfp4", "mxfp4", "nf4",
+    "quantized", "chat",
 }

 api = HfApi()
@@ -69,6 +73,128 @@ def _parse_params(name):
    return total, active


+def _params_from_config(cfg):
+    """Estimate (total, active) parameter counts from a HF config.json dict.
+
+    Returns (None, None) when the architecture fields aren't usable. Covers:
+      * explicit ``num_parameters`` / ``n_params`` (rare but authoritative)
+      * dense transformers (LLaMA / Qwen / Mistral / GLM-dense / etc.) via
+        embeddings + per-layer attention + MLP
+      * MoE (Qwen3-MoE, GLM-4-MoE, DeepSeek-style) using ``num_experts`` or
+        ``n_routed_experts`` (+ ``n_shared_experts``). Active count assumes
+        ``num_experts_per_tok`` routed experts plus any shared experts.
+
+    The estimate is intentionally coarse — within ~5-10% of the true count for
+    standard decoder-only architectures — which is fine for the downstream
+    ``min_vram_gb`` heuristic (it already buckets via ``parameter_count`` to
+    one decimal place of "B").
+    """
+    if not isinstance(cfg, dict):
+        return None, None
+
+    # Authoritative fields first. Some custom configs embed the trained
+    # parameter count directly.
+    for key in ("num_parameters", "n_params", "total_params"):
+        v = cfg.get(key)
+        if isinstance(v, (int, float)) and v > 0:
+            return int(v), None
+
+    def _i(key, default=None):
+        v = cfg.get(key, default)
+        try:
+            return int(v) if v is not None else None
+        except (TypeError, ValueError):
+            return None
+
+    h = _i("hidden_size")
+    L = _i("num_hidden_layers")
+    if not h or not L:
+        return None, None
+
+    vocab = _i("vocab_size") or 0
+    ffn = _i("intermediate_size") or (4 * h)
+    n_heads = _i("num_attention_heads") or 0
+    n_kv = _i("num_key_value_heads") or n_heads
+    head_dim = _i("head_dim") or (h // n_heads if n_heads else h)
+
+    # Attention: Q is hidden_size wide, KV is grouped (GQA / MQA).
+    q_proj = h * (n_heads * head_dim if n_heads else h)
+    kv_proj = 2 * h * (n_kv * head_dim if n_kv else h)
+    o_proj = (n_heads * head_dim if n_heads else h) * h
+    per_layer_attn = q_proj + kv_proj + o_proj
+
+    # Dense MLP: gate + up + down (SwiGLU / GeGLU). Configs without a gate
+    # (plain GELU) are within the noise floor of this estimate.
+    per_layer_dense_mlp = 3 * h * ffn
+
+    # MoE routing. Both naming conventions are seen in the wild.
+    n_experts = _i("num_experts") or _i("n_routed_experts") or 0
+    n_shared = _i("n_shared_experts") or 0
+    n_active = _i("num_experts_per_tok") or 0
+    moe_ffn = _i("moe_intermediate_size") or ffn
+    # Some configs (GLM-4-MoE, DeepSeek-V3) keep the first K layers dense.
+    first_dense = _i("first_k_dense_replace") or 0
+
+    if n_experts > 0 and n_active > 0:
+        moe_layers = max(0, L - first_dense)
+        dense_layers = L - moe_layers
+        per_expert = 3 * h * moe_ffn
+        total_mlp = (
+            dense_layers * per_layer_dense_mlp
+            + moe_layers * (n_experts + n_shared) * per_expert
+        )
+        active_mlp = (
+            dense_layers * per_layer_dense_mlp
+            + moe_layers * (n_active + n_shared) * per_expert
+        )
+    else:
+        total_mlp = L * per_layer_dense_mlp
+        active_mlp = total_mlp
+
+    embed = vocab * h
+    # Untied output head doubles the embedding contribution.
+    head = 0 if cfg.get("tie_word_embeddings", True) else vocab * h
+
+    total = embed + head + L * per_layer_attn + total_mlp
+    active = embed + head + L * per_layer_attn + active_mlp
+    if total <= 0:
+        return None, None
+    if active == total or n_experts == 0:
+        return int(total), None
+    return int(total), int(active)
+
+
+_CONFIG_CACHE = {}
+
+
+def _fetch_config_json(repo_id):
+    """Download and cache a repo's config.json. Returns a dict or None.
+
+    Network / 404 / private-repo failures are swallowed — the caller already
+    has a safetensors fallback below this. We rely on huggingface_hub's own
+    on-disk cache so repeated script runs don't re-hit the Hub.
+    """
+    if repo_id in _CONFIG_CACHE:
+        return _CONFIG_CACHE[repo_id]
+    try:
+        path = hf_hub_download(repo_id=repo_id, filename="config.json")
+    except (EntryNotFoundError, RepositoryNotFoundError):
+        _CONFIG_CACHE[repo_id] = None
+        return None
+    except Exception:
+        # Network hiccup, gated repo, etc. — don't crash the bulk run.
+        _CONFIG_CACHE[repo_id] = None
+        return None
+    try:
+        with open(path, encoding="utf-8") as f:
+            cfg = json.load(f)
+    except (OSError, ValueError):
+        _CONFIG_CACHE[repo_id] = None
+        return None
+    _CONFIG_CACHE[repo_id] = cfg
+    return cfg
+
+
 def _base_model_tag(tags):
    """Return the `base_model:...` repo id from tags, if any."""
    for t in (tags or []):
@@ -79,6 +205,20 @@ def _base_model_tag(tags):

 def _quant_from_name(name):
    n = name.lower()
+    if "nvfp4" in n:
+        return "NVFP4"
+    if "mxfp4" in n:
+        return "MXFP4"
+    if re.search(r"(^|[-_/])nf4($|[-_/])", n):
+        return "NF4"
+    if re.search(r"(^|[-_/])fp4($|[-_/])", n):
+        return "FP4"
+    if re.search(r"(^|[-_/])w4a16($|[-_/])", n):
+        return "W4A16"
+    if re.search(r"(^|[-_/])w8a8($|[-_/])", n):
+        return "W8A8"
+    if re.search(r"(^|[-_/])w8a16($|[-_/])", n):
+        return "W8A16"
    is8 = "8bit" in n or "8-bit" in n or "int8" in n
    if "awq" in n:
        return "AWQ-8bit" if is8 else "AWQ-4bit"
@@ -88,10 +228,14 @@ def _quant_from_name(name):
        if "6bit" in n:
            return "mlx-6bit"
        return "mlx-8bit" if is8 else "mlx-4bit"
+    if "nvfp4" in n:
+        return "NVFP4"
    if "fp8" in n:
        return "FP8"
    if "int4" in n or "4bit" in n or "4-bit" in n:
-        return "AWQ-4bit"
+        return "INT4"
+    if "int8" in n or "8bit" in n or "8-bit" in n:
+        return "INT8"
    return "Q4_K_M"


@@ -122,6 +266,27 @@ def _entry_from_modelinfo(mi, overrides):
                    active = ba
    # Determine quant first — we need it to unpack the safetensors fallback.
    quant = _quant_from_name(name)
+    # Next-to-last resort: parse config.json. This is robust against
+    # parameter-less repo names (e.g. "GLM-4.5" with no "9B" suffix) where
+    # both the regex and the base_model tag come up empty. We try this
+    # before safetensors so non-standard names still resolve without a
+    # per-repo manual override in EXTRA_REPOS. Source repo first (works for
+    # unquantized models) then the quantized parent via base_model:.
+    if total is None:
+        config_targets = [name]
+        bm = _base_model_tag(getattr(mi, "tags", None))
+        if bm and bm != name:
+            config_targets.append(bm)
+        for target in config_targets:
+            cfg = _fetch_config_json(target)
+            if not cfg:
+                continue
+            ct, ca = _params_from_config(cfg)
+            if ct:
+                total = ct
+                if ca and active is None:
+                    active = ca
+                break
    # Last resort: read safetensors element counts. For pre-quantized repos
    # (AWQ/GPTQ/MLX-Int4 etc.) the weights are packed: 8× 4-bit weights per
    # I32 element, 4× 8-bit weights per I32. The bare safetensors total
@@ -136,7 +301,7 @@ def _entry_from_modelinfo(mi, overrides):
                params_by_dtype = getattr(st, "parameters", None) or {}
                if quant.endswith("4bit") or quant.endswith("Int4"):
                    pack_factor = 8
-                elif quant.endswith("8bit") or quant.endswith("Int8") or quant == "FP8":
+                elif quant.endswith("8bit") or quant.endswith("Int8") or quant in ("FP8", "NVFP4"):
                    pack_factor = 4
                else:
                    pack_factor = 1
@@ -158,7 +323,10 @@ def _entry_from_modelinfo(mi, overrides):
    rel = created.strftime("%Y-%m-%d") if created else datetime.utcnow().strftime("%Y-%m-%d")
    # Rough RAM/VRAM hints (fit.py recomputes the real requirement from params+quant).
    _BPP = {"AWQ-4bit": 0.58, "GPTQ-Int4": 0.58, "mlx-4bit": 0.55, "mlx-6bit": 0.85,
-            "AWQ-8bit": 1.1, "GPTQ-Int8": 1.1, "mlx-8bit": 1.1, "FP8": 1.1, "Q4_K_M": 0.6}
+            "AWQ-8bit": 1.1, "GPTQ-Int8": 1.1, "mlx-8bit": 1.1, "FP8": 1.1,
+            "FP4": 0.58, "NVFP4": 0.58, "MXFP4": 0.58, "NF4": 0.58,
+            "INT4": 0.58, "INT8": 1.1, "W4A16": 0.58, "W8A8": 1.1, "W8A16": 1.1,
+            "Q4_K_M": 0.6}
    bpp = _BPP.get(quant, 0.6)
    vram = round(pb * bpp + 0.5, 1)
    entry = {
--- a/scripts/check-docker-amd-gpu.sh
+++ b/scripts/check-docker-amd-gpu.sh
@@ -0,0 +1,205 @@
+#!/usr/bin/env bash
+# check-docker-amd-gpu.sh - read-only AMD/ROCm Docker passthrough diagnostic.
+#
+# This script does not install packages, edit .env, or restart Docker. It only
+# checks host AMD device nodes, Docker access, and whether a small container can
+# see /dev/kfd and /dev/dri. The Odysseus slim image does not include ROCm tools
+# such as rocm-smi, so container verification checks devices instead.
+
+set -u
+
+PASS=0
+FAIL=0
+WARN=0
+RENDER_GID=""
+VIDEO_GID=""
+TEST_IMAGE="${ODYSSEUS_AMD_TEST_IMAGE:-alpine:3.20}"
+
+_pass() { printf '\033[32m[PASS]\033[0m %s\n' "$*"; PASS=$((PASS + 1)); }
+_fail() { printf '\033[31m[FAIL]\033[0m %s\n' "$*"; FAIL=$((FAIL + 1)); }
+_warn() { printf '\033[33m[WARN]\033[0m %s\n' "$*"; WARN=$((WARN + 1)); }
+_info() { printf '\033[34m[INFO]\033[0m %s\n' "$*"; }
+
+_usage() {
+    cat <<'USAGE'
+Usage: scripts/check-docker-amd-gpu.sh
+
+Read-only AMD/ROCm Docker GPU diagnostic. Installs nothing, edits nothing, and
+does not restart Docker.
+
+Checks:
+  - host /dev/kfd and /dev/dri/renderD* exist
+  - host render group GID for RENDER_GID in .env
+  - optional host rocminfo visibility
+  - Docker can pass AMD device nodes into a small container
+
+Environment:
+  ODYSSEUS_AMD_TEST_IMAGE   Docker image for the passthrough smoke
+                            (default: alpine:3.20)
+USAGE
+}
+
+for _arg in "$@"; do
+    case "${_arg}" in
+        --help|-h)
+            _usage
+            exit 0
+            ;;
+        *)
+            printf 'Unknown option: %s\n\n' "${_arg}" >&2
+            _usage >&2
+            exit 1
+            ;;
+    esac
+done
+
+_find_cmd() {
+    if command -v "$1" >/dev/null 2>&1; then
+        command -v "$1"
+        return 0
+    fi
+    if [ -x "/opt/rocm/bin/$1" ]; then
+        printf '/opt/rocm/bin/%s\n' "$1"
+        return 0
+    fi
+    return 1
+}
+
+_check_host_devices() {
+    _info "Checking host AMD device nodes..."
+    if [ -e /dev/kfd ]; then
+        _pass "/dev/kfd exists"
+    else
+        _fail "/dev/kfd is missing - ROCm kernel driver access is not available."
+    fi
+
+    if [ -d /dev/dri ]; then
+        _pass "/dev/dri exists"
+    else
+        _fail "/dev/dri is missing - render devices are not available."
+        return
+    fi
+
+    render_nodes="$(find /dev/dri -maxdepth 1 -type c -name 'renderD*' -print 2>/dev/null | sort)"
+    if [ -n "${render_nodes}" ]; then
+        _pass "Render nodes found:"
+        printf '%s\n' "${render_nodes}" | sed 's/^/        /'
+    else
+        _fail "No /dev/dri/renderD* node found."
+    fi
+    echo
+}
+
+_check_groups() {
+    _info "Checking host render/video groups..."
+    RENDER_GID="$(getent group render | awk -F: '{print $3; exit}')"
+    VIDEO_GID="$(getent group video | awk -F: '{print $3; exit}')"
+
+    if [ -n "${RENDER_GID}" ]; then
+        _pass "render group GID: ${RENDER_GID}"
+    else
+        _fail "render group not found - set RENDER_GID manually if your distro uses a different group."
+    fi
+
+    if [ -n "${VIDEO_GID}" ]; then
+        _pass "video group GID: ${VIDEO_GID}"
+    else
+        _warn "video group not found. /dev/kfd and renderD* may still be enough on some hosts."
+    fi
+    echo
+}
+
+_check_host_rocm() {
+    _info "Checking host ROCm tools..."
+    rocminfo_cmd="$(_find_cmd rocminfo || true)"
+    if [ -n "${rocminfo_cmd}" ]; then
+        if "${rocminfo_cmd}" 2>/dev/null | grep -Eq 'gfx[0-9a-f]+'; then
+            _pass "rocminfo works on the host: ${rocminfo_cmd}"
+            "${rocminfo_cmd}" 2>/dev/null \
+                | grep -E 'Marketing Name:|Name:[[:space:]]+gfx' \
+                | head -12 \
+                | sed 's/^/        /'
+        else
+            _warn "rocminfo exists but did not list a gfx target."
+        fi
+    else
+        _warn "rocminfo not found on PATH or /opt/rocm/bin. This does not block Docker passthrough, but host ROCm may be incomplete."
+    fi
+    echo
+}
+
+_check_docker() {
+    _info "Checking Docker..."
+    if ! command -v docker >/dev/null 2>&1; then
+        _fail "docker not found - install Docker first."
+        echo
+        return 1
+    fi
+    if docker info >/dev/null 2>&1; then
+        _pass "Docker daemon is running."
+    else
+        _fail "Docker daemon is not running or this user lacks Docker permission."
+        echo
+        return 1
+    fi
+    echo
+}
+
+_check_docker_passthrough() {
+    if [ -z "${RENDER_GID}" ]; then
+        _fail "Skipping Docker passthrough smoke because render GID is unknown."
+        echo
+        return
+    fi
+
+    _info "Testing AMD device passthrough with ${TEST_IMAGE} (may pull on first run)..."
+    group_args=(--group-add "${RENDER_GID}")
+    if [ -n "${VIDEO_GID}" ]; then
+        group_args+=(--group-add "${VIDEO_GID}")
+    fi
+
+    if docker run --rm \
+        --device=/dev/kfd \
+        --device=/dev/dri \
+        "${group_args[@]}" \
+        "${TEST_IMAGE}" \
+        sh -lc 'test -e /dev/kfd && test -d /dev/dri && ls /dev/dri/renderD* >/dev/null' \
+        >/dev/null 2>&1; then
+        _pass "Docker can pass /dev/kfd and /dev/dri render nodes into a container."
+    else
+        _fail "Docker AMD device passthrough failed."
+        _info "Check that Docker can access /dev/kfd and /dev/dri, then retry."
+    fi
+    echo
+}
+
+_print_next_steps() {
+    echo "=== Suggested .env values ==="
+    if [ -n "${RENDER_GID}" ]; then
+        printf 'COMPOSE_FILE=docker-compose.yml:docker/gpu.amd.yml\n'
+        printf 'RENDER_GID=%s\n' "${RENDER_GID}"
+    else
+        printf 'COMPOSE_FILE=docker-compose.yml:docker/gpu.amd.yml\n'
+        printf 'RENDER_GID=<numeric render group id>\n'
+    fi
+    echo
+    echo "After restarting Odysseus, verify the slim app container sees devices:"
+    echo "  docker compose exec odysseus sh -lc 'test -e /dev/kfd && test -d /dev/dri && ls -l /dev/kfd /dev/dri/renderD*'"
+    echo
+    echo "Note: rocm-smi/rocminfo are not expected inside the slim Odysseus image."
+    echo "Device passthrough is necessary but not sufficient for GPU serving; vLLM and"
+    echo "llama.cpp still need ROCm-compatible builds or ROCm-specific Docker images."
+}
+
+echo "=== Odysseus AMD Docker GPU diagnostic ==="
+echo
+_check_host_devices
+_check_groups
+_check_host_rocm
+if _check_docker; then
+    _check_docker_passthrough
+fi
+_print_next_steps
+echo
+echo "=== Results: ${PASS} passed, ${WARN} warnings, ${FAIL} failed ==="
+[ "${FAIL}" -eq 0 ]
--- a/scripts/check-docker-gpu.sh
+++ b/scripts/check-docker-gpu.sh
@@ -0,0 +1,579 @@
+#!/usr/bin/env bash
+# check-docker-gpu.sh — Diagnostic and optional setup helper for NVIDIA Docker GPU access.
+#
+# Default mode is READ-ONLY — does not install packages, modify config, or restart Docker.
+# The Odysseus app never calls this script automatically.
+#
+# USAGE
+#   scripts/check-docker-gpu.sh                              # read-only diagnostics (default)
+#   scripts/check-docker-gpu.sh --enable-nvidia-overlay     # also write COMPOSE_FILE to .env
+#   scripts/check-docker-gpu.sh --print-install-commands    # show OS-specific commands, don't run
+#   scripts/check-docker-gpu.sh --install-nvidia-toolkit    # install toolkit (Ubuntu/Debian only)
+#   scripts/check-docker-gpu.sh --install-nvidia-toolkit --enable-nvidia-overlay
+#   scripts/check-docker-gpu.sh --install-nvidia-toolkit --enable-nvidia-overlay --yes
+#   scripts/check-docker-gpu.sh --help
+
+MODE="check"
+OPT_YES=0
+OPT_ENABLE_OVERLAY=0
+_GPU_PASSTHROUGH_OK=0
+
+# ─── output helpers ──────────────────────────────────────────────────────────
+
+PASS=0
+FAIL=0
+
+_pass() { printf '\033[32m[PASS]\033[0m %s\n' "$*"; PASS=$((PASS + 1)); }
+_fail() { printf '\033[31m[FAIL]\033[0m %s\n' "$*"; FAIL=$((FAIL + 1)); }
+_info() { printf '\033[34m[INFO]\033[0m %s\n' "$*"; }
+_warn() { printf '\033[33m[WARN]\033[0m %s\n' "$*"; }
+_step() { printf '\033[36m[STEP]\033[0m %s\n' "$*"; }
+
+_confirm() {
+    printf '%s [y/N] ' "$1"
+    read -r _ans
+    case "${_ans}" in
+        [Yy]|[Yy][Ee][Ss]) return 0 ;;
+        *) return 1 ;;
+    esac
+}
+
+# ─── paths ───────────────────────────────────────────────────────────────────
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
+
+# ─── arg parsing ─────────────────────────────────────────────────────────────
+
+_usage() {
+    cat <<'USAGE'
+Usage: scripts/check-docker-gpu.sh [OPTIONS]
+
+Read-only diagnostic (default — safe to run at any time, installs nothing):
+  (no flags)                    Check host nvidia-smi, Docker daemon, and Docker
+                                GPU passthrough. Prints PASS/FAIL and next steps.
+
+Informational:
+  --print-install-commands      Detect the OS and print recommended NVIDIA
+                                Container Toolkit commands without running them.
+                                Inspect these before deciding to install.
+  --help                        Show this help.
+
+Opt-in .env update (requires .env or .env.example in the repo root):
+  --enable-nvidia-overlay       Write COMPOSE_FILE=docker-compose.yml:docker/gpu.nvidia.yml
+                                into .env. Creates a timestamped backup first.
+                                Blocked if GPU passthrough is not working — fix
+                                passthrough first, then re-run. --yes does not
+                                override this gate.
+                                Never edits .env unless this flag is passed.
+
+Opt-in install (Ubuntu/Debian only, requires sudo):
+  --install-nvidia-toolkit      Add NVIDIA's apt repository, install
+                                nvidia-container-toolkit, configure the Docker
+                                runtime, and optionally restart Docker.
+                                Shows all commands and prompts before any
+                                privileged action.
+  --yes                         Skip confirmation prompts (for use with
+                                --install-nvidia-toolkit and/or
+                                --enable-nvidia-overlay in automated setups).
+
+Examples:
+  # Diagnose GPU passthrough before enabling the NVIDIA compose overlay:
+  scripts/check-docker-gpu.sh
+
+  # See what install commands apply to this system without running them:
+  scripts/check-docker-gpu.sh --print-install-commands
+
+  # Diagnose and automatically update .env with the NVIDIA overlay:
+  scripts/check-docker-gpu.sh --enable-nvidia-overlay
+
+  # Install toolkit interactively, then enable the overlay if it works:
+  scripts/check-docker-gpu.sh --install-nvidia-toolkit --enable-nvidia-overlay
+
+  # Full assisted setup without prompts (automated/CI use):
+  scripts/check-docker-gpu.sh --install-nvidia-toolkit --enable-nvidia-overlay --yes
+
+After a successful setup, start Odysseus:
+  docker compose up -d --build
+
+Full guide: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html
+USAGE
+}
+
+for _arg in "$@"; do
+    case "${_arg}" in
+        --help|-h)
+            _usage
+            exit 0
+            ;;
+        --print-install-commands)
+            MODE="print"
+            ;;
+        --install-nvidia-toolkit)
+            MODE="install"
+            ;;
+        --enable-nvidia-overlay)
+            OPT_ENABLE_OVERLAY=1
+            ;;
+        --yes|-y)
+            OPT_YES=1
+            ;;
+        *)
+            printf 'Unknown option: %s\n\n' "${_arg}" >&2
+            _usage >&2
+            exit 1
+            ;;
+    esac
+done
+
+# ─── OS/distro detection ─────────────────────────────────────────────────────
+
+DISTRO_ID=""
+DISTRO_LIKE=""
+DISTRO_VERSION=""
+DISTRO_ARCH="$(uname -m 2>/dev/null || echo unknown)"
+
+if [ -f /etc/os-release ]; then
+    DISTRO_ID="$(grep '^ID=' /etc/os-release | cut -d= -f2 | tr -d '"')"
+    DISTRO_LIKE="$(grep '^ID_LIKE=' /etc/os-release | cut -d= -f2 | tr -d '"')"
+    DISTRO_VERSION="$(grep '^VERSION_ID=' /etc/os-release | cut -d= -f2 | tr -d '"')"
+fi
+
+_is_debian_family() {
+    case "${DISTRO_ID}" in
+        ubuntu|debian|linuxmint|pop|elementary) return 0 ;;
+    esac
+    # ID_LIKE can be a space-separated list, e.g. "ubuntu debian"
+    case " ${DISTRO_LIKE} " in
+        *" debian "*|*" ubuntu "*) return 0 ;;
+    esac
+    return 1
+}
+
+_distro_label() {
+    if [ -n "${DISTRO_ID}" ]; then
+        printf '%s%s (%s)' \
+            "${DISTRO_ID}" \
+            "${DISTRO_VERSION:+ ${DISTRO_VERSION}}" \
+            "${DISTRO_ARCH}"
+    else
+        printf 'unknown Linux (%s)' "${DISTRO_ARCH}"
+    fi
+}
+
+# ─── Ubuntu/Debian install command text ──────────────────────────────────────
+# Printed both by --print-install-commands and shown before --install runs.
+
+_debian_install_steps() {
+    cat <<'STEPS'
+
+  # 1. Install prerequisites
+  sudo apt-get update
+  sudo apt-get install -y curl gpg
+
+  # 2. Add NVIDIA's signing key
+  curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey \
+    | sudo gpg --batch --yes --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
+
+  # 3. Add NVIDIA's apt repository
+  curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list \
+    | sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' \
+    | sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
+
+  # 4. Install the toolkit
+  sudo apt-get update
+  sudo apt-get install -y nvidia-container-toolkit
+
+  # 5. Configure the Docker runtime
+  sudo nvidia-ctk runtime configure --runtime=docker
+
+  # 6. Restart Docker
+  sudo systemctl restart docker
+
+  # 7. Verify
+  docker run --rm --gpus all nvidia/cuda:12.4.1-base-ubuntu22.04 nvidia-smi
+
+STEPS
+}
+
+# ─── read-only checks ────────────────────────────────────────────────────────
+
+_check_nvidia_smi() {
+    _info "Checking host nvidia-smi..."
+    if command -v nvidia-smi >/dev/null 2>&1; then
+        if nvidia-smi -L 2>/dev/null | grep -q 'GPU '; then
+            _pass "nvidia-smi is working. Detected GPUs:"
+            nvidia-smi -L 2>/dev/null | sed 's/^/        /'
+        else
+            _fail "nvidia-smi found but no GPUs listed — check your NVIDIA driver installation."
+        fi
+    else
+        _fail "nvidia-smi not found — install the NVIDIA driver for your distribution."
+        _info "No NVIDIA GPU? Skip this script — the NVIDIA overlay is not needed for CPU-only use."
+    fi
+    echo
+}
+
+# Returns 1 if Docker is unavailable (callers should stop further GPU checks).
+_check_docker() {
+    _info "Checking Docker..."
+    if ! command -v docker >/dev/null 2>&1; then
+        _fail "docker not found — install Docker: https://docs.docker.com/engine/install/"
+        echo "Cannot continue without Docker."
+        return 1
+    fi
+    if docker info >/dev/null 2>&1; then
+        _pass "Docker daemon is running."
+    else
+        _fail "Docker daemon is not running or current user lacks permission."
+        _info "Try: sudo systemctl start docker"
+        _info "Or add your user to the docker group: sudo usermod -aG docker \$USER"
+        echo "Cannot continue — GPU passthrough test requires a running Docker daemon."
+        return 1
+    fi
+    echo
+}
+
+_check_gpu_passthrough() {
+    _info "Testing GPU passthrough (may pull image on first run):"
+    _info "  docker run --rm --gpus all nvidia/cuda:12.4.1-base-ubuntu22.04 nvidia-smi"
+    echo
+    if docker run --rm --gpus all nvidia/cuda:12.4.1-base-ubuntu22.04 nvidia-smi 2>&1; then
+        echo
+        _GPU_PASSTHROUGH_OK=1
+        _pass "GPU passthrough is working — the NVIDIA compose overlay should work."
+        _info "Passthrough means Docker can see your GPU. It does NOT guarantee"
+        _info "llama.cpp will use CUDA. If Cookbook logs show:"
+        _info "  'Unable to find cudart library'"
+        _info "  'Could NOT find CUDAToolkit' / 'CUDA Toolkit not found'"
+        _info "  tensors or layers assigned to CPU"
+        _info "that is a Cookbook/llama.cpp CUDA build or runtime issue, not a"
+        _info "passthrough failure. Re-install the serve engine via"
+        _info "Cookbook -> Dependencies to get a CUDA-enabled build."
+        if [ "${OPT_ENABLE_OVERLAY}" -eq 0 ]; then
+            _info "Enable the overlay in .env with:"
+            _info "  scripts/check-docker-gpu.sh --enable-nvidia-overlay"
+        fi
+    else
+        echo
+        _fail "GPU passthrough failed. Check these steps in order:"
+        echo
+        echo "  1. Install NVIDIA Container Toolkit (if not already installed):"
+        echo "     Arch:    sudo pacman -S nvidia-container-toolkit"
+        echo "     Debian:  sudo apt install nvidia-container-toolkit"
+        echo "     Fedora:  sudo dnf install nvidia-container-toolkit"
+        echo "     Full guide: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html"
+        echo
+        echo "  2. Configure the Docker runtime:"
+        echo "     sudo nvidia-ctk runtime configure --runtime=docker"
+        echo
+        echo "  3. Restart Docker:"
+        echo "     sudo systemctl restart docker"
+        echo
+        echo "  Then re-run this script to confirm."
+        echo
+        _warn "Without GPU passthrough, Cookbook will detect the iGPU, another card, or"
+        _warn "CPU instead of your NVIDIA GPU — model recommendations will use the wrong VRAM."
+        _info "Run with --print-install-commands to see OS-specific commands."
+        _info "Run with --install-nvidia-toolkit to install on Ubuntu/Debian."
+    fi
+    echo
+}
+
+# ─── --enable-nvidia-overlay ─────────────────────────────────────────────────
+
+_enable_nvidia_overlay() {
+    echo "=== Enabling NVIDIA compose overlay ==="
+    echo
+
+    local _env_file="${REPO_ROOT}/.env"
+    local _env_example="${REPO_ROOT}/.env.example"
+    local _overlay_fragment="docker/gpu.nvidia.yml"
+    local _backup_ts
+    _backup_ts="$(date +%Y%m%d-%H%M%S)"
+
+    # Ensure .env exists
+    if [ ! -f "${_env_file}" ]; then
+        if [ -f "${_env_example}" ]; then
+            _info ".env not found. .env.example is available."
+            local _do_copy=0
+            if [ "${OPT_YES}" -eq 1 ]; then
+                _do_copy=1
+            elif _confirm "Copy .env.example to .env?"; then
+                _do_copy=1
+            fi
+            if [ "${_do_copy}" -eq 1 ]; then
+                if ! cp "${_env_example}" "${_env_file}"; then
+                    _fail "Failed to copy .env.example to .env."
+                    return 1
+                fi
+                _pass "Copied .env.example to .env."
+            else
+                _fail ".env is required to set COMPOSE_FILE — aborted."
+                return 1
+            fi
+        else
+            _fail ".env not found and .env.example is missing."
+            _info "Create a .env file in the repo root, then re-run."
+            return 1
+        fi
+    fi
+
+    # Read current active (uncommented) COMPOSE_FILE value, if any
+    local _current_cf
+    _current_cf="$(grep '^COMPOSE_FILE=' "${_env_file}" | tail -1 | cut -d= -f2-)"
+
+    # Idempotency check
+    if echo "${_current_cf}" | grep -qF "${_overlay_fragment}"; then
+        _pass "COMPOSE_FILE already includes the NVIDIA overlay — nothing to change."
+        echo
+        _info "Start or restart Odysseus to apply:"
+        _info "  docker compose up -d --build"
+        return 0
+    fi
+
+    # Back up .env before any edit
+    local _backup="${_env_file}.bak.${_backup_ts}"
+    if ! cp "${_env_file}" "${_backup}"; then
+        _fail "Failed to create backup of .env — aborting to avoid data loss."
+        return 1
+    fi
+    _info "Backup created: .env.bak.${_backup_ts}"
+
+    local _new_cf=""
+    if [ -z "${_current_cf}" ]; then
+        # No active COMPOSE_FILE line — append one
+        _new_cf="docker-compose.yml:${_overlay_fragment}"
+        if ! printf '\nCOMPOSE_FILE=%s\n' "${_new_cf}" >> "${_env_file}"; then
+            _fail "Failed to write COMPOSE_FILE to .env."
+            return 1
+        fi
+    else
+        # Existing COMPOSE_FILE — append the overlay to the existing value
+        _new_cf="${_current_cf}:${_overlay_fragment}"
+        local _tmp="${_env_file}.tmp"
+        if ! sed "s|^COMPOSE_FILE=.*|COMPOSE_FILE=${_new_cf}|" "${_env_file}" > "${_tmp}"; then
+            _fail "Failed to update COMPOSE_FILE in .env."
+            rm -f "${_tmp}"
+            return 1
+        fi
+        if ! mv "${_tmp}" "${_env_file}"; then
+            _fail "Failed to write updated .env."
+            rm -f "${_tmp}"
+            return 1
+        fi
+    fi
+
+    _pass "COMPOSE_FILE set to: ${_new_cf}"
+    echo
+    _info "Start or restart Odysseus with the NVIDIA overlay:"
+    _info "  docker compose up -d --build"
+    echo
+    _info "To undo, restore the backup:"
+    _info "  cp ${_backup} ${_env_file}"
+}
+
+# ─── mode: default read-only diagnostic ──────────────────────────────────────
+
+_mode_check() {
+    echo "=== Odysseus Docker GPU diagnostic ==="
+    echo
+    _check_nvidia_smi
+    _check_docker || { echo "=== Results: ${PASS} passed, ${FAIL} failed ==="; return 1; }
+    _check_gpu_passthrough
+
+    if [ "${OPT_ENABLE_OVERLAY}" -eq 1 ]; then
+        if [ "${_GPU_PASSTHROUGH_OK}" -eq 0 ]; then
+            # Hard gate: broken passthrough blocks .env edits regardless of --yes.
+            # Writing COMPOSE_FILE before passthrough works causes Odysseus to fail
+            # at startup, so this is not a prompt — it is a stop.
+            _fail "GPU passthrough is not working — .env will not be modified."
+            _info "Fix passthrough first, then re-run with --enable-nvidia-overlay:"
+            _info "  Ubuntu/Debian: scripts/check-docker-gpu.sh --install-nvidia-toolkit"
+            _info "  Other distros: scripts/check-docker-gpu.sh --print-install-commands"
+            echo
+        else
+            _enable_nvidia_overlay
+        fi
+    fi
+
+    echo "=== Results: ${PASS} passed, ${FAIL} failed ==="
+    [ "${FAIL}" -eq 0 ]
+}
+
+# ─── mode: --print-install-commands ──────────────────────────────────────────
+
+_mode_print() {
+    echo "=== NVIDIA Container Toolkit — install commands ==="
+    echo
+    _info "Detected system: $(_distro_label)"
+    echo
+
+    if _is_debian_family; then
+        _info "Ubuntu/Debian — recommended install commands:"
+        _debian_install_steps
+        _info "After running these, re-run the diagnostic to confirm:"
+        _info "  scripts/check-docker-gpu.sh"
+    else
+        case "${DISTRO_ID}" in
+            fedora|rhel|centos|rocky|almalinux)
+                _info "Fedora/RHEL — install commands:"
+                echo
+                echo "  sudo dnf install -y nvidia-container-toolkit"
+                echo "  sudo nvidia-ctk runtime configure --runtime=docker"
+                echo "  sudo systemctl restart docker"
+                echo "  docker run --rm --gpus all nvidia/cuda:12.4.1-base-ubuntu22.04 nvidia-smi"
+                ;;
+            opensuse*|sles)
+                _info "OpenSUSE/SLES — install commands:"
+                echo
+                echo "  sudo zypper install nvidia-container-toolkit"
+                echo "  sudo nvidia-ctk runtime configure --runtime=docker"
+                echo "  sudo systemctl restart docker"
+                echo "  docker run --rm --gpus all nvidia/cuda:12.4.1-base-ubuntu22.04 nvidia-smi"
+                ;;
+            arch|manjaro|endeavouros)
+                _info "Arch Linux — install commands:"
+                echo
+                echo "  sudo pacman -S nvidia-container-toolkit"
+                echo "  sudo nvidia-ctk runtime configure --runtime=docker"
+                echo "  sudo systemctl restart docker"
+                echo "  docker run --rm --gpus all nvidia/cuda:12.4.1-base-ubuntu22.04 nvidia-smi"
+                ;;
+            *)
+                _warn "Distro '${DISTRO_ID:-unknown}' is not specifically recognized."
+                echo
+                echo "  See the full guide for your distribution:"
+                echo "  https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html"
+                ;;
+        esac
+        echo
+        _info "Automated install (--install-nvidia-toolkit) supports Ubuntu/Debian only."
+        _info "For other distros, run the commands above manually, then re-run:"
+        _info "  scripts/check-docker-gpu.sh"
+    fi
+}
+
+# ─── mode: --install-nvidia-toolkit ──────────────────────────────────────────
+
+_mode_install() {
+    echo "=== NVIDIA Container Toolkit — interactive installer ==="
+    echo
+
+    if [ "$(uname -s)" != "Linux" ]; then
+        _fail "Install mode is Linux-only. Detected: $(uname -s)"
+        exit 1
+    fi
+
+    if ! _is_debian_family; then
+        _fail "Automated install currently supports Ubuntu/Debian only."
+        _info "Detected: $(_distro_label)"
+        _info "Run --print-install-commands to see manual steps for your distro."
+        exit 1
+    fi
+
+    _info "Detected system: $(_distro_label)"
+    echo
+
+    echo "This will run the following commands with sudo:"
+    _debian_install_steps
+
+    if [ "${OPT_YES}" -eq 0 ]; then
+        if ! _confirm "Proceed with the above steps?"; then
+            echo "Aborted — nothing was changed."
+            exit 0
+        fi
+        echo
+    fi
+
+    # Step 1: prerequisites
+    _step "Updating package lists..."
+    sudo apt-get update -qq || { _fail "apt-get update failed."; exit 1; }
+    _step "Installing prerequisites (curl, gpg)..."
+    sudo apt-get install -y curl gpg || { _fail "Failed to install prerequisites."; exit 1; }
+    _pass "Prerequisites ready."
+    echo
+
+    # Step 2: signing key
+    _step "Adding NVIDIA GPG signing key..."
+    curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey \
+        | sudo gpg --batch --yes --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
+        || { _fail "Failed to add NVIDIA GPG key."; exit 1; }
+    _pass "Signing key added."
+    echo
+
+    # Step 3: apt repository
+    _step "Adding NVIDIA apt repository..."
+    curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list \
+        | sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' \
+        | sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list > /dev/null \
+        || { _fail "Failed to add NVIDIA apt repository."; exit 1; }
+    _pass "apt repository added."
+    echo
+
+    # Step 4: install toolkit
+    _step "Installing nvidia-container-toolkit..."
+    sudo apt-get update -qq || { _fail "apt-get update failed after adding NVIDIA repo."; exit 1; }
+    sudo apt-get install -y nvidia-container-toolkit \
+        || { _fail "Failed to install nvidia-container-toolkit."; exit 1; }
+    _pass "nvidia-container-toolkit installed."
+    echo
+
+    # Step 5: configure Docker runtime
+    _step "Configuring Docker runtime..."
+    sudo nvidia-ctk runtime configure --runtime=docker \
+        || { _fail "nvidia-ctk runtime configure failed."; exit 1; }
+    _pass "Docker runtime configured."
+    echo
+
+    # Step 6: restart Docker
+    _step "A Docker restart is required for the runtime change to take effect."
+    local _do_restart=0
+    if [ "${OPT_YES}" -eq 1 ]; then
+        _do_restart=1
+    elif _confirm "Restart Docker now?"; then
+        _do_restart=1
+    else
+        _warn "Docker not restarted."
+        _warn "Run 'sudo systemctl restart docker' before testing GPU passthrough."
+    fi
+
+    if [ "${_do_restart}" -eq 1 ]; then
+        _step "Restarting Docker..."
+        if sudo systemctl restart docker; then
+            _pass "Docker restarted."
+        else
+            _fail "Docker restart failed — run: sudo systemctl restart docker"
+        fi
+    fi
+    echo
+
+    # Step 7: verification
+    _info "Running GPU passthrough verification..."
+    echo
+    _check_docker || { echo "=== Results: ${PASS} passed, ${FAIL} failed ==="; exit 1; }
+    _check_gpu_passthrough
+
+    # Step 8: enable overlay (only if passthrough verified)
+    if [ "${OPT_ENABLE_OVERLAY}" -eq 1 ]; then
+        if [ "${_GPU_PASSTHROUGH_OK}" -eq 1 ]; then
+            _enable_nvidia_overlay
+        else
+            _warn "GPU passthrough verification failed — skipping overlay setup."
+            _warn "Fix the passthrough issue, then run:"
+            _warn "  scripts/check-docker-gpu.sh --enable-nvidia-overlay"
+            echo
+        fi
+    fi
+
+    echo "=== Results: ${PASS} passed, ${FAIL} failed ==="
+    [ "${FAIL}" -eq 0 ]
+}
+
+# ─── dispatch ────────────────────────────────────────────────────────────────
+
+case "${MODE}" in
+    check)   _mode_check ;;
+    print)   _mode_print ;;
+    install) _mode_install ;;
+esac
--- a/scripts/claim_ownerless.py
+++ b/scripts/claim_ownerless.py
@@ -13,6 +13,18 @@ import json

 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

+
+def claim_json_entries(entries, owner):
+    count = 0
+    for entry in entries:
+        if not isinstance(entry, dict):
+            continue
+        if not entry.get("owner"):
+            entry["owner"] = owner
+            count += 1
+    return count
+
+
 def main():
    if len(sys.argv) < 2:
        print("Usage: python scripts/claim_ownerless.py <username>")
@@ -31,11 +43,7 @@ def main():
            continue
        with open(path, "r", encoding="utf-8") as f:
            entries = json.load(f)
-        count = 0
-        for e in entries:
-            if not e.get("owner"):
-                e["owner"] = owner
-                count += 1
+        count = claim_json_entries(entries, owner)
        if count:
            with open(path, "w", encoding="utf-8") as f:
                json.dump(entries, f, ensure_ascii=False, indent=2)
@@ -58,10 +66,12 @@ def main():
        count = db.query(Session).filter(Session.owner == None).update({"owner": owner})
        print(f"  sessions: claimed {count}")

-        # Documents
-        count = db.query(Document).filter(Document.session_id.in_(
-            db.query(Session.id).filter(Session.owner == owner)
-        )).update({"session_id": Document.session_id}, synchronize_session=False)
+        # Documents (have their own owner column; claim the ownerless ones,
+        # mirroring the sessions/gallery/comparisons blocks). The old query set
+        # session_id to itself — a no-op — and never set owner, so ownerless
+        # documents stayed ownerless and invisible in the user's Library.
+        count = db.query(Document).filter(Document.owner == None).update({"owner": owner})
+        print(f"  documents: claimed {count}")

        # Gallery
        if GalleryImage:
--- a/scripts/migrate_faiss_to_chroma.py
+++ b/scripts/migrate_faiss_to_chroma.py
@@ -26,6 +26,39 @@ logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(mess
 logger = logging.getLogger("migrate")


+def _load_json(path, default):
+    try:
+        with open(path, encoding="utf-8") as f:
+            return json.load(f)
+    except (OSError, json.JSONDecodeError):
+        return default
+
+
+def _memory_map(rows):
+    memories = {}
+    if not isinstance(rows, list):
+        return memories
+    for row in rows:
+        if not isinstance(row, dict):
+            continue
+        memory_id = row.get("id", "")
+        if memory_id:
+            memories[memory_id] = row
+    return memories
+
+
+def _rag_docstore(data):
+    if not isinstance(data, dict):
+        return [], [], []
+    ids = data.get("ids", [])
+    documents = data.get("documents", [])
+    metadatas = data.get("metadatas", [])
+    if not isinstance(ids, list) or not isinstance(documents, list) or not isinstance(metadatas, list):
+        return [], [], []
+    count = min(len(ids), len(documents), len(metadatas))
+    return ids[:count], documents[:count], metadatas[:count]
+
+
 def migrate_memories():
    """Migrate memory vectors from FAISS to ChromaDB."""
    from src.chroma_client import get_chroma_client
@@ -39,7 +72,9 @@ def migrate_memories():
        logger.info("No memory FAISS index found, skipping memory migration")
        return

-    ids = json.loads(open(ids_path, encoding="utf-8").read())
+    ids = _load_json(ids_path, [])
+    if not isinstance(ids, list):
+        ids = []
    if not ids:
        logger.info("Memory FAISS index is empty, skipping")
        return
@@ -47,8 +82,7 @@ def migrate_memories():
    # Load memory texts
    memories = {}
    if os.path.exists(memory_path):
-        for mem in json.loads(open(memory_path, encoding="utf-8").read()):
-            memories[mem.get("id", "")] = mem
+        memories = _memory_map(_load_json(memory_path, []))

    embed = get_embedding_client()
    if not embed:
@@ -97,10 +131,7 @@ def migrate_rag():
        logger.info("No RAG DocStore found, skipping RAG migration")
        return

-    data = json.loads(open(docs_path, encoding="utf-8").read())
-    ids = data.get("ids", [])
-    documents = data.get("documents", [])
-    metadatas = data.get("metadatas", [])
+    ids, documents, metadatas = _rag_docstore(_load_json(docs_path, {}))

    if not ids:
        logger.info("RAG DocStore is empty, skipping")
--- a/scripts/odysseus
+++ b/scripts/odysseus
@@ -68,6 +68,10 @@ def _short_help(path: Path) -> str:
    return first


+def _is_runnable_subcommand(path: Path) -> bool:
+    return path.exists() and path.is_file() and os.access(path, os.X_OK)
+
+
 def _print_listing() -> None:
    """`odysseus` with no args (or `odysseus help`) — print the table."""
    sys.stdout.write(f"odysseus {VERSION} — every feature, on the shell.\n\n")
@@ -101,7 +105,7 @@ def main(argv: list[str] | None = None) -> int:
            _print_listing()
            return 0
        sub = SCRIPTS_DIR / f"odysseus-{argv[1]}"
-        if not sub.exists():
+        if not _is_runnable_subcommand(sub):
            sys.stderr.write(f"odysseus: unknown subcommand {argv[1]!r}\n")
            return 1
        return subprocess.call([str(sub), "--help"])
@@ -109,7 +113,7 @@ def main(argv: list[str] | None = None) -> int:
    # `odysseus foo ...` → exec `odysseus-foo ...` under the project venv.
    name = argv[0]
    sub = SCRIPTS_DIR / f"odysseus-{name}"
-    if not sub.exists():
+    if not _is_runnable_subcommand(sub):
        sys.stderr.write(
            f"odysseus: unknown subcommand {name!r}. "
            f"Try `odysseus help` to see available ones.\n"
--- a/scripts/odysseus-backup
+++ b/scripts/odysseus-backup
@@ -56,6 +56,16 @@ def _sqlite_safe_copy(src: Path, dst: Path) -> None:
        dst.write_bytes(src.read_bytes())


+def _reject_output_inside_data(out_path: Path) -> None:
+    try:
+        resolved = out_path.resolve()
+        data_root = _DATA_DIR.resolve()
+        resolved.relative_to(data_root)
+    except ValueError:
+        return
+    fail("backup output path must be outside data/")
+
+
 def cmd_snapshot(args):
    """Write a tar.gz of the entire data/ directory.

@@ -68,6 +78,7 @@ def cmd_snapshot(args):
    out_path = Path(args.out) if args.out else (
        _BACKUP_DIR / f"odysseus-backup-{datetime.now().strftime('%Y%m%d-%H%M%S')}.tar.gz"
    )
+    _reject_output_inside_data(out_path)
    out_path.parent.mkdir(parents=True, exist_ok=True)

    sqlite_dbs = [p for p in _DATA_DIR.rglob("*.db") if p.is_file() and not p.is_symlink()]
--- a/scripts/odysseus-calendar
+++ b/scripts/odysseus-calendar
@@ -69,11 +69,17 @@ def _parse_dt(s: str) -> datetime:
    return datetime.fromisoformat(s.replace("Z", "+00:00"))


+def _calendar_name(ev: "CalendarEvent") -> str:
+    cal = getattr(ev, "calendar", None)
+    name = getattr(cal, "name", "") if cal else ""
+    return name if isinstance(name, str) else ""
+
+
 def _serialize_event(ev: "CalendarEvent") -> dict:
    return {
        "uid": ev.uid,
        "calendar_id": ev.calendar_id,
-        "calendar_name": ev.calendar.name if ev.calendar else "",
+        "calendar_name": _calendar_name(ev),
        "summary": ev.summary,
        "description": ev.description or "",
        "location": ev.location or "",
--- a/scripts/odysseus-contacts
+++ b/scripts/odysseus-contacts
@@ -60,13 +60,17 @@ def fail(msg: str, code: int = 1) -> None:
    sys.exit(code)


+def _contact_rows(contacts):
+    return [c for c in contacts or [] if isinstance(c, dict)]
+
+
 # ─── list ────────────────────────────────────────────────────────────

 def cmd_list(args) -> None:
    cfg = _get_carddav_config()
    if not cfg["url"]:
        fail("CardDAV not configured. Set carddav_url/username/password in the web UI.")
-    contacts = _fetch_contacts(force=args.refresh)
+    contacts = _contact_rows(_fetch_contacts(force=args.refresh))
    emit(contacts, args)


@@ -77,7 +81,7 @@ def cmd_search(args) -> None:
    if not cfg["url"]:
        fail("CardDAV not configured.")
    q = args.query.lower()
-    contacts = _fetch_contacts()
+    contacts = _contact_rows(_fetch_contacts())
    matches = [
        c for c in contacts
        if q in (c.get("name") or "").lower() or q in (c.get("email") or "").lower()
--- a/scripts/odysseus-cookbook
+++ b/scripts/odysseus-cookbook
@@ -411,6 +411,8 @@ def cmd_state_set(args) -> None:
        obj = json.loads(data)
    except json.JSONDecodeError as e:
        fail(f"invalid JSON on stdin: {e}")
+    if not isinstance(obj, dict):
+        fail("invalid cookbook state: expected a JSON object")
    _STATE_PATH.parent.mkdir(parents=True, exist_ok=True)
    # Backup the existing state — undo button if a bad pipe clobbers it.
    if _STATE_PATH.exists():
--- a/scripts/odysseus-docs
+++ b/scripts/odysseus-docs
@@ -33,6 +33,10 @@ except ModuleNotFoundError as e:
    sys.exit(2)


+def _text_len(value) -> int:
+    return len(value) if isinstance(value, str) else 0
+
+
 def _serialize(d: "Document", include_content: bool = False) -> dict:
    out = {
        "id": d.id,
@@ -42,7 +46,7 @@ def _serialize(d: "Document", include_content: bool = False) -> dict:
        "version_count": d.version_count or 1,
        "is_active": bool(d.is_active),
        "tidy_verdict": d.tidy_verdict or "",
-        "content_length": len(d.current_content or ""),
+        "content_length": _text_len(d.current_content),
        "created_at": d.created_at.isoformat() if d.created_at else "",
        "updated_at": d.updated_at.isoformat() if d.updated_at else "",
    }
@@ -90,7 +94,7 @@ def cmd_versions(args):
                "version_number": v.version_number,
                "summary": v.summary or "",
                "source": v.source or "ai",
-                "content_length": len(v.content or ""),
+                "content_length": _text_len(v.content),
            } for v in rows
        ], args)
    finally:
--- a/scripts/odysseus-gallery
+++ b/scripts/odysseus-gallery
@@ -30,11 +30,19 @@ except ModuleNotFoundError as e:
    sys.exit(2)


+def _preview_text(value, limit: int = 200) -> str:
+    """Truncated preview tolerant of non-string values. A gallery row whose
+    ``prompt`` is a non-string would crash ``(value or "")[:200]`` with a
+    TypeError; coerce non-strings to ""."""
+    text = value if isinstance(value, str) else ""
+    return text[:limit]
+
+
 def _serialize_image(i: "GalleryImage") -> dict:
    return {
        "id": i.id,
        "filename": i.filename,
-        "prompt": (i.prompt or "")[:200],
+        "prompt": _preview_text(i.prompt),
        "model": i.model or "",
        "size": i.size or "",
        "tags": i.tags or "",
@@ -51,6 +59,14 @@ def _serialize_image(i: "GalleryImage") -> dict:
    }


+def _album_image_count(album) -> int:
+    images = getattr(album, "images", None)
+    try:
+        return len(images) if images is not None else 0
+    except TypeError:
+        return 0
+
+
 def cmd_list(args):
    db = SessionLocal()
    try:
@@ -92,7 +108,7 @@ def cmd_albums(args):
    try:
        rows = db.query(GalleryAlbum).order_by(GalleryAlbum.name.asc()).all()
        emit([
-            {"id": a.id, "name": a.name, "image_count": len(a.images)}
+            {"id": a.id, "name": a.name, "image_count": _album_image_count(a)}
            for a in rows
        ], args)
    finally:
--- a/scripts/odysseus-logs
+++ b/scripts/odysseus-logs
@@ -58,6 +58,8 @@ def _resolve(name: str) -> Path | None:
    """Match a log by exact filename, basename-without-extension, or
    substring. Returns the most-recently-modified match if there are
    ties."""
+    if not isinstance(name, str):
+        return None
    candidates = []
    for base in (_APP_LOGS, _TMUX_LOGS):
        if not base.is_dir():
--- a/scripts/odysseus-mail
+++ b/scripts/odysseus-mail
@@ -107,6 +107,19 @@ def _q(name: str) -> str:
    return '"' + (name or "").replace("\\", "\\\\").replace('"', '\\"') + '"'


+def _split_recipients(value: str) -> list[str]:
+    return [r.strip() for r in (value or "").split(",") if r.strip()]
+
+
+def _recipient_list(to: str, cc: str = "", bcc: str = "") -> list[str]:
+    recipients = _split_recipients(to)
+    recipients.extend(_split_recipients(cc))
+    recipients.extend(_split_recipients(bcc))
+    if not recipients:
+        fail("at least one recipient is required")
+    return recipients
+
+
 # ─── list ────────────────────────────────────────────────────────────

 def cmd_list(args) -> None:
@@ -177,7 +190,7 @@ def cmd_read(args) -> None:
        if st != "OK":
            fail(f"select {args.folder!r} failed: {st}")
        st, msg_data = conn.fetch(args.uid.encode(), "(BODY.PEEK[])")
-        if st != "OK":
+        if st != "OK" or not msg_data or not msg_data[0]:
            fail(f"fetch UID {args.uid} failed: {st}")
        raw = msg_data[0][1]
        msg = email_mod.message_from_bytes(raw)
@@ -302,11 +315,7 @@ def cmd_send(args) -> None:
    outer["Date"] = datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S +0000")
    outer.attach(MIMEText(body, "plain", "utf-8"))

-    recipients = [r.strip() for r in args.to.split(",") if r.strip()]
-    if args.cc:
-        recipients.extend([r.strip() for r in args.cc.split(",") if r.strip()])
-    if args.bcc:
-        recipients.extend([r.strip() for r in args.bcc.split(",") if r.strip()])
+    recipients = _recipient_list(args.to, args.cc, args.bcc)

    if args.dry_run:
        emit({
--- a/scripts/odysseus-mcp
+++ b/scripts/odysseus-mcp
@@ -33,16 +33,26 @@ except ModuleNotFoundError as e:
    sys.exit(2)


+def _json_list(raw) -> list:
+    try:
+        value = json.loads(raw) if raw else []
+    except (TypeError, json.JSONDecodeError):
+        return []
+    return value if isinstance(value, list) else []
+
+
+def _json_dict(raw) -> dict:
+    try:
+        value = json.loads(raw) if raw else {}
+    except (TypeError, json.JSONDecodeError):
+        return {}
+    return value if isinstance(value, dict) else {}
+
+
 def _serialize(s: "McpServer", redact_env: bool = True) -> dict:
-    try:
-        args_arr = json.loads(s.args) if s.args else []
-    except json.JSONDecodeError:
-        args_arr = []
-    try:
-        env_obj = json.loads(s.env) if s.env else {}
-    except json.JSONDecodeError:
-        env_obj = {}
-    if redact_env and env_obj:
+    args_arr = _json_list(s.args)
+    env_obj = _json_dict(s.env)
+    if redact_env and isinstance(env_obj, dict):
        env_obj = {k: ("***" if v else "") for k, v in env_obj.items()}
    return {
        "id": s.id,
--- a/scripts/odysseus-memory
+++ b/scripts/odysseus-memory
@@ -47,8 +47,12 @@ def _manager() -> MemoryManager:
    return _mgr


+def _memory_entries(entries):
+    return [e for e in entries or [] if isinstance(e, dict)]
+
+
 def cmd_list(args):
-    entries = _manager().load_all()
+    entries = _memory_entries(_manager().load_all())
    if args.category:
        entries = [e for e in entries if (e.get("category") or "fact") == args.category]
    if args.source:
@@ -62,14 +66,14 @@ def cmd_list(args):

 def cmd_search(args):
    q = args.query.lower()
-    entries = _manager().load_all()
+    entries = _memory_entries(_manager().load_all())
    matches = [e for e in entries if q in (e.get("text") or "").lower()]
    matches = sorted(matches, key=lambda e: e.get("timestamp", 0), reverse=True)
    emit(matches[: args.limit], args)


 def cmd_show(args):
-    for e in _manager().load_all():
+    for e in _memory_entries(_manager().load_all()):
        if e.get("id") == args.id:
            emit(e, args)
            return
@@ -93,7 +97,7 @@ def cmd_add(args):


 def cmd_delete(args):
-    entries = _manager().load_all()
+    entries = _memory_entries(_manager().load_all())
    target = next((e for e in entries if e.get("id") == args.id), None)
    if not target:
        fail(f"no memory with id {args.id!r}")
@@ -104,7 +108,7 @@ def cmd_delete(args):

 def cmd_categories(args):
    counts: dict[str, int] = {}
-    for e in _manager().load_all():
+    for e in _memory_entries(_manager().load_all()):
        cat = e.get("category") or "fact"
        counts[cat] = counts.get(cat, 0) + 1
    rows = sorted(counts.items(), key=lambda kv: -kv[1])
--- a/scripts/odysseus-notes
+++ b/scripts/odysseus-notes
@@ -29,12 +29,22 @@ except ModuleNotFoundError as e:
    sys.exit(2)


+def _load_items(raw) -> list:
+    if not raw:
+        return []
+    try:
+        items = json.loads(raw)
+    except (TypeError, json.JSONDecodeError):
+        return []
+    return items if isinstance(items, list) else []
+
+
 def _serialize(n: "Note") -> dict:
    return {
        "id": n.id,
        "title": n.title or "",
        "content": n.content or "",
-        "items": json.loads(n.items) if n.items else [],
+        "items": _load_items(n.items),
        "note_type": n.note_type or "note",
        "color": n.color or "",
        "label": n.label or "",
--- a/scripts/odysseus-personal
+++ b/scripts/odysseus-personal
@@ -42,8 +42,12 @@ def _manager() -> PersonalDocsManager:
    return _mgr


+def _file_rows(files):
+    return [f for f in files or [] if isinstance(f, dict)]
+
+
 def cmd_list(args):
-    files = getattr(_manager(), "index", []) or []
+    files = _file_rows(getattr(_manager(), "index", []) or [])
    out = [
        {"name": f.get("name"), "size": f.get("size"), "path": f.get("path", "")}
        for f in files
--- a/scripts/odysseus-preset
+++ b/scripts/odysseus-preset
@@ -28,9 +28,12 @@ def _load() -> dict:
    if not _PATH.exists():
        return {}
    try:
-        return json.loads(_PATH.read_text())
+        data = json.loads(_PATH.read_text())
    except json.JSONDecodeError as e:
        fail(f"presets.json corrupt: {e}")
+    if not isinstance(data, dict):
+        fail("presets.json corrupt: expected an object")
+    return data


 def _save(data: dict) -> None:
@@ -46,6 +49,15 @@ def _save(data: dict) -> None:
    tmp.replace(_PATH)


+def _entry_or_fail(presets: dict, name: str) -> dict:
+    if name not in presets:
+        fail(f"no preset named {name!r}")
+    entry = presets[name]
+    if not isinstance(entry, dict):
+        fail(f"preset {name!r} is corrupt: expected an object")
+    return entry
+
+
 def cmd_list(args):
    presets = _load()
    rows = []
@@ -63,9 +75,7 @@ def cmd_list(args):

 def cmd_get(args):
    presets = _load()
-    if args.name not in presets:
-        fail(f"no preset named {args.name!r}")
-    emit({"id": args.name, **presets[args.name]}, args)
+    emit({"id": args.name, **_entry_or_fail(presets, args.name)}, args)


 def cmd_set(args):
@@ -75,7 +85,8 @@ def cmd_set(args):
    if prompt is None and args.temperature is None:
        fail("nothing to set — pass --prompt, --prompt-file, or --temperature")
    presets = _load()
-    entry = dict(presets.get(args.name) or {})
+    current = presets.get(args.name)
+    entry = dict(current) if isinstance(current, dict) else {}
    entry.setdefault("name", args.name)
    if prompt is not None:
        entry["system_prompt"] = prompt
@@ -90,9 +101,8 @@ def cmd_set(args):

 def cmd_delete(args):
    presets = _load()
-    if args.name not in presets:
-        fail(f"no preset named {args.name!r}")
-    snap = presets.pop(args.name)
+    snap = _entry_or_fail(presets, args.name)
+    presets.pop(args.name)
    _save(presets)
    emit({"ok": True, "deleted": {"id": args.name, **snap}}, args)

--- a/scripts/odysseus-research
+++ b/scripts/odysseus-research
@@ -26,20 +26,33 @@ from pathlib import Path
 _DATA_DIR = _REPO_ROOT / "data" / "deep_research"


+def _load_path(path: Path) -> dict | None:
+    try:
+        data = json.loads(path.read_text())
+    except (json.JSONDecodeError, OSError):
+        return None
+    return data if isinstance(data, dict) else None
+
+
 def _load(rp_id: str) -> dict | None:
    path = _DATA_DIR / f"{rp_id}.json"
    if not path.exists():
        return None
-    try:
-        return json.loads(path.read_text())
-    except json.JSONDecodeError:
-        return None
+    return _load_path(path)
+
+
+def _preview_text(value, limit: int = 200) -> str:
+    """Truncated preview tolerant of non-string values. A stored research
+    record whose ``query`` is a non-string (legacy/corrupt JSON) would crash
+    ``(value or "")[:200]`` with a TypeError; coerce non-strings to ""."""
+    text = value if isinstance(value, str) else ""
+    return text[:limit]


 def _summarize(rp_id: str, data: dict) -> dict:
    return {
        "id": rp_id,
-        "query": (data.get("query") or "")[:200],
+        "query": _preview_text(data.get("query")),
        "category": data.get("category") or "",
        "status": data.get("status") or "",
        "started_at": data.get("started_at") or "",
@@ -56,9 +69,8 @@ def cmd_list(args):
    out = []
    for path in sorted(_DATA_DIR.glob("*.json")):
        rp_id = path.stem
-        try:
-            data = json.loads(path.read_text())
-        except Exception:
+        data = _load_path(path)
+        if data is None:
            continue
        if args.status and (data.get("status") or "") != args.status:
            continue
@@ -100,9 +112,8 @@ def cmd_search(args):
    out = []
    for path in _DATA_DIR.glob("*.json"):
        rp_id = path.stem
-        try:
-            data = json.loads(path.read_text())
-        except Exception:
+        data = _load_path(path)
+        if data is None:
            continue
        haystack = " ".join([
            (data.get("query") or "").lower(),
--- a/scripts/odysseus-sessions
+++ b/scripts/odysseus-sessions
@@ -27,6 +27,12 @@ except ModuleNotFoundError as e:


 def _serialize(s: "DbSession") -> dict:
+    def _int_or_zero(value) -> int:
+        try:
+            return int(value or 0)
+        except (TypeError, ValueError):
+            return 0
+
    return {
        "id": s.id,
        "name": s.name,
@@ -37,9 +43,9 @@ def _serialize(s: "DbSession") -> dict:
        "archived": bool(s.archived),
        "rag": bool(s.rag),
        "is_important": bool(s.is_important),
-        "message_count": s.message_count or 0,
-        "total_input_tokens": s.total_input_tokens or 0,
-        "total_output_tokens": s.total_output_tokens or 0,
+        "message_count": _int_or_zero(s.message_count),
+        "total_input_tokens": _int_or_zero(s.total_input_tokens),
+        "total_output_tokens": _int_or_zero(s.total_output_tokens),
        "last_accessed": s.last_accessed.isoformat() if s.last_accessed else "",
        "created_at": s.created_at.isoformat() if s.created_at else "",
    }
--- a/scripts/odysseus-signature
+++ b/scripts/odysseus-signature
@@ -29,6 +29,19 @@ except ModuleNotFoundError as e:
    sys.exit(2)


+def _decode_png_data(data_png: str) -> bytes:
+    raw = data_png or ""
+    if "," in raw:
+        raw = raw.split(",", 1)[1]
+    try:
+        decoded = base64.b64decode(raw, validate=True)
+    except Exception as e:
+        fail(f"data_png is not valid base64: {e}")
+    if not decoded.startswith(b"\x89PNG\r\n\x1a\n"):
+        fail("data_png is not a PNG image")
+    return decoded
+
+
 def cmd_list(args):
    """No `Signature` SQLAlchemy model is registered for the
    `signatures` table — query via raw SQL so we don't depend on it."""
@@ -85,13 +98,7 @@ def cmd_export(args):
        ), {"id": args.id}).mappings().first()
    if not row:
        fail(f"no signature with id {args.id!r}")
-    raw = row["data_png"] or ""
-    if "," in raw:
-        raw = raw.split(",", 1)[1]
-    try:
-        png_bytes = base64.b64decode(raw)
-    except Exception as e:
-        fail(f"data_png is not valid base64: {e}")
+    png_bytes = _decode_png_data(row["data_png"] or "")
    out = Path(args.png)
    out.parent.mkdir(parents=True, exist_ok=True)
    out.write_bytes(png_bytes)
--- a/scripts/odysseus-skills
+++ b/scripts/odysseus-skills
@@ -41,11 +41,26 @@ def _manager() -> SkillsManager:
    return _mgr


+def _preview_text(value, limit: int = 200) -> str:
+    """Truncated preview of a text field, tolerant of non-string values.
+
+    A skill whose ``description`` is a non-string (e.g. a number from a
+    hand-edited/legacy store) would crash ``(value or "")[:200]`` with a
+    TypeError; coerce non-strings to "" instead.
+    """
+    text = value if isinstance(value, str) else ""
+    return text[:limit]
+
+
+def _skill_entries(skills):
+    return [s for s in skills or [] if isinstance(s, dict)]
+
+
 def _summary(skill: dict) -> dict:
    return {
        "name": skill.get("name", ""),
        "category": skill.get("category", "general"),
-        "description": (skill.get("description") or "")[:200],
+        "description": _preview_text(skill.get("description")),
        "status": skill.get("status", ""),
        "uses": skill.get("uses", 0),
        "last_used": skill.get("last_used") or "",
@@ -54,7 +69,7 @@ def _summary(skill: dict) -> dict:


 def cmd_list(args):
-    out = _manager().load_all()
+    out = _skill_entries(_manager().load_all())
    if args.category:
        out = [s for s in out if (s.get("category") or "general") == args.category]
    out.sort(key=lambda s: (-int(s.get("uses") or 0), s.get("name", "")))
@@ -62,7 +77,7 @@ def cmd_list(args):


 def cmd_show(args):
-    for s in _manager().load_all():
+    for s in _skill_entries(_manager().load_all()):
        if s.get("name") == args.name:
            emit(s, args)
            return
@@ -71,7 +86,7 @@ def cmd_show(args):

 def cmd_categories(args):
    counts: dict[str, int] = {}
-    for s in _manager().load_all():
+    for s in _skill_entries(_manager().load_all()):
        c = s.get("category") or "general"
        counts[c] = counts.get(c, 0) + 1
    emit([{"category": c, "count": n} for c, n in sorted(counts.items())], args)
@@ -80,7 +95,7 @@ def cmd_categories(args):
 def cmd_delete(args):
    # Locate the skill's directory and rm -rf it.
    skills_root = Path(_DATA_DIR) / "skills"
-    for s in _manager().load_all():
+    for s in _skill_entries(_manager().load_all()):
        if s.get("name") != args.name:
            continue
        cat = s.get("category") or "general"
@@ -94,7 +109,7 @@ def cmd_delete(args):


 def cmd_export(args):
-    for s in _manager().load_all():
+    for s in _skill_entries(_manager().load_all()):
        if s.get("name") != args.name:
            continue
        cat = s.get("category") or "general"
--- a/scripts/odysseus-tasks
+++ b/scripts/odysseus-tasks
@@ -26,13 +26,18 @@ except ModuleNotFoundError as e:
    sys.exit(2)


+def _preview_text(value, limit: int = 200) -> str:
+    text = value if isinstance(value, str) else ""
+    return text[:limit] + ("…" if len(text) > limit else "")
+
+
 def _serialize_task(t: "ScheduledTask") -> dict:
    return {
        "id": t.id,
        "name": t.name,
        "task_type": t.task_type,
        "action": t.action,
-        "prompt": (t.prompt or "")[:200] + ("…" if t.prompt and len(t.prompt) > 200 else ""),
+        "prompt": _preview_text(t.prompt),
        "schedule": t.schedule,
        "scheduled_time": t.scheduled_time,
        "next_run": t.next_run.isoformat() if t.next_run else "",
@@ -51,7 +56,7 @@ def _serialize_run(r: "TaskRun") -> dict:
        "started_at": r.started_at.isoformat() if r.started_at else "",
        "completed_at": r.completed_at.isoformat() if r.completed_at else "",
        "status": r.status,
-        "output_preview": (getattr(r, "output", "") or "")[:200],
+        "output_preview": _preview_text(getattr(r, "output", "")),
    }


--- a/scripts/odysseus-theme
+++ b/scripts/odysseus-theme
@@ -36,10 +36,14 @@ def _load_prefs() -> dict:
        return {"_users": {}}
    try:
        data = json.loads(_USER_PREFS_PATH.read_text())
-        data.setdefault("_users", {})
-        return data
    except json.JSONDecodeError as e:
        fail(f"user_prefs.json is corrupt: {e}")
+    if not isinstance(data, dict):
+        fail("user_prefs.json is corrupt: expected an object")
+    users = data.setdefault("_users", {})
+    if not isinstance(users, dict):
+        fail("user_prefs.json is corrupt: _users must be an object")
+    return data


 def _save_prefs(data: dict) -> None:
--- a/scripts/odysseus-webhook
+++ b/scripts/odysseus-webhook
@@ -30,6 +30,17 @@ except ModuleNotFoundError as e:
    sys.exit(2)


+def _mask_token(token: str, reveal: bool = False) -> str:
+    token = token or ""
+    if reveal:
+        return token
+    if not token:
+        return ""
+    if len(token) <= 10:
+        return "***"
+    return token[:6] + "…" + token[-4:]
+
+
 def _summary(t: "ScheduledTask", reveal: bool = False) -> dict:
    tok = t.webhook_token or ""
    return {
@@ -37,7 +48,7 @@ def _summary(t: "ScheduledTask", reveal: bool = False) -> dict:
        "name": t.name,
        "status": t.status,
        "task_type": t.task_type,
-        "webhook_token": tok if reveal else (tok[:6] + "…" + tok[-4:]) if tok else "",
+        "webhook_token": _mask_token(tok, reveal),
        "has_token": bool(tok),
    }

--- a/Show More
+++ b/Show More