Merge remote-tracking branch 'origin/main' into visual-pr-playground

# Conflicts:
#	routes/cookbook_routes.py
#	routes/hwfit_routes.py
#	services/hwfit/fit.py
#	services/hwfit/models.py
#	static/js/cookbook-diagnosis.js
#	static/js/cookbook-hwfit.js
#	static/js/cookbook.js
#	static/js/cookbookRunning.js
This commit is contained in:
pewdiepie-archdaemon
2026-06-03 16:49:10 +09:00
569 changed files with 35252 additions and 3489 deletions

View File

@@ -9,6 +9,7 @@ __pycache__/
dist/
build/
.env
.env.bak.*
/data/
/logs/
.git/

View File

@@ -16,6 +16,10 @@ LLM_HOST=localhost
# when started with OLLAMA_HOST=0.0.0.0:11434.
# OLLAMA_BASE_URL=http://host.docker.internal:11434/v1
# Optional LM Studio URL. In Docker, host LM Studio is reachable here
# when LM Studio is set to serve on all interfaces (0.0.0.0).
# LM_STUDIO_URL=http://host.docker.internal:1234
# OpenAI API key (only needed if using OpenAI models).
# Do not commit real keys. Keep this commented until needed.
# OPENAI_API_KEY=your_openai_api_key_here
@@ -59,6 +63,10 @@ SEARXNG_INSTANCE=http://localhost:8080
# Keep false for Docker, LAN, reverse proxy, and any shared deployment.
# LOCALHOST_BYPASS=false
# Mark session cookies Secure. Set true when Odysseus is served through HTTPS
# by a trusted reverse proxy or private access gateway.
# SECURE_COOKIES=true
# Optional: pre-seed the first admin password during setup.
# Do not commit a real password.
# ODYSSEUS_ADMIN_PASSWORD=change_me_before_first_boot
@@ -141,7 +149,8 @@ SEARXNG_INSTANCE=http://localhost:8080
#
# AMD ROCm (requires ROCm drivers on the host and the GID of the render group):
# COMPOSE_FILE=docker-compose.yml:docker/gpu.amd.yml
# RENDER_GID=992
# Find the render GID with: getent group render | cut -d: -f3
# RENDER_GID=989
#
# These overlays only expose the GPU devices. The slim Odysseus image
# still needs CUDA/ROCm userspace via Cookbook -> Dependencies (vLLM,

103
.github/ISSUE_TEMPLATE/bug_report.yml vendored Normal file
View File

@@ -0,0 +1,103 @@
name: Bug Report
description: Report a reproducible bug in Odysseus.
labels: ["bug"]
body:
- type: markdown
attributes:
value: |
**Before submitting:** search [open issues](https://github.com/pewdiepie-archdaemon/odysseus/issues)
and [discussions](https://github.com/pewdiepie-archdaemon/odysseus/discussions) first.
Duplicate reports slow things down.
For security vulnerabilities, **do not open a public issue** —
use [GitHub Security Advisories](https://github.com/pewdiepie-archdaemon/odysseus/security/advisories/new)
and read [SECURITY.md](https://github.com/pewdiepie-archdaemon/odysseus/blob/main/SECURITY.md) first.
- type: checkboxes
id: prerequisites
attributes:
label: Prerequisites
options:
- label: I searched [open issues](https://github.com/pewdiepie-archdaemon/odysseus/issues?q=is%3Aissue+is%3Aopen) and [discussions](https://github.com/pewdiepie-archdaemon/odysseus/discussions) and did not find an existing report of this bug.
required: true
- label: This is **not** a security vulnerability. (Vulnerabilities go to [GitHub Security Advisories](https://github.com/pewdiepie-archdaemon/odysseus/security/advisories/new) — see [SECURITY.md](https://github.com/pewdiepie-archdaemon/odysseus/blob/main/SECURITY.md).)
required: true
- label: I am running the latest code from `main`.
required: true
- type: dropdown
id: install-method
attributes:
label: Install Method
options:
- Docker (docker compose up)
- Manual Python install (pip / venv)
- Windows native (launch-windows.ps1)
- macOS app (build-macos-app.sh / start-macos.sh)
- Other (describe in the reproduction steps below)
validations:
required: true
- type: dropdown
id: os
attributes:
label: Operating System
options:
- Linux
- macOS
- Windows
- Other
validations:
required: true
- type: textarea
id: steps
attributes:
label: Steps to Reproduce
description: Exact steps that reliably trigger the bug. The more specific, the faster this gets fixed.
placeholder: |
1. Go to ...
2. Click / type ...
3. Observe ...
validations:
required: true
- type: textarea
id: expected
attributes:
label: Expected Behaviour
description: What should have happened?
validations:
required: true
- type: textarea
id: actual
attributes:
label: Actual Behaviour
description: What actually happened? Include the full error message if there is one.
validations:
required: true
- type: textarea
id: logs
attributes:
label: Logs / Screenshots
description: Paste relevant terminal output or attach screenshots. Remove API keys, passwords, and personal data before pasting.
render: text
- type: input
id: model-backend
attributes:
label: Model / Backend (if relevant)
description: "e.g. Ollama + llama3.2:latest, vLLM + mistral-7b, OpenAI API, Anthropic API"
placeholder: "Ollama + llama3.2:latest"
- type: textarea
id: additional-info
attributes:
label: Additional Information
description: Anything else that might help — browser console errors, related issues, things you already tried, or environment quirks.
placeholder: |
- Any other context goes here.
- If you are willing to submit a PR that fixes this, mention it here.

13
.github/ISSUE_TEMPLATE/config.yml vendored Normal file
View File

@@ -0,0 +1,13 @@
blank_issues_enabled: false
contact_links:
- name: Question / Need Help
url: https://github.com/pewdiepie-archdaemon/odysseus/discussions/categories/q-a
about: Ask how-to questions, setup help, and model configuration questions here. Issues are for confirmed bugs and concrete proposals only.
- name: Idea or Suggestion
url: https://github.com/pewdiepie-archdaemon/odysseus/discussions/categories/ideas
about: Discuss ideas and gauge interest before opening a formal feature request. If there is already a discussion, link it in your feature request.
- name: Security Vulnerability
url: https://github.com/pewdiepie-archdaemon/odysseus/security/advisories/new
about: Report vulnerabilities privately via GitHub Security Advisories — never as a public issue. Read SECURITY.md before reporting.

View File

@@ -0,0 +1,90 @@
name: Feature Request
description: Propose a new feature or a concrete improvement to Odysseus.
labels: ["enhancement"]
body:
- type: markdown
attributes:
value: |
**Before submitting:** search [open issues](https://github.com/pewdiepie-archdaemon/odysseus/issues)
and [discussions](https://github.com/pewdiepie-archdaemon/odysseus/discussions) first.
Feature requests that duplicate [ROADMAP.md](https://github.com/pewdiepie-archdaemon/odysseus/blob/main/ROADMAP.md)
or an existing open issue will be closed as duplicates.
If your idea needs community input before it becomes a concrete proposal,
start a [discussion](https://github.com/pewdiepie-archdaemon/odysseus/discussions/categories/ideas) instead.
- type: checkboxes
id: prerequisites
attributes:
label: Prerequisites
options:
- label: I searched [open issues](https://github.com/pewdiepie-archdaemon/odysseus/issues?q=is%3Aissue+is%3Aopen) and this has not already been proposed.
required: true
- label: I searched [discussions](https://github.com/pewdiepie-archdaemon/odysseus/discussions) and this is not already being debated there.
required: true
- label: This is a concrete, actionable proposal — not a vague "it would be nice if..." request.
required: true
- type: dropdown
id: area
attributes:
label: Area
description: Which part of the application does this affect?
options:
- Chat / Agent
- Email
- Calendar
- Documents / RAG
- Memory
- Cookbook / Local Models / GPU
- Search
- Notes / Editor
- Auth / Security
- Docker / Deployment
- UI / Frontend
- API / Backend
- MCP
- Testing / CI
- Other
validations:
required: true
- type: textarea
id: problem
attributes:
label: Problem or Motivation
description: What problem does this solve, or what use case does it enable? Be specific — "it would be better" is not enough.
validations:
required: true
- type: textarea
id: solution
attributes:
label: Proposed Solution
description: Describe the behaviour or change you want to see. Include API shape, UI sketch, or code snippets if that helps make it concrete.
validations:
required: true
- type: textarea
id: alternatives
attributes:
label: Alternatives Considered
description: What other approaches did you consider and why did you rule them out? If there is an existing workaround, describe it.
- type: textarea
id: prior-art
attributes:
label: Prior Art / Related Issues
description: Link any related issues, discussions, or external references that informed this proposal.
- type: dropdown
id: willing_to_implement
attributes:
label: Are you willing to implement this?
options:
- "Yes — I can open a PR"
- "Partially — I can help but need guidance"
- "No — I am only filing the request"
validations:
required: true

53
.github/pull_request_template.md vendored Normal file
View File

@@ -0,0 +1,53 @@
## Summary
<!-- One paragraph: what changed and why. "Fixed bug" and "Added feature" are not summaries. -->
## Linked Issue
<!-- Every PR should be linked to an issue.
Use one of: Fixes #NNN | Part of #NNN | Closes #NNN -->
Fixes #
## Type of Change
- [ ] Bug fix (non-breaking — fixes a confirmed issue)
- [ ] New feature (non-breaking — adds new behaviour)
- [ ] Breaking change (changes or removes existing behaviour)
- [ ] Refactor / cleanup (behaviour unchanged)
- [ ] Documentation only
- [ ] CI / tooling / configuration
## Checklist
- [ ] I searched [open issues](https://github.com/pewdiepie-archdaemon/odysseus/issues) and [open PRs](https://github.com/pewdiepie-archdaemon/odysseus/pulls) — this is not a duplicate.
- [ ] This PR targets `main`
- [ ] My changes are limited to the scope described above — no unrelated refactors or whitespace changes mixed in.
- [ ] I actually ran the app (`docker compose up` or `uvicorn app:app`) and verified the change works end-to-end. Type-checks and unit tests are not enough.
## How to Test
<!-- Step-by-step instructions a reviewer can follow to verify this works.
Do not leave this empty — a PR without test steps will be sent back. -->
1.
2.
3.
## Visual / UI changes — REQUIRED if you touched anything that renders
**Anything that changes what the UI looks like — buttons, icons, padding, colors, fonts, spacing, layout, CSS, HTML, SVG, or any `static/js/` module that draws to the DOM — needs all of the following. PRs that change rendering without these WILL be closed.**
- [ ] **Screenshot or short clip** of the change in the running app, attached below. Mobile screenshot too if the change affects mobile.
- [ ] **Style match**: the change uses Odysseus's existing visual language. Specifically:
- Reuse existing CSS variables (`--red`, `--fg`, `--bg`, `--card`, `--border`, etc.) — do not introduce new color values, font sizes, or spacing units.
- Reuse existing button/input/card/border classes. Don't invent parallel styling.
- **No Unicode emoji in UI or code.** Use inline SVG (matching the monochrome icon style already in `static/index.html`) or plain text.
- Monospaced font (`Fira Code`) for primary UI text. Don't override.
- Dark theme is the default; any light-mode work must be wired through the existing theme system, not hard-coded.
- [ ] **No new component patterns.** If a similar widget already exists in the app, extend it instead of writing a parallel one.
- [ ] **I am not an LLM agent submitting a bulk PR.** If you are, please open an issue describing the problem first — bulk auto-generated PRs that don't match the project's visual style are closed on sight, even when the underlying fix is correct.
### Screenshots / clips
<!-- Drag and drop images or a screen recording here. Required for any UI/visual change. -->

1
.gitignore vendored
View File

@@ -12,6 +12,7 @@ venv/
# Environment
.env
.env.bak.*
!.env.example
# Data — all user data stays local

View File

@@ -33,8 +33,8 @@ The full license texts are kept in [`licenses/`](licenses/).
- **[Tongyi DeepResearch](https://github.com/Alibaba-NLP/DeepResearch)** by
**Alibaba-NLP / Tongyi Lab** — the multi-step deep-research agent pipeline.
Copyright © Alibaba-NLP / Tongyi Lab. **Apache-2.0.** Adapted for Odysseus's
Deep Research feature (`api/research_*.py`, `routes/research_routes.py`,
`services/search/`). Full text in
Deep Research feature (`services/research/`, `src/research_handler.py`,
`routes/research_routes.py`, `services/search/`). Full text in
[`licenses/DeepResearch-Apache-2.0.txt`](licenses/DeepResearch-Apache-2.0.txt).
---
@@ -47,7 +47,7 @@ just composed.
| Service | Image | Purpose | License |
|---|---|---|---|
| [SearXNG](https://github.com/searxng/searxng) | `searxng/searxng:latest` | Default metasearch backend | AGPL-3.0 |
| [SearXNG](https://github.com/searxng/searxng) | `searxng/searxng:2026.5.31-7159b8aed` (pinned tag; see compose) | Default metasearch backend | AGPL-3.0 |
| [ChromaDB](https://github.com/chroma-core/chroma) | `chromadb/chroma:latest` | Vector store for memory / RAG | Apache-2.0 |
| [ntfy](https://github.com/binwiederhier/ntfy) | `binwiederhier/ntfy` | Push notifications (self-hosted reminders) | Apache-2.0 / GPL-2.0 |
@@ -118,6 +118,7 @@ Core (`requirements.txt`) and optional (`requirements-optional.txt`):
| croniter | MIT |
| pytest / pytest-asyncio | MIT / Apache-2.0 |
| duckduckgo-search (optional) | MIT |
| markitdown (optional — Office/EPUB text extraction) | MIT |
| **PyMuPDF** *(optional — form-filling only)* | **AGPL-3.0** — see note below |
## Companion services (interoperated with, not bundled)
@@ -152,6 +153,9 @@ concerns from earlier are resolved:
deployment (Artifex also sells a commercial PyMuPDF license that lifts this).
- **`caldav`** (Python lib) is **dual-licensed GPL-3.0-or-later OR Apache-2.0**.
Odysseus uses it under **Apache-2.0**, which is permissive and MIT-compatible.
- **`markitdown`** (Microsoft) is **MIT** and used only as an *optional* dependency for Office/EPUB text
extraction (`src/markitdown_runtime.py`), lazy-imported with graceful fallback — the MIT core runs without
it. The cloud `az-doc-intel` extra is deliberately **not** installed, keeping extraction fully local.
---

View File

@@ -57,12 +57,32 @@ Good pull requests usually include:
- A short explanation of the bug or feature.
- The files or areas changed.
- Manual test steps or automated test results.
- Manual test steps or automated test results from running the actual app, not just the test suite.
- Screenshots or short recordings for UI changes.
- Links to related issues, for example `Fixes #123`.
Please keep PRs small. Large PRs that mix unrelated cleanup, formatting, refactors, and behavior changes are much harder to review.
> **Auto-generated PRs.** If you are running an LLM agent (Devin, Cursor, OpenHands, Claude Code, etc.) against this repo: please open an issue describing the problem first instead of opening a PR directly. Bulk agent-generated PRs that don't match the project's visual style or contribution format will be closed without review, even when the underlying fix is correct.
## Style and visual changes
Odysseus has an intentional visual style. PRs that ignore it will be closed without merge, no matter how correct the underlying code is.
Before submitting any change that affects what the app looks like — buttons, icons, fonts, colors, spacing, layout, CSS, HTML, SVG, or any `static/js/` module that draws to the DOM — please:
1. **Run the app locally** and view the change in a browser. Type-checks and unit tests are not enough.
2. **Attach a screenshot or short clip** of the change in the running app. Add a mobile screenshot too if the change affects mobile.
3. **Match the existing visual language.** Specifically:
- Reuse existing CSS variables (`--red`, `--fg`, `--bg`, `--card`, `--border`, …). Do not introduce new color values, font sizes, or spacing units.
- Reuse existing button, input, card, and border classes. Don't invent parallel styling for similar widgets.
- **No Unicode emoji in UI or code.** Use inline SVG (matching the monochrome icon style already in `static/index.html`) or plain text.
- Monospaced font (`Fira Code`) for primary UI text. Don't override.
- Dark theme is the default; any light-mode work goes through the existing theme system, not hard-coded.
4. **Don't add parallel components.** If a similar widget already exists in the app, extend it instead of writing a new one.
If you are unsure whether a change is "visual," it is. Default to attaching a screenshot.
## Issue Reports
For bugs, include:

176
README.md
View File

@@ -1,7 +1,10 @@
# Odysseus
```
───────────────────────────────────────────────
⊹ ࣪ ˖ ૮( ˶ᵔ ᵕ ᵔ˶ )っ Odysseus vers. 1.0
───────────────────────────────────────────────
```
![Odysseus](docs/odysseus.jpg)
@@ -77,8 +80,10 @@ python setup.py
python -m uvicorn app:app --host 127.0.0.1 --port 7000
```
Requirements: Python 3.11+. Cookbook also needs `tmux` for background model
downloads and serves. Use `--host 0.0.0.0` only when you intentionally want
LAN/reverse-proxy access.
downloads and serves. The app itself is lightweight; local model serving is the
heavy part and depends on the model, runtime, GPU, and VRAM, so small hosts can
connect to API or remote model servers instead. Use `--host 0.0.0.0` only when
you intentionally want LAN/reverse-proxy access.
### Apple Silicon
Docker on macOS cannot use the Metal GPU. For GPU-accelerated Cookbook on an
@@ -90,7 +95,18 @@ cd odysseus
./start-macos.sh
```
It launches at `http://127.0.0.1:7860`. To build a clickable app wrapper:
It launches at `http://127.0.0.1:7860`. To expose it to your phone over a trusted LAN/VPN such as Tailscale, bind all interfaces:
```bash
ODYSSEUS_HOST=0.0.0.0 ./start-macos.sh
# then open http://<tailscale-ip>:7860
```
The script also reads `.env` at startup, so `APP_BIND=0.0.0.0` and `APP_PORT`
set there are picked up automatically without a command-line override each run.
Keep `AUTH_ENABLED=true` (the default) before binding outside loopback. Do not
expose this port directly to the public internet. To build a clickable app wrapper:
```bash
./build-macos-app.sh
@@ -117,21 +133,82 @@ Odysseus SSH key and add the public key to the remote server's
ssh-copy-id -i data/ssh/id_ed25519.pub user@server
```
**NVIDIA / AMD Docker GPU overlays.** Install the host runtime first, then add
one of these to `.env`:
**Docker GPU overlays.** CPU-only users can skip this section. Cookbook can
only detect GPUs that Docker exposes to the container — if the host runtime or
device passthrough is not configured, Cookbook sees the iGPU, another card, or
CPU instead of your intended GPU.
For NVIDIA, `scripts/check-docker-gpu.sh` diagnoses GPU passthrough and can
optionally install the host runtime or update `.env`.
```bash
# Read-only diagnostic (default — installs nothing, never edits .env):
scripts/check-docker-gpu.sh
# Print OS-specific install commands without running them:
scripts/check-docker-gpu.sh --print-install-commands
# Install NVIDIA Container Toolkit on Ubuntu/Debian (requires sudo):
scripts/check-docker-gpu.sh --install-nvidia-toolkit
# Write COMPOSE_FILE to .env (only when GPU passthrough is confirmed working):
scripts/check-docker-gpu.sh --enable-nvidia-overlay
# Full assisted setup — install toolkit, then enable overlay if passthrough works:
scripts/check-docker-gpu.sh --install-nvidia-toolkit --enable-nvidia-overlay
```
Safety notes:
- The app never installs host GPU runtime automatically.
- The app never edits `.env` automatically.
- `.env` is only modified when `--enable-nvidia-overlay` is explicitly passed,
and only after GPU passthrough succeeds. `--yes` skips prompts but does not
bypass the passthrough gate.
- `.env.bak.*` backups created by `--enable-nvidia-overlay` are ignored by
Git and the Docker build context.
To enable manually without the script, add this to `.env`:
```bash
COMPOSE_FILE=docker-compose.yml:docker/gpu.nvidia.yml
COMPOSE_FILE=docker-compose.yml:docker/gpu.amd.yml
```
Verify with:
**AMD / ROCm.** AMD setup is read-only diagnostic plus manual `.env` edit. Run:
```bash
docker compose exec odysseus nvidia-smi -L
docker compose exec odysseus rocm-smi
scripts/check-docker-amd-gpu.sh
```
Then add the reported values to `.env`, replacing `RENDER_GID` with your host's
numeric render group id:
```bash
COMPOSE_FILE=docker-compose.yml:docker/gpu.amd.yml
RENDER_GID=989
```
For NVIDIA/AMD GPU support, also read the comments in the selected overlay file: docker/gpu.nvidia.yml or docker/gpu.amd.yml.
Verify after enabling either overlay:
```bash
docker compose exec odysseus nvidia-smi -L # NVIDIA
docker compose exec odysseus sh -lc 'test -e /dev/kfd && test -d /dev/dri && ls -l /dev/kfd /dev/dri/renderD*' # AMD
```
> **GPU passthrough ≠ llama.cpp CUDA.** `nvidia-smi` passing inside the
> container confirms Docker GPU access, but llama.cpp also needs `cudart` and
> the CUDA Toolkit at runtime. If Cookbook logs show `Unable to find cudart
> library`, `Could NOT find CUDAToolkit`, `CUDA Toolkit not found`, or
> tensors/layers assigned to CPU, that is a Cookbook/llama.cpp build issue —
> not a Docker passthrough failure. Re-install the serve engine via
> **Cookbook → Dependencies** to get a CUDA-enabled build.
>
> The same split applies to AMD/ROCm: seeing `/dev/kfd` and `/dev/dri` inside
> the container confirms device passthrough, not ROCm userspace or a
> ROCm-enabled vLLM/llama.cpp build. `rocm-smi` and `rocminfo` are not expected
> inside the slim Odysseus image.
**Ollama with Docker.** If Ollama runs on the host, add this endpoint in
Settings:
@@ -145,6 +222,13 @@ Ollama must listen outside its own loopback interface:
OLLAMA_HOST=0.0.0.0:11434 ollama serve
```
This connects Odysseus in Docker to an Ollama server that is already running on
your host machine; it does not start Ollama inside the container.
`host.docker.internal` is Docker's hostname for the host machine from inside the
container. Cookbook **Serve** is a separate workflow for serving downloaded
models through Odysseus/llama.cpp, so Windows users with an existing Ollama
install usually only need to add the endpoint in Settings.
**Useful checks.**
```bash
@@ -176,13 +260,16 @@ Or do it by hand:
```powershell
git clone https://github.com/pewdiepie-archdaemon/odysseus.git
cd odysseus
python -m venv venv
py -3.11 -m venv venv
venv\Scripts\Activate.ps1
pip install -r requirements.txt
python setup.py
python -m uvicorn app:app --host 127.0.0.1 --port 7000
```
If `python` points at an older interpreter, use `py -3.12` (or another installed
3.11+ version) for the venv step.
**Requirements:** Python 3.11+. The core app (chat, agent, memory, documents,
email, calendar, deep research) runs fully native. For full **Cookbook** background
model downloads and the agent shell tool, also install
@@ -194,31 +281,77 @@ Local GPU *serving* of vLLM/SGLang needs Linux/WSL2; for a local model on Window
Open `http://localhost:7000`, log in with the generated admin password,
and configure everything else inside **Settings**.
## Troubleshooting & Advanced Setup
### `chromadb-client` conflicts with embedded ChromaDB
If `chromadb-client` (the lightweight HTTP-only package) is installed alongside the full `chromadb` package, Odysseus starts but ChromaDB silently falls back to HTTP-only mode and fails.
**Fix:** uninstall `chromadb-client` and force-reinstall the full package:
```bash
./venv/bin/pip uninstall chromadb-client -y
./venv/bin/pip install --force-reinstall chromadb
```
### HTTPS + LAN/Tailscale exposure
To expose Odysseus on a local network or Tailscale with HTTPS:
1. Change the bind address to `0.0.0.0` in `.env` (`APP_BIND=0.0.0.0` or `ODYSSEUS_HOST=0.0.0.0`).
2. Generate a locally-trusted cert for your LAN/Tailscale IPs using [mkcert](https://github.com/FiloSottile/mkcert):
```bash
mkcert -install
mkcert -cert-file cert.pem -key-file key.pem 192.168.1.100 tailscale-ip
```
3. Run `uvicorn` with the generated certs:
```bash
python -m uvicorn app:app --host 0.0.0.0 --port 7000 --ssl-certfile=cert.pem --ssl-keyfile=key.pem
```
4. Install the `mkcert` CA on any other device you want to access Odysseus from (e.g., for iOS, email the `rootCA.pem` to yourself, install the profile, and trust it in Certificate Trust Settings).
### Optional Dependencies
`requirements-optional.txt` contains packages that unlock extra features. It is not installed by default.
| Package | Feature unlocked |
|---------|-----------------|
| `faster-whisper` | Local speech-to-text (microphone -> text) via the "local" STT provider. |
| `duckduckgo-search` | DuckDuckGo as a search provider option. |
| `PyMuPDF` | PDF page rendering in the side viewer panel and form-filling. (Note: AGPL-3.0) |
| `markitdown` | Office/EPUB document text extraction (converts .docx/.xlsx/.pptx/.xls/.epub to Markdown). |
## Security Notes
Odysseus is a self-hosted workspace with powerful local tools: shell access, file uploads, model downloads, web research, email/calendar integrations, and API tokens. Treat it like an admin console.
- Keep `AUTH_ENABLED=true` for any network-accessible deployment.
- Do not expose it directly to the public internet without HTTPS and a trusted reverse proxy.
- Keep `data/`, `.env`, logs, databases, and uploaded/generated media out of Git. They are ignored by default.
- Keep `LOCALHOST_BYPASS=false` outside local development.
- Use `SECURE_COOKIES=true` when Odysseus is served through HTTPS by a trusted reverse proxy or private access gateway.
- Do not expose it directly to the public internet without HTTPS and a trusted reverse proxy or private access layer.
- Keep `.env`, `data/`, `logs/`, databases, uploads, generated media, backups, auth/session files, API keys, and model/provider tokens out of Git and private shares. They are ignored by default.
- Review `data/auth.json` after first boot: disable open signup unless you intentionally want it, make only your own account admin, and keep demo/test accounts non-admin.
- Non-admin users do not get shell/Python/file read/write by default, and admin-only routes/tools such as MCP management, API tokens, webhooks, model/cookbook serving, backup/vault, and app settings are admin-gated. Other features are controlled by per-user privileges, so review each user's privileges before exposing a deployment.
- Rotate any API keys or tokens that were ever pasted into a shared chat, demo, screenshot, or log.
- If you enable API tokens or webhooks, create separate tokens per integration and delete unused ones.
- Prefer binding manual development runs to `127.0.0.1`; bind to `0.0.0.0` only when you intentionally want LAN/reverse-proxy access.
- Keep ChromaDB, SearXNG, ntfy, Ollama, vLLM, llama.cpp, databases, and raw model/provider APIs internal-only. Expose only the authenticated Odysseus web/API entrypoint through your trusted proxy or private access layer.
- Before publishing a fork, run `git status --short` and confirm no private files from `.env`, `data/`, `logs/`, uploads, backups, or local databases are staged.
### Putting it behind HTTPS
Odysseus serves plain HTTP on its port. That's fine for `localhost` and trusted LAN/VPN use, but browsers will warn ("Password fields present on an insecure page") and the login + API tokens travel in cleartext. For anything reachable outside your machine — including a Tailscale IP shared with other devices — put a TLS-terminating reverse proxy in front.
### Private or proxied deployments
Odysseus serves plain HTTP on its app port. Docker Compose binds Odysseus and the bundled services to `127.0.0.1` by default, so a typical production/private setup is:
Shortest path with [Caddy](https://caddyserver.com/) (auto-renews Let's Encrypt certs):
1. Keep Odysseus on localhost, for example `127.0.0.1:7000`.
2. Terminate HTTPS at a trusted reverse proxy or private access gateway.
3. Put the authenticated Odysseus web/API entrypoint behind that layer.
4. Keep raw service and model ports internal-only.
```caddy
odysseus.example.com {
reverse_proxy localhost:7000
}
```
Cloudflare Access, Tailscale, Caddy, nginx, and Traefik can all fit this pattern; none are required by Odysseus. If your access layer reaches Odysseus on the same host, proxy to `http://127.0.0.1:7000` and keep `AUTH_ENABLED=true`, `LOCALHOST_BYPASS=false`, and `SECURE_COOKIES=true`.
For a LAN-only Tailscale deployment, Caddy + [tailscale-cert](https://caddyserver.com/docs/caddyfile/options#auto-https) or the built-in MagicDNS HTTPS feature both work. nginx/Traefik configs are similar — proxy `localhost:7000`, terminate TLS at the proxy. Once that's in place, the browser warning goes away and your login is encrypted.
Common internal-only ports from the default docs/compose setup:
| Port | Service |
|---|---|
| `7000` | Odysseus raw app port |
| `8080` | SearXNG |
| `8091` | ntfy |
| `8100` | ChromaDB host port for manual/compose access |
| `11434` | Ollama |
| `8000-8020` | Common local model/provider APIs |
## Contributing
Help is welcome. The best entry points are fresh-install testing, provider setup
@@ -241,6 +374,7 @@ Key settings:
| `APP_PORT` | `7000` | Docker Compose host port for the web UI. |
| `AUTH_ENABLED` | `true` | Enable/disable login |
| `LOCALHOST_BYPASS` | `false` | Development-only auth bypass for loopback requests. Keep false for shared/network deployments. |
| `SECURE_COOKIES` | `false` | Set true when serving Odysseus through HTTPS at a trusted proxy or private access gateway. |
| `DATABASE_URL` | `sqlite:///./data/app.db` | Database connection string |
| `CHROMADB_HOST` | `localhost` | ChromaDB host for vector memory. Docker overrides this to `chromadb`. |
| `CHROMADB_PORT` | `8100` | ChromaDB port for manual host runs. Docker overrides this to `8000`. |

View File

@@ -1,6 +1,6 @@
# Roadmap / Help Wanted
Odysseus is on a voyage, but not home yet. It works great for me (lol), but this is ship is moving fast and feedback/help would be appreciated! (I dont know what I'm doing hlep).
Odysseus is on a voyage, but not home yet. It works great for me (lol), but this ship is moving fast and feedback/help would be appreciated! (I don't know what I'm doing, help).
If you see weird CSS, strange layout behavior, or a suspiciously murky corner of
the codebase, you are probably right to stay away.
@@ -8,25 +8,60 @@ the codebase, you are probably right to stay away.
## High Priority
- SQUASH BUGS
- Fresh Docker install smoke tests on Linux, macOS, and Windows!!
- Fresh install smoke tests on Linux, macOS, and Windows. Docker, native Python,
and WSL all need coverage.
- Integration audit: do integrations even work? Confirm what works, what needs setup docs, and what should be removed or hidden.
- Self-host troubleshooting cookbook. Document the weird 30-second fixes that otherwise become 30-minute searches: Dovecot cleartext auth for local stacks, ntfy Android Instant Delivery for non-ntfy.sh servers, clipboard limits on plain-HTTP Tailscale URLs, Radicale collection URLs, and similar traps.
- Cookbook reliability on other computers. This is probably the area most likely to need work across different machines, GPUs, drivers, shells, and Python environments.
- Tile/window management correctness. I had to brute force my way a bit here, I'm aware, popups, dropdowns, and fixed-position UI inside transformed modals can land in the wrong place.
- Esc button, it's small but a lot of windows that arent still close on esc and alot of them doesnt.
- Skill audit, how does your model respond to skill injection, does it follow? Does its parsing miss?
- Cookbook SGLang support across platforms. Make sure SGLang setup/serve works
predictably on Linux, Windows/WSL, macOS where possible, Docker, and common
NVIDIA/AMD hardware paths.
- Deep Research model presets by hardware. Recommend approved model/parameter
profiles for small, medium, and large local setups so people with different
hardware can use Deep Research without guessing. Surface this either in Deep
Research settings or as a Cookbook scan/dropdown suggestion.
- Cookbook model scan/download ranking. Prioritize newer architectures and
better hardware-fit models instead of scoring everything almost the same.
Ranking should account for architecture age, quant format, VRAM/RAM fit,
backend support, vision/mmproj requirements, and likely serve reliability.
- Cookbook error feedback and logging. Failed downloads, dependency installs,
preflights, and serve jobs should show the actual command/output/error in the
UI, with copyable logs and clear next steps instead of just "crashed".
- Agent prompt/context bloat. Agent mode is too heavy for smaller local models:
tool schemas, skills, memory, documents, and instructions can eat the context
before the user request really starts. We need slimmer prompts, better tool
selection, smaller default tool sets, and clearer guidance for models with
4k/8k/16k context windows.
- Skill/tool prompt-injection audit. User-editable skills, notes, documents,
fetched pages, and memories should be treated as untrusted data. Keep testing
whether models follow malicious instructions from those surfaces.
- Better degraded-state reporting for ChromaDB, SearXNG, email, ntfy, and provider probes.
- Email performance audit. Fetching, searching, opening, deleting, and sending
email can feel slow, especially over IMAP/SMTP providers with high latency.
Need someone who knows mail performance to profile the current flow, identify
whether the bottleneck is IMAP folder select/fetch, cache invalidation,
attachment/body loading, SMTP handshakes, or frontend refresh behavior, then
propose safer caching/prefetch/batching without breaking multi-account state.
- Provider setup/probing audit for Anthropic, Gemini, Groq, xAI, OpenRouter, OpenAI, and DeepSeek.
## Refactor Targets
- CSS cleanup. `static/style.css` basically Calypso's island atm.
- Tour core helper. The onboarding tours have too much copy-pasted scaffolding; promote a shared `tour-core.js` helper before adding more tours.
- Modal/window positioning cleanup. Some window controls have improved, but the
underlying popup/dropdown/fixed-position behavior is still too fragile.
- Mobile media override discoverability. A lot of "CSS did not move" bugs are mobile `@media` overrides of the same selector; comments or linting around desktop/mobile paired rules would help.
- Dead code pass for old routes, stale feature flags, and unused UI states.
## Frontend
- Expand the Editor for quicker, more robust everyday use. Better file/document
handling, smoother window behavior, clearer save/export flows, stronger image
editing affordances, and fewer brittle edge cases.
- Better AI integration for Notes and Todos. Notes should be easier for the
agent to read, update, summarize, and turn into actions. Todos should be
assignable to an agent from the UI, possibly through a button, task action,
or dedicated skill/tool flow.
- Mobile gallery/editor polish. Easier to launch/download inpaint model or any missing pieces.
- Accessibility pass: keyboard navigation, focus states, contrast, reduced motion.
- Improve empty states and error messages on fresh installs.

View File

@@ -8,16 +8,20 @@ Security fixes are handled on the default branch until formal releases are cut.
## Deployment Guidance
- Keep `AUTH_ENABLED=true`.
- Keep `AUTH_ENABLED=true` for any network-accessible deployment.
- Keep `LOCALHOST_BYPASS=false` outside local development.
- Set `SECURE_COOKIES=true` when Odysseus is served through HTTPS by a trusted reverse proxy or private access gateway.
- Use HTTPS when exposing the app beyond localhost.
- Put the app behind a trusted reverse proxy or private network.
- Protect `.env`, `data/`, logs, uploaded files, generated media, and database files.
- Put the authenticated Odysseus web/API entrypoint behind a trusted reverse proxy or private access layer such as Cloudflare Access, Tailscale, or a VPN.
- Keep ChromaDB, SearXNG, ntfy, Ollama, vLLM, llama.cpp, databases, and raw model/provider APIs internal-only.
- Protect `.env`, `data/`, `logs/`, uploads, generated media, backups, auth/session files, database files, API keys, and model/provider tokens.
- Disable open signup unless you intentionally want new accounts.
- Keep demo/test users non-admin, and remove them entirely on serious deployments.
- Give admin accounts strong passwords and enable 2FA where possible.
- Leave high-risk agent tools restricted to admins: shell, Python, file read/write, email send/read, MCP, app API, task/skill/memory management, settings, tokens, and model serving.
- Rotate API keys, webhook secrets, and Odysseus API tokens if they appear in logs, screenshots, demos, or shared chats.
- Treat shell, model-serving, MCP, email, calendar, and vault features as privileged admin functionality.
- Common internal-only ports are Odysseus `7000`, SearXNG `8080`, ntfy `8091`, ChromaDB `8100`, Ollama `11434`, and local model/provider APIs such as `8000-8020`.
## Publishing A Fork
@@ -29,7 +33,7 @@ git check-ignore -v .env data/auth.json data/app.db logs/compound.log odysseus.d
git grep -n -I -E "(sk-[A-Za-z0-9_-]{20,}|xox[baprs]-|AIza[0-9A-Za-z_-]{20,}|Bearer [A-Za-z0-9._~+/-]{20,})" -- . ':!static/lib/**' ':!package-lock.json'
```
Only `.env.example`, docs, source, tests, and static assets should be committed. Never commit live `data/` contents, local databases, uploaded files, generated media, logs, backups, API keys, password hashes, or personal documents.
Only `.env.example`, docs, source, tests, and static assets should be committed. Never commit live `.env` values, `data/` contents, local databases, uploaded files, generated media, logs, backups, auth/session files, API keys, model/provider tokens, password hashes, or personal documents.
## Reporting

81
THREAT_MODEL.md Normal file
View File

@@ -0,0 +1,81 @@
# Threat Model
Odysseus is a **self-hosted AI workspace with privileged local access**. This document states the trust boundary so contributors can reason about security decisions without reading through the full auth and middleware stack.
## Trust Boundary
Odysseus is designed for **trusted users on a private network**, not public exposure. The README describes it as "treat it like an admin console" — that framing is accurate. A logged-in admin can execute shell commands, read and write files, send email, and control model serving. This is intentional. The threat model does not try to prevent admins from doing these things. It does try to prevent:
- Unauthenticated access
- Non-admins reaching admin-only capabilities
- The AI agent acting on instructions injected through untrusted content (web results, emails, fetched pages, memories)
- Internal services (ChromaDB, Ollama, SearXNG, etc.) being reachable from outside the host
## Roles and Capabilities
| Capability | Admin | Non-admin (default) |
|---|---|---|
| Chat with agent | ✓ | ✓ |
| Browser tool | ✓ | ✓ |
| Documents | ✓ | ✓ |
| Research mode | ✓ | ✓ |
| Image generation | ✓ | ✓ |
| Memory management | ✓ | ✓ |
| Shell / Python execution | ✓ | ✗ |
| File read / write | ✓ | ✗ |
| Email send / read | ✓ | ✗ |
| MCP tools | ✓ | ✗ |
| Calendar management | ✓ | ✗ |
| Token / webhook management | ✓ | ✗ |
| Model serving | ✓ | ✗ |
| Vault | ✓ | ✗ |
| Settings | ✓ | ✗ |
Non-admin defaults are in `core/auth.py:DEFAULT_PRIVILEGES`. Tool enforcement is in `src/tool_security.py:NON_ADMIN_BLOCKED_TOOLS`. Any tool whose name starts with `mcp__` is also blocked for non-admins. Admins always get full access regardless of stored privilege values.
## Authentication
- **Sessions:** bcrypt passwords, 7-day session tokens stored atomically in `data/sessions.json` via `core/atomic_io.py`.
- **2FA:** TOTP with 8 single-use backup codes. Verified after password check, before session issuance.
- **Reserved usernames:** `internal-tool`, `api`, `demo`, `system` cannot be registered or renamed into. Defined in `core/auth.py:RESERVED_USERNAMES`.
- `internal-tool` is security-critical: `core/middleware.py:require_admin` treats any request where `request.state.current_user == "internal-tool"` as the in-process tool loopback and grants admin unconditionally. A real account with that name would silently pass every `require_admin` check.
- **Orphan sessions:** `validate_token` re-checks that the user record still exists on every call. A deleted user's cookie is dropped on next request rather than continuing to authenticate.
## Internal Tool Loopback
Agent tool calls reach admin-gated HTTP routes over an in-process HTTP loopback. The mechanism:
1. At app startup, `core/middleware.py` generates a random `INTERNAL_TOOL_TOKEN` via `secrets.token_hex(32)`. It is never persisted and never sent to clients.
2. Loopback requests carry `X-Odysseus-Internal-Token: <token>` or have `request.state.current_user` already set to `"internal-tool"` by the auth middleware.
3. `require_admin` recognises either signal and grants access without checking the session user.
The agent may be running in a non-admin user's session, but tool dispatch first calls `src/tool_security.py:owner_is_admin_or_single_user` to verify the session owner is an admin before issuing any loopback call. Non-admin users cannot invoke admin tools even via the agent.
## Prompt-Injection Hardening
External content that reaches the LLM is treated as untrusted via `src/prompt_security.py`:
- `untrusted_context_message(label, content)` wraps the content in a `user`-role message with a header block instructing the model not to follow instructions inside it. Content goes in as data, not as a system instruction.
- `UNTRUSTED_CONTEXT_POLICY` is a system-prompt preamble that states the same policy at the top of every session where untrusted data may appear.
**Untrusted surfaces that must go through this wrapper:** web search results, fetched URLs, emails (read), saved memories, skill text, notes, and any tool output sourced from outside the server. Injecting untrusted content directly into the system role is a security bug.
## Security Headers
`core/middleware.py:SecurityHeadersMiddleware` sets headers on every response:
- `X-Frame-Options: DENY` + `frame-ancestors 'none'` on all routes except tool-render iframes (which are sandboxed at the HTML level).
- `X-Content-Type-Options: nosniff` and `Referrer-Policy: no-referrer` everywhere.
- **CSP:** nonce-based `script-src 'self' 'nonce-{nonce}' https://cdn.jsdelivr.net`. `style-src 'unsafe-inline'` is intentionally kept — `static/index.html` ships inline `<style>` blocks and JS modules set `style=""` attributes at runtime. Inline styles do not execute script so the risk is visual-only. Removing this requires templating the HTML files and auditing all JS-set style attributes.
## Known Gaps
These are open, acknowledged, and contributor help is welcome:
1. **No shell/filesystem sandbox.** The agent `bash` and `read_file`/`write_file` tools run as the app process user with no network egress filtering or filesystem confinement. A successful prompt-injection reaching a shell-enabled admin session can make outbound requests to internal services. See #1058 for the sandbox proposal.
2. **SSRF via `/api/v1/chat` `base_url` parameter.** A chat-scoped API token can supply an arbitrary `base_url`; the server forwards the LLM request to that host without validating the scheme or address. PR #1039 fixes this.
3. **`src/search/` partial consolidation.** `src.search.core` and `src.search.providers` correctly alias `services.search` via `sys.modules` replacement. `analytics`, `cache`, `content`, `query`, and `ranking` are still independent copies that can drift. The SSRF regression tests in `tests/test_webhook_ssrf_resilience.py` test `src.webhook_manager` directly (separate from search), so the safety net there is intact. See #1058.
4. **Token scopes are coarse.** There is no way to grant a session a subset of the owning user's privileges. Companion/mobile tokens carry either `chat` or `admin` scope with no per-capability granularity.

71
app.py
View File

@@ -1,6 +1,23 @@
# app.py — slim orchestrator
import mimetypes
import os
def register_static_mime_types() -> None:
"""Force stable JS module MIME types across platforms.
Some native Windows setups inherit stale/incorrect registry mappings for
``.js``/``.mjs``, which can make Starlette serve ES modules with a non-JS
``Content-Type`` and cause the UI to load but fail on click. Re-register the
standard MIME types at startup so static assets are served consistently.
"""
mimetypes.add_type("text/javascript", ".js")
mimetypes.add_type("application/javascript", ".mjs")
register_static_mime_types()
# Windows: force HuggingFace/fastembed to COPY model files instead of symlinking.
# On a network-share/UNC data dir Windows can't follow HF's symlinks ([WinError
# 1463]), so the ONNX embedding model fails to load. huggingface_hub reads this
@@ -25,6 +42,7 @@ import secrets
from datetime import datetime
from typing import Dict
from contextlib import asynccontextmanager
from fastapi import FastAPI, Request, HTTPException
from fastapi.responses import JSONResponse, FileResponse, HTMLResponse
from fastapi.middleware.cors import CORSMiddleware
@@ -57,6 +75,9 @@ logging.basicConfig(
logger = logging.getLogger(__name__)
# ========= APP =========
# Lifespan is defined below (after all helpers it references are in scope)
# and passed to FastAPI so we can use the modern context-manager lifecycle
# instead of the deprecated @app.on_event("startup"/"shutdown") decorators.
app = FastAPI(
title="AI Chat Application",
description="Comprehensive AI chat with memory, research, and multi-modal capabilities",
@@ -152,9 +173,25 @@ if AUTH_ENABLED:
"/login",
}
AUTH_EXEMPT_PREFIXES = ["/static"]
# Dynamic paths whose own handler proves identity via a path-embedded
# secret instead of the session/bearer auth. The route handler at
# routes/task_routes.py validates the per-task `webhook_token` itself
# and returns 404 on mismatch, so the path is the credential — the
# UI labels these URLs "no auth needed" precisely because external
# callers (Zapier, n8n, curl) can't supply a session cookie. Without
# this exemption AuthMiddleware rejects every POST with 401 before
# the token is ever checked.
import re as _re
AUTH_EXEMPT_PATTERNS = [
_re.compile(r"^/api/tasks/[^/]+/webhook/[^/]+/?$"),
]
def _is_auth_exempt(path: str) -> bool:
return path in AUTH_EXEMPT_EXACT or any(path.startswith(p) for p in AUTH_EXEMPT_PREFIXES)
if path in AUTH_EXEMPT_EXACT:
return True
if any(path.startswith(p) for p in AUTH_EXEMPT_PREFIXES):
return True
return any(p.match(path) for p in AUTH_EXEMPT_PATTERNS)
# In-memory token cache: prefix → list[(token_id, token_hash, owner, scopes)]. The DB
# query was running on every API-bearer request and scanning bcrypt
@@ -662,6 +699,9 @@ app.include_router(setup_vault_routes())
from routes.contacts_routes import setup_contacts_routes
app.include_router(setup_contacts_routes())
from companion import setup_companion_routes
app.include_router(setup_companion_routes())
# ========= ROUTES (kept in app.py) =========
def _serve_html_with_nonce(request: Request, file_path: str) -> HTMLResponse:
@@ -736,6 +776,17 @@ async def get_version():
async def health_check() -> Dict[str, str]:
return {"status": "healthy", "timestamp": datetime.utcnow().isoformat()}
@app.get("/api/ready")
async def readiness_check() -> JSONResponse:
"""Readiness / integrity self-check — DB, data dir, local-first storage.
Unlike /api/health (liveness), this returns 503 unless every critical
subsystem is whole, so an orchestrator can gate traffic on real readiness.
"""
from src.readiness import check_readiness
result = check_readiness()
return JSONResponse(status_code=200 if result.get("ready") else 503, content=result)
@app.get("/api/runtime")
async def runtime_info() -> Dict[str, object]:
in_docker = os.path.exists("/.dockerenv")
@@ -758,8 +809,19 @@ async def runtime_info() -> Dict[str, object]:
# ========= LIFECYCLE =========
@app.on_event("startup")
async def startup_event():
@asynccontextmanager
async def _lifespan(app):
"""Modern lifespan context manager replacing deprecated @app.on_event."""
# ── STARTUP ──
await _startup_event()
yield
# ── SHUTDOWN ──
await _shutdown_event()
app.router.lifespan_context = _lifespan
async def _startup_event():
global upload_cleanup_task
logger.info("Application starting up...")
webhook_manager.set_loop(asyncio.get_running_loop())
@@ -983,8 +1045,7 @@ async def startup_event():
_startup_tasks.append(asyncio.create_task(_skill_audit_nightly_loop()))
logger.info("Application startup complete")
@app.on_event("shutdown")
async def shutdown_event():
async def _shutdown_event():
logger.info("Application shutting down...")
if upload_cleanup_task:
upload_cleanup_task.cancel()

View File

@@ -119,7 +119,11 @@ fi
notify "Starting…"
cd "$INSTALL_DIR" || die_gui "Install folder not found: $INSTALL_DIR"
"$UVICORN" app:app --host 127.0.0.1 --port "$PORT" >>"$LOG" 2>&1 &
if [ "$(uname -m)" = "arm64" ]; then
arch -arm64 "$UVICORN" app:app --host 127.0.0.1 --port "$PORT" >>"$LOG" 2>&1 &
else
"$UVICORN" app:app --host 127.0.0.1 --port "$PORT" >>"$LOG" 2>&1 &
fi
SERVER_PID=$!
# Quitting the app stops the server it started.

28
companion/README.md Normal file
View File

@@ -0,0 +1,28 @@
# Companion bridge
A thin, additive layer so a LAN client (e.g. a phone) can discover what an
Odysseus server offers and pair to it, without duplicating any LLM logic.
| Method | Path | Auth | Purpose |
|---|---|---|---|
| GET | `/api/companion/ping` | session or token | cheap, auth-validated health check |
| GET | `/api/companion/info` | session or token | server identity + capability flags |
| GET | `/api/companion/models` | session or token | the **caller's own** model endpoints |
| GET | `/api/companion/pair` | **admin cookie** | pairing page (a form; never mints) |
| POST | `/api/companion/pair` | **admin cookie** | mint a one-time pairing token (`?format=json` for an in-app screen) |
`/models` scopes to the caller's real owner plus legacy null-owner shared rows
(same rule as `owner_filter`) and never returns API-key material.
## Pairing CSRF posture
Minting happens **only on POST**. The session cookie is `SameSite=Lax`
(`routes/auth_routes.py`), so a browser will not send it on a cross-site POST —
the same protection `POST /api/tokens` relies on. A `GET` would be unsafe (Lax
cookies ride top-level GET navigations), so `GET /pair` only renders a form.
Minting invalidates the auth middleware's token cache, so a freshly minted token
works on the next request without a restart.
The pairing/scoping rules live in small, tested units (`token_owner`,
`owner_can_see`, `mint_pairing_token`, `pairing.*`) — see
`tests/test_companion_readonly.py` and `tests/test_companion_pairing.py`.

11
companion/__init__.py Normal file
View File

@@ -0,0 +1,11 @@
"""Odysseus companion bridge — additive LAN endpoints.
Read endpoints (/api/companion/ping, /info, owner-scoped /models) so a LAN
client can discover what a server offers, plus admin-only pairing
(/api/companion/pair) that mints a one-time chat-scoped token on POST. No new LLM
logic; auth is enforced by the existing AuthMiddleware. See companion/README.md.
"""
from companion.routes import setup_companion_routes
__all__ = ["setup_companion_routes"]

126
companion/pairing.py Normal file
View File

@@ -0,0 +1,126 @@
"""Shared pairing helpers for the companion bridge.
Token minting + LAN discovery + QR rendering, kept here as small, importable
units so the route layer stays thin and the logic is directly testable.
"""
from __future__ import annotations
import json
import os
import secrets
import socket
import uuid
import bcrypt
PAIRING_VERSION = 1
COMPANION_SCOPE = "chat"
def default_port() -> int:
"""Best guess at the port the server is reachable on. Callers that know the
real request port should pass it explicitly."""
try:
return int(os.environ.get("APP_PORT", "7000"))
except ValueError:
return 7000
def lan_ip_candidates() -> list[str]:
"""Likely LAN IPv4 addresses for this host, best candidate first.
The UDP-connect trick reveals the egress interface the OS would use to reach
the default gateway -- i.e. the address a phone on the same Wi-Fi should
target. No packets are actually sent. Loopback is dropped.
"""
candidates: list[str] = []
def _add(ip):
if ip and ip not in candidates and not ip.startswith("127."):
candidates.append(ip)
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
try:
s.connect(("8.8.8.8", 80))
_add(s.getsockname()[0])
except OSError:
pass
finally:
s.close()
try:
for info in socket.getaddrinfo(socket.gethostname(), None, socket.AF_INET):
_add(info[4][0])
except OSError:
pass
return candidates
def find_admin_user() -> str | None:
"""Resolve an admin username from data/auth.json (schema uses is_admin),
falling back to the first user."""
auth_path = os.path.join("data", "auth.json")
try:
with open(auth_path, "r", encoding="utf-8") as f:
data = json.load(f)
except (OSError, json.JSONDecodeError):
return None
if not isinstance(data, dict):
return None
users = data.get("users") or {}
if not isinstance(users, dict):
return None
for uname, udata in users.items():
if isinstance(udata, dict) and udata.get("is_admin") is True:
return uname
return next(iter(users), None)
def mint_token(owner: str, name: str = "companion") -> tuple[str, str]:
"""Create a chat-scoped API token row and return (token_id, raw_token).
The raw token is returned ONCE -- only its bcrypt hash + an 8-char prefix
are persisted. Mirrors routes/api_token_routes.py so cookie- and
companion-minted tokens are indistinguishable to the auth middleware.
"""
from core.database import get_db_session, ApiToken
raw_token = "ody_" + secrets.token_urlsafe(32)
token_hash = bcrypt.hashpw(raw_token.encode(), bcrypt.gensalt()).decode()
token_id = str(uuid.uuid4())[:8]
with get_db_session() as db:
db.add(ApiToken(
id=token_id,
owner=owner,
name=name,
token_hash=token_hash,
token_prefix=raw_token[:8],
scopes=COMPANION_SCOPE,
is_active=True,
))
return token_id, raw_token
def pairing_payload(host: str, port: int, token: str) -> dict:
"""The exact JSON a client scans / accepts. Keep keys stable."""
return {"v": PAIRING_VERSION, "host": host, "port": port, "token": token}
def pairing_qr_png_data_uri(payload: dict) -> str | None:
"""Render the pairing payload as a QR `data:` URI for an <img>. Returns None
if the optional qrcode dep is unavailable."""
try:
import base64
import io
import qrcode
img = qrcode.make(json.dumps(payload, separators=(",", ":")))
buf = io.BytesIO()
img.save(buf, format="PNG")
return "data:image/png;base64," + base64.b64encode(buf.getvalue()).decode()
except Exception:
return None

236
companion/routes.py Normal file
View File

@@ -0,0 +1,236 @@
"""Companion bridge — /api/companion/*.
A thin, additive layer so a LAN client (e.g. a phone) can discover what a server
offers and pair to it, without duplicating any LLM logic.
Auth is enforced globally by AuthMiddleware (app.py), so reaching a handler here
means the caller is authenticated by either a cookie session or a Bearer `ody_`
API token. The read endpoints (ping/info/models) accept either; the pairing
endpoints are admin-cookie only.
Pairing CSRF posture: minting happens ONLY on POST. The session cookie is
SameSite=Lax (routes/auth_routes.py), which a browser does not send on a
cross-site POST, so an admin's cookie can't be used by a malicious page to mint
a token -- the same protection the existing POST /api/tokens relies on. Minting
on a GET would be unsafe (Lax cookies ride top-level GET navigations), so GET
/pair only renders a form.
"""
import html
from fastapi import APIRouter, Request
from fastapi.responses import HTMLResponse
from core.middleware import require_admin
from src.auth_helpers import get_current_user
from companion import pairing as _pairing
def token_owner(request: Request) -> str | None:
"""The real owner to attribute a request to, for read-scoping.
Cookie sessions resolve to the logged-in username via get_current_user.
Bearer-token callers come through as the sandboxed pseudo-user "api"; their
real owner is stamped on request.state.api_token_owner by the auth
middleware. Returns None when no owner can be resolved.
"""
if getattr(request.state, "api_token", False):
return getattr(request.state, "api_token_owner", None)
return get_current_user(request)
def owner_can_see(row_owner, owner) -> bool:
"""Owner-scope rule for read endpoints.
A caller sees a row when it is their own, or when it is a legacy null-owner
("shared") row. A caller must NEVER see another owner's row. Mirrors the
`owner_filter` rule used elsewhere, expressed as a pure predicate so it can
be tested directly and used as a defensive in-Python check alongside the
SQL filter.
"""
return row_owner is None or row_owner == owner
def mint_pairing_token(owner: str, invalidate=None) -> tuple[str, str]:
"""Mint a pairing token AND invalidate the auth middleware's in-memory token
cache, so the new token is accepted on the very next request without a server
restart. Returns (token_id, raw_token); the raw token is shown once.
`invalidate` is the app's request.app.state.invalidate_token_cache callable
(passed in so this stays a pure, testable unit).
"""
token_id, raw_token = _pairing.mint_token(owner)
if callable(invalidate):
invalidate()
return token_id, raw_token
def setup_companion_routes() -> APIRouter:
router = APIRouter(prefix="/api/companion", tags=["companion"])
@router.get("/ping")
def ping(request: Request):
"""Cheap, auth-validated health check. A 200 with ok=true confirms the
host/port and credential are valid; middleware returns 401 otherwise."""
from core.constants import APP_VERSION
return {
"ok": True,
"name": "odysseus",
"version": APP_VERSION,
"auth": "token" if getattr(request.state, "api_token", False) else "session",
}
@router.get("/info")
def info(request: Request):
"""Server identity + coarse capability flags. `owner` is the caller's own
identity (the token's owner for bearer callers)."""
from core.constants import APP_VERSION
return {
"name": "odysseus",
"version": APP_VERSION,
"owner": token_owner(request),
"capabilities": {"chat": True, "streaming": True},
}
@router.get("/models")
def models(request: Request):
"""LLM model endpoints the CALLER can use.
The stock /api/models route scopes to get_current_user, which for a
bearer token is the sandboxed pseudo-user "api" (owns nothing). Here we
scope to the token's real owner instead, plus legacy null-owner shared
rows -- the same rule as owner_filter. Read-only; never returns api_key
material.
"""
import json as _json
from core.database import SessionLocal, ModelEndpoint
from src.endpoint_resolver import build_chat_url
owner = token_owner(request)
out = []
db = SessionLocal()
try:
q = db.query(ModelEndpoint).filter(
ModelEndpoint.is_enabled == True, # noqa: E712
(ModelEndpoint.model_type == "llm") | (ModelEndpoint.model_type == None), # noqa: E711
)
if owner:
q = q.filter((ModelEndpoint.owner == owner) | (ModelEndpoint.owner == None)) # noqa: E711
for ep in q.all():
if not owner_can_see(ep.owner, owner):
continue
try:
model_ids = _json.loads(ep.cached_models) if ep.cached_models else []
except (ValueError, TypeError):
model_ids = []
try:
hidden = set(_json.loads(ep.hidden_models)) if ep.hidden_models else set()
except (ValueError, TypeError):
hidden = set()
model_ids = [m for m in model_ids if m not in hidden]
try:
chat_url = build_chat_url(ep.base_url)
except Exception:
chat_url = ep.base_url
out.append({
"endpoint_id": ep.id,
"name": ep.name,
"endpoint_url": chat_url,
"models": model_ids,
"supports_tools": ep.supports_tools,
})
finally:
db.close()
return {"endpoints": out}
@router.get("/pair")
def pair_page(request: Request):
"""Admin-only pairing page. Renders a form that POSTs to mint a code.
A GET never mints a credential: SameSite=Lax session cookies ride
top-level GET navigations, so minting on GET would be triggerable by a
link or <img> (CSRF). The actual mint is the POST handler below.
"""
require_admin(request)
page = """<!doctype html>
<html><head><meta charset="utf-8"><meta name="viewport" content="width=device-width,initial-scale=1">
<title>Pair a device</title>
<style>
body{font-family:-apple-system,system-ui,sans-serif;max-width:520px;margin:48px auto;padding:0 20px;color:#e8e8e8;background:#16161a}
.card{background:#1f1f25;border:1px solid #2c2c35;border-radius:14px;padding:28px;text-align:center}
button{background:#7c9cff;color:#0e0e12;border:none;border-radius:10px;padding:12px 20px;font-size:15px;font-weight:600;cursor:pointer}
</style></head>
<body><div class="card">
<h2>Pair a device</h2>
<p>Generate a one-time pairing code (a chat-scoped API token) for a LAN client.</p>
<form method="POST" action="/api/companion/pair">
<button type="submit">Generate pairing code</button>
</form>
<p style="color:#8a8a96;font-size:12px;margin-top:18px">Admin only. Each code mints a new token, shown once. Manage or revoke under Settings &rarr; API tokens.</p>
</div></body></html>"""
return HTMLResponse(page)
@router.post("/pair")
def pair_create(request: Request):
"""Mint a pairing code. Admin-cookie only; CSRF-safe because the
SameSite=Lax session cookie is not sent on a cross-site POST (same
protection as POST /api/tokens). Minting invalidates the token cache so
the code works immediately, no restart. `?format=json` returns the
payload for an in-app pairing screen."""
require_admin(request)
owner = get_current_user(request)
invalidate = getattr(request.app.state, "invalidate_token_cache", None)
token_id, raw_token = mint_pairing_token(owner, invalidate)
hosts = _pairing.lan_ip_candidates()
host = hosts[0] if hosts else "127.0.0.1"
port = request.url.port or _pairing.default_port()
payload = _pairing.pairing_payload(host, port, raw_token)
qr = _pairing.pairing_qr_png_data_uri(payload)
qr_ok = bool(qr and qr.startswith("data:image/png;base64,"))
if (request.query_params.get("format") or "").lower() == "json":
return {
"host": host,
"port": port,
"token": raw_token,
"token_id": token_id,
"hosts": hosts,
"payload": payload,
"qr": qr if qr_ok else None,
}
import json as _json
payload_json = _json.dumps(payload, separators=(",", ":"))
# Only ever emit a known PNG data-URI into the src; every other value is
# html.escaped.
qr_block = (
f'<img src="{html.escape(qr)}" alt="Pairing QR" width="260" height="260">'
if qr_ok else "<p><em>QR rendering unavailable -- enter the details manually.</em></p>"
)
page = f"""<!doctype html>
<html><head><meta charset="utf-8"><meta name="viewport" content="width=device-width,initial-scale=1">
<title>Pairing code</title>
<style>
body{{font-family:-apple-system,system-ui,sans-serif;max-width:520px;margin:40px auto;padding:0 20px;color:#e8e8e8;background:#16161a}}
.card{{background:#1f1f25;border:1px solid #2c2c35;border-radius:14px;padding:24px;text-align:center}}
code{{background:#0e0e12;padding:2px 6px;border-radius:6px;word-break:break-all}}
.row{{text-align:left;margin:10px 0;font-size:14px;color:#bdbdc7}}
.warn{{color:#e0a85e;font-size:13px;margin-top:18px}}
</style></head>
<body><div class="card">
<h2>Pairing code</h2>
{qr_block}
<div class="row"><strong>Host:</strong> <code>{html.escape(host)}</code></div>
<div class="row"><strong>Port:</strong> <code>{html.escape(str(port))}</code></div>
<div class="row"><strong>Token:</strong> <code>{html.escape(raw_token)}</code></div>
<div class="row"><strong>Payload:</strong> <code>{html.escape(payload_json)}</code></div>
<p class="warn">Shown once. This grants chat access to your Odysseus; revoke it
in Settings &rarr; API tokens (id <code>{html.escape(token_id)}</code>). The
device must be on the same network, and the server must bind to your LAN.</p>
</div></body></html>"""
return HTMLResponse(page)
return router

View File

@@ -266,7 +266,8 @@ class AuthManager:
renamed_sessions = 0
with self._sessions_lock:
for sess in self._sessions.values():
if (sess or {}).get("username") == old_username:
sess_user = str((sess or {}).get("username") or "").strip().lower()
if sess_user == old_username:
sess["username"] = new_username
renamed_sessions += 1
if renamed_sessions:
@@ -375,7 +376,10 @@ class AuthManager:
return True # 2FA not enabled, always pass
secret = user.get("totp_secret")
if not secret:
return True
# 2FA is enabled but no secret is stored (corrupt/partially-written
# auth.json). Fail closed — returning True here bypassed the second
# factor entirely.
return False
# Check backup codes first
backup = user.get("totp_backup_codes", [])
if code in backup:

View File

@@ -1,7 +1,9 @@
import os
import logging
import sqlite3
from datetime import datetime
from sqlalchemy import create_engine, Column, String, Text, Boolean, DateTime, Integer, ForeignKey, JSON, Index, func, text
from sqlalchemy import event, create_engine, Column, String, Text, Boolean, DateTime, Integer, ForeignKey, JSON, Index, func, text
from sqlalchemy.engine import Engine
from sqlalchemy.types import TypeDecorator
from sqlalchemy.ext.declarative import declarative_base, declared_attr
from sqlalchemy.orm import relationship, sessionmaker, backref
@@ -34,6 +36,18 @@ engine = create_engine(
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
# Listening on the Engine class ensures this listener fires for all Engine
# instances created within the process, not just the primary application engine.
# The isinstance(sqlite3.Connection) check ensures that this PRAGMA foreign_keys=ON
# configuration remains a no-op when using non-SQLite database backends.
@event.listens_for(Engine, "connect")
def set_sqlite_pragma(dbapi_connection, connection_record):
if isinstance(dbapi_connection, sqlite3.Connection):
cursor = dbapi_connection.cursor()
cursor.execute("PRAGMA foreign_keys=ON")
cursor.close()
class EncryptedText(TypeDecorator):
"""Text column transparently encrypted at rest via src.secret_storage.
@@ -298,6 +312,7 @@ class EmailAccount(TimestampMixin, Base):
# SMTP (sending)
smtp_host = Column(String, default="")
smtp_port = Column(Integer, default=465)
smtp_security = Column(String, default="ssl") # ssl | starttls | none
smtp_user = Column(String, default="")
smtp_password = Column(String, default="")
@@ -1483,6 +1498,10 @@ def _migrate_seed_email_account():
logging.getLogger(__name__).warning(f"seed email account migration: {e}")
# WARNING: Foreign-key enforcement is enabled globally for all SQLite connections.
# Any future migrations or schema changes that temporarily violate foreign-key
# constraints will fail. To perform such operations, foreign_keys must be
# temporarily disabled around the migration workflow.
def init_db():
"""
Initialize the database by creating all tables.
@@ -1517,6 +1536,7 @@ def init_db():
_migrate_drop_ping_notes_tasks()
_migrate_add_crew_member_id()
_migrate_add_assistant_columns()
_migrate_add_email_smtp_security()
_migrate_seed_email_account()
_migrate_add_calendar_metadata()
_migrate_add_calendar_is_utc()
@@ -1525,6 +1545,32 @@ def init_db():
_migrate_encrypt_endpoint_keys()
def _migrate_add_email_smtp_security():
"""Add explicit SMTP security mode for Proton Bridge/custom local SMTP."""
import sqlite3
db_path = DATABASE_URL.replace("sqlite:///", "")
if not os.path.exists(db_path):
return
try:
conn = sqlite3.connect(db_path)
cursor = conn.execute("PRAGMA table_info(email_accounts)")
columns = [row[1] for row in cursor.fetchall()]
if columns and "smtp_security" not in columns:
conn.execute("ALTER TABLE email_accounts ADD COLUMN smtp_security TEXT DEFAULT 'ssl'")
conn.execute(
"UPDATE email_accounts SET smtp_security = CASE "
"WHEN COALESCE(smtp_port, 465) = 587 THEN 'starttls' "
"WHEN COALESCE(smtp_port, 465) = 465 THEN 'ssl' "
"ELSE 'ssl' END "
"WHERE smtp_security IS NULL OR smtp_security = ''"
)
conn.commit()
logging.getLogger(__name__).info("Migrated: added smtp_security column to email_accounts")
conn.close()
except Exception as e:
logging.getLogger(__name__).warning(f"smtp_security migration skipped: {e}")
def _migrate_encrypt_endpoint_keys():
"""Encrypt any plaintext provider API keys in model_endpoints. Idempotent;
raw SQL so the EncryptedText decorator isn't applied twice."""

View File

@@ -14,6 +14,7 @@ Design rules:
from __future__ import annotations
import os
import ntpath
import shutil
import subprocess
from pathlib import Path
@@ -134,11 +135,40 @@ _BASH_CACHE: Optional[str] = None
_BASH_PROBED = False
# Common Git-for-Windows install locations to probe when bash isn't on PATH.
_WINDOWS_BASH_FALLBACKS = (
r"C:\Program Files\Git\bin\bash.exe",
r"C:\Program Files\Git\usr\bin\bash.exe",
r"C:\Program Files (x86)\Git\bin\bash.exe",
_WINDOWS_BASH_ROOT_ENV_VARS = (
"ProgramFiles",
"ProgramW6432",
"ProgramFiles(x86)",
"LocalAppData",
)
_WINDOWS_BASH_DEFAULT_ROOTS = (
r"C:\Program Files\Git",
r"C:\Program Files (x86)\Git",
)
_WINDOWS_BASH_RELATIVE_PATHS = (
("bin", "bash.exe"),
("usr", "bin", "bash.exe"),
)
def _windows_bash_fallbacks() -> List[str]:
roots: List[str] = []
for env_name in _WINDOWS_BASH_ROOT_ENV_VARS:
base = os.environ.get(env_name)
if base:
roots.append(ntpath.join(base, "Git"))
roots.extend(_WINDOWS_BASH_DEFAULT_ROOTS)
paths: List[str] = []
seen = set()
for root in roots:
for rel in _WINDOWS_BASH_RELATIVE_PATHS:
path = ntpath.join(root, *rel)
key = path.lower()
if key not in seen:
seen.add(key)
paths.append(path)
return paths
def find_bash() -> Optional[str]:
@@ -153,9 +183,9 @@ def find_bash() -> Optional[str]:
if _BASH_PROBED:
return _BASH_CACHE
_BASH_PROBED = True
found = shutil.which("bash")
found = which_tool("bash")
if not found and IS_WINDOWS:
for cand in _WINDOWS_BASH_FALLBACKS:
for cand in _windows_bash_fallbacks():
if os.path.exists(cand):
found = cand
break

View File

@@ -29,6 +29,21 @@ def _message_timestamp_iso(value: Optional[datetime]) -> Optional[str]:
return value.isoformat().replace("+00:00", "Z")
def _parse_msg_content(raw):
"""Parse message content from DB — deserialises JSON arrays back to lists
(multimodal content with image/audio attachments)."""
if isinstance(raw, list):
return raw
if isinstance(raw, str) and raw.startswith('[{') and '"type"' in raw:
try:
parsed = json.loads(raw)
if isinstance(parsed, list) and all(isinstance(p, dict) for p in parsed):
return parsed
except (json.JSONDecodeError, ValueError):
pass
return raw
class SessionManager:
"""
Manages chat sessions with database persistence.
@@ -119,7 +134,7 @@ class SessionManager:
meta.setdefault('timestamp', _message_timestamp_iso(db_msg.timestamp))
history.append(ChatMessage(
role=db_msg.role,
content=db_msg.content,
content=_parse_msg_content(db_msg.content),
metadata=meta,
))
else:
@@ -134,7 +149,7 @@ class SessionManager:
meta.setdefault('timestamp', _message_timestamp_iso(db_msg.timestamp))
history.append(ChatMessage(
role=db_msg.role,
content=db_msg.content,
content=_parse_msg_content(db_msg.content),
metadata=meta,
))
@@ -187,30 +202,43 @@ class SessionManager:
"""Persist a single message to the database."""
db = SessionLocal()
try:
db_session = db.query(DbSession).filter(DbSession.id == session_id).first()
if db_session is None:
# A stream/tool callback can outlive a session delete. Do not
# create a chat_messages row with no parent session; also drop
# any stale cached session so later writes fail closed too.
self.sessions.pop(session_id, None)
logger.warning("Dropping message for deleted session %s", session_id)
return
msg_id = str(uuid.uuid4())
msg_time = datetime.utcnow()
if message.metadata is None:
message.metadata = {}
message.metadata.setdefault('timestamp', _message_timestamp_iso(msg_time))
# Multimodal content (image/audio attachments) is a list — serialize
# to JSON so the Text column can store it. On reload, _db_to_session
# detects the JSON-array prefix and parses it back.
_content = message.content
if isinstance(_content, list):
_content = json.dumps(_content)
db_message = DbChatMessage(
id=msg_id,
session_id=session_id,
role=message.role,
content=message.content,
content=_content,
meta_data=json.dumps(message.metadata) if message.metadata else None,
timestamp=msg_time,
)
db.add(db_message)
db_session = db.query(DbSession).filter(DbSession.id == session_id).first()
if db_session:
db_session.message_count = len(self.sessions.get(session_id, {}).history) if session_id in self.sessions else 0
_now = datetime.now(timezone.utc)
db_session.last_accessed = _now
# Clean "last conversation" timestamp — only bumped here on a
# real message persist, so it powers an accurate "Last active"
# sort that ignores renames / model swaps / mere opens.
db_session.last_message_at = _now
db_session.message_count = len(self.sessions.get(session_id, {}).history) if session_id in self.sessions else 0
_now = datetime.now(timezone.utc)
db_session.last_accessed = _now
# Clean "last conversation" timestamp — only bumped here on a
# real message persist, so it powers an accurate "Last active"
# sort that ignores renames / model swaps / mere opens.
db_session.last_message_at = _now
db.commit()
@@ -276,7 +304,15 @@ class SessionManager:
id=msg_id,
session_id=session_id,
role=message.role,
content=message.content,
# Multimodal content (image/audio attachments) is a list;
# serialize to JSON so the Text column round-trips via
# _parse_msg_content. Storing the raw list let SQLAlchemy
# bind its single-quoted repr, which _parse_msg_content
# cannot parse (it looks for double-quoted "type"), so the
# attachment was destroyed on reload. Mirrors _persist_message.
content=(json.dumps(message.content)
if isinstance(message.content, list)
else message.content),
meta_data=json.dumps(message.metadata) if message.metadata else None,
timestamp=now + timedelta(microseconds=i),
)
@@ -466,11 +502,17 @@ class SessionManager:
db_session = db.query(DbSession).filter(DbSession.id == session_id).first()
if db_session:
db.delete(db_session)
# Drop the in-memory copy even when there is no DB row. A "ghost"
# session lives only here (never persisted, or its row was removed
# out-of-band); without this it can never be cleared and keeps
# 404ing on every operation (issue #1044).
removed_in_memory = self.sessions.pop(session_id, None) is not None
if db_session or removed_in_memory:
# Commit the document-detach / message-delete above (a no-op when
# the ghost had no rows) together with the session delete.
db.commit()
if session_id in self.sessions:
del self.sessions[session_id]
logger.info(f"Deleted session {session_id}")
return True
return False

View File

@@ -4,28 +4,53 @@ services:
ports:
- "${APP_BIND:-127.0.0.1}:${APP_PORT:-7000}:7000"
volumes:
- ./data:/app/data
- ./logs:/app/logs
- ./data:/app/data:z
- ./logs:/app/logs:z
# Cookbook remote-server SSH identity. Odysseus can generate a key here;
# add the shown public key to each remote server's authorized_keys.
- ./data/ssh:/app/.ssh
- ./data/ssh:/app/.ssh:z
# Cookbook local model cache. Inside Docker, "Local" means the Odysseus
# container, so persist its HuggingFace cache under ./data/huggingface.
- ./data/huggingface:/app/.cache/huggingface
- ./data/huggingface:/app/.cache/huggingface:z
# Cookbook-installed Python CLIs/packages (vLLM, llama-cpp-python, etc.)
# land under /app/.local for the odysseus user. Persist them so a
# container recreate does not silently remove installed serve engines.
- ./data/local:/app/.local
- ./data/local:/app/.local:z
extra_hosts:
# Lets the container reach local services on the Docker host, including
# Ollama at http://host.docker.internal:11434.
- "host.docker.internal:host-gateway"
env_file:
- .env
environment:
- LLM_HOST=${LLM_HOST:-localhost}
- LLM_HOSTS=${LLM_HOSTS:-}
- OPENAI_API_KEY=${OPENAI_API_KEY:-}
- OLLAMA_BASE_URL=${OLLAMA_BASE_URL:-}
- RESEARCH_LLM_ENDPOINT=${RESEARCH_LLM_ENDPOINT:-}
- HF_TOKEN=${HF_TOKEN:-}
- HUGGING_FACE_HUB_TOKEN=${HUGGING_FACE_HUB_TOKEN:-}
- SEARXNG_INSTANCE=http://searxng:8080
- CHROMADB_HOST=chromadb
- CHROMADB_PORT=8000
- DATABASE_URL=${DATABASE_URL:-sqlite:///./data/app.db}
- AUTH_ENABLED=${AUTH_ENABLED:-true}
- LOCALHOST_BYPASS=${LOCALHOST_BYPASS:-false}
- ODYSSEUS_ADMIN_USER=${ODYSSEUS_ADMIN_USER:-admin}
- ODYSSEUS_ADMIN_PASSWORD=${ODYSSEUS_ADMIN_PASSWORD:-}
- ALLOWED_ORIGINS=${ALLOWED_ORIGINS:-http://localhost,http://127.0.0.1}
- SECURE_COOKIES=${SECURE_COOKIES:-false}
- EMBEDDING_URL=${EMBEDDING_URL:-}
- EMBEDDING_MODEL=${EMBEDDING_MODEL:-}
- FASTEMBED_MODEL=${FASTEMBED_MODEL:-sentence-transformers/all-MiniLM-L6-v2}
- FASTEMBED_CACHE_PATH=${FASTEMBED_CACHE_PATH:-}
- CLEANUP_INTERVAL_HOURS=${CLEANUP_INTERVAL_HOURS:-24}
- ODYSSEUS_INPROCESS_POLLERS=${ODYSSEUS_INPROCESS_POLLERS:-1}
- ODYSSEUS_INPROCESS_TASKS=${ODYSSEUS_INPROCESS_TASKS:-1}
- ODYSSEUS_SCRIPT_HOST=${ODYSSEUS_SCRIPT_HOST:-localhost}
- DATA_BRAVE_API_KEY=${DATA_BRAVE_API_KEY:-}
- GOOGLE_API_KEY=${GOOGLE_API_KEY:-}
- GOOGLE_PSE_CX=${GOOGLE_PSE_CX:-}
- TAVILY_API_KEY=${TAVILY_API_KEY:-}
- SERPER_API_KEY=${SERPER_API_KEY:-}
# PUID / PGID — the user/group the container drops to before
# running uvicorn (entrypoint also chowns /app/data + /app/logs
# to match, so bind-mounted files stay editable from the host).
@@ -54,7 +79,12 @@ services:
restart: unless-stopped
searxng:
image: docker.io/searxng/searxng:latest
# Pinned, not :latest — odysseus waits on searxng's healthcheck
# (depends_on: condition: service_healthy), so a broken upstream `latest`
# tag blocks the whole app from starting. 2026.6.2 crashes on boot with
# `KeyError: 'default_doi_resolver'`, failing the healthcheck (issue #1414).
# Bump this deliberately after verifying a newer tag boots clean.
image: docker.io/searxng/searxng:2026.5.31-7159b8aed
entrypoint:
- /bin/sh
- -c
@@ -72,10 +102,24 @@ services:
- "127.0.0.1:8080:8080"
volumes:
- searxng-data:/etc/searxng
- ./config/searxng/settings.yml:/tmp/searxng-settings.yml.template:ro
- ./config/searxng/settings.yml:/tmp/searxng-settings.yml.template:ro,z
environment:
- SEARXNG_BASE_URL=http://localhost:8080/
- SEARXNG_SECRET=${SEARXNG_SECRET:-}
# The official searxng image runs as the non-root `searxng` user, but its
# entrypoint still needs to chown /etc/searxng on first boot, drop privs via
# su-exec, and (with our wrapper above) write settings.yml into the named
# volume. Without these capabilities the wrapper aborts at the redirection
# with EACCES and the container fails its healthcheck with permission
# errors during setup. Mirrors the cap set recommended by the upstream
# searxng-docker compose file. See issue #721.
cap_drop:
- ALL
cap_add:
- CHOWN
- SETGID
- SETUID
- DAC_OVERRIDE
healthcheck:
test: ["CMD-SHELL", "python -c \"import urllib.request; urllib.request.urlopen('http://localhost:8080/', timeout=5).read(1)\""]
interval: 5s

View File

@@ -76,6 +76,15 @@ done
# nvcc" even when the GPU itself is fully visible to the container.
export VLLM_USE_FLASHINFER_SAMPLER="${VLLM_USE_FLASHINFER_SAMPLER:-0}"
# Make Cookbook-installed Python CLIs visible after `pip install --user`.
# vLLM and helper scripts land here because /app is the non-root user's HOME.
export PATH="/app/.local/bin:$PATH"
# Run first-time setup as the app user so data/ files get the right ownership.
# setup.py is idempotent — skips auth.json / .env if they already exist.
# || true so a setup failure never prevents the container from starting.
gosu "$PUID:$PGID" python /app/setup.py || true
# Drop root and run the actual app. `gosu` is preferred over `su` /
# `sudo` because it cleans up the process tree (no extra shell layer)
# so signals (SIGTERM from `docker stop`) reach uvicorn directly.

View File

@@ -1,5 +1,6 @@
# AMD ROCm GPU overlay. Enable by setting COMPOSE_FILE in .env:
# COMPOSE_FILE=docker-compose.yml:docker/gpu.amd.yml
# RENDER_GID=<numeric output of: getent group render | cut -d: -f3>
#
# Requires ROCm drivers on the host (kfd + DRI devices). The host user
# running Docker must be in the `video` and `render` groups.

View File

@@ -1,6 +1,11 @@
# NVIDIA GPU overlay. Enable by setting COMPOSE_FILE in .env:
# COMPOSE_FILE=docker-compose.yml:docker/gpu.nvidia.yml
#
# Use scripts/check-docker-gpu.sh to diagnose GPU passthrough, optionally
# install the NVIDIA Container Toolkit (Ubuntu/Debian), and write COMPOSE_FILE
# to .env. The script is read-only by default — it installs nothing and never
# edits .env unless explicitly asked.
#
# Requires the NVIDIA Container Toolkit on the host.
# Arch: sudo pacman -S nvidia-container-toolkit
# Debian: sudo apt install nvidia-container-toolkit

Binary file not shown.

Before

Width:  |  Height:  |  Size: 52 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 51 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 26 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 90 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 125 KiB

View File

@@ -25,7 +25,7 @@
--radius: 8px;
}
* { box-sizing: border-box; }
html { scroll-behavior: smooth; scroll-snap-type: y mandatory; scroll-padding-top: 60px; }
html { scroll-behavior: smooth; scroll-snap-type: y proximity; scroll-padding-top: 60px; }
/* Each section is a full-viewport "page" with its content centered, so only
one shows at a time and the snap is obvious. */
.hero, section {

View File

@@ -30,23 +30,80 @@ function Fail($msg) {
exit 1
}
# 1. Locate a Python interpreter (3.11+ recommended)
function Find-GitBash {
$cmd = Get-Command bash -ErrorAction SilentlyContinue
if ($cmd) { return $cmd.Source }
$roots = @()
foreach ($name in @("ProgramFiles", "ProgramW6432", "ProgramFiles(x86)", "LocalAppData")) {
$base = [Environment]::GetEnvironmentVariable($name)
if ($base) { $roots += (Join-Path $base "Git") }
}
$roots += @("C:\Program Files\Git", "C:\Program Files (x86)\Git")
foreach ($root in ($roots | Select-Object -Unique)) {
foreach ($relative in @("bin\bash.exe", "usr\bin\bash.exe")) {
$candidate = Join-Path $root $relative
if (Test-Path $candidate) { return $candidate }
}
}
return $null
}
# 1. Locate a Python interpreter (3.11+ required)
Write-Step "Checking for Python"
function Get-PythonVersionText($launcher, $launcherArgs) {
try {
return (& $launcher @launcherArgs -c "import sys; print('.'.join(map(str, sys.version_info[:3])))" 2>$null).Trim()
} catch {
return $null
}
}
$pyExe = $null
foreach ($c in @("python", "py")) {
$cmd = Get-Command $c -ErrorAction SilentlyContinue
if ($cmd) { $pyExe = $cmd.Source; break }
$pyArgs = @()
$pyVersion = $null
$pyLauncher = Get-Command py -ErrorAction SilentlyContinue
if ($pyLauncher) {
foreach ($v in @("-3.13", "-3.12", "-3.11")) {
$ver = Get-PythonVersionText $pyLauncher.Source @($v)
if ($ver) {
$pyExe = $pyLauncher.Source
$pyArgs = @($v)
$pyVersion = $ver
break
}
}
}
if (-not $pyExe) {
Fail "Python not found on PATH. Install Python 3.11+ from https://www.python.org/downloads/ (check 'Add to PATH'), then re-run this script."
$pythonCmd = Get-Command python -ErrorAction SilentlyContinue
if ($pythonCmd) {
$ver = Get-PythonVersionText $pythonCmd.Source @()
if ($ver) {
$versionParts = $ver.Split('.')
$major = [int]$versionParts[0]
$minor = [int]$versionParts[1]
if ($major -gt 3 -or ($major -eq 3 -and $minor -ge 11)) {
$pyExe = $pythonCmd.Source
$pyVersion = $ver
}
}
}
}
Write-Host ("Using Python: " + $pyExe)
if (-not $pyExe) {
Fail "Couldn't find Python 3.11+ for Windows setup. Install Python 3.11+ (or open the Python launcher with 'py -3.11') from https://www.python.org/downloads/, then re-run this script."
}
$pythonLabel = ("Using Python {0}: {1} {2}" -f $pyVersion, $pyExe, ($pyArgs -join ' ')).TrimEnd()
Write-Host $pythonLabel
# 2. Create the virtualenv if missing
$venvPy = Join-Path $PSScriptRoot "venv\Scripts\python.exe"
if (-not (Test-Path $venvPy)) {
Write-Step "Creating virtual environment (venv)"
& $pyExe -m venv venv
& $pyExe @pyArgs -m venv venv
if ($LASTEXITCODE -ne 0 -or -not (Test-Path $venvPy)) { Fail "Failed to create the virtual environment." }
} else {
Write-Host "venv already exists - skipping creation."
@@ -64,7 +121,7 @@ Write-Step "Running first-time setup"
if ($LASTEXITCODE -ne 0) { Fail "setup.py failed." }
# 5. Friendly note about Git Bash (full Cookbook / agent-shell parity)
if (-not (Get-Command bash -ErrorAction SilentlyContinue)) {
if (-not (Find-GitBash)) {
Write-Host ""
Write-Host "NOTE: Git Bash (bash.exe) was not found on PATH." -ForegroundColor Yellow
Write-Host " The core app works without it. For full Cookbook background" -ForegroundColor Yellow

View File

@@ -13,6 +13,10 @@ SEARCH_TIMEOUT = 30
def truncate(text: str, limit: int = MAX_OUTPUT_CHARS) -> str:
"""Truncate text to *limit* characters with a suffix note."""
if not isinstance(text, str):
# Tool output is occasionally None or a non-string; len(None) would
# raise. Coerce so this shared helper never crashes a tool response.
text = "" if text is None else str(text)
if len(text) > limit:
return text[:limit] + f"\n... (truncated, {len(text)} chars total)"
return text

View File

@@ -70,10 +70,12 @@ def _list_accounts_raw() -> list:
try:
conn = sqlite3.connect(str(path))
conn.row_factory = sqlite3.Row
rows = conn.execute("""
columns = {r[1] for r in conn.execute("PRAGMA table_info(email_accounts)").fetchall()}
smtp_security_select = "smtp_security" if "smtp_security" in columns else "'' AS smtp_security"
rows = conn.execute(f"""
SELECT id, name, is_default, enabled,
imap_host, imap_port, imap_user, imap_password, imap_starttls,
smtp_host, smtp_port, smtp_user, smtp_password, from_address
smtp_host, smtp_port, {smtp_security_select}, smtp_user, smtp_password, from_address
FROM email_accounts WHERE enabled = 1
ORDER BY is_default DESC, created_at ASC
""").fetchall()
@@ -145,6 +147,7 @@ def _load_config(account: str | None = None) -> dict:
"imap_starttls": os.environ.get("IMAP_STARTTLS", "true").lower() == "true",
"smtp_host": os.environ.get("SMTP_HOST", ""),
"smtp_port": int(os.environ.get("SMTP_PORT", "465")),
"smtp_security": os.environ.get("SMTP_SECURITY", ""),
"smtp_user": os.environ.get("SMTP_USER", ""),
"smtp_password": os.environ.get("SMTP_PASSWORD", ""),
"smtp_starttls": os.environ.get("SMTP_STARTTLS", "false").lower() == "true",
@@ -189,6 +192,7 @@ def _load_config(account: str | None = None) -> dict:
cfg["imap_ssl"] = int(cfg["imap_port"]) == 993 and not cfg["imap_starttls"]
cfg["smtp_host"] = row["smtp_host"] or cfg["smtp_host"]
cfg["smtp_port"] = int(row["smtp_port"] or cfg["smtp_port"])
cfg["smtp_security"] = row["smtp_security"] or cfg["smtp_security"] or ("starttls" if int(cfg["smtp_port"]) == 587 else "ssl")
cfg["smtp_user"] = row["smtp_user"] or cfg["smtp_user"]
cfg["smtp_password"] = _decrypt(row["smtp_password"]) if row["smtp_password"] else cfg["smtp_password"]
cfg["from_address"] = row["from_address"] or row["imap_user"] or cfg["from_address"]
@@ -333,14 +337,25 @@ def _decode_header(raw):
"""Decode MIME encoded header."""
if not raw:
return ""
parts = email.header.decode_header(raw)
decoded = []
for data, charset in parts:
if isinstance(data, bytes):
decoded.append(data.decode(charset or "utf-8", errors="replace"))
else:
decoded.append(data)
return " ".join(decoded)
try:
# make_header concatenates per RFC 2047: no spurious space between an
# encoded-word and adjacent plain text (plain runs keep their own
# whitespace), and whitespace between two adjacent encoded-words is
# dropped. The old " ".join produced "Re: Jose" style double spaces
# on every non-ASCII subject or sender.
return str(email.header.make_header(email.header.decode_header(raw)))
except Exception:
# Malformed header or unknown charset: lossy per-part decode
decoded = []
for data, charset in email.header.decode_header(raw):
if isinstance(data, bytes):
try:
decoded.append(data.decode(charset or "utf-8", errors="replace"))
except LookupError:
decoded.append(data.decode("utf-8", errors="replace"))
else:
decoded.append(data)
return "".join(decoded)
def _extract_text(msg):
@@ -413,6 +428,11 @@ def _list_emails(folder="INBOX", max_results=20, unresponded_only=False,
status, data = conn.uid("SEARCH", None, "(UNSEEN UNANSWERED)")
elif unread_only:
status, data = conn.uid("SEARCH", None, "(UNSEEN)")
elif unresponded_only:
# Was missing — unresponded_only=True (without unread_only) fell through
# to "ALL" and returned answered mail too, despite the documented
# "emails without replies" behaviour.
status, data = conn.uid("SEARCH", None, "(UNANSWERED)")
else:
# Include read too — IMAP search "ALL" returns the entire folder
status, data = conn.uid("SEARCH", None, "ALL")
@@ -739,17 +759,17 @@ def _smtp_connect(account=None, cfg=None):
if not _smtp_ready(cfg):
raise ValueError(f"Email account {cfg.get('account_name') or account or 'default'} has no SMTP configured")
port = int(cfg.get("smtp_port") or 465)
# Account rows only store host/port, not the legacy env-level smtp_ssl
# toggle. Infer the conventional TLS mode from the port so MCP tools match
# the web send path: 465 = implicit SSL, 587 = STARTTLS.
if port == 587:
security = str(cfg.get("smtp_security") or "").strip().lower()
if security not in {"ssl", "starttls", "none"}:
security = "starttls" if port == 587 else "ssl"
if security == "starttls":
conn = smtplib.SMTP(
cfg["smtp_host"],
port,
timeout=EMAIL_SOCKET_TIMEOUT,
)
conn.starttls()
elif cfg.get("smtp_ssl", True):
elif security == "ssl":
conn = smtplib.SMTP_SSL(
cfg["smtp_host"],
port,
@@ -761,8 +781,6 @@ def _smtp_connect(account=None, cfg=None):
port,
timeout=EMAIL_SOCKET_TIMEOUT,
)
if cfg["smtp_starttls"]:
conn.starttls()
if cfg["smtp_user"] and cfg["smtp_password"]:
conn.login(cfg["smtp_user"], cfg["smtp_password"])
return conn

View File

@@ -161,10 +161,9 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
deleted_text = m.get("text", "")
deleted_category = m.get("category", "")
break
original_len = len(memories)
memories = [m for m in memories if not m.get("id", "").startswith(memory_id)]
if len(memories) == original_len:
if not full_id:
return [TextContent(type="text", text=f"Error: Memory '{memory_id}' not found")]
memories = [m for m in memories if m.get("id") != full_id]
_memory_manager.save(memories)
if _memory_vector and _memory_vector.healthy and full_id:
try:

View File

@@ -101,10 +101,13 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
return [TextContent(type="text", text=f"Error: {e}")]
elif action == "add_directory":
directory = arguments.get("directory", "").strip()
_dir = arguments.get("directory")
directory = _dir.strip() if isinstance(_dir, str) else ""
if not directory:
return [TextContent(type="text", text="Error: add_directory needs a directory path")]
directory = os.path.expanduser(directory)
# Store an absolute path so indexed `source` metadata is absolute and
# remove_directory (which abspath-normalizes) can match it later (#1660).
directory = os.path.abspath(os.path.expanduser(directory))
if not os.path.isdir(directory):
return [TextContent(type="text", text=f"Error: Directory not found: {directory}")]
if not _rag_manager:
@@ -112,14 +115,27 @@ async def call_tool(name: str, arguments: dict) -> list[TextContent]:
try:
result = _rag_manager.index_personal_documents(directory)
indexed = result.get("indexed_count", 0) if isinstance(result, dict) else 0
# Record the directory so `list` and `remove_directory` can see it.
# Indexing was just done above, so pass index=False to avoid a second
# (ownerless) pass. Without this the directory was indexed but never
# tracked in indexed_directories, so it was invisible/unremovable.
if _personal_docs_manager and hasattr(_personal_docs_manager, "add_directory"):
try:
_personal_docs_manager.add_directory(directory, index=False)
except Exception:
pass
return [TextContent(type="text", text=f"Directory '{directory}' added to RAG index ({indexed} chunks indexed)")]
except Exception as e:
return [TextContent(type="text", text=f"Error: Failed to index directory: {e}")]
elif action == "remove_directory":
directory = arguments.get("directory", "").strip()
_dir = arguments.get("directory")
directory = _dir.strip() if isinstance(_dir, str) else ""
if not directory:
return [TextContent(type="text", text="Error: remove_directory needs a directory path")]
# Expand ~ to match add_directory, which indexes the expanded path.
# Without this, removing "~/docs" never matches the stored absolute path.
directory = os.path.expanduser(directory)
if not _personal_docs_manager:
return [TextContent(type="text", text="Error: Personal docs manager not available")]
try:

View File

@@ -9,7 +9,7 @@ Type=simple
# CHANGE THESE to match your user and install path:
User=YOURUSER
WorkingDirectory=/home/YOURUSER/odysseus-ui
ExecStart=/home/YOURUSER/odysseus-ui/venv/bin/uvicorn app:app --port 8000 --host 0.0.0.0
ExecStart=/home/YOURUSER/odysseus-ui/venv/bin/uvicorn app:app --port 7000 --host 0.0.0.0
Restart=always
RestartSec=3
EnvironmentFile=-/home/YOURUSER/odysseus-ui/.env

View File

@@ -4,6 +4,14 @@
# Note: chromadb-client + fastembed moved to requirements.txt — RAG, semantic
# memory, and tool selection are core paths, so they ship by default now.
# Local speech-to-text (microphone -> text) via faster-whisper, for the
# "local" STT provider. Runs on CPU out of the box (CTranslate2 backend, no
# torch needed). Install if you want to dictate/transcribe with the mic
# without sending audio to an external endpoint.
# Optional extra: install `torch` too if you have a CUDA GPU and want
# GPU-accelerated transcription — it's auto-detected, CPU is used otherwise.
faster-whisper
# DuckDuckGo as a search provider option.
# Install if you want DDG in the search-provider dropdown.
# Alternatives: SearXNG, Brave, Tavily, Serper, Google PSE.
@@ -15,3 +23,14 @@ duckduckgo-search
# network-served app — see ACKNOWLEDGMENTS.md. The MIT core (PDF *text*
# extraction via pypdf) works without it; this only unlocks form-filling.
PyMuPDF
# Office / EPUB document text extraction (chat attachments + the personal-docs
# RAG index). markitdown (MIT, Microsoft) converts .docx/.xlsx/.pptx/.xls/.epub
# to Markdown — more token-efficient and model-legible than a raw dump. Optional
# and lazy-imported via src/markitdown_runtime.py; without it those formats fall
# back to a friendly "install to extract" banner and the core stays pure-MIT.
# Extras pull mammoth/lxml/python-pptx/pandas/openpyxl/xlrd; the base also pulls
# magika (onnxruntime), already a core dep via fastembed. We avoid the
# [all]/Azure/audio extras (cloud + heavy). Pinned to a release >30 days old per
# the dependency-age discussion in issue #485.
markitdown[docx,pptx,xlsx,xls]==0.1.5

View File

@@ -27,6 +27,7 @@ from core.database import (
Document,
DocumentVersion,
GalleryImage,
GalleryAlbum,
CalendarEvent,
CalendarCal,
)
@@ -145,8 +146,9 @@ def setup_admin_wipe_routes(session_manager):
return {"status": "deleted", "kind": kind, "count": count}
if kind == "gallery":
count = db.query(GalleryImage).count()
count = db.query(GalleryImage).count() + db.query(GalleryAlbum).count()
db.query(GalleryImage).delete()
db.query(GalleryAlbum).delete()
db.commit()
# Also drop the upload dir so disk doesn't keep orphans.
_rmtree_quiet(os.path.join(DATA_DIR, "gallery"))

View File

@@ -67,6 +67,8 @@ class DeleteUserRequest(BaseModel):
class RenameUserRequest(BaseModel):
username: str
class SetOpenRegistrationRequest(BaseModel):
enabled: bool
SESSION_COOKIE = "odysseus_session"
@@ -295,6 +297,7 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
# owner-scoped DB rows before changing auth so the account keeps
# access to its sessions, docs, email accounts, tasks, etc.
try:
from sqlalchemy import func
from core.database import Base, SessionLocal
db = SessionLocal()
try:
@@ -304,7 +307,7 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
continue
(
db.query(model)
.filter(model.owner == old_username)
.filter(func.lower(model.owner) == old_username)
.update({"owner": new_username}, synchronize_session=False)
)
db.commit()
@@ -322,9 +325,15 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
from routes.prefs_routes import _load as _load_prefs, _save as _save_prefs
prefs = _load_prefs()
users = prefs.get("_users") if isinstance(prefs, dict) else None
if isinstance(users, dict) and old_username in users and new_username not in users:
users[new_username] = users.pop(old_username)
_save_prefs(prefs)
if isinstance(users, dict):
prefs_key = next(
(k for k in users if str(k).strip().lower() == old_username),
None,
)
new_taken = any(str(k).strip().lower() == new_username for k in users)
if prefs_key is not None and not new_taken:
users[new_username] = users.pop(prefs_key)
_save_prefs(prefs)
except Exception as e:
logger.warning("Failed to rename user prefs %s -> %s: %s", old_username, new_username, e)
@@ -333,15 +342,31 @@ def setup_auth_routes(auth_manager: AuthManager) -> APIRouter:
raise HTTPException(400, "Cannot rename user")
return {"ok": True, "username": new_username, "renamed_self": old_username == user}
@router.post("/signup-toggle")
@router.post("/signup-toggle", deprecated=True)
async def toggle_signup(request: Request):
"""Toggle open registration on/off. Admin only."""
"""
Toggle open registration on/off. Admin only.
DEPRECATED: This endpoint uses toggle semantics which can lead to unsafe state changes.
Use PUT /open-signup instead.
This endpoint is kept for backward compatibility and may be removed in future versions.
"""
user = _get_current_user(request)
if not user or not auth_manager.is_admin(user):
raise HTTPException(403, "Admin only")
auth_manager.signup_enabled = not auth_manager.signup_enabled
return {"ok": True, "signup_enabled": auth_manager.signup_enabled}
@router.put("/open-signup")
async def set_signup_enabled(body: SetOpenRegistrationRequest, request: Request):
"""Set open signup enabled state. Admin only."""
user = _get_current_user(request)
if not user or not auth_manager.is_admin(user):
raise HTTPException(403, "Admin only")
auth_manager.signup_enabled = body.enabled
return {"ok": True,"signup_enabled": auth_manager.signup_enabled}
@router.delete("/users")
async def admin_delete_user(body: DeleteUserRequest, request: Request):
user = _get_current_user(request)

View File

@@ -77,7 +77,12 @@ def setup_backup_routes(memory_manager, preset_manager, skills_manager) -> APIRo
# ── Memories ──
if "memories" in body and isinstance(body["memories"], list):
existing = memory_manager.load_all()
existing_texts = {e.get("text", "").strip().lower() for e in existing}
# Dedup against THIS user's own memories only. Using every tenant's
# rows (load_all) meant a memory whose text matched any other
# user's was silently skipped, so the importing user lost their own
# data. The full store is still saved back below.
existing_texts = {e.get("text", "").strip().lower()
for e in existing if e.get("owner") == user}
added = 0
for mem in body["memories"]:
if not isinstance(mem, dict) or not mem.get("text"):

View File

@@ -12,10 +12,27 @@ from dateutil.rrule import rrulestr, rruleset
from dateutil.rrule import DAILY, WEEKLY, MONTHLY, YEARLY
from core.database import SessionLocal, CalendarCal, CalendarEvent
from src.auth_helpers import get_current_user
from src.auth_helpers import get_current_user, require_user
logger = logging.getLogger(__name__)
def _ics_naive_dtstart(dt):
"""Naive value matching how import_ics STORES CalendarEvent.dtstart.
Timed tz-aware events are stored as UTC with tzinfo stripped, all-day
dates as midnight datetimes, naive datetimes unchanged. The ICS dedup
must compute the same value or a re-import never matches the stored row.
"""
if isinstance(dt, datetime):
if dt.tzinfo is not None:
from datetime import timezone as _tz
return dt.astimezone(_tz.utc).replace(tzinfo=None)
return dt
if isinstance(dt, date):
return datetime(dt.year, dt.month, dt.day)
return dt
# Single-user fallback identity. Used only when:
# 1. The app is configured for single-user (no auth middleware), AND
# 2. The request didn't resolve to an authenticated user.
@@ -28,16 +45,17 @@ _SINGLE_USER_MODE = _os.environ.get("ODYSSEUS_SINGLE_USER", "1") != "0"
def _require_user(request: Request) -> str:
"""Return the authenticated user. In multi-user mode an unauthenticated
request raises 401; in single-user mode it falls through to
FALLBACK_OWNER. Prevents the silent cross-user data write that would
happen if a request slipped past auth middleware in a real deployment."""
u = get_current_user(request)
if u:
return u
if _SINGLE_USER_MODE:
return FALLBACK_OWNER
raise HTTPException(401, "Authentication required")
"""Return the authenticated user. Uses require_user so AUTH_ENABLED=false
and single-user mode both work: require_user returns "" when auth is
disabled or unconfigured, and only raises 401 when auth is configured but
the caller is unauthenticated. Falls back to FALLBACK_OWNER for calendar
writes so data isn't stored under an empty owner in single-user mode."""
user = require_user(request)
if user:
return user
# require_user returned "" — auth is off or unconfigured (single-user).
# Use FALLBACK_OWNER so calendar rows have a stable owner for filtering.
return FALLBACK_OWNER
def _get_or_404_calendar(db, cal_id: str, owner: str) -> CalendarCal:
@@ -64,6 +82,24 @@ def _get_or_404_event(db, uid: str, owner: str) -> CalendarEvent:
return ev
def _ics_escape(text: str) -> str:
"""Escape a value for an iCalendar TEXT field (RFC 5545 §3.3.11).
Backslash, semicolon and comma are structural in TEXT values and must be
escaped, and newlines become a literal ``\\n``. Backslash is escaped first
so the escapes we add aren't re-escaped.
"""
return (
(text or "")
.replace("\\", "\\\\")
.replace(";", "\\;")
.replace(",", "\\,")
.replace("\r\n", "\\n")
.replace("\n", "\\n")
.replace("\r", "\\n")
)
def _resolve_base_uid(uid: str) -> str:
"""Extract the base series UID from a compound occurrence UID.
@@ -319,8 +355,8 @@ def _parse_dt(s: str) -> datetime:
return None
return h, mn
# today/tomorrow/yesterday [at] TIME
m = _re.match(r'^(today|tomorrow|tmrw|yesterday)(?:\s+at)?\s*(.*)$', lower)
# today/tonight/tomorrow/yesterday [at] TIME
m = _re.match(r'^(today|tonight|tomorrow|tmrw|yesterday)(?:\s+at)?\s*(.*)$', lower)
if m:
word, rest = m.group(1), m.group(2).strip()
base = today
@@ -434,8 +470,21 @@ def _expand_rrule(
return [d]
# Parse the rrule, applying it to the base dtstart.
rrule_str = ev.rrule
if ev.dtstart is not None and getattr(ev.dtstart, "tzinfo", None) is None:
# Events are stored with a naive (UTC) dtstart, but standard .ics
# exporters (Google/Apple/Outlook/Fastmail) write the bound as an
# absolute UTC value, e.g. UNTIL=20240105T090000Z. dateutil refuses to
# mix a tz-aware UNTIL with a naive DTSTART ("RRULE UNTIL values must be
# specified in UTC when DTSTART is timezone-aware"), so the except branch
# below would silently collapse the whole series to a single event.
# Drop the trailing Z so UNTIL matches the naive DTSTART.
import re as _re
rrule_str = _re.sub(
r"(UNTIL=\d{8}(?:T\d{6})?)Z", r"\1", rrule_str, flags=_re.IGNORECASE
)
try:
rule = rrulestr(ev.rrule, dtstart=ev.dtstart)
rule = rrulestr(rrule_str, dtstart=ev.dtstart)
except Exception as ex:
logger.warning(
"Failed to parse rrule=%r for event %s: %s", ev.rrule, ev.uid, ex
@@ -509,13 +558,20 @@ def setup_calendar_routes() -> APIRouter:
owner = _require_user(request)
from routes.prefs_routes import _load_for_user
cfg = (_load_for_user(owner) or {}).get("caldav", {}) or {}
caldav_password = cfg.get("password") or ""
if caldav_password:
try:
from src.secret_storage import decrypt
caldav_password = decrypt(caldav_password)
except Exception:
pass
# Surface url+username but never hand the password back to the
# client — saved-state UI shouldn't leak the credential.
return {
"url": cfg.get("url", "") or "",
"username": cfg.get("username", "") or "",
"password": "",
"has_password": bool(cfg.get("password")),
"has_password": bool(caldav_password),
"local": not bool(cfg.get("url")),
}
@@ -534,12 +590,20 @@ def setup_calendar_routes() -> APIRouter:
prefs.pop("caldav", None)
_save_for_user(owner, prefs)
return {"ok": True, "cleared": True}
cfg["url"] = body.get("url", "").strip()
from src.caldav_sync import validate_caldav_url
try:
cfg["url"] = validate_caldav_url(body.get("url", ""))
except ValueError as e:
raise HTTPException(400, str(e))
cfg["username"] = (body.get("username") or "").strip()
# Preserve the stored password when the client sends an empty
# one (edit form re-submitted without re-typing the password).
if body.get("password"):
cfg["password"] = body["password"]
from src.secret_storage import encrypt
cfg["password"] = encrypt(body["password"])
elif cfg.get("password"):
from src.secret_storage import encrypt
cfg["password"] = encrypt(cfg["password"])
prefs["caldav"] = cfg
_save_for_user(owner, prefs)
return {"ok": True}
@@ -566,9 +630,21 @@ def setup_calendar_routes() -> APIRouter:
cfg = (_load_for_user(owner) or {}).get("caldav", {}) or {}
url = url or (cfg.get("url") or "")
user = user or (cfg.get("username") or "")
pw = pw or (cfg.get("password") or "")
if not pw:
pw = cfg.get("password") or ""
if pw:
try:
from src.secret_storage import decrypt
pw = decrypt(pw)
except Exception:
pass
if not (url and user and pw):
return {"ok": False, "error": "Missing URL, username, or password"}
from src.caldav_sync import validate_caldav_url
try:
url = validate_caldav_url(url)
except ValueError as e:
return {"ok": False, "error": str(e)}
import httpx
propfind_body = (
'<?xml version="1.0" encoding="UTF-8"?>\n'
@@ -576,13 +652,25 @@ def setup_calendar_routes() -> APIRouter:
'</d:prop></d:propfind>'
)
try:
async with httpx.AsyncClient(timeout=8.0, follow_redirects=True) as cx:
async with httpx.AsyncClient(timeout=8.0, follow_redirects=False, trust_env=False) as cx:
r = await cx.request(
"PROPFIND", url,
auth=(user, pw),
headers={"Depth": "0", "Content-Type": "application/xml"},
content=propfind_body,
)
# If the server demands Digest (Baïkal default, SabreDAV-based
# servers, Radicale with htdigest), the Basic attempt above
# 401s. Retry once with httpx.DigestAuth so this test matches
# what the real sync does via caldav.DAVClient in
# src/caldav_sync.py (which negotiates the scheme).
if r.status_code == 401 and "digest" in r.headers.get("www-authenticate", "").lower():
r = await cx.request(
"PROPFIND", url,
auth=httpx.DigestAuth(user, pw),
headers={"Depth": "0", "Content-Type": "application/xml"},
content=propfind_body,
)
# 207 = Multi-Status — standard CalDAV success. 200 also
# acceptable. Anything else (401/403/404/5xx) means trouble.
if r.status_code in (200, 207):
@@ -593,6 +681,8 @@ def setup_calendar_routes() -> APIRouter:
return {"ok": False, "error": "Forbidden — user can't access that URL"}
if r.status_code == 404:
return {"ok": False, "error": "Not found — check the URL path"}
if 300 <= r.status_code < 400:
return {"ok": False, "error": "Redirects are not followed for CalDAV safety; use the final URL"}
return {"ok": False, "error": f"HTTP {r.status_code}"}
except httpx.ConnectError as e:
return {"ok": False, "error": f"Connection refused: {e}"[:200]}
@@ -739,6 +829,16 @@ def setup_calendar_routes() -> APIRouter:
)
db.add(ev)
db.commit()
if cal.source == "caldav":
# Push the new event to the remote so it appears on the user's
# other devices — the sync is otherwise pull-only (#800).
from src.caldav_writeback import writeback_event
await writeback_event(owner, cal.source, cal.id, {
"uid": uid, "summary": data.summary, "description": data.description,
"location": data.location, "dtstart": dtstart, "dtend": dtend,
"all_day": data.all_day, "is_utc": _is_utc and not data.all_day,
"rrule": data.rrule or "",
})
return {"ok": True, "uid": uid}
except HTTPException:
raise
@@ -785,6 +885,14 @@ def setup_calendar_routes() -> APIRouter:
if data.color is not None:
ev.color = data.color if data.color else None
db.commit()
cal = db.query(CalendarCal).filter(CalendarCal.id == ev.calendar_id).first()
if cal and cal.source == "caldav":
from src.caldav_writeback import writeback_event
await writeback_event(owner, cal.source, cal.id, {
"uid": ev.uid, "summary": ev.summary, "description": ev.description,
"location": ev.location, "dtstart": ev.dtstart, "dtend": ev.dtend,
"all_day": ev.all_day, "is_utc": ev.is_utc, "rrule": ev.rrule or "",
})
return {"ok": True}
except HTTPException:
raise
@@ -805,8 +913,15 @@ def setup_calendar_routes() -> APIRouter:
db = SessionLocal()
try:
ev = _get_or_404_event(db, base_uid, owner)
# Capture what the remote push needs BEFORE the row is gone.
_cal = db.query(CalendarCal).filter(CalendarCal.id == ev.calendar_id).first()
_is_caldav = bool(_cal and _cal.source == "caldav")
_cal_id, _ev_uid = ev.calendar_id, ev.uid
db.delete(ev)
db.commit()
if _is_caldav:
from src.caldav_writeback import writeback_event
await writeback_event(owner, "caldav", _cal_id, {"uid": _ev_uid}, delete=True)
return {"ok": True}
except HTTPException:
raise
@@ -938,7 +1053,12 @@ def setup_calendar_routes() -> APIRouter:
source_uid = str(comp.get("uid", "")) or None
if source_uid:
src_dtstart = dtstart.dt
naive_src = src_dtstart.replace(tzinfo=None) if hasattr(src_dtstart, 'tzinfo') and src_dtstart.tzinfo else src_dtstart
# Normalize to the SAME naive form import_ics stores, so a
# re-import of a tz-aware event matches the existing row.
# The old code stripped tzinfo WITHOUT converting to UTC
# (wall clock), while storage converts to UTC first, so
# every re-import of a TZID event created a duplicate.
naive_src = _ics_naive_dtstart(src_dtstart)
existing = (
db.query(CalendarEvent)
.filter(
@@ -1032,23 +1152,23 @@ def setup_calendar_routes() -> APIRouter:
"BEGIN:VCALENDAR",
"VERSION:2.0",
"PRODID:-//Odysseus//Calendar//EN",
f"X-WR-CALNAME:{cal.name}",
f"X-WR-CALNAME:{_ics_escape(cal.name)}",
]
for ev in events:
lines.append("BEGIN:VEVENT")
lines.append(f"UID:{ev.uid}")
lines.append(f"SUMMARY:{ev.summary or ''}")
lines.append(f"SUMMARY:{_ics_escape(ev.summary or '')}")
if ev.all_day:
lines.append(f"DTSTART;VALUE=DATE:{ev.dtstart.strftime('%Y%m%d')}")
lines.append(f"DTEND;VALUE=DATE:{ev.dtend.strftime('%Y%m%d')}")
else:
lines.append(f"DTSTART:{ev.dtstart.strftime('%Y%m%dT%H%M%S')}")
lines.append(f"DTEND:{ev.dtend.strftime('%Y%m%dT%H%M%S')}")
_dt_suffix = "Z" if getattr(ev, "is_utc", False) else ""
lines.append(f"DTSTART:{ev.dtstart.strftime('%Y%m%dT%H%M%S')}{_dt_suffix}")
lines.append(f"DTEND:{ev.dtend.strftime('%Y%m%dT%H%M%S')}{_dt_suffix}")
if ev.description:
desc = ev.description.replace(chr(10), '\\n')
lines.append(f"DESCRIPTION:{desc}")
lines.append(f"DESCRIPTION:{_ics_escape(ev.description)}")
if ev.location:
lines.append(f"LOCATION:{ev.location}")
lines.append(f"LOCATION:{_ics_escape(ev.location)}")
if ev.rrule:
lines.append(f"RRULE:{ev.rrule}")
lines.append("END:VEVENT")

View File

@@ -3,6 +3,7 @@
import asyncio
import json
import logging
import os
import re
from dataclasses import dataclass, field
from typing import Any, Optional
@@ -11,6 +12,7 @@ from core.models import ChatMessage
from core.database import SessionLocal
from core.database import Session as DBSession, ModelEndpoint
from src.llm_core import normalize_model_id
from src.endpoint_resolver import normalize_base
from src.context_compactor import maybe_compact, trim_for_context
from src.auth_helpers import get_current_user
from src.prompt_security import untrusted_context_message
@@ -119,7 +121,7 @@ def needs_auto_name(name: str) -> bool:
if name.startswith("Chat:") or name == "Chat":
return True
# Default frontend name: "modelname HH:MM:SS AM/PM"
if re.match(r'^.+ \d{1,2}:\d{2}:\d{2}\s*(AM|PM)$', name):
if re.match(r"^.+ \d{1,2}:\d{2}:\d{2}(\s*(AM|PM))?$", name, re.IGNORECASE):
return True
return False
@@ -146,9 +148,13 @@ async def auto_name_session(session_manager, sess):
if not first_msg:
return
owner = getattr(sess, "owner", None)
t_url, t_model, t_headers = resolve_task_endpoint(
sess.endpoint_url, sess.model, sess.headers,
sess.endpoint_url, sess.model, sess.headers, owner=owner,
)
if not t_model:
logger.debug("[auto-name] No model provided, skipping")
return
# max_tokens big enough that reasoning models (Minimax M2,
# DeepSeek R1, QwQ, etc.) have headroom for <think>…</think>
@@ -306,7 +312,24 @@ def fire_message_event(request, webhook_manager, session_id: str, sess, message:
fire_event("message_sent", user)
def resolve_session_auth(sess, session_id: str):
def _session_url_matches_endpoint(session_url: str, endpoint_base: str) -> bool:
if not session_url or not endpoint_base:
return False
try:
from src.endpoint_resolver import build_chat_url, normalize_base
sess_url = session_url.rstrip("/")
base = normalize_base(endpoint_base).rstrip("/")
return sess_url in {
base,
base + "/chat/completions",
build_chat_url(base).rstrip("/"),
}
except Exception:
return False
def resolve_session_auth(sess, session_id: str, owner: Optional[str] = None):
"""Ensure session has auth headers — resolve from endpoint DB if missing."""
has_auth = sess.headers and isinstance(sess.headers, dict) and any(
k.lower() in ('authorization', 'x-api-key') for k in sess.headers
@@ -315,25 +338,96 @@ def resolve_session_auth(sess, session_id: str):
return
try:
from src.endpoint_resolver import build_headers
from src.endpoint_resolver import build_headers, normalize_base
db = SessionLocal()
try:
domain = sess.endpoint_url.split("//")[1].split("/")[0] if "//" in sess.endpoint_url else ""
if domain:
ep = db.query(ModelEndpoint).filter(ModelEndpoint.base_url.contains(domain)).first()
if ep and ep.api_key:
sess.headers = build_headers(ep.api_key, ep.base_url)
db.query(DBSession).filter(DBSession.id == session_id).update(
{"headers": json.dumps(sess.headers)}
)
db.commit()
logger.info(f"Resolved and persisted auth headers for session {session_id} from endpoint {ep.name}")
target_url = getattr(sess, "endpoint_url", "") or ""
if not target_url:
return
q = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True)
if owner:
# Missing headers usually means "recover from the saved endpoint".
# Scope that lookup to the session owner, otherwise two users
# with similar endpoint URLs can borrow each other's API key.
from src.auth_helpers import owner_filter
q = owner_filter(q, ModelEndpoint, owner)
for ep in q.all():
if not _session_url_matches_endpoint(target_url, ep.base_url or ""):
continue
if not ep.api_key:
return
base = normalize_base(ep.base_url or "")
sess.headers = build_headers(ep.api_key, base)
update_q = db.query(DBSession).filter(DBSession.id == session_id)
if owner:
update_q = update_q.filter(DBSession.owner == owner)
update_q.update({"headers": sess.headers})
db.commit()
logger.info(f"Resolved and persisted auth headers for session {session_id} from endpoint {ep.name}")
return
finally:
db.close()
except Exception as e:
logger.warning(f"Failed to resolve session headers: {e}")
def _match_cached_model_id(requested: str, models) -> Optional[str]:
if not requested or not models:
return None
model_ids = [str(m) for m in models if m]
if requested in model_ids:
return requested
req_base = os.path.basename(requested.rstrip("/"))
for model_id in model_ids:
if os.path.basename(model_id.rstrip("/")) == req_base:
return model_id
return None
def _normalize_model_id_from_cache(sess) -> Optional[str]:
"""Use stored endpoint model IDs before falling back to a live /models probe."""
endpoint_url = getattr(sess, "endpoint_url", "") or ""
requested = getattr(sess, "model", "") or ""
if not endpoint_url or not requested:
return None
try:
session_base = normalize_base(endpoint_url)
except Exception:
session_base = endpoint_url.rstrip("/")
if not session_base:
return None
db = SessionLocal()
try:
endpoints = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True).all()
for ep in endpoints:
try:
if normalize_base(getattr(ep, "base_url", "") or "") != session_base:
continue
except Exception:
continue
raw_models = getattr(ep, "cached_models", None)
if not raw_models:
continue
try:
models = json.loads(raw_models) if isinstance(raw_models, str) else raw_models
except Exception:
continue
matched = _match_cached_model_id(requested, models)
if matched:
return matched
except Exception as e:
logger.debug("Cached model normalization skipped: %s", e)
finally:
db.close()
return None
async def build_chat_context(
sess,
request,
@@ -434,8 +528,9 @@ async def build_chat_context(
for transcript in preprocessed.youtube_transcripts:
preface.append(untrusted_context_message("youtube transcript", transcript))
# Normalize model ID
norm = normalize_model_id(sess.endpoint_url, sess.model)
# Normalize model ID. Prefer cached endpoint models so group chat does not
# re-hit slow local /models endpoints on every participant turn.
norm = _normalize_model_id_from_cache(sess) or normalize_model_id(sess.endpoint_url, sess.model)
if norm:
sess.model = norm
@@ -743,7 +838,7 @@ def run_post_response_tasks(
from services.memory.memory_extractor import extract_and_store
from src.task_endpoint import resolve_task_endpoint
t_url, t_model, t_headers = resolve_task_endpoint(
sess.endpoint_url, sess.model, sess.headers,
sess.endpoint_url, sess.model, sess.headers, owner=owner,
)
asyncio.create_task(extract_and_store(
sess, memory_manager, memory_vector,
@@ -780,7 +875,7 @@ def run_post_response_tasks(
from services.memory.skill_extractor import maybe_extract_skill
from src.task_endpoint import resolve_task_endpoint
s_url, s_model, s_headers = resolve_task_endpoint(
sess.endpoint_url, sess.model, sess.headers,
sess.endpoint_url, sess.model, sess.headers, owner=owner,
)
logger.debug("[skill-extract] dispatching extractor (model=%s)", s_model)
asyncio.create_task(maybe_extract_skill(

View File

@@ -23,10 +23,12 @@ from src.prompt_security import untrusted_context_message
from core.exceptions import SessionNotFoundError
from src.auth_helpers import get_current_user
from routes.session_routes import _verify_session_owner
from routes.document_helpers import _owner_session_filter
from core.database import SessionLocal, get_session_mode, set_session_mode
from core.database import Session as DBSession, ChatMessage as DBChatMessage
from core.database import Document as DBDocument, ModelEndpoint
from routes.research_routes import _resolve_research_endpoint
from routes.model_routes import _visible_models
from routes.chat_helpers import (
resolve_session_auth,
build_chat_context,
@@ -41,6 +43,7 @@ logger = logging.getLogger(__name__)
# Track active streams for partial-save safety net
_active_streams: Dict[str, dict] = {}
_IMAGE_MODEL_PREFIXES = ("gpt-image", "dall-e", "chatgpt-image")
def _stream_set(session_id: str, **fields) -> None:
@@ -69,13 +72,17 @@ def _session_url_matches_endpoint(session_url: str, endpoint_base: str) -> bool:
return sess in variants or sess.startswith(base + "/")
def _clear_orphaned_session_endpoint(sess) -> bool:
def _clear_orphaned_session_endpoint(sess, owner: str | None = None) -> bool:
"""Clear a session model if its endpoint was deleted from ModelEndpoint."""
if not getattr(sess, "endpoint_url", ""):
return False
db = SessionLocal()
try:
endpoints = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True).all()
q = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True)
if owner:
from src.auth_helpers import owner_filter
q = owner_filter(q, ModelEndpoint, owner)
endpoints = q.all()
for ep in endpoints:
if _session_url_matches_endpoint(sess.endpoint_url or "", ep.base_url or ""):
return False
@@ -96,6 +103,132 @@ def _clear_orphaned_session_endpoint(sess) -> bool:
db.close()
def _endpoint_cache_contains_model(endpoint, model: str) -> bool:
"""Return True when a populated endpoint model cache includes ``model``.
Empty/malformed caches are treated as unknown rather than a negative match
so older image endpoints without cached models still work.
"""
raw = getattr(endpoint, "cached_models", None)
if not raw:
return True
try:
models = json.loads(raw) if isinstance(raw, str) else raw
except Exception:
return True
if not isinstance(models, list) or not models:
return True
wanted = (model or "").strip()
return wanted in {str(item).strip() for item in models}
def _is_image_generation_session(sess, owner: str | None = None) -> bool:
"""Whether this chat session should bypass text chat and generate images.
Model-name prefixes are explicit image models. Endpoint type is only used
when the current session endpoint actually matches that image endpoint, and
when a populated endpoint model cache includes the selected model. This
prevents an image endpoint on the same host from misrouting ordinary text
models into the image-generation path.
"""
model = (getattr(sess, "model", "") or "").strip()
if any(model.lower().startswith(prefix) for prefix in _IMAGE_MODEL_PREFIXES):
return True
endpoint_url = (getattr(sess, "endpoint_url", "") or "").strip()
if not endpoint_url:
return False
db = SessionLocal()
try:
q = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True)
if owner:
from src.auth_helpers import owner_filter
q = owner_filter(q, ModelEndpoint, owner)
endpoints = q.all()
for endpoint in endpoints:
if (getattr(endpoint, "model_type", None) or "llm") != "image":
continue
if not _session_url_matches_endpoint(endpoint_url, getattr(endpoint, "base_url", "") or ""):
continue
if _endpoint_cache_contains_model(endpoint, model):
return True
except Exception:
return False
finally:
db.close()
return False
def _recover_empty_session_model(sess, session_id: str, owner: str | None = None) -> bool:
"""Re-populate sess.model from the matching endpoint's cached models.
Covers the window between endpoint setup and the first chat send: the
picker showed a model in the dropdown but the session record never got
written (Issue #587 — UI uses the cached endpoint list, not s.model).
Without this, we'd POST the upstream with model="" and get a generic
401/503 instead of using the model the user already picked.
Returns True iff sess.model was repaired.
"""
if getattr(sess, "model", None):
return False
db = SessionLocal()
try:
# Prefer the endpoint whose base URL matches the session — we know the
# user already pointed this session at that endpoint, so its first
# cached model is the most defensible default.
ep = None
if getattr(sess, "endpoint_url", ""):
q = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True)
if owner:
from src.auth_helpers import owner_filter
q = owner_filter(q, ModelEndpoint, owner)
endpoints = q.all()
for cand in endpoints:
if _session_url_matches_endpoint(sess.endpoint_url or "", cand.base_url or ""):
ep = cand
break
if not ep:
return False
try:
cached = json.loads(ep.cached_models) if isinstance(ep.cached_models, str) else (ep.cached_models or [])
except Exception:
cached = []
if not cached:
return False
try:
visible = _visible_models(cached, getattr(ep, "hidden_models", None))
except Exception:
visible = cached
if not visible:
return False
model = visible[0]
if not isinstance(model, str) or not model.strip():
return False
model = model.strip()
# Persist so the next request, websocket reconnect, or page reload
# picks up the same model (we'd otherwise re-pick on every send
# and silently switch on the user if the cached order shifts).
db_session = db.query(DBSession).filter(DBSession.id == session_id).first()
if db_session:
db_session.model = model
db_session.updated_at = datetime.utcnow()
db.commit()
sess.model = model
logger.info(
"Recovered empty session model for %s — picked %r from endpoint %s",
session_id, model, ep.id,
)
return True
except Exception as e:
db.rollback()
logger.warning("Failed to recover empty session model for %s: %s", session_id, e)
return False
finally:
db.close()
def setup_chat_routes(
session_manager,
chat_handler,
@@ -130,9 +263,20 @@ def setup_chat_routes(
sess = session_manager.get_session(session)
except KeyError:
raise HTTPException(404, f"Session '{session}' not found")
if _clear_orphaned_session_endpoint(sess):
owner = get_current_user(request)
if _clear_orphaned_session_endpoint(sess, owner=owner):
raise HTTPException(400, "Selected model endpoint was removed. Pick another model in Settings.")
# Empty model + live endpoint = setup race (Issue #587). Repair from
# the endpoint's cached model list before privilege checks, which
# otherwise see "" and behave inconsistently with the allowlist.
_recover_empty_session_model(sess, session, owner=owner)
if not getattr(sess, "model", "").strip():
raise HTTPException(
400,
"No model selected for this chat. Open the model picker and choose one before sending.",
)
# Same allowed_models + daily-cap gate as chat_stream (mirror so the
# non-streaming path can't be used to bypass).
_enforce_chat_privileges(request, sess)
@@ -270,8 +414,21 @@ def setup_chat_routes(
# but BEFORE loading. Prevents cross-user session hijack.
_verify_session_owner(request, session)
sess = session_manager.get_session(session)
if _clear_orphaned_session_endpoint(sess):
owner = get_current_user(request)
if _clear_orphaned_session_endpoint(sess, owner=owner):
raise HTTPException(400, "Selected model endpoint was removed. Pick another model in Settings.")
# Issue #587: picker shows a model from the endpoint cache but
# s.model never made it onto the DB row (first-send race after
# endpoint setup, or a previous endpoint delete/recreate). Pull
# the first cached model off the matching endpoint so the
# upstream isn't called with model="" (which surfaces as a
# generic 401/503).
_recover_empty_session_model(sess, session, owner=owner)
if not getattr(sess, "model", "").strip():
raise HTTPException(
400,
"No model selected for this chat. Open the model picker and choose one before sending.",
)
except SessionNotFoundError as e:
raise HTTPException(404, str(e))
except (ValueError, ValidationError):
@@ -288,7 +445,7 @@ def setup_chat_routes(
_enforce_chat_privileges(request, sess)
# Ensure session has auth headers
resolve_session_auth(sess, session)
resolve_session_auth(sess, session, owner=get_current_user(request))
# Check for research_pending BEFORE mode persist overwrites it
do_research = str(use_research).lower() == "true"
@@ -343,18 +500,22 @@ def setup_chat_routes(
try:
if active_doc_id:
logger.info(f"[doc-inject] active_doc_id from frontend: {active_doc_id}")
active_doc = _doc_db.query(DBDocument).filter(
DBDocument.id == active_doc_id,
).first()
# Scope to the caller's documents. The session and in-memory
# fallbacks below are already owner/session-bound; this
# explicit-id path looked up by id alone, so a user could
# inject another user's document by passing its id.
_doc_q = _doc_db.query(DBDocument).filter(DBDocument.id == active_doc_id)
active_doc = _owner_session_filter(_doc_q, ctx.user).first()
if active_doc:
logger.info(f"[doc-inject] found by ID: title={active_doc.title!r}, lang={active_doc.language!r}, is_active={active_doc.is_active}, content_len={len(active_doc.current_content or '')}")
else:
logger.warning(f"[doc-inject] NOT FOUND by ID {active_doc_id}")
if not active_doc:
active_doc = _doc_db.query(DBDocument).filter(
_session_doc_q = _doc_db.query(DBDocument).filter(
DBDocument.session_id == session,
DBDocument.is_active == True
).order_by(DBDocument.updated_at.desc()).first()
)
active_doc = _owner_session_filter(_session_doc_q, ctx.user).order_by(DBDocument.updated_at.desc()).first()
if active_doc:
logger.info(f"[doc-inject] found by session fallback: title={active_doc.title!r}")
# Last resort: the document the agent itself just created/edited
@@ -368,7 +529,8 @@ def setup_chat_routes(
from src.tool_implementations import get_active_document
_mem_id = get_active_document()
if _mem_id:
cand = _doc_db.query(DBDocument).filter(DBDocument.id == _mem_id).first()
_mem_q = _doc_db.query(DBDocument).filter(DBDocument.id == _mem_id)
cand = _owner_session_filter(_mem_q, ctx.user).first()
if cand and (not cand.session_id or cand.session_id == session):
active_doc = cand
logger.info(f"[doc-inject] found by in-memory active id: title={active_doc.title!r} (session_id={cand.session_id!r})")
@@ -563,6 +725,7 @@ def setup_chat_routes(
prior_findings=_prior_findings,
prior_urls=_prior_urls,
on_complete=_on_research_done,
owner=_user,
)
_heartbeat_counter = 0
@@ -619,7 +782,7 @@ def setup_chat_routes(
# output. Resolved once per request.
try:
from src.endpoint_resolver import resolve_chat_fallback_candidates
_fallback_candidates = resolve_chat_fallback_candidates()
_fallback_candidates = resolve_chat_fallback_candidates(owner=_user)
except Exception:
_fallback_candidates = []
@@ -632,28 +795,7 @@ def setup_chat_routes(
_model_info["character_name"] = ctx.preset.character_name
yield f'data: {json.dumps(_model_info)}\n\n'
# Detect image models and route directly to image generation
_IMAGE_MODEL_PREFIXES = ("gpt-image", "dall-e", "chatgpt-image")
_is_image_model = any(sess.model.lower().startswith(p) for p in _IMAGE_MODEL_PREFIXES)
# Also check if the endpoint is registered as an image-type endpoint
if not _is_image_model:
try:
from src.endpoint_resolver import normalize_base as _nb
_ep_base = _nb(sess.endpoint_url)
_db = SessionLocal()
try:
_is_image_model = _db.query(ModelEndpoint).filter(
ModelEndpoint.model_type == "image",
ModelEndpoint.is_enabled == True,
ModelEndpoint.base_url.contains(_ep_base.split("://")[-1].split("/")[0]),
).first() is not None
finally:
_db.close()
except Exception:
pass
if _is_image_model:
if _is_image_generation_session(sess, owner=_user):
from src.settings import get_setting
if not get_setting("image_gen_enabled", True):
yield f'data: {json.dumps({"delta": "Image generation is disabled by the administrator."})}\n\n'
@@ -664,7 +806,7 @@ def setup_chat_routes(
_user_msg = message or ""
yield f'data: {json.dumps({"type": "tool_start", "tool": "generate_image", "command": _user_msg[:100]})}\n\n'
yield ": heartbeat\n\n"
_img_result = await do_generate_image(f"{_user_msg}\n{sess.model}", session)
_img_result = await do_generate_image(f"{_user_msg}\n{sess.model}", session, owner=_user)
_img_output = _img_result.get("results", _img_result.get("error", ""))
_img_tool_data = {"type": "tool_output", "tool": "generate_image", "command": _user_msg[:100], "output": _img_output, "exit_code": 0 if "error" not in _img_result else 1}
for _k in ("image_url", "image_id", "image_prompt", "image_model", "image_size", "image_quality"):
@@ -688,6 +830,7 @@ def setup_chat_routes(
return
elif chat_mode == "chat":
_chat_start = time.time()
_answered_by = None # set if the selected model failed and a fallback answered
# ── Chat mode: call stream_llm directly, NO tools, NO document access ──
try:
_chat_candidates = [(sess.endpoint_url, sess.model, sess.headers)] + _fallback_candidates
@@ -708,16 +851,35 @@ def setup_chat_routes(
try:
data = json.loads(chunk[6:])
if "delta" in data:
full_response += data["delta"]
_stream_set(session, partial=full_response)
# Reasoning tokens arrive flagged thinking:true.
# Forward them so the client can show a thinking
# indicator, but don't fold them into the saved
# reply (mirrors the rewrite path below).
if not data.get("thinking"):
full_response += data["delta"]
_stream_set(session, partial=full_response)
yield chunk
elif data.get("type") == "fallback":
# Selected model failed; a fallback answered.
# Forward the notice and remember the real model.
_answered_by = data.get("answered_by") or _answered_by
yield chunk
elif data.get("type") == "usage":
last_metrics = data.get("data", {})
last_metrics["model"] = sess.model
last_metrics["model"] = _answered_by or sess.model
if ctx.context_length and last_metrics.get("input_tokens"):
pct = min(round((last_metrics["input_tokens"] / ctx.context_length) * 100, 1), 100.0)
last_metrics["context_percent"] = pct
last_metrics["context_length"] = ctx.context_length
# The frontend reads `tokens_per_second`; the raw usage event
# carries the backend's true gen speed as `gen_tps` (llama.cpp
# timings). Map it through so this direct-chat path shows real
# t/s instead of "n/a" → falling back to a bare token count.
if last_metrics.get("gen_tps") and not last_metrics.get("tokens_per_second"):
last_metrics["tokens_per_second"] = last_metrics["gen_tps"]
last_metrics["tps_source"] = "backend"
# Wall-clock response time for the stats popup ("Time").
last_metrics.setdefault("response_time", round(time.time() - _chat_start, 2))
yield f'data: {json.dumps({"type": "metrics", "data": last_metrics})}\n\n'
except json.JSONDecodeError:
yield chunk
@@ -781,6 +943,7 @@ def setup_chat_routes(
# ── Agent mode: full agent loop with tools ──
_agent_rounds = 0
_agent_tool_calls = 0
_answered_by = None # set if the selected model failed and a fallback answered
try:
from src.settings import get_setting
_tool_budget = int(get_setting("agent_max_tool_calls", 0))
@@ -805,8 +968,12 @@ def setup_chat_routes(
try:
data = json.loads(chunk[6:])
if "delta" in data:
full_response += data["delta"]
_stream_set(session, partial=full_response)
# Reasoning tokens arrive flagged thinking:true.
# Forward them for the live indicator, but keep
# them out of the saved reply (same as chat mode).
if not data.get("thinking"):
full_response += data["delta"]
_stream_set(session, partial=full_response)
yield chunk
elif data.get("type") == "web_sources":
web_sources = data.get("data", [])
@@ -821,9 +988,16 @@ def setup_chat_routes(
elif data.get("type") == "tool_start":
_agent_tool_calls += 1
yield chunk
elif data.get("type") == "fallback":
# Selected model failed; a fallback answered.
# Forward the notice and remember the real
# model so metrics reflect it, not the masked
# selected model.
_answered_by = data.get("answered_by") or _answered_by
yield chunk
elif data.get("type") == "metrics":
last_metrics = data.get("data", {})
last_metrics["model"] = sess.model
last_metrics["model"] = _answered_by or sess.model
yield f'data: {json.dumps({"type": "metrics", "data": last_metrics})}\n\n'
except json.JSONDecodeError:
yield chunk
@@ -920,11 +1094,15 @@ def setup_chat_routes(
_verify_session_owner(request, session_id)
# A detached run can still be going even if _active_streams was popped;
# report it as active so the client knows to reconnect via /resume.
if session_id not in _active_streams:
# Read once via .get() to avoid a KeyError race between the membership
# check and the indexed read if a sibling stream's finally pops the
# entry in between (same pattern _stream_set already uses).
rec = _active_streams.get(session_id)
if rec is None:
if agent_runs.is_active(session_id):
return {"status": "streaming", "detached": True}
raise HTTPException(404, "No active stream for this session")
return _active_streams[session_id]
return rec
# ------------------------------------------------------------------ #
# POST /api/inject_context
@@ -1088,7 +1266,7 @@ def setup_chat_routes(
db_msg = (
db.query(DBChatMessage)
.filter(DBChatMessage.session_id == session_id, DBChatMessage.role == 'assistant')
.order_by(DBChatMessage.created_at.desc())
.order_by(DBChatMessage.timestamp.desc())
.first()
)
if db_msg:

View File

@@ -130,21 +130,28 @@ def _parse_vcards(text: str) -> List[Dict]:
contact = {"name": "", "emails": [], "phones": [], "uid": ""}
for line in block.split("\n"):
line = line.strip()
if line.startswith("FN:") or line.startswith("FN;"):
contact["name"] = _vunesc(line.split(":", 1)[1]) if ":" in line else ""
elif line.startswith("EMAIL"):
# Strip an optional RFC 6350 group prefix (e.g. "item1.EMAIL;...")
# that Apple Contacts / iCloud / many CardDAV servers emit by
# default — without this the property-name checks below miss those
# lines and silently drop the email / phone. The group token only
# precedes the property name, so it is safe to strip for matching
# and value extraction, and a no-op for non-grouped lines.
name_part = re.sub(r"^[A-Za-z0-9-]+\.", "", line, count=1)
if name_part.startswith("FN:") or name_part.startswith("FN;"):
contact["name"] = _vunesc(name_part.split(":", 1)[1]) if ":" in name_part else ""
elif name_part.startswith("EMAIL"):
# Handle EMAIL:foo@bar OR EMAIL;TYPE=...:foo@bar OR EMAIL;PREF=1:foo@bar
if ":" in line:
email_addr = _vunesc(line.split(":", 1)[1])
if ":" in name_part:
email_addr = _vunesc(name_part.split(":", 1)[1])
if email_addr and email_addr not in contact["emails"]:
contact["emails"].append(email_addr)
elif line.startswith("TEL"):
if ":" in line:
phone = _vunesc(line.split(":", 1)[1])
elif name_part.startswith("TEL"):
if ":" in name_part:
phone = _vunesc(name_part.split(":", 1)[1])
if phone and phone not in contact["phones"]:
contact["phones"].append(phone)
elif line.startswith("UID:"):
contact["uid"] = _vunesc(line[4:])
elif name_part.startswith("UID:"):
contact["uid"] = _vunesc(name_part[4:])
if contact["name"] or contact["emails"]:
contacts.append(contact)
return contacts
@@ -676,8 +683,8 @@ def setup_contacts_routes():
@router.post("/add")
async def add_contact(data: dict, _admin: str = Depends(require_admin)):
"""Add a new contact."""
name = data.get("name", "").strip()
email = data.get("email", "").strip()
name = (data.get("name") or "").strip()
email = (data.get("email") or "").strip()
if not email:
return {"success": False, "error": "Email required"}
# Check if already exists

View File

@@ -148,6 +148,108 @@ def _local_tooling_path_export(executable: str) -> str:
return f'export PATH="{esc}:$PATH"'
def _pip_install_no_cache(cmd: str) -> str:
"""Add ``--no-cache-dir`` to a pip install command.
Cookbook dependency installs (vLLM, llama-cpp-python, …) build large wheels;
pip's default cache lives under ``$HOME/.cache/pip`` and these builds can fill
a small home filesystem with ``[Errno 28] No space left on device`` mid-build
(issue #1219), leaving the dependency "installed" but unusable (#1459).
Disabling the cache for these one-off installs keeps them off the home disk
(the maintainer's suggested ``PIP_CACHE_DIR=`` workaround, made the default).
Idempotent; leaves non-pip-install commands untouched."""
if not cmd or "pip install" not in cmd or "--no-cache-dir" in cmd:
return cmd
return cmd.replace("pip install", "pip install --no-cache-dir", 1)
def _pip_install_attempt(pip_cmd: str) -> str:
"""Wrap a single pip install command so its exit status survives the
fallback chain and its stderr is visible in the tmux log on failure.
Without this wrapper, `pip … 2>&1 | tail -5` returns ``tail``'s exit
code (0), masking pip's real failure and preventing the next fallback
from running. The generated snippet captures all output to a temp
file, prints the last 5 lines on failure (so the Cookbook log panel
shows useful diagnostics), cleans up, and exits with pip's original
status.
"""
return (
"bash -c '"
f'_out=$(mktemp) && {pip_cmd} >"$_out" 2>&1; _rc=$?; '
'tail -5 "$_out"; rm -f "$_out"; exit $_rc'
"'"
)
def _pip_install_fallback_chain(package: str, *, python_cmd: str = "python3 -m pip", upgrade: bool = False) -> str:
"""Build a bash pip install fallback chain that surfaces errors.
Try the active interpreter/environment first. ``--user`` is invalid
inside many venvs, so only attempt the ``--user`` fallback when NOT
inside a venv.
Each attempt is wrapped via :func:`_pip_install_attempt` so pip's real
exit code is preserved (no ``| tail`` masking) and the last 5 lines of
pip output appear in the Cookbook log on failure.
"""
upgrade_flag = " -U" if upgrade else ""
# Shell-quote the package spec: an extras spec like ``llama-cpp-python[server]``
# contains brackets that bash would treat as a glob, so it must be quoted
# before being embedded in the install command. Plain names (e.g.
# ``huggingface_hub``) are returned unchanged by ``shlex.quote``.
pkg = shlex.quote(package)
base = _pip_install_attempt(f"{python_cmd} install -q{upgrade_flag} {pkg}")
user = _pip_install_attempt(f"{python_cmd} install --user --break-system-packages -q{upgrade_flag} {pkg}")
# Derive the python executable for the venv detection check.
# Must use the same interpreter that pip belongs to; hardcoding
# python3 breaks when pip lives in a venv that only has "python".
if " -m pip" in python_cmd:
python_exe = python_cmd.replace(" -m pip", "")
elif python_cmd.strip() == "pip":
python_exe = "python"
elif python_cmd.strip() == "pip3":
python_exe = "python3"
else:
python_exe = "python3"
venv_check = f'{python_exe} -c "import sys; sys.exit(0 if sys.prefix != sys.base_prefix else 1)"'
# Negated: `! venv_check` succeeds (exit 0) when NOT in a venv → `&&` tries
# --user. When IN a venv `! venv_check` fails → `&&` skips --user and the
# group exits non-zero, propagating the base-install failure instead of
# masking it as success (the `|| { venv_check || … }` shape from #903
# swallowed the exit code because venv_check's exit-0 became the group's
# result).
return f"{base} || {{ ! {venv_check} && {user}; }}"
def _venv_safe_local_pip_install_cmd(cmd: str, *, local: bool, in_venv: bool) -> str:
"""Drop pip user-install flags that are invalid for local venv installs.
Cookbook dependency installs run through the model-serve task path so users
can watch progress in the same log UI. For local POSIX runs, that task
prepends Odysseus' own interpreter directory to PATH. If Odysseus itself is
running from a venv, `python3` resolves to the venv Python and pip rejects
`--user` with "User site-packages are not visible in this virtualenv".
Keep remote and non-venv installs unchanged: remotes may intentionally use
system Python, and Docker/non-venv installs still need user-site fallback.
"""
if not local or not in_venv:
return cmd
if "pip install" not in (cmd or ""):
return cmd
try:
parts = shlex.split(cmd)
except ValueError:
return cmd
stripped = [
part
for part in parts
if part not in {"--user", "--break-system-packages"}
]
return shlex.join(stripped)
def _cached_model_scan_script(model_dirs: list[str] | None = None) -> str:
"""Build the standalone Python scanner used by /api/model/cached."""
lines = [
@@ -166,6 +268,38 @@ def _cached_model_scan_script(model_dirs: list[str] | None = None) -> str:
" for root, dirs, fns in os.walk(top, followlinks=False):",
" dirs[:] = [d for d in dirs if not os.path.islink(os.path.join(root, d)) and safe_path(os.path.join(root, d))]",
" yield root, dirs, fns",
"def gguf_role(name):",
" n = name.lower()",
" if n.startswith('mmproj') or 'mmproj' in n: return 'projector'",
" return 'model'",
"def gguf_quant(name):",
" m = re.search(r'(?i)(UD-)?(IQ[0-9]_[A-Z0-9_]+|Q[0-9](?:_[A-Z0-9]+)+|BF16|F16|FP16|F32|Q8_0)', name)",
" return m.group(0).upper() if m else ''",
"def collect_ggufs(base):",
" files = []",
" split_groups = {}",
" if not os.path.isdir(base) or not safe_path(base): return files",
" for root, dirs, fns in safe_walk(base):",
" for fn in sorted(fns):",
" if not fn.lower().endswith('.gguf'): continue",
" fp = os.path.join(root, fn)",
" try: size = os.path.getsize(fp)",
" except Exception: size = 0",
" try: rel = os.path.relpath(fp, base).replace(os.sep, '/')",
" except Exception: rel = fn",
" sm = re.match(r'(?i)^(.+)-(\\d+)-of-(\\d+)\\.gguf$', fn)",
" if sm:",
" prefix, part_s, total_s = sm.group(1), sm.group(2), sm.group(3)",
" key = (root, prefix, total_s)",
" g = split_groups.setdefault(key, {'name':fn,'rel_path':rel,'size_bytes':0,'role':gguf_role(fn),'quant':gguf_quant(fn),'parts':int(total_s),'split':True})",
" g['size_bytes'] += size",
" if int(part_s) == 1:",
" g.update({'name':fn,'rel_path':rel,'role':gguf_role(fn),'quant':gguf_quant(fn)})",
" continue",
" files.append({'name':fn,'rel_path':rel,'size_bytes':size,'role':gguf_role(fn),'quant':gguf_quant(fn)})",
" files.extend(split_groups.values())",
" files.sort(key=lambda f: (f.get('role') != 'model', f.get('rel_path', '')))",
" return files",
"def scan_hf(cache):",
" if not os.path.isdir(cache): return",
" for d in sorted(os.listdir(cache)):",
@@ -180,16 +314,14 @@ def _cached_model_scan_script(model_dirs: list[str] | None = None) -> str:
" if f.is_file(): nf += 1; sz += f.stat().st_size",
" if f.name.endswith('.incomplete'): ic = True",
" snap = os.path.join(cache, d, 'snapshots')",
" is_diffusion = False; is_gguf = False",
" is_diffusion = False; gguf_files = []",
" if os.path.isdir(snap):",
" for sd in os.listdir(snap):",
" sf = os.path.join(snap, sd)",
" if not os.path.isdir(sf): continue",
" if os.path.exists(os.path.join(sf, 'model_index.json')): is_diffusion = True",
" try:",
" if any(x.endswith('.gguf') for x in os.listdir(sf)): is_gguf = True",
" except Exception: pass",
" models.append({'repo_id':rid,'size_bytes':sz,'nb_files':nf,'has_incomplete':ic,'path':cache,'is_diffusion':is_diffusion,'is_gguf':is_gguf})",
" for f in collect_ggufs(sf): f['rel_path'] = sd + '/' + f['rel_path']; gguf_files.append(f)",
" models.append({'repo_id':rid,'size_bytes':sz,'nb_files':nf,'has_incomplete':ic,'path':cache,'is_diffusion':is_diffusion,'is_gguf':bool(gguf_files),'gguf_files':gguf_files})",
"def scan_dir(p):",
" if not os.path.isdir(p) or not safe_path(p): return",
" for d in sorted(os.listdir(p)):",
@@ -198,13 +330,14 @@ def _cached_model_scan_script(model_dirs: list[str] | None = None) -> str:
" fp = os.path.join(p, d)",
" if not os.path.isdir(fp) or os.path.islink(fp) or not safe_path(fp): continue",
" if d in seen: continue",
" is_model = False; is_gguf = False",
" is_model = False; gguf_files = []",
" for root, dirs, fns in safe_walk(fp):",
" for fn in fns:",
" if fn.endswith('.gguf'): is_gguf = True; is_model = True",
" if fn.lower().endswith('.gguf'): is_model = True",
" elif fn == 'config.json' or fn.endswith('.safetensors') or fn.endswith('.bin'): is_model = True",
" if is_model: break",
" if not is_model: continue",
" gguf_files = collect_ggufs(fp)",
" seen.add(d)",
" sz, nf = 0, 0",
" for dp, _, fns in safe_walk(fp):",
@@ -212,7 +345,7 @@ def _cached_model_scan_script(model_dirs: list[str] | None = None) -> str:
" try: nf += 1; sz += os.path.getsize(os.path.join(dp, fn))",
" except Exception: pass",
" is_diff = os.path.exists(os.path.join(fp, 'model_index.json'))",
" models.append({'repo_id':d,'size_bytes':sz,'nb_files':nf,'has_incomplete':False,'path':p,'is_local_dir':True,'is_diffusion':is_diff,'is_gguf':is_gguf})",
" models.append({'repo_id':d,'size_bytes':sz,'nb_files':nf,'has_incomplete':False,'path':p,'is_local_dir':True,'is_diffusion':is_diff,'is_gguf':bool(gguf_files),'gguf_files':gguf_files})",
"def parse_size(num, unit):",
" try: n = float(num)",
" except Exception: return 0",
@@ -293,6 +426,38 @@ _SERVE_CMD_ALLOWLIST = {
_GGUF_PRELUDE_RE = re.compile(
r'^MODEL_FILE=\$\([^\n]*?\)\s*&&\s*\{[^{}]*\}\s*\|\|\s*\{[^{}]*\}\s*&&\s*'
)
_OLLAMA_HOST_ASSIGNMENT_RE = re.compile(r"(?:^|\s)OLLAMA_HOST=([^\s]+)")
_OLLAMA_BIND_RE = re.compile(r"^\[([^\]]+)\]:(\d+)$|^([^:]+):(\d+)$")
_OLLAMA_BIND_HOST_RE = re.compile(r"^[A-Za-z0-9._:-]+$")
def _ollama_bind_from_cmd(cmd: str | None, *, default_host: str = "127.0.0.1") -> tuple[str, str]:
"""Return the Ollama bind host/port requested by a serve command.
Plain local `ollama serve` defaults to loopback. Remote callers can pass a
wider default host so the resulting API is reachable by Odysseus.
"""
if not cmd:
return default_host, "11434"
match = _OLLAMA_HOST_ASSIGNMENT_RE.search(cmd)
if not match:
return default_host, "11434"
value = match.group(1).strip("'\"")
bind_match = _OLLAMA_BIND_RE.match(value)
if not bind_match:
return "127.0.0.1", "11434"
bracketed_host = bind_match.group(1)
host = bracketed_host or bind_match.group(3) or "127.0.0.1"
port = bind_match.group(2) or bind_match.group(4) or "11434"
if not _OLLAMA_BIND_HOST_RE.match(host):
return "127.0.0.1", "11434"
try:
port_num = int(port, 10)
except ValueError:
return "127.0.0.1", "11434"
if port_num < 1 or port_num > 65535:
return "127.0.0.1", "11434"
return f"[{host}]" if bracketed_host else host, port
def _check_serve_binary(seg: str) -> None:
@@ -370,6 +535,83 @@ def _append_serve_exit_code_lines(runner_lines: list[str], *, keep_shell_open: b
runner_lines.append('echo ""; echo "=== Process exited with code $ODYSSEUS_CMD_EXIT ==="; exec "${SHELL:-/bin/bash}"')
else:
runner_lines.append('echo ""; echo "=== Process exited with code $ODYSSEUS_CMD_EXIT ==="')
runner_lines.append('exit "$ODYSSEUS_CMD_EXIT"')
def _append_llama_cpp_linux_accel_build_lines(runner_lines: list[str]) -> None:
"""Append Linux llama.cpp build lines that prefer ROCm/HIP when available.
Cookbook already detects AMD GPUs elsewhere, but the llama.cpp bootstrap used
to hard-wire CUDA on Linux. That made ROCm hosts attempt a CUDA configure and
fail with "CUDA Toolkit not found" instead of building with HIP.
"""
# Detect pip-installed nvcc (from vLLM/nvidia CUDA wheels) and put it on PATH
# so cmake's CUDA configure can find it. We keep this after the ROCm/HIP
# check — a machine with both stacks should honor the native HIP toolchain on
# AMD hosts instead of accidentally preferring a stray nvcc wheel.
runner_lines.append(' for _cudir in ~/.local/lib/python*/site-packages/nvidia/cu13 ~/.local/lib/python*/site-packages/nvidia/cu12 ~/.local/lib/python*/site-packages/nvidia/cuda_nvcc; do')
runner_lines.append(' [ -x "$_cudir/bin/nvcc" ] && export CUDA_HOME="$_cudir" && export PATH="$_cudir/bin:$PATH" && break')
runner_lines.append(' done')
# rm -rf build so a prior poisoned CMakeCache.txt (e.g. from a failed CUDA
# or HIP attempt) doesn't cause the next configure to reuse stale settings.
runner_lines.append(' cd ~/llama.cpp && rm -rf build')
runner_lines.append(' if command -v hipconfig &>/dev/null || [ -d /opt/rocm ] || [ -n "$ROCM_PATH" ] || [ -n "$HIP_PATH" ]; then')
runner_lines.append(' if command -v hipconfig &>/dev/null; then')
runner_lines.append(' export HIPCXX="${HIPCXX:-$(hipconfig -l)/clang}"')
runner_lines.append(' export HIP_PATH="${HIP_PATH:-$(hipconfig -R)}"')
runner_lines.append(' fi')
runner_lines.append(' echo "[odysseus] ROCm/HIP detected — building llama-server with HIP support..."')
runner_lines.append(' cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_HIP=ON && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
runner_lines.append(' elif command -v nvcc &>/dev/null; then')
# nvcc alone is not sufficient — pip-installed CUDA wheels or incomplete
# tooling can expose nvcc without shipping libcudart, causing cmake to fail
# mid-build with "CUDA runtime library not found". Check cudart explicitly
# via a small helper so the guard stays readable.
runner_lines.append(' _odysseus_has_cudart() {')
runner_lines.append(' ldconfig -p 2>/dev/null | grep -q \'libcudart\\.so\' && return 0')
runner_lines.append(' local _cuh="${CUDA_HOME:-/usr/local/cuda}"')
runner_lines.append(' ls "$_cuh/lib64/libcudart.so"* &>/dev/null && return 0')
runner_lines.append(' ls "$_cuh/lib/libcudart.so"* &>/dev/null && return 0')
runner_lines.append(' ls /usr/local/cuda/lib64/libcudart.so* &>/dev/null && return 0')
runner_lines.append(' ls /usr/local/cuda/lib/libcudart.so* &>/dev/null && return 0')
runner_lines.append(' ls "${_cuh%/cuda_nvcc}/cuda_runtime/lib/libcudart.so"* &>/dev/null && return 0')
runner_lines.append(' return 1')
runner_lines.append(' }')
runner_lines.append(' if _odysseus_has_cudart; then')
runner_lines.append(' echo "[odysseus] CUDA nvcc + cudart found — building llama-server with CUDA (GPU) support..."')
runner_lines.append(' cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_CUDA=ON && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
runner_lines.append(' else')
runner_lines.append(' echo "[odysseus] WARNING: nvcc found but CUDA runtime (libcudart.so) is not visible — building llama-server for CPU only."')
runner_lines.append(' echo "[odysseus] GPU inference will not be available for this llama.cpp build."')
runner_lines.append(' echo "[odysseus] Ensure libcudart is installed (e.g. cuda-runtime package) and visible via ldconfig or CUDA_HOME."')
runner_lines.append(' cmake -B build -DCMAKE_BUILD_TYPE=Release && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
runner_lines.append(' fi')
runner_lines.append(' else')
runner_lines.append(' echo "[odysseus] WARNING: no HIP/CUDA toolchain found — building llama-server for CPU only."')
runner_lines.append(' echo "[odysseus] GPU inference will not be available for this llama.cpp build."')
runner_lines.append(' echo "[odysseus] Install ROCm for AMD GPUs or vLLM/CUDA tooling for NVIDIA, then re-launch this serve task."')
runner_lines.append(' cmake -B build -DCMAKE_BUILD_TYPE=Release && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
runner_lines.append(' fi')
def _llama_cpp_rebuild_cmd() -> str:
"""Shell command that clears the Cookbook-managed llama.cpp build.
Removes the cached ``llama-server`` symlink and the ``~/llama.cpp/build``
directory so the next llama.cpp serve recompiles from source, picking up a
CUDA or HIP toolchain if one is now available. The serve bootstrap only
builds when ``llama-server`` is missing from PATH, so without this an
existing CPU-only build is reused forever. It deliberately installs and
downloads nothing; the rebuild itself happens on the next serve.
"""
return (
'mkdir -p "$HOME/bin" && '
'rm -f "$HOME/bin/llama-server" && '
'rm -rf "$HOME/llama.cpp/build" && '
'echo "[odysseus] Cleared the cached llama.cpp build. '
'Re-launch the serve task to rebuild llama-server from source '
'(CUDA or HIP will be used if a toolchain is now available)."'
)
class ModelDownloadRequest(BaseModel):

View File

@@ -37,7 +37,8 @@ from routes.cookbook_helpers import (
_validate_local_dir, _validate_ssh_port, _validate_gpus, _shell_path,
_ps_squote, _bash_squote, _validate_serve_cmd, _parse_serve_phase,
_safe_env_prefix, _local_tooling_path_export, _append_serve_preflight_exit_lines,
_append_serve_exit_code_lines, _cached_model_scan_script,
_append_serve_exit_code_lines, _append_llama_cpp_linux_accel_build_lines, _cached_model_scan_script,
_ollama_bind_from_cmd, _pip_install_fallback_chain, _pip_install_no_cache, _venv_safe_local_pip_install_cmd,
ModelDownloadRequest, ServeRequest,
)
@@ -148,6 +149,15 @@ def setup_cookbook_routes() -> APIRouter:
"No GPUs are visible to the serve process.",
[{"label": "clear Cookbook GPU selection or choose available GPUs", "op": "settings", "field": "gpus", "value": ""}],
),
(
r"Failed to infer device type|NVML Shared Library Not Found|No module named 'amdsmi'|platform is not available",
"vLLM could not find a supported GPU (CUDA or ROCm). "
"This machine may have integrated or unsupported graphics only.",
[
{"label": "switch to llama.cpp (CPU/Metal, works without a discrete GPU)", "op": "manual"},
{"label": "switch to Ollama (CPU/Metal, works without a discrete GPU)", "op": "manual"},
],
),
(
r"vllm.*command not found|No module named vllm|ERROR: vLLM is not installed",
"vLLM is not installed or not in PATH on this server.",
@@ -163,6 +173,11 @@ def setup_cookbook_routes() -> APIRouter:
"llama.cpp / llama-cpp-python dependencies are missing.",
[{"label": "install llama.cpp dependencies or llama-cpp-python[server]", "op": "dependency", "package": "llama-cpp-python[server]"}],
),
(
r"No GGUF found on this host|no \.gguf file|No GGUF file found",
"No GGUF file found for this model on this host. The llama.cpp backend needs a .gguf file.",
[{"label": "download a GGUF build of this model (repo name usually ends in -GGUF, file like Q4_K_M.gguf)", "op": "manual"}],
),
(
r"No module named 'torch'|No module named torch|No module named 'diffusers'|No module named diffusers",
"Diffusion serving requires PyTorch and diffusers.",
@@ -368,11 +383,15 @@ def setup_cookbook_routes() -> APIRouter:
encoding="utf-8",
)
argv = [os.environ.get("ComSpec", "cmd.exe"), "/c", str(script_path)]
env = os.environ.copy()
env["PYTHONUTF8"] = "1"
env["PYTHONIOENCODING"] = "utf-8"
proc = subprocess.Popen(
argv,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
stdin=subprocess.DEVNULL,
env=env,
**detached_popen_kwargs(),
)
pid_path.write_text(str(proc.pid), encoding="utf-8")
@@ -432,12 +451,12 @@ def setup_cookbook_routes() -> APIRouter:
# throughput. Retries set disable_hf_transfer to fall back to the plain,
# slower-but-reliable downloader (resumes cleanly from the .incomplete files).
# Use `python3 -m pip` not `pip` — macOS has no bare `pip` command.
lines.append("command -v hf >/dev/null 2>&1 || python3 -m pip install --user --break-system-packages -q -U huggingface_hub 2>/dev/null || python3 -m pip install -q -U huggingface_hub 2>/dev/null")
lines.append(f"command -v hf >/dev/null 2>&1 || {_pip_install_fallback_chain('huggingface_hub', upgrade=True)}")
if req.disable_hf_transfer:
lines.append("export HF_HUB_ENABLE_HF_TRANSFER=0")
lines.append("export HF_HUB_DOWNLOAD_MAX_WORKERS=4")
else:
lines.append("python3 -c 'import hf_transfer' 2>/dev/null || python3 -m pip install --user --break-system-packages -q hf_transfer 2>/dev/null || python3 -m pip install -q hf_transfer 2>/dev/null")
lines.append(f"python3 -c 'import hf_transfer' 2>/dev/null || {_pip_install_fallback_chain('hf_transfer')}")
lines.append("python3 -c 'import hf_transfer' 2>/dev/null && export HF_HUB_ENABLE_HF_TRANSFER=1")
lines.append("export HF_HUB_DOWNLOAD_MAX_WORKERS=8")
@@ -531,12 +550,18 @@ def setup_cookbook_routes() -> APIRouter:
)
# Ensure pip-user scripts (e.g. hf CLI installed via --user) are on PATH
runner_lines.append('export PATH="$HOME/.local/bin:$PATH"')
# Install hf CLI + hf_transfer best-effort so future runs get the fast path.
# Install hf CLI + optional hf_transfer best-effort. Retries disable
# hf_transfer because the Rust parallel path is fast but has been
# flaky near the end of very large multi-file downloads.
# Use --break-system-packages on PEP-668 systems (Arch, newer Debian) so it doesn't bail.
runner_lines.append("command -v hf >/dev/null 2>&1 || pip install --user --break-system-packages -q -U huggingface_hub 2>/dev/null || pip install -q -U huggingface_hub 2>/dev/null")
runner_lines.append("python3 -c 'import hf_transfer' 2>/dev/null || pip install --user --break-system-packages -q hf_transfer 2>/dev/null || pip install -q hf_transfer 2>/dev/null")
runner_lines.append("python3 -c 'import hf_transfer' 2>/dev/null && export HF_HUB_ENABLE_HF_TRANSFER=1")
runner_lines.append("export HF_HUB_DOWNLOAD_MAX_WORKERS=8")
runner_lines.append(f"command -v hf >/dev/null 2>&1 || {_pip_install_fallback_chain('huggingface_hub', python_cmd='pip', upgrade=True)}")
if req.disable_hf_transfer:
runner_lines.append("export HF_HUB_ENABLE_HF_TRANSFER=0")
runner_lines.append("export HF_HUB_DOWNLOAD_MAX_WORKERS=4")
else:
runner_lines.append(f"python3 -c 'import hf_transfer' 2>/dev/null || {_pip_install_fallback_chain('hf_transfer', python_cmd='pip')}")
runner_lines.append("python3 -c 'import hf_transfer' 2>/dev/null && export HF_HUB_ENABLE_HF_TRANSFER=1")
runner_lines.append("export HF_HUB_DOWNLOAD_MAX_WORKERS=8")
# Surface whether the HF token actually reached THIS server, so a gated
# download's "not authorized" failure can be told apart from a missing
# token (the token is masked — we only print applied / not-set).
@@ -547,15 +572,19 @@ def setup_cookbook_routes() -> APIRouter:
runner_lines.append(f' {hf_cmd} < /dev/null')
runner_lines.append('elif python3 -c "import huggingface_hub" 2>/dev/null; then')
runner_lines.append(' echo "hf CLI not found, using Python huggingface_hub..."')
runner_lines.append(f' python3 -c "import os; from huggingface_hub import snapshot_download; snapshot_download(\'{req.repo_id}\'{_dl_pyarg}, max_workers=8)"')
runner_lines.append(f' python3 -c "import os; from huggingface_hub import snapshot_download; snapshot_download(\'{req.repo_id}\'{_dl_pyarg}, max_workers={4 if req.disable_hf_transfer else 8})"')
runner_lines.append('else')
runner_lines.append(' echo "Installing huggingface-hub and dependencies..."')
runner_lines.append(' pip install --no-deps -q huggingface-hub 2>/dev/null')
runner_lines.append(' pip install -q filelock fsspec packaging pyyaml tqdm typer httpx requests hf_transfer 2>/dev/null')
runner_lines.append(" python3 -c 'import hf_transfer' 2>/dev/null && export HF_HUB_ENABLE_HF_TRANSFER=1")
runner_lines.append(f' python3 -c "import os; from huggingface_hub import snapshot_download; snapshot_download(\'{req.repo_id}\'{_dl_pyarg}, max_workers=8)"')
if req.disable_hf_transfer:
runner_lines.append(' pip install -q filelock fsspec packaging pyyaml tqdm typer httpx requests 2>/dev/null')
runner_lines.append(' export HF_HUB_ENABLE_HF_TRANSFER=0')
else:
runner_lines.append(' pip install -q filelock fsspec packaging pyyaml tqdm typer httpx requests hf_transfer 2>/dev/null')
runner_lines.append(" python3 -c 'import hf_transfer' 2>/dev/null && export HF_HUB_ENABLE_HF_TRANSFER=1")
runner_lines.append(f' python3 -c "import os; from huggingface_hub import snapshot_download; snapshot_download(\'{req.repo_id}\'{_dl_pyarg}, max_workers={4 if req.disable_hf_transfer else 8})"')
runner_lines.append('fi')
runner_lines.append('if [ $? -eq 0 ]; then echo ""; echo "DOWNLOAD_OK"; else echo ""; echo "DOWNLOAD_FAILED (exit $?)"; fi')
runner_lines.append('_ec=$?; if [ $_ec -eq 0 ]; then echo ""; echo "DOWNLOAD_OK"; else echo ""; echo "DOWNLOAD_FAILED (exit $_ec)"; fi')
runner_lines.append(f"rm -f {remote_runner}")
runner_lines.append('exec "${SHELL:-/bin/bash}"')
runner_path = TMUX_LOG_DIR / f"{session_id}_run.sh"
@@ -586,11 +615,11 @@ def setup_cookbook_routes() -> APIRouter:
# Detached path: no controlling TTY, so skip `< /dev/null`
# (handled by Popen stdin=DEVNULL) and don't keep a shell open.
lines.append(hf_cmd)
lines.append('if [ $? -eq 0 ]; then echo ""; echo "DOWNLOAD_OK"; else echo ""; echo "DOWNLOAD_FAILED (exit $?)"; fi')
lines.append('_ec=$?; if [ $_ec -eq 0 ]; then echo ""; echo "DOWNLOAD_OK"; else echo ""; echo "DOWNLOAD_FAILED (exit $_ec)"; fi')
else:
# < /dev/null suppresses interactive "update available? [Y/n]" prompt
lines.append(f"{hf_cmd} < /dev/null")
lines.append('if [ $? -eq 0 ]; then echo ""; echo "DOWNLOAD_OK"; else echo ""; echo "DOWNLOAD_FAILED (exit $?)"; fi')
lines.append('_ec=$?; if [ $_ec -eq 0 ]; then echo ""; echo "DOWNLOAD_OK"; else echo ""; echo "DOWNLOAD_FAILED (exit $_ec)"; fi')
lines.append(f"rm -f '{wrapper_script}'")
lines.append('exec "${SHELL:-/bin/bash}"')
wrapper_script.write_text("\n".join(lines) + "\n", encoding="utf-8")
@@ -672,11 +701,14 @@ def setup_cookbook_routes() -> APIRouter:
cwd=str(Path.home()),
)
else:
# LOCAL scan: run the interpreter directly. `python3` isn't a thing on
# Windows (it's `python`/`py`), and shell single-quoting of the path
# doesn't survive cmd.exe — so resolve the interpreter and exec it
# with the script path as an argv element (no shell quoting needed).
local_py = (
# LOCAL scan: use sys.executable (the venv Python Odysseus is already
# running under) — it's guaranteed real Python on all platforms.
# Falling back to which_tool on Windows risks hitting the Microsoft
# Store stub alias for "python3"/"python", which prints
# "Python was not found; run without arguments to install from the
# Microsoft Store" and exits 9009, producing empty stdout and a
# JSON parse error. sys.executable bypasses PATH entirely.
local_py = sys.executable or (
which_tool("python3") or which_tool("python")
or which_tool("py") or "python"
)
@@ -714,6 +746,8 @@ def setup_cookbook_routes() -> APIRouter:
entry["backend"] = m.get("backend")
if m.get("is_ollama"):
entry["is_ollama"] = True
if isinstance(m.get("gguf_files"), list):
entry["gguf_files"] = m["gguf_files"]
models.append(entry)
except Exception as e:
logger.warning(f"Failed to parse cached models: {e}")
@@ -775,6 +809,80 @@ def setup_cookbook_routes() -> APIRouter:
finally:
db.close()
def _auto_register_llm_endpoint(req: ServeRequest, remote: str | None) -> str | None:
"""Register a freshly-served LLM as a model endpoint so it appears in the
model picker without a manual /setup step — the text-model sibling of
_auto_register_image_endpoint.
Cookbook serve commands launch an OpenAI-compatible server (llama.cpp's
llama-server, vLLM, SGLang, or Ollama) on a known port. We point an
endpoint at that server's /v1; the picker auto-discovers the model id by
probing /v1/models and dims the endpoint until the server is reachable,
so registering immediately (before the server finishes loading) is safe.
"""
import re
from core.database import SessionLocal, ModelEndpoint
# Port: an explicit --port wins. Otherwise fall back by backend — Ollama
# is the only server in our generated commands that omits --port.
port_match = re.search(r'--port\s+(\d+)', req.cmd)
if port_match:
port = int(port_match.group(1))
elif "ollama" in req.cmd:
port = 11434
else:
port = 8080 # llama.cpp's llama-server default — the Apple Silicon path
# Determine host (mirrors the image path: SSH alias for remote serves).
if remote:
host = remote.split("@")[-1] if "@" in remote else remote
else:
host = "localhost"
base_url = f"http://{host}:{port}/v1"
short_name = req.repo_id.split("/")[-1] if "/" in req.repo_id else req.repo_id
display_name = short_name or "Local model"
# If the serve command opts models into OpenAI tool-calling, record it so
# agent_loop trusts emitted tool_calls instead of the name heuristic.
supports_tools = True if "--enable-auto-tool-choice" in req.cmd else None
db = SessionLocal()
try:
# Reuse an endpoint already pointed at this URL instead of duplicating.
existing = db.query(ModelEndpoint).filter(ModelEndpoint.base_url == base_url).first()
if existing:
existing.is_enabled = True
existing.model_type = "llm"
existing.name = display_name
if supports_tools is not None:
existing.supports_tools = supports_tools
db.commit()
logger.info(f"Updated existing local model endpoint: {base_url}")
return existing.id
ep_id = f"local-{uuid.uuid4().hex[:8]}"
ep = ModelEndpoint(
id=ep_id,
name=display_name,
base_url=base_url,
api_key=None,
is_enabled=True,
model_type="llm",
supports_tools=supports_tools,
)
db.add(ep)
db.commit()
logger.info(f"Auto-registered local model endpoint: {display_name} @ {base_url}")
return ep_id
except Exception as e:
logger.error(f"Failed to auto-register local model endpoint: {e}")
db.rollback()
return None
finally:
db.close()
@router.post("/api/model/serve")
async def model_serve(request: Request, req: ServeRequest):
"""Launch a model server in a tmux session (or PowerShell background process on Windows).
@@ -800,8 +908,17 @@ def setup_cookbook_routes() -> APIRouter:
# many downstream `"engine" in req.cmd` membership checks can't hit
# `TypeError: argument of type 'NoneType'` (a 500 instead of a clean 400).
req.cmd = _validate_serve_cmd(req.cmd) or ""
req.cmd = _venv_safe_local_pip_install_cmd(
req.cmd,
local=not bool(req.remote_host),
in_venv=sys.prefix != sys.base_prefix,
)
is_pip_install = bool(req.cmd and "pip install" in req.cmd)
if is_pip_install:
# Keep big dependency wheel builds (vLLM, …) off the home filesystem's
# pip cache so they don't fail mid-build with "No space left" (#1219)
# and leave the dep installed-but-unusable (#1459).
req.cmd = _pip_install_no_cache(req.cmd)
# PEP-508-style package spec — letters, digits, `.-_` for the
# name; `[` `]` for extras; `<>=!~,` for version specifiers.
# v2 review HIGH-14: tightened from the previous regex which
@@ -922,7 +1039,7 @@ def setup_cookbook_routes() -> APIRouter:
runner_lines.append(' if ! python3 -c "import llama_cpp" 2>/dev/null; then')
runner_lines.append(' pkg install -y cmake 2>/dev/null')
runner_lines.append(' pip install numpy diskcache jinja2 2>/dev/null')
runner_lines.append(' CMAKE_ARGS="-DGGML_BLAS=OFF -DGGML_LLAMAFILE=OFF" pip install llama-cpp-python --no-build-isolation --no-cache-dir 2>&1 || true')
runner_lines.append(' CMAKE_ARGS="-DGGML_BLAS=OFF -DGGML_LLAMAFILE=OFF" pip install \'llama-cpp-python[server]\' --no-build-isolation --no-cache-dir 2>&1 || true')
runner_lines.append(' fi')
runner_lines.append('elif ! command -v llama-server &>/dev/null; then')
runner_lines.append(' echo "Native llama-server not found — building from source (one-time, may take a few minutes)..."')
@@ -944,61 +1061,45 @@ def setup_cookbook_routes() -> APIRouter:
runner_lines.append(' && cmake --build build -j"$NPROC" --target llama-server \\')
runner_lines.append(' && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
runner_lines.append(' else')
# Detect pip-installed nvcc (from vLLM/nvidia CUDA wheels) and put
# it on PATH so cmake's CUDA configure can find it. We check the
# same three layouts as entrypoint.sh:
# nvidia/cu13 — nvidia-nvcc-cu13
# nvidia/cu12 — nvidia-nvcc-cu12
# nvidia/cuda_nvcc — nvidia-cuda-nvcc-cu12 (sub-package style)
runner_lines.append(' for _cudir in ~/.local/lib/python*/site-packages/nvidia/cu13 ~/.local/lib/python*/site-packages/nvidia/cu12 ~/.local/lib/python*/site-packages/nvidia/cuda_nvcc; do')
runner_lines.append(' [ -x "$_cudir/bin/nvcc" ] && export CUDA_HOME="$_cudir" && export PATH="$_cudir/bin:$PATH" && break')
runner_lines.append(' done')
# rm -rf build so a prior poisoned CMakeCache.txt (e.g. from a
# failed CUDA attempt) doesn't cause the next configure to reuse
# stale settings and silently produce a CPU-only binary.
runner_lines.append(' cd ~/llama.cpp && rm -rf build')
runner_lines.append(' if command -v nvcc &>/dev/null; then')
runner_lines.append(' echo "[odysseus] CUDA nvcc found — building llama-server with CUDA (GPU) support..."')
runner_lines.append(' cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_CUDA=ON \\')
runner_lines.append(' && cmake --build build -j"$NPROC" --target llama-server \\')
runner_lines.append(' && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
runner_lines.append(' else')
runner_lines.append(' echo "[odysseus] WARNING: nvcc not found — building llama-server for CPU only."')
runner_lines.append(' echo "[odysseus] GPU inference will not be available for this llama.cpp build."')
runner_lines.append(' echo "[odysseus] To get a GPU build, first install vLLM via Cookbook -> Dependencies"')
runner_lines.append(' echo "[odysseus] (its CUDA wheels include nvcc), then re-launch this serve task."')
runner_lines.append(' cmake -B build -DCMAKE_BUILD_TYPE=Release \\')
runner_lines.append(' && cmake --build build -j"$NPROC" --target llama-server \\')
runner_lines.append(' && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
runner_lines.append(' fi')
_append_llama_cpp_linux_accel_build_lines(runner_lines)
runner_lines.append(' fi')
runner_lines.append(' # If the native build failed, fall back to the Python bindings.')
runner_lines.append(' if ! command -v llama-server &>/dev/null && ! python3 -c "import llama_cpp" 2>/dev/null; then')
runner_lines.append(' echo "llama-server build failed — installing Python bindings as fallback..."')
runner_lines.append(' pip install --user --break-system-packages -q llama-cpp-python 2>/dev/null || pip install -q llama-cpp-python 2>/dev/null || true')
runner_lines.append(f" {_pip_install_fallback_chain('llama-cpp-python[server]', python_cmd='pip')} || true")
runner_lines.append(' fi')
runner_lines.append(' if ! command -v llama-server &>/dev/null && ! python3 -c "import llama_cpp" 2>/dev/null; then')
runner_lines.append(' echo "ERROR: llama.cpp serving is not available after install/build attempts."')
runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127')
runner_lines.append(' fi')
runner_lines.append('fi')
elif "ollama" in req.cmd:
handled_ollama_serve = True
_ollama_port = "11434"
_ollama_match = re.search(r"OLLAMA_HOST=[^\s:]+:(\d+)", req.cmd)
if _ollama_match:
_ollama_port = _ollama_match.group(1)
_ollama_default_host = "0.0.0.0" if remote else "127.0.0.1"
_ollama_host, _ollama_port = _ollama_bind_from_cmd(
req.cmd,
default_host=_ollama_default_host,
)
# Ollama can be a host binary, a system service, or a Docker
# container. If the HTTP API is already reachable, the model is
# already served and we should not require a host `ollama` CLI.
runner_lines.append(f'ODYSSEUS_OLLAMA_HOST={_bash_squote(_ollama_host)}')
runner_lines.append(f'ODYSSEUS_OLLAMA_PORT="{_ollama_port}"')
runner_lines.append('ODYSSEUS_OLLAMA_URL=""')
runner_lines.append('for _ody_ollama_port in "$ODYSSEUS_OLLAMA_PORT" 11434; do')
runner_lines.append(' [ -z "$_ody_ollama_port" ] && continue')
runner_lines.append(' for _ody_ollama_host in 127.0.0.1 localhost host.docker.internal; do')
runner_lines.append(' _ody_ollama_url="http://${_ody_ollama_host}:${_ody_ollama_port}"')
runner_lines.append(' if curl -sf "$_ody_ollama_url/api/tags" >/dev/null 2>&1; then')
runner_lines.append(' ODYSSEUS_OLLAMA_URL="$_ody_ollama_url"')
runner_lines.append(' ODYSSEUS_OLLAMA_PORT="$_ody_ollama_port"')
runner_lines.append(' break 2')
runner_lines.append(' fi')
runner_lines.append('for _ody_ollama_try in $(seq 1 20); do')
runner_lines.append(' for _ody_ollama_port in "$ODYSSEUS_OLLAMA_PORT" 11434; do')
runner_lines.append(' [ -z "$_ody_ollama_port" ] && continue')
runner_lines.append(' for _ody_ollama_host in 127.0.0.1 localhost host.docker.internal; do')
runner_lines.append(' _ody_ollama_url="http://${_ody_ollama_host}:${_ody_ollama_port}"')
runner_lines.append(' if curl -sf "$_ody_ollama_url/api/tags" >/dev/null 2>&1; then')
runner_lines.append(' ODYSSEUS_OLLAMA_URL="$_ody_ollama_url"')
runner_lines.append(' ODYSSEUS_OLLAMA_PORT="$_ody_ollama_port"')
runner_lines.append(' break 3')
runner_lines.append(' fi')
runner_lines.append(' done')
runner_lines.append(' done')
runner_lines.append(' [ "$_ody_ollama_try" -eq 1 ] && echo "[odysseus] Waiting for an existing Ollama API on ports ${ODYSSEUS_OLLAMA_PORT}/11434..."')
runner_lines.append(' sleep 1')
runner_lines.append('done')
runner_lines.append('if [ -n "$ODYSSEUS_OLLAMA_URL" ]; then')
runner_lines.append(' if [ "$ODYSSEUS_OLLAMA_PORT" != "' + _ollama_port + '" ]; then')
@@ -1015,8 +1116,12 @@ def setup_cookbook_routes() -> APIRouter:
runner_lines.append(' echo "=== Process exited with code 127 ==="')
runner_lines.append(' exec bash -i')
runner_lines.append('fi')
runner_lines.append('echo "Starting ollama server on 0.0.0.0:${ODYSSEUS_OLLAMA_PORT}..."')
runner_lines.append('OLLAMA_HOST="0.0.0.0:${ODYSSEUS_OLLAMA_PORT}" ollama serve')
runner_lines.append('ODYSSEUS_OLLAMA_URL="http://${ODYSSEUS_OLLAMA_HOST}:${ODYSSEUS_OLLAMA_PORT}"')
if remote and _ollama_host in ("0.0.0.0", "::"):
runner_lines.append('echo "[odysseus] WARNING: remote Ollama will bind to ${ODYSSEUS_OLLAMA_HOST}:${ODYSSEUS_OLLAMA_PORT} so Odysseus can reach it from this host."')
runner_lines.append('echo "[odysseus] Ollama has no built-in authentication; expose this only on a trusted LAN/VPN or provide an explicit OLLAMA_HOST with your own access controls."')
runner_lines.append('echo "Starting ollama server on ${ODYSSEUS_OLLAMA_HOST}:${ODYSSEUS_OLLAMA_PORT}..."')
runner_lines.append('OLLAMA_HOST="${ODYSSEUS_OLLAMA_HOST}:${ODYSSEUS_OLLAMA_PORT}" ollama serve')
runner_lines.append('_ody_exit=$?')
runner_lines.append('echo')
runner_lines.append('echo "=== Process exited with code ${_ody_exit} ==="')
@@ -1032,19 +1137,24 @@ def setup_cookbook_routes() -> APIRouter:
# find the `vllm` CLI ("command not found"). Mirrors llama.cpp above.
runner_lines.append('export PATH="$HOME/.local/bin:$PATH"')
runner_lines.append('if ! command -v vllm &>/dev/null; then')
runner_lines.append(' echo "ERROR: vLLM is not installed. Open Cookbook -> Dependencies and install vllm on this server, then launch again."')
runner_lines.append(' echo "ERROR: vLLM is not installed."')
runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127')
runner_lines.append('fi')
elif "sglang.launch_server" in req.cmd:
runner_lines.append('export PATH="$HOME/.local/bin:$PATH"')
runner_lines.append('if ! python3 -c "import sglang" 2>/dev/null; then')
runner_lines.append(' echo "ERROR: SGLang is not installed. Open Cookbook -> Dependencies and install sglang on this server, then launch again."')
runner_lines.append('if ! command -v sglang &>/dev/null; then')
runner_lines.append(' echo "ERROR: SGLang is not installed."')
runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127')
runner_lines.append('elif ! ODYSSEUS_SGLANG_IMPORT_ERROR="$(python3 -c "import sglang" 2>&1)"; then')
runner_lines.append(' echo "ERROR: SGLang is installed but failed to import."')
runner_lines.append(' printf "%s\\n" "$ODYSSEUS_SGLANG_IMPORT_ERROR"')
runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127')
runner_lines.append('fi')
elif "scripts/diffusion_server.py" in req.cmd or ".diffusion_server.py" in req.cmd:
runner_lines.append('export PATH="$HOME/.local/bin:$PATH"')
runner_lines.append('if ! python3 -c "import torch, diffusers" 2>/dev/null; then')
runner_lines.append(' echo "ERROR: Diffusion serving requires PyTorch + diffusers. Open Cookbook -> Dependencies and install diffusers on this server, then launch again."')
runner_lines.append('if ! ODYSSEUS_DIFFUSION_IMPORT_ERROR="$(python3 -c "import torch, diffusers" 2>&1)"; then')
runner_lines.append(' echo "ERROR: Diffusion serving requires PyTorch + diffusers."')
runner_lines.append(' printf "%s\\n" "$ODYSSEUS_DIFFUSION_IMPORT_ERROR"')
runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127')
runner_lines.append('fi')
@@ -1116,11 +1226,16 @@ def setup_cookbook_routes() -> APIRouter:
stderr = (await proc.stderr.read()).decode(errors="replace")
return {"ok": False, "error": stderr, "session_id": session_id}
# Auto-register as model endpoint if serving a diffusion model
# Auto-register a model endpoint so the served model shows up in the model
# picker with no manual /setup step. Diffusion models get an image
# endpoint; any other real model serve (i.e. not a pip-install task) gets
# a local LLM endpoint pointed at its /v1.
endpoint_id = None
is_diffusion = "diffusion_server.py" in req.cmd
if is_diffusion:
endpoint_id = _auto_register_image_endpoint(req, remote)
elif not is_pip_install:
endpoint_id = _auto_register_llm_endpoint(req, remote)
# Log to assistant
try:
@@ -1357,9 +1472,16 @@ def setup_cookbook_routes() -> APIRouter:
total_mb = max(0, int(total_bytes / (1024 * 1024)))
used_mb = max(0, min(total_mb, int(used_bytes / (1024 * 1024))))
free_mb = max(0, total_mb - used_mb)
# GTT = the system-RAM pool the GPU pages into when VRAM is full.
# On a discrete card a large gtt_used means the model spilled past
# VRAM into RAM over PCIe — much slower. Surface it so the UI can
# warn "spilling to RAM" instead of the user wondering why it's slow.
gtt_used_raw = await _gpu_read_file(f"{base}/mem_info_gtt_used", host, ssh_port)
gtt_used_mb = max(0, int(int(gtt_used_raw) / (1024 * 1024))) if (gtt_used_raw and gtt_used_raw.isdigit()) else 0
gpus.append({
"index": len(gpus), "name": name, "uuid": entry,
"free_mb": free_mb, "total_mb": total_mb, "used_mb": used_mb,
"gtt_used_mb": gtt_used_mb,
"util_pct": 0, "busy": bool(total_mb and (free_mb / total_mb) < 0.85),
"processes": [], "backend": "rocm", "source": "amd-sysfs",
"unified_memory": unified,
@@ -1461,6 +1583,46 @@ def setup_cookbook_routes() -> APIRouter:
if gpus:
return {"ok": True, "gpus": gpus, "backend": "cuda", "source": "nvidia-smi"}
# Local Apple Silicon / Metal fallback. macOS has no nvidia-smi and no
# Linux /sys/class/drm tree, but services.hwfit.hardware already knows
# how to size the shared unified-memory GPU budget. Keep this route in
# sync so Cookbook's GPU picker doesn't show "nvidia-smi not found" on
# native Mac launches.
if not host and sys.platform == "darwin":
try:
from services.hwfit.hardware import detect_system
info = detect_system(fresh=True)
backend = str(info.get("backend") or "").lower()
if backend in {"metal", "mps", "apple"} and info.get("gpu_count", 0) > 0:
total_mb = int(float(info.get("gpu_vram_gb") or info.get("total_ram_gb") or 0) * 1024)
free_mb = int(float(info.get("available_ram_gb") or 0) * 1024)
if total_mb and (free_mb <= 0 or free_mb > total_mb):
free_mb = total_mb
used_mb = max(0, total_mb - max(0, free_mb))
return {
"ok": True,
"gpus": [{
"index": 0,
"name": info.get("gpu_name") or info.get("cpu_name") or "Apple Silicon GPU",
"uuid": "apple-metal-0",
"free_mb": max(0, free_mb),
"total_mb": max(0, total_mb),
"used_mb": used_mb,
"util_pct": 0,
"busy": bool(total_mb and (free_mb / total_mb) < 0.5),
"processes": [],
"backend": "metal",
"source": "apple-metal",
"unified_memory": True,
}],
"backend": "metal",
"source": "apple-metal",
"fallback_from": "nvidia-smi",
"nvidia_error": nvidia_error,
}
except Exception as e:
logger.warning("Apple Metal GPU fallback failed: %s", e)
amd_gpus = await _probe_amd_sysfs(host, ssh_port)
if amd_gpus:
return {
@@ -1607,6 +1769,33 @@ def setup_cookbook_routes() -> APIRouter:
disk_tasks = on_disk.get("tasks") or [] if isinstance(on_disk, dict) else []
incoming_tasks = data.get("tasks") if isinstance(data.get("tasks"), list) else []
# Anti-poisoning guard: a stale browser tab can keep POSTing a
# download task as status='done' from before the strict-finish
# fix landed, undoing any server-side correction. For each
# incoming "done" download, override to "running" if the last
# shard pattern says N<total AND no DOWNLOAD_OK/DOWNLOAD_FAILED/
# /snapshots/ sentinel is in the output.
import re as _re_dl
for _it in incoming_tasks:
if (not isinstance(_it, dict)) or _it.get("type") != "download" or _it.get("status") != "done":
continue
_out = _it.get("output") or ""
if ("DOWNLOAD_OK" in _out) or ("DOWNLOAD_FAILED" in _out) or ("/snapshots/" in _out):
continue
_shards = _re_dl.findall(r"model-(\d+)-of-(\d+)\.safetensors", _out)
if _shards:
_n, _tot = _shards[-1]
if int(_n) < int(_tot):
logger.info(f"cookbook state POST: rejecting stale done for {_it.get('sessionId')} "
f"(last shard {_n}/{_tot}, no DOWNLOAD_OK)")
_it["status"] = "running"
else:
_completed = _out.count("Download complete")
_starts = _out.count("Downloading '")
if _starts > _completed:
logger.info(f"cookbook state POST: rejecting stale done for {_it.get('sessionId')} "
f"({_completed}/{_starts} files complete, no DOWNLOAD_OK)")
_it["status"] = "running"
incoming_ids = {t.get("sessionId") for t in incoming_tasks if isinstance(t, dict) and t.get("sessionId")}
import time as _t
now_ms = int(_t.time() * 1000)
@@ -1763,6 +1952,43 @@ def setup_cookbook_routes() -> APIRouter:
def _cookbook_tasks_status_sync():
import subprocess
def _download_cache_complete(repo_id: str, remote_host: str = "", ssh_port: str = "") -> bool:
"""Best-effort check for a completed HF cache entry.
tmux output can stop at a stale progress line if the pane/session
disappears before Cookbook captures the final DOWNLOAD_OK marker.
In that case, trust the cache shape: a snapshot directory with files
and no *.incomplete blobs means HuggingFace finished materializing the
model.
"""
if not repo_id or "/" not in repo_id:
return False
py = (
"import os,sys;"
"repo=sys.argv[1];"
"base=os.environ.get('HUGGINGFACE_HUB_CACHE') or os.path.join(os.environ.get('HF_HOME', os.path.expanduser('~/.cache/huggingface')), 'hub');"
"d=os.path.join(base,'models--'+repo.replace('/','--'));"
"snap=os.path.join(d,'snapshots');"
"ok=os.path.isdir(snap) and any(os.path.isdir(os.path.join(snap,x)) and os.listdir(os.path.join(snap,x)) for x in os.listdir(snap));"
"inc=False;"
"blobs=os.path.join(d,'blobs');"
"inc=os.path.isdir(blobs) and any(x.endswith('.incomplete') for x in os.listdir(blobs));"
"sys.exit(0 if ok and not inc else 1)"
)
cmd = ["python3", "-c", py, repo_id]
try:
if remote_host:
ssh_base = ["ssh"]
if ssh_port and ssh_port != "22":
ssh_base.extend(["-p", str(ssh_port)])
shell_cmd = " ".join(shlex.quote(x) for x in cmd)
proc = subprocess.run(ssh_base + [remote_host, shell_cmd], timeout=12, capture_output=True)
else:
proc = subprocess.run(cmd, timeout=12, capture_output=True)
return proc.returncode == 0
except Exception:
return False
# Load saved tasks from cookbook state
tasks = []
if _cookbook_state_path.exists():
@@ -1902,14 +2128,21 @@ def setup_cookbook_routes() -> APIRouter:
# persists after the process exits, so a finished download still has a
# snapshot to classify (DOWNLOAD_OK / exit marker) — evaluate it even
# when the PID is gone instead of blindly reporting "stopped".
download_zero_files = False
status = "unknown"
if is_alive or (local_win_task and full_snapshot):
lower = full_snapshot.lower()
has_exit = "=== process exited with code" in lower
exit_match = re.search(r"=== process exited with code\s+(-?\d+)", full_snapshot, re.I)
has_exit = exit_match is not None
exit_code = int(exit_match.group(1)) if exit_match else None
has_error = "error" in lower or "failed" in lower or "traceback" in lower
if has_exit and task_type == "serve":
# Serve tasks that exit are always errors — they should run indefinitely
status = "error"
elif has_exit and task_type == "download":
# Dependency installs are tracked as download tasks but only
# emit the generic runner exit marker, not HF download markers.
status = "completed" if exit_code == 0 else "error"
elif has_exit and "unrecognized arguments" in lower:
status = "error"
elif has_error and not ("application startup complete" in lower):
@@ -1918,7 +2151,11 @@ def setup_cookbook_routes() -> APIRouter:
# Only download tasks treat 100% as "completed".
# Serve tasks log 100%|██████| during inference progress
# (diffusion sampling, etc.) — that's "running", not done.
status = "completed"
if re.search(r"Fetching\s+0\s+files", full_snapshot, re.IGNORECASE):
status = "error"
download_zero_files = True
else:
status = "completed"
elif "application startup complete" in lower:
status = "ready"
elif not is_alive:
@@ -1928,7 +2165,14 @@ def setup_cookbook_routes() -> APIRouter:
status = "running"
else:
# Session is dead — check if it completed or crashed
status = "stopped"
if task_type == "download" and _download_cache_complete(_payload.get("repo_id") or model, remote, str(_tport or "")):
status = "completed"
if not progress_text:
progress_text = "Download complete"
if not full_snapshot:
full_snapshot = "DOWNLOAD_OK"
else:
status = "stopped"
# Parse structured phase info — single source of truth for the UI
phase_info = _parse_serve_phase(full_snapshot, task_type) if (task_type == "serve" and status == "running" and full_snapshot) else {}
@@ -1938,6 +2182,8 @@ def setup_cookbook_routes() -> APIRouter:
diagnosis = _diagnose_serve_output(full_snapshot) if task_type == "serve" and full_snapshot else None
if diagnosis and status in {"running", "unknown", "stopped"}:
status = "error"
if download_zero_files:
diagnosis = {"message": "No matching files were downloaded. The model repo or filename/quant pattern may be wrong (for example a ':Q4_K_M' tag that does not exist in the repo). Check the repo and the include/quant pattern."}
output_tail = "\n".join(full_snapshot.splitlines()[-12:]) if full_snapshot else ""
results.append({

View File

@@ -152,7 +152,7 @@ def _resolve_user_upload_path(
owner=owner,
auth_manager=auth_manager,
)
if not resolved:
if not isinstance(resolved, dict) or not resolved:
return None
path = resolved.get("path")
upload_dir = getattr(upload_handler, "upload_dir", None)
@@ -203,6 +203,8 @@ def _assert_pdf_marker_upload_owned(
def _derive_title(content: str) -> str:
"""Derive a title from document content."""
import re
if not isinstance(content, str):
return "Untitled"
text = content.strip()
if not text:
return "Untitled"

View File

@@ -15,6 +15,21 @@ from src.auth_helpers import get_current_user
logger = logging.getLogger(__name__)
def _aggregate_language_facets(lang_rows):
"""Sum document counts per display language for the library facet.
NULL-language and explicit "text" rows share the "text" bucket (the
language filter treats them as one), so they must be ADDED. The old dict
comprehension keyed both to "text", silently overwriting one group and
undercounting the facet versus what the filter actually returns.
"""
out = {}
for lang, cnt in lang_rows:
key = lang or "text"
out[key] = out.get(key, 0) + cnt
return out
from routes.document_helpers import (
DocumentCreate, DocumentUpdate, DocumentPatch,
@@ -145,7 +160,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
create_form_markdown_document,
create_plain_pdf_document,
)
from src.document_processor import _process_pdf
from src.document_processor import _process_pdf, strip_pdf_content_marker
import os
from src.auth_helpers import require_privilege
@@ -184,7 +199,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
title = os.path.splitext(meta.get("original_name") or meta.get("name") or upload_id)[0]
try:
body_text = _process_pdf(pdf_path).lstrip("\n[PDF content]:").strip()
body_text = strip_pdf_content_marker(_process_pdf(pdf_path))
except Exception:
body_text = None
@@ -258,7 +273,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
)
lang_q = _owner_session_filter(lang_q, user)
lang_rows = lang_q.group_by(Document.language).all()
languages = {lang or "text": cnt for lang, cnt in lang_rows}
languages = _aggregate_language_facets(lang_rows)
# Session count (owner-filtered)
sc_q = (
@@ -402,7 +417,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
text extraction was wired, plus for scanned/image-only PDFs where the
VL model picks up text the basic pypdf path missed."""
import re
from src.document_processor import _process_pdf
from src.document_processor import _process_pdf, strip_pdf_content_marker
from src.pdf_form_doc import find_source_upload_id
user = get_current_user(request)
@@ -423,7 +438,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
raise HTTPException(404, "Source PDF could not be located")
try:
body_text = _process_pdf(pdf_path).lstrip("\n[PDF content]:").strip()
body_text = strip_pdf_content_marker(_process_pdf(pdf_path))
except Exception as e:
logger.error(f"extract_pdf_text failed for {pdf_path}: {e}")
raise HTTPException(500, f"Extraction failed: {e}")
@@ -593,6 +608,15 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
if req.session_id is not None:
# Empty string = unlink from session
doc.session_id = req.session_id if req.session_id else None
if not req.session_id:
# Tab closed / doc detached from its session — drop the
# in-memory active-doc pointer so the last-resort injection
# path doesn't re-surface this doc in a later chat (#1160).
try:
from src.tool_implementations import clear_active_document
clear_active_document(doc_id)
except Exception:
pass
db.commit()
db.refresh(doc)
return _doc_to_dict(doc)
@@ -615,6 +639,13 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
raise HTTPException(404, "Document not found")
_verify_doc_owner(db, doc, user)
doc.is_active = False
# Closed/deleted — drop the in-memory active-doc pointer so it isn't
# re-injected into a later, unrelated chat (#1160).
try:
from src.tool_implementations import clear_active_document
clear_active_document(doc_id)
except Exception:
pass
db.commit()
return {"status": "deleted", "id": doc_id}
except HTTPException:
@@ -885,7 +916,7 @@ def setup_document_routes(session_manager, upload_handler=None) -> APIRouter:
for i, doc in enumerate(batch):
if i >= len(verdicts):
break
verdict = verdicts[i].lower().strip()
verdict = str(verdicts[i] or "").lower().strip()
if verdict == "junk":
doc.tidy_verdict = "junk"
db.delete(doc)

View File

@@ -67,6 +67,14 @@ def _summary(d: EditorDraft) -> Dict[str, Any]:
}
def _load_payload(raw: Optional[str]) -> Dict[str, Any]:
try:
payload = json.loads(raw) if raw else {}
except Exception:
return {}
return payload if isinstance(payload, dict) else {}
def setup_editor_draft_routes() -> APIRouter:
router = APIRouter(tags=["editor-drafts"])
@@ -93,13 +101,9 @@ def setup_editor_draft_routes() -> APIRouter:
).first()
if not d or not _owns(d, user):
raise HTTPException(404, "Draft not found")
try:
payload = json.loads(d.payload) if d.payload else {}
except Exception:
payload = {}
return {
**_summary(d),
"payload": payload,
"payload": _load_payload(d.payload),
}
finally:
db.close()

View File

@@ -15,7 +15,6 @@ and `email_pollers.py` (the background loops):
import os
import imaplib
import smtplib
import ssl
import email as email_mod
import email.header
import email.utils
@@ -33,47 +32,43 @@ from fastapi import Query, HTTPException, Request
from pydantic import BaseModel
from typing import Optional, List
from src.auth_helpers import get_current_user
from src.auth_helpers import _auth_disabled, get_current_user
from src.secret_storage import decrypt as _decrypt
logger = logging.getLogger(__name__)
def _send_smtp_message(cfg: dict, from_addr: str, recipients: list[str], message: str | bytes, timeout: int = 30) -> None:
"""Send through SMTP using the conventional TLS mode for the configured port.
def _smtp_security_mode(cfg: dict) -> str:
raw = str(cfg.get("smtp_security") or "").strip().lower()
if raw in {"ssl", "starttls", "none"}:
return raw
port = int(cfg.get("smtp_port") or 465)
if port == 587:
return "starttls"
return "ssl"
Account settings only store host/port today. Port 465 is implicit TLS
(SMTP_SSL); port 587 is plain SMTP upgraded with STARTTLS. Using SSL
directly against 587 raises the classic "[SSL: WRONG_VERSION_NUMBER]"
error even when credentials are correct.
"""
def _send_smtp_message(cfg: dict, from_addr: str, recipients: list[str], message: str | bytes, timeout: int = 30) -> None:
"""Send through SMTP using the configured transport security mode."""
host = cfg["smtp_host"]
port = int(cfg.get("smtp_port") or 465)
user = cfg.get("smtp_user") or ""
password = cfg.get("smtp_password") or ""
def _send_starttls(starttls_port: int = 587) -> None:
with smtplib.SMTP(host, starttls_port, timeout=timeout) as smtp:
smtp.starttls()
if user and password:
smtp.login(user, password)
smtp.sendmail(from_addr, recipients, message)
security = _smtp_security_mode(cfg)
if port == 587:
_send_starttls(587)
return
try:
if security == "ssl":
with smtplib.SMTP_SSL(host, port, timeout=timeout) as smtp:
if user and password:
smtp.login(user, password)
smtp.sendmail(from_addr, recipients, message)
return
except (TimeoutError, ssl.SSLError) as e:
if port == 465:
logger.warning("SMTP implicit TLS on %s:465 failed (%s); retrying STARTTLS on 587", host, e)
_send_starttls(587)
return
raise
with smtplib.SMTP(host, port, timeout=timeout) as smtp:
if security == "starttls":
smtp.starttls()
if user and password:
smtp.login(user, password)
smtp.sendmail(from_addr, recipients, message)
def _strip_think(text: str) -> str:
@@ -152,6 +147,8 @@ def _require_auth(request: Request) -> str:
u = get_current_user(request)
if u:
return u
if _auth_disabled():
return ""
auth_mgr = getattr(request.app.state, "auth_manager", None)
if auth_mgr is not None and getattr(auth_mgr, "is_configured", False):
raise HTTPException(401, "Not authenticated")
@@ -300,7 +297,8 @@ def _init_scheduled_db():
send_at TEXT NOT NULL,
created_at TEXT NOT NULL,
status TEXT NOT NULL DEFAULT 'pending',
error TEXT
error TEXT,
owner TEXT DEFAULT ''
)
""")
# Email summary cache (keyed by Message-ID)
@@ -438,6 +436,35 @@ def _init_scheduled_db():
conn.execute("ALTER TABLE scheduled_emails ADD COLUMN account_id TEXT")
if "odysseus_kind" not in cols:
conn.execute("ALTER TABLE scheduled_emails ADD COLUMN odysseus_kind TEXT")
if "owner" not in cols:
conn.execute("ALTER TABLE scheduled_emails ADD COLUMN owner TEXT DEFAULT ''")
conn.execute("CREATE INDEX IF NOT EXISTS ix_scheduled_emails_owner_status ON scheduled_emails(owner, status)")
# Backfill owner on legacy rows from the owning email account so the
# owner-scoped list/cancel routes surface pre-migration scheduled
# sends to the right user (the poller already resolves these by
# account at send time; this aligns the UI with that).
legacy_accounts = conn.execute(
"SELECT DISTINCT account_id FROM scheduled_emails "
"WHERE (owner IS NULL OR owner = '') AND account_id IS NOT NULL AND account_id != ''"
).fetchall()
if legacy_accounts:
try:
from core.database import SessionLocal as _SL, EmailAccount as _EA
_db = _SL()
try:
for (acct_id,) in legacy_accounts:
row = _db.query(_EA.owner).filter(_EA.id == acct_id).first()
acct_owner = (row[0] or "") if row else ""
if acct_owner:
conn.execute(
"UPDATE scheduled_emails SET owner = ? "
"WHERE account_id = ? AND (owner IS NULL OR owner = '')",
(acct_owner, acct_id),
)
finally:
_db.close()
except Exception:
pass
except Exception:
pass
# Lazy migration: add turns_json to email_boundaries for server-side
@@ -541,6 +568,7 @@ def _get_email_config(account_id: str | None = None, owner: str = "") -> dict:
"account_name": row.name,
"smtp_host": row.smtp_host or "",
"smtp_port": int(row.smtp_port or 465),
"smtp_security": _smtp_security_mode({"smtp_security": getattr(row, "smtp_security", ""), "smtp_port": row.smtp_port}),
"smtp_user": row.smtp_user or "",
"smtp_password": _decrypt(row.smtp_password or ""),
"imap_host": row.imap_host or "",
@@ -567,6 +595,10 @@ def _get_email_config(account_id: str | None = None, owner: str = "") -> dict:
"account_name": "legacy",
"smtp_host": settings.get("smtp_host", os.environ.get("SMTP_HOST", "")),
"smtp_port": int(settings.get("smtp_port", os.environ.get("SMTP_PORT", "465")) or 465),
"smtp_security": _smtp_security_mode({
"smtp_security": settings.get("smtp_security", os.environ.get("SMTP_SECURITY", "")),
"smtp_port": settings.get("smtp_port", os.environ.get("SMTP_PORT", "465")),
}),
"smtp_user": settings.get("smtp_user", os.environ.get("SMTP_USER", "")),
"smtp_password": settings.get("smtp_password", os.environ.get("SMTP_PASSWORD", "")),
"imap_host": settings.get("imap_host", os.environ.get("IMAP_HOST", "")),
@@ -606,7 +638,32 @@ def _list_email_accounts() -> list[dict]:
# ── IMAP helpers ──
_IMAP_TIMEOUT_SECONDS = 15
def _coerce_imap_timeout_seconds(raw: str | None) -> int:
try:
value = int(raw or "30")
except (TypeError, ValueError):
value = 30
return max(5, min(value, 300))
_IMAP_TIMEOUT_SECONDS = _coerce_imap_timeout_seconds(os.environ.get("ODYSSEUS_IMAP_TIMEOUT_SECONDS"))
def _open_imap_connection(host: str, port: int, *, starttls: bool, timeout: int = _IMAP_TIMEOUT_SECONDS):
"""Open an IMAP connection using the configured security mode."""
port = int(port or 993)
if starttls:
conn = imaplib.IMAP4(host, port, timeout=timeout)
conn.starttls()
elif port == 993:
conn = imaplib.IMAP4_SSL(host, port, timeout=timeout)
else:
conn = imaplib.IMAP4(host, port, timeout=timeout)
try:
conn.sock.settimeout(timeout)
except Exception:
pass
return conn
def _imap_connect(account_id: str | None = None, owner: str = ""):
# SECURITY: passing `owner` scopes the fallback config lookup so a brand
@@ -620,17 +677,12 @@ def _imap_connect(account_id: str | None = None, owner: str = ""):
# The last branch is critical: previously this fell into IMAP4_SSL
# for any non-STARTTLS port, which would fail the TLS handshake on
# plain local servers (Dovecot on 31143, etc.).
if cfg.get("imap_starttls"):
conn = imaplib.IMAP4(cfg["imap_host"], cfg["imap_port"], timeout=_IMAP_TIMEOUT_SECONDS)
conn.starttls()
elif int(cfg.get("imap_port") or 993) == 993:
conn = imaplib.IMAP4_SSL(cfg["imap_host"], cfg["imap_port"], timeout=_IMAP_TIMEOUT_SECONDS)
else:
conn = imaplib.IMAP4(cfg["imap_host"], cfg["imap_port"], timeout=_IMAP_TIMEOUT_SECONDS)
try:
conn.sock.settimeout(_IMAP_TIMEOUT_SECONDS)
except Exception:
pass
conn = _open_imap_connection(
cfg["imap_host"],
cfg["imap_port"],
starttls=bool(cfg.get("imap_starttls")),
timeout=_IMAP_TIMEOUT_SECONDS,
)
conn.login(cfg["imap_user"], cfg["imap_password"])
return conn
@@ -699,7 +751,13 @@ def _decode_header(raw):
decoded = []
for data, charset in parts:
if isinstance(data, bytes):
decoded.append(data.decode(charset or "utf-8", errors="replace"))
try:
decoded.append(data.decode(charset or "utf-8", errors="replace"))
except (LookupError, ValueError):
# Unknown/invalid MIME charset (e.g. a malformed or spam header
# like =?x-unknown-charset?B?...?=). errors="replace" only covers
# byte-decode errors, not codec lookup, so fall back to utf-8.
decoded.append(data.decode("utf-8", errors="replace"))
else:
decoded.append(data)
return " ".join(decoded)
@@ -793,22 +851,27 @@ def _detect_spam_folder(conn):
return None
def _imap_move(uid, dest, src="INBOX"):
def _imap_move(uid, dest, src="INBOX", account_id: str | None = None, owner: str = ""):
"""Move a single IMAP UID from src folder to dest. Returns True on success."""
c = None
try:
c = _imap_connect()
c = _imap_connect(account_id, owner=owner)
c.select(_q(src))
status, _ = c.copy(uid, _q(dest))
if status != "OK":
c.logout()
return False
c.store(uid, "+FLAGS", "\\Deleted")
c.expunge()
c.logout()
return True
except Exception as e:
logger.warning(f"IMAP move {uid}{dest} failed: {e}")
return False
finally:
if c:
try:
c.logout()
except Exception:
pass
def _extract_attachment_text(msg, max_chars: int = 6000) -> str:
@@ -999,7 +1062,9 @@ def _fetch_sender_thread_context(sender_addr: str,
exclude_folder: str = "INBOX",
limit: int = 3,
max_chars_per_email: int = 1500,
max_attachment_chars: int = 4000) -> str:
max_attachment_chars: int = 4000,
account_id: str | None = None,
owner: str = "") -> str:
"""Pull the last N emails from `sender_addr` (across common folders),
extract their body snippets + attachment text, and return one formatted
block ready to be glued into an LLM system prompt as "REFERENCED MATERIAL".
@@ -1021,7 +1086,7 @@ def _fetch_sender_thread_context(sender_addr: str,
seen_uids.add((exclude_folder or "INBOX", str(exclude_uid)))
try:
conn = _imap_connect()
conn = _imap_connect(account_id, owner=owner)
except Exception as e:
logger.warning(f"sender-thread-context: imap connect failed: {e}")
return ""
@@ -1104,7 +1169,12 @@ def _fetch_sender_thread_context(sender_addr: str,
return "\n\n=====\n\n".join(blocks)
def _pre_retrieve_context(body: str, sender: str) -> tuple:
def _pre_retrieve_context(
body: str,
sender: str,
account_id: str | None = None,
owner: str = "",
) -> tuple:
"""Extract key terms from an incoming email and search past emails + contacts.
Returns (context_snippets, terms_list). Best-effort; never raises.
@@ -1128,18 +1198,37 @@ def _pre_retrieve_context(body: str, sender: str) -> tuple:
# ── Known-sender check: only retrieve context for senders we already
# have a relationship with. New / cold senders get an empty context.
sender_addr = email.utils.parseaddr(sender or "")[1].lower()
is_known = False
# The CardDAV address book is global admin data backed by a single
# Radicale instance, so only fold it into reply context for an admin /
# single-user owner. Non-admin owners still get their own (owner-scoped)
# IMAP history below, just not the shared contacts.
try:
from routes.contacts_routes import _fetch_contacts
for c in _fetch_contacts() or []:
if (c.get("email") or "").lower() == sender_addr:
is_known = True
break
from src.tool_security import owner_is_admin_or_single_user
contacts_allowed = owner_is_admin_or_single_user(owner or None)
except Exception:
pass
contacts_allowed = not bool(owner)
is_known = False
if contacts_allowed:
try:
from routes.contacts_routes import _fetch_contacts
for c in _fetch_contacts() or []:
# Contacts are normalized to plural `emails` lists, but
# keep the legacy singular key fallback for older data.
contact_emails = []
raw_emails = c.get("emails")
if isinstance(raw_emails, list):
contact_emails.extend(str(e or "") for e in raw_emails)
legacy_email = c.get("email")
if legacy_email:
contact_emails.append(str(legacy_email))
if any((addr or "").strip().lower() == sender_addr for addr in contact_emails):
is_known = True
break
except Exception:
pass
if not is_known and sender_addr:
try:
with _imap() as _ck:
with _imap(account_id, owner=owner) as _ck:
_ck.select("INBOX", readonly=True)
st_known, dk = _ck.search(None, f'(FROM "{sender_addr}")')
if st_known == "OK" and dk and dk[0]:
@@ -1177,7 +1266,7 @@ def _pre_retrieve_context(body: str, sender: str) -> tuple:
return context_snippets, terms_list
try:
ctx_conn = _imap_connect()
ctx_conn = _imap_connect(account_id, owner=owner)
for folder in ["INBOX", "Sent", "Archive", "Drafts"]:
try:
st_sel, _sd = ctx_conn.select(_q(folder), readonly=True)
@@ -1221,18 +1310,18 @@ def _pre_retrieve_context(body: str, sender: str) -> tuple:
try:
from routes.contacts_routes import _fetch_contacts
all_contacts = _fetch_contacts()
all_contacts = _fetch_contacts() if contacts_allowed else []
for term in terms_list:
t_lower = term.lower()
matches = [c for c in all_contacts
if t_lower in (c.get("name") or "").lower()
or t_lower in (c.get("email") or "").lower()]
or any(t_lower in (e or "").lower() for e in (c.get("emails") or []))]
for c in matches[:2]:
parts = [f"Name: {c.get('name','')}"]
if c.get("email"):
parts.append(f"Email: {c['email']}")
if c.get("phone"):
parts.append(f"Phone: {c['phone']}")
if c.get("emails"):
parts.append(f"Email: {', '.join(c['emails'])}")
if c.get("phones"):
parts.append(f"Phone: {', '.join(c['phones'])}")
context_snippets.append(f"[Contact match for \"{term}\"] " + ", ".join(parts))
except Exception:
pass

View File

@@ -45,6 +45,21 @@ from routes.email_helpers import (
logger = logging.getLogger(__name__)
def _owner_for_email_account(account_id: str | None) -> str:
if not account_id:
return ""
try:
from core.database import SessionLocal as _SL, EmailAccount as _EA
db = _SL()
try:
row = db.query(_EA.owner).filter(_EA.id == account_id).first()
return (row[0] or "") if row else ""
finally:
db.close()
except Exception:
return ""
# ── Routes ──
async def _emit_progress(progress_cb, message: str):
@@ -84,6 +99,36 @@ async def _run_auto_summarize_once(do_summary: bool = True, do_reply: bool = Tru
_save_settings(s2)
def _latest_inbox_fallback_uids(conn, reconnect):
"""Latest INBOX UIDs via ``SEARCH ALL``, with a poisoned-socket guard (#1613).
On a large Gmail mailbox the fallback ``SEARCH ALL`` can time out mid-reply,
leaving its enormous ``* SEARCH <uids…>`` line unread on the socket. The next
command (the downstream re-select / EXAMINE) then reads those leftover bytes
and fails with ``EXAMINE => unexpected response: b'325188 …'``. Reconnecting
on failure guarantees the downstream command starts from a clean socket.
Returns ``(uids, conn)`` — ``conn`` is the live connection to keep using: the
same one on success, a fresh one (via ``reconnect()``) if we had to recover.
"""
try:
conn.select("INBOX", readonly=True)
status, data = conn.uid("SEARCH", None, "ALL")
uids = []
if status == "OK" and data and data[0]:
for u in reversed(data[0].split()[-8:]):
uids.append(("INBOX", u))
logger.info("Email task SINCE scan found no messages; fell back to latest INBOX messages")
return uids, conn
except Exception as _e:
logger.warning(f"Latest-INBOX fallback scan failed: {_e}")
try:
conn.logout()
except Exception:
pass
return [], reconnect()
async def _auto_summarize_pass(days_back: int = 1, account_id: str | None = None, progress_cb=None) -> str:
"""Single pass of the auto-summarize/reply scan.
@@ -132,7 +177,7 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None
import sqlite3 as _sql3
import requests as _req
from src.endpoint_resolver import resolve_endpoint
from src.llm_core import _uses_max_completion_tokens
from src.llm_core import _uses_max_completion_tokens, _restricts_temperature
settings = _load_settings()
auto_sum = settings.get("email_auto_summarize", False)
@@ -143,25 +188,18 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None
if not auto_sum and not auto_reply and not auto_tag and not auto_spam and not auto_cal:
return "Nothing to do"
# Owner of the account being processed. All calendar reads/writes below are
# scoped to this user: the multi-account fan-out runs every user's mailbox,
# so an unscoped pass would disclose and mutate other tenants' calendars.
_acct_owner = None
try:
from core.database import SessionLocal as _SLo, EmailAccount as _EAo
_dbo = _SLo()
try:
if account_id:
_arow = _dbo.query(_EAo).filter(_EAo.id == account_id).first()
_acct_owner = _arow.owner if _arow else None
finally:
_dbo.close()
except Exception:
_acct_owner = None
# Owner of the account being processed. All calendar + mailbox reads/writes
# below are scoped to this user: the multi-account fan-out runs every user's
# mailbox, so an unscoped pass would disclose/mutate other tenants' data.
# One resolution feeds both the mailbox path (account_owner) and upstream's
# calendar path (_acct_owner, which expects None rather than "").
account_owner = _owner_for_email_account(account_id)
_acct_owner = account_owner or None
conn = None
try:
await _emit_progress(progress_cb, "Connecting to mail…")
conn = _imap_connect(account_id)
conn = _imap_connect(account_id, owner=account_owner)
from datetime import timedelta as _td
since = (datetime.utcnow() - _td(days=max(1, days_back))).strftime("%d-%b-%Y")
# uid_list carries real IMAP UIDs, matching the email UI/read routes.
@@ -193,26 +231,27 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None
# the latest visible inbox messages so Clear cache -> Run again can
# actually repopulate AI reply/summary/tag caches.
if not uid_list:
try:
conn.select("INBOX", readonly=True)
status, data = conn.uid("SEARCH", None, "ALL")
if status == "OK" and data and data[0]:
for u in reversed(data[0].split()[-8:]):
uid_list.append(("INBOX", u))
logger.info("Email task SINCE scan found no messages; fell back to latest INBOX messages")
except Exception as _e:
logger.warning(f"Latest-INBOX fallback scan failed: {_e}")
# Re-select INBOX as default for downstream code
_fb_uids, conn = _latest_inbox_fallback_uids(
conn, lambda: _imap_connect(account_id, owner=account_owner)
)
uid_list.extend(_fb_uids)
# Re-select INBOX as default for downstream code (on a clean socket even
# if the SEARCH ALL fallback above failed — see #1613).
conn.select("INBOX", readonly=True)
if not uid_list:
conn.logout()
return "No recent emails"
await _emit_progress(progress_cb, f"Found {len(uid_list)} recent email(s); checking cache…")
_c = _sql3.connect(SCHEDULED_DB)
_sum_existing = {r[0] for r in _c.execute("SELECT message_id FROM email_summaries").fetchall()}
_reply_existing = {r[0] for r in _c.execute("SELECT message_id FROM email_ai_replies").fetchall()}
_tag_existing = {r[0] for r in _c.execute("SELECT message_id FROM email_tags").fetchall()} if (auto_tag or auto_spam) else set()
if auto_tag or auto_spam:
if account_owner:
_tag_existing = {r[0] for r in _c.execute("SELECT message_id FROM email_tags WHERE owner=?", (account_owner,)).fetchall()}
else:
_tag_existing = {r[0] for r in _c.execute("SELECT message_id FROM email_tags WHERE owner='' OR owner IS NULL").fetchall()}
else:
_tag_existing = set()
_cal_existing = {r[0] for r in _c.execute("SELECT message_id FROM email_calendar_extractions").fetchall()} if auto_cal else set()
# Urgency is handled by the built-in `check_email_urgency` task. Keep
# this legacy poller path disabled so users don't get two independent
@@ -225,7 +264,7 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None
# this per-iteration was making big inbox scans crawl. Used by the
# urgency self-loop check below.
try:
_self_self_addr = (_get_email_config(account_id).get("from_address") or "").strip().lower()
_self_self_addr = (_get_email_config(account_id, owner=account_owner).get("from_address") or "").strip().lower()
except Exception:
_self_self_addr = ""
@@ -233,11 +272,10 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None
if auto_spam and not spam_folder:
logger.warning("Auto-spam enabled but no Junk/Spam folder detected — will classify but not move")
url, model, headers = resolve_endpoint("utility")
url, model, headers = resolve_endpoint("utility", owner=account_owner)
if not url:
url, model, headers = resolve_endpoint("default")
url, model, headers = resolve_endpoint("default", owner=account_owner)
if not url or not model:
conn.logout()
return "No model configured"
writing_style = settings.get("email_writing_style", "")
@@ -355,6 +393,9 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None
"temperature": 0.3,
"stream": False,
}
# Reasoning models (o1/o3/o4/gpt-5) reject an explicit temperature.
if _restricts_temperature(model):
payload.pop("temperature", None)
try:
# Use to_thread so this sync HTTP call doesn't freeze
# the entire event loop while the LLM thinks (240s).
@@ -392,8 +433,8 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None
await _emit_progress(progress_cb, f"Drafting reply {processed + 1}/{_max_process} · checked {examined}/{len(uid_list)}")
# Background reply drafting should not make the whole app
# feel busy. Keep it lightweight: no extra IMAP context
# mining here; manual AI Reply can still do that when the
# user explicitly asks for a draft on one email.
# mining here; manual AI Reply can still do that (owner-scoped)
# when the user explicitly asks for a draft on one email.
context_snippets, _terms = [], []
sys_prompt = _EMAIL_REPLY_SYS_PROMPT_BASE
if att_text:
@@ -708,7 +749,7 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None
# Send alert email immediately if critical or high
if urgency in ("critical", "high"):
try:
cfg = _get_email_config(account_id)
cfg = _get_email_config(account_id, owner=account_owner)
to_addr = cfg["from_address"] # self-email
# Deep-link to open the original email in Odysseus (if public URL is configured).
@@ -716,8 +757,8 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None
from src.settings import load_settings as _ls
_pub = (_ls().get("app_public_url") or "").rstrip("/")
uid_str = uid.decode() if isinstance(uid, bytes) else str(uid)
from urllib.parse import quote as _q
open_url = f"{_pub}/#email={_q(_folder, safe='')}:{uid_str}" if _pub else ""
from urllib.parse import quote as _url_q
open_url = f"{_pub}/#email={_url_q(_folder, safe='')}:{uid_str}" if _pub else ""
alert_subject = f"[{urgency.upper()}] {subject}"
alert_body = (
@@ -806,12 +847,15 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None
"temperature": 0.1,
"stream": False,
}
# Reasoning models (o1/o3/o4/gpt-5) reject an explicit temperature.
if _restricts_temperature(model):
payload.pop("temperature", None)
# to_thread keeps the event loop responsive during the LLM call
resp = await asyncio.to_thread(
_req.post, url, json=payload, headers=req_headers, timeout=120
)
if not resp.ok:
logger.warning(f"Auto-classify {uid.decode()} HTTP {resp.status_code}: {resp.text[:200]}")
logger.warning(f"Auto-classify {uid.decode() if isinstance(uid, bytes) else str(uid)} HTTP {resp.status_code}: {resp.text[:200]}")
else:
rdata = resp.json()
m = (rdata.get("choices") or [{}])[0].get("message", {})
@@ -840,17 +884,17 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None
moved_to = ""
if is_spam and auto_spam and spam_folder:
if _imap_move(uid, spam_folder):
if _imap_move(uid, spam_folder, account_id=account_id, owner=account_owner):
moved_to = spam_folder
logger.info(f"Auto-spam moved uid={uid.decode()} to {spam_folder}: {spam_reason}")
_c = _sql3.connect(SCHEDULED_DB)
_c.execute("""
INSERT OR REPLACE INTO email_tags
(message_id, uid, folder, subject, sender, tags, spam_verdict,
(message_id, owner, uid, folder, subject, sender, tags, spam_verdict,
spam_reason, moved_to, model_used, created_at)
VALUES (?, ?, 'INBOX', ?, ?, ?, ?, ?, ?, ?, ?)
""", (message_id, uid.decode(), subject, sender,
VALUES (?, ?, ?, 'INBOX', ?, ?, ?, ?, ?, ?, ?, ?)
""", (message_id, account_owner or "", uid.decode(), subject, sender,
json.dumps(tags), 1 if is_spam else 0,
spam_reason, moved_to, model, datetime.utcnow().isoformat()))
_c.commit()
@@ -865,7 +909,6 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None
logger.warning(f"Auto-process {uid} failed: {e}")
continue
conn.logout()
await _emit_progress(progress_cb, "Finishing…")
if processed > 0:
logger.info(f"Auto-processed {processed} new email(s) for summary/reply/classify")
@@ -902,6 +945,12 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None
except Exception as e:
logger.warning(f"Auto-summarize pass error: {e}")
return f"Error: {e}"
finally:
if conn:
try:
conn.logout()
except Exception:
pass
async def _auto_summarize_poller():
@@ -930,8 +979,9 @@ def _scheduled_poll_once() -> dict:
conn = sqlite3.connect(SCHEDULED_DB)
cols = [row[1] for row in conn.execute("PRAGMA table_info(scheduled_emails)").fetchall()]
kind_expr = "odysseus_kind" if "odysseus_kind" in cols else "'scheduled' AS odysseus_kind"
owner_expr = "owner" if "owner" in cols else "'' AS owner"
rows = conn.execute(f"""
SELECT id, to_addr, cc, bcc, subject, body, in_reply_to, references_hdr, attachments, account_id, {kind_expr}
SELECT id, to_addr, cc, bcc, subject, body, in_reply_to, references_hdr, attachments, account_id, {kind_expr}, {owner_expr}
FROM scheduled_emails
WHERE status = 'pending' AND send_at <= ?
""", (now_iso,)).fetchall()
@@ -943,7 +993,8 @@ def _scheduled_poll_once() -> dict:
attachments = json.loads(r[8] or "[]")
row_account_id = r[9] if len(r) > 9 else None
odysseus_kind = r[10] if len(r) > 10 else "scheduled"
cfg = _get_email_config(row_account_id)
row_owner = (r[11] if len(r) > 11 else "") or _owner_for_email_account(row_account_id)
cfg = _get_email_config(row_account_id, owner=row_owner)
has_atts = bool(attachments)
if has_atts:
outer = MIMEMultipart("mixed")
@@ -980,7 +1031,7 @@ def _scheduled_poll_once() -> dict:
# Append to local Sent folder
try:
with _imap() as imap:
with _imap(row_account_id, owner=row_owner) as imap:
sent_folder = _detect_sent_folder(imap)
imap.append(sent_folder, "\\Seen", None, outer.as_bytes())
except Exception as e:

View File

@@ -17,7 +17,6 @@ import sqlite3 as _sql3
import email as email_mod
import email.header
import email.utils
import imaplib
import smtplib
import json
import re
@@ -40,7 +39,8 @@ from routes.email_helpers import (
_strip_think, _extract_reply, _apply_email_style_mechanics, require_owner, require_user, _assert_owns_account,
_q, _attach_compose_uploads, _cleanup_compose_uploads,
_load_settings, _save_settings, _get_email_config,
_send_smtp_message,
_send_smtp_message, _smtp_security_mode,
_IMAP_TIMEOUT_SECONDS, _open_imap_connection,
_imap_connect, _imap, _decode_header, _detect_sent_folder, _detect_drafts_folder,
_extract_attachment_text, _list_attachments_from_msg,
_extract_attachment_to_disk, _extract_html, _extract_text,
@@ -90,6 +90,16 @@ def _email_tag_owner_aliases(account_id: str | None, owner: str = "") -> list[st
return out or [""]
def _email_tag_owner_clause(account_id: str | None, owner: str = "") -> tuple[str, list[str]]:
aliases = _email_tag_owner_aliases(account_id, owner)
placeholders = ",".join("?" * len(aliases))
# In configured multi-user mode, do not treat legacy owner='' rows as
# visible to everyone. Single-user/unconfigured mode keeps legacy rows.
if owner:
return f"owner IN ({placeholders})", aliases
return f"(owner IN ({placeholders}) OR owner IS NULL)", aliases
def _record_email_received_events(owner: str, account_id: str | None, folder: str, emails: list[dict]):
"""Baseline inbox messages, then fire `email_received` for new arrivals."""
if not owner or (folder or "INBOX").upper() != "INBOX" or not emails:
@@ -312,6 +322,20 @@ def _apply_odysseus_headers(msg, kind: str | None = None, ref_id: str | None = N
msg["X-Odysseus-Ref"] = re.sub(r"[^A-Za-z0-9_.:-]", "-", ref_id)[:128]
def _envelope_recipients(*fields: str) -> list:
"""Extract bare SMTP envelope addresses from one or more To/Cc/Bcc header
strings. A naive `field.split(",")` corrupts display names that contain a
comma (e.g. `"Smith, John" <john@corp.com>`, the canonical Outlook form):
it splits into `"Smith` and `John" <john@corp.com>`, breaking delivery.
email.utils.getaddresses parses the address grammar correctly."""
out = []
for _name, addr in email.utils.getaddresses([f for f in fields if f]):
addr = (addr or "").strip()
if addr:
out.append(addr)
return out
def _md_to_email_html(text: str) -> str:
"""Render the compose markdown body to a SAFE HTML fragment for the email's
text/html part. Everything is HTML-escaped FIRST (so a pasted <script> /
@@ -457,7 +481,7 @@ def setup_email_routes():
_IMAP_POOL = {} # account_id → (conn, last_used_at)
_IMAP_IDLE_MAX = 60.0
_WARMING_READS = set()
_WARM_READ_LIMIT = 3
_WARM_READ_LIMIT = 1
_WARM_MAX_BYTES = 128 * 1024
_WARM_RECENT_SECONDS = 7 * 24 * 60 * 60
_pool_lock = _threading.Lock()
@@ -591,11 +615,11 @@ def setup_email_routes():
SECURITY: `owner` is propagated so when `account_id` is missing,
the fallback config lookup is scoped to this user's accounts only.
"""
conn = None
try:
conn = _imap_connect(account_id, owner=owner)
select_status, _ = conn.select(_q(folder), readonly=True)
if select_status != "OK":
conn.logout()
return {"emails": [], "total": 0, "folder": folder, "error": f"Folder not found: {folder}"}
from_clause = ""
@@ -645,8 +669,7 @@ def setup_email_routes():
try:
import sqlite3 as _sql3t
_ct = _sql3t.connect(SCHEDULED_DB)
_owner_aliases = _email_tag_owner_aliases(account_id, owner)
_owner_ph = ",".join("?" * len(_owner_aliases))
_owner_clause, _owner_params = _email_tag_owner_clause(account_id, owner)
# SECURITY: owner-scope the lookup (review C2/H8). Without
# this, user A's `tag:urgent` filter would surface UIDs
# written by user B and IMAP would return whatever
@@ -658,8 +681,8 @@ def setup_email_routes():
rows_t = _ct.execute(
"SELECT message_id, uid FROM email_tags "
"WHERE folder=? AND spam_verdict=1 "
f"AND (owner IN ({_owner_ph}) OR owner IS NULL)",
(folder, *_owner_aliases),
f"AND {_owner_clause}",
(folder, *_owner_params),
).fetchall()
for mid, uid in rows_t:
if mid:
@@ -670,8 +693,8 @@ def setup_email_routes():
rows_t = _ct.execute(
"SELECT message_id, uid, tags FROM email_tags "
"WHERE folder=? AND tags IS NOT NULL AND tags != '' "
f"AND (owner IN ({_owner_ph}) OR owner IS NULL)",
(folder, *_owner_aliases),
f"AND {_owner_clause}",
(folder, *_owner_params),
).fetchall()
for r in rows_t:
try:
@@ -743,12 +766,11 @@ def setup_email_routes():
_uid_strs = [u.decode() for u in uid_list]
if _uid_strs:
placeholders = ",".join("?" * len(_uid_strs))
_owner_aliases = _email_tag_owner_aliases(account_id, owner)
_owner_ph = ",".join("?" * len(_owner_aliases))
_owner_clause, _owner_params = _email_tag_owner_clause(account_id, owner)
rows = _c.execute(
f"SELECT uid, tags, spam_verdict FROM email_tags "
f"WHERE folder=? AND (owner IN ({_owner_ph}) OR owner IS NULL) AND uid IN ({placeholders})",
[folder, *_owner_aliases, *_uid_strs],
f"WHERE folder=? AND {_owner_clause} AND uid IN ({placeholders})",
[folder, *_owner_params, *_uid_strs],
).fetchall()
for r in rows:
try:
@@ -805,14 +827,13 @@ def setup_email_routes():
if header_ids:
import sqlite3 as _sql3m
_cm = _sql3m.connect(SCHEDULED_DB)
_owner_aliases_m = _email_tag_owner_aliases(account_id, owner)
_owner_ph_m = ",".join("?" * len(_owner_aliases_m))
_owner_clause_m, _owner_params_m = _email_tag_owner_clause(account_id, owner)
_mid_ph = ",".join("?" * len(header_ids))
rows_m = _cm.execute(
f"SELECT message_id, tags, spam_verdict FROM email_tags "
f"WHERE folder=? AND (owner IN ({_owner_ph_m}) OR owner IS NULL) "
f"WHERE folder=? AND {_owner_clause_m} "
f"AND message_id IN ({_mid_ph})",
[folder, *_owner_aliases_m, *header_ids],
[folder, *_owner_params_m, *header_ids],
).fetchall()
_cm.close()
for mid, tags_raw, spam_raw in rows_m:
@@ -924,12 +945,17 @@ def setup_email_routes():
except Exception as _summary_err:
logger.debug(f"Bulk summary attach skipped: {_summary_err}")
conn.logout()
return {"emails": emails, "total": total, "folder": folder, "offset": offset}
except Exception as e:
logger.error(f"Failed to list emails: {e}")
detail = str(e).strip()
return {"emails": [], "total": 0, "error": f"Mail operation failed: {detail[:180]}" if detail else "Mail operation failed"}
finally:
if conn:
try:
conn.logout()
except Exception:
pass
@router.get("/list")
async def list_emails(
@@ -971,10 +997,11 @@ def setup_email_routes():
async def unflag_spam(uid: str, owner: str = Depends(require_owner)):
"""User override — mark email as not spam."""
try:
owner_clause, owner_params = _email_tag_owner_clause(None, owner)
_c = _sql3.connect(SCHEDULED_DB)
_c.execute(
"UPDATE email_tags SET spam_verdict=0, spam_reason='' WHERE uid=?",
(uid,),
f"UPDATE email_tags SET spam_verdict=0, spam_reason='' WHERE uid=? AND {owner_clause}",
[uid, *owner_params],
)
_c.commit()
_c.close()
@@ -997,8 +1024,10 @@ def setup_email_routes():
ql = (q or "").strip().lower()
try:
conn = _sql3.connect(SCHEDULED_DB)
owner_clause, owner_params = _email_tag_owner_clause(None, owner)
rows = conn.execute(
"SELECT sender FROM email_tags WHERE sender IS NOT NULL AND sender != ''"
f"SELECT sender FROM email_tags WHERE sender IS NOT NULL AND sender != '' AND {owner_clause}",
owner_params,
).fetchall()
conn.close()
seen = {}
@@ -1046,7 +1075,7 @@ def setup_email_routes():
# Escape backslash and quote for the IMAP-SEARCH quoted-string.
q_escaped = q.replace('\\', '\\\\').replace('"', '\\"')
search_cmd = f'(OR FROM "{q_escaped}" TEXT "{q_escaped}")'
search_cmd = f'(OR OR FROM "{q_escaped}" SUBJECT "{q_escaped}" TEXT "{q_escaped}")'
status, data = _imap_uid_search(conn, search_cmd)
if status != "OK" or not data[0]:
@@ -1928,11 +1957,7 @@ def setup_email_routes():
outer.attach(body_container)
_attach_compose_uploads(outer, attachments)
recipients = [r.strip() for r in to.split(",") if r.strip()]
if cc:
recipients.extend([r.strip() for r in cc.split(",") if r.strip()])
if bcc:
recipients.extend([r.strip() for r in bcc.split(",") if r.strip()])
recipients = _envelope_recipients(to, cc, bcc)
_send_smtp_message(cfg, cfg["from_address"], recipients, outer.as_string())
@@ -1964,13 +1989,22 @@ def setup_email_routes():
# minute doesn't trip the past-time guard.
if parsed_at < now_utc:
return {"success": False, "error": "send_at must be in the future"}
# Normalize to naive UTC before storing: the poller selects due
# rows with a lexicographic string compare against a naive
# datetime.utcnow().isoformat(), so storing the raw client string
# makes "+02:00" schedules fire hours late, negative offsets fire
# hours early, and a "Z" suffix compares after the fractional
# seconds of the poller timestamp.
if parsed_at.tzinfo:
parsed_at = parsed_at.astimezone(_tz.utc).replace(tzinfo=None)
send_at = parsed_at.isoformat()
sid = _uuid.uuid4().hex[:16]
conn = sqlite3.connect(SCHEDULED_DB)
conn.execute("""
INSERT INTO scheduled_emails
(id, to_addr, cc, bcc, subject, body, in_reply_to, references_hdr, attachments, send_at, created_at, status, account_id, odysseus_kind)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'pending', ?, ?)
(id, to_addr, cc, bcc, subject, body, in_reply_to, references_hdr, attachments, send_at, created_at, status, account_id, odysseus_kind, owner)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'pending', ?, ?, ?)
""", (
sid,
req.get("to", ""),
@@ -1985,6 +2019,7 @@ def setup_email_routes():
datetime.utcnow().isoformat(),
req.get("account_id") or None,
req.get("odysseus_kind") or "scheduled",
owner or "",
))
conn.commit()
conn.close()
@@ -2003,9 +2038,9 @@ def setup_email_routes():
rows = conn.execute("""
SELECT id, to_addr, cc, subject, send_at, created_at, status, error
FROM scheduled_emails
WHERE status IN ('pending', 'failed')
WHERE status IN ('pending', 'failed') AND owner = ?
ORDER BY send_at ASC
""").fetchall()
""", (owner or "",)).fetchall()
conn.close()
return {"scheduled": [
{
@@ -2023,7 +2058,10 @@ def setup_email_routes():
import sqlite3
try:
conn = sqlite3.connect(SCHEDULED_DB)
conn.execute("DELETE FROM scheduled_emails WHERE id = ? AND status = 'pending'", (sid,))
conn.execute(
"DELETE FROM scheduled_emails WHERE id = ? AND status = 'pending' AND owner = ?",
(sid, owner or ""),
)
conn.commit()
conn.close()
return {"success": True}
@@ -2035,7 +2073,7 @@ def setup_email_routes():
async def resolve_contact(name: str = Query(..., description="Name to search for"), owner: str = Depends(require_owner)):
"""Search Sent folder for a contact by name. Returns matching email addresses."""
try:
with _imap() as conn:
with _imap(owner=owner) as conn:
matches = {}
for folder in ["Sent", "INBOX", "Drafts"]:
try:
@@ -2133,12 +2171,9 @@ def setup_email_routes():
outer.attach(body_container)
_attach_compose_uploads(outer, req.attachments)
# Build recipient list
recipients = [r.strip() for r in req.to.split(",") if r.strip()]
if req.cc:
recipients.extend([r.strip() for r in req.cc.split(",") if r.strip()])
if req.bcc:
recipients.extend([r.strip() for r in req.bcc.split(",") if r.strip()])
# Build recipient list (parse the address grammar so display names with
# commas don't get split into broken envelope addresses)
recipients = _envelope_recipients(req.to, req.cc, req.bcc)
# Serialize what the background task needs so the request object can be GC'd
outer_bytes = outer.as_bytes()
@@ -2146,6 +2181,7 @@ def setup_email_routes():
_from = cfg["from_address"]
_smtp_host = cfg["smtp_host"]
_smtp_port = cfg["smtp_port"]
_smtp_security = cfg.get("smtp_security")
_smtp_user = cfg["smtp_user"]
_smtp_pw = cfg["smtp_password"]
_recipients = list(recipients)
@@ -2163,6 +2199,7 @@ def setup_email_routes():
{
"smtp_host": _smtp_host,
"smtp_port": _smtp_port,
"smtp_security": _smtp_security,
"smtp_user": _smtp_user,
"smtp_password": _smtp_pw,
},
@@ -2417,7 +2454,7 @@ def setup_email_routes():
"""Generate a quick AI summary of an email body."""
try:
from src.endpoint_resolver import resolve_endpoint
from src.llm_core import _uses_max_completion_tokens
from src.llm_core import _uses_max_completion_tokens, _restricts_temperature
import requests as _req
body = data.get("body", "")
@@ -2474,6 +2511,9 @@ def setup_email_routes():
"temperature": 0.3,
"stream": False,
}
# Reasoning models (o1/o3/o4/gpt-5) reject an explicit temperature.
if _restricts_temperature(model):
payload.pop("temperature", None)
resp = await asyncio.to_thread(
_req.post, url, json=payload, headers=req_headers, timeout=180
)
@@ -2585,7 +2625,7 @@ def setup_email_routes():
# `api_key` field.
from core.database import SessionLocal as _SL, Session as _CS
_db = _SL()
sess = _db.query(_CS).filter(_CS.id == session_id).first()
sess = _db.query(_CS).filter(_CS.id == session_id, _CS.owner == owner).first()
if sess and sess.endpoint_url:
url = sess.endpoint_url
# Some sessions stored headers double-encoded (a JSON
@@ -2644,9 +2684,10 @@ def setup_email_routes():
# Manual AI Reply should feel immediate. The heavier context mining
# can involve multiple IMAP folder searches and attachment parsing;
# reserve that for callers that explicitly opt out of fast mode.
# Owner-scoped so pre-retrieval never crosses tenants.
context_snippets, _terms = ([], [])
if not fast_reply:
context_snippets, _terms = _pre_retrieve_context(original_body, to)
context_snippets, _terms = _pre_retrieve_context(original_body, to, owner=owner)
# NEW: also pull the last few emails from the original sender +
# their attachments. The "to" field on this endpoint is the
@@ -2662,6 +2703,7 @@ def setup_email_routes():
exclude_uid=source_uid,
exclude_folder=source_folder,
limit=3,
owner=owner,
)
except Exception as _e:
logger.warning(f"sender-thread-context failed: {_e}")
@@ -2723,7 +2765,7 @@ def setup_email_routes():
# Configured fallback chains last.
for cand in resolve_utility_fallback_candidates(owner=owner) or []:
_add(*cand)
for cand in resolve_chat_fallback_candidates() or []:
for cand in resolve_chat_fallback_candidates(owner=owner) or []:
_add(*cand)
try:
reply = await llm_call_async_with_fallback(
@@ -2814,13 +2856,16 @@ def setup_email_routes():
import uuid as _uuid
db = SessionLocal()
try:
row = db.query(EmailAccount).filter(EmailAccount.is_default == True).first() # noqa: E712
q = db.query(EmailAccount).filter(EmailAccount.is_default == True) # noqa: E712
if owner:
q = q.filter(EmailAccount.owner == owner)
row = q.first()
if row is None:
row = EmailAccount(id=_uuid.uuid4().hex, name="Default", is_default=True, enabled=True)
row = EmailAccount(id=_uuid.uuid4().hex, owner=owner, name="Default", is_default=True, enabled=True)
db.add(row)
field_map = {
"smtp_host": "smtp_host", "smtp_port": "smtp_port", "smtp_user": "smtp_user",
"imap_host": "imap_host", "imap_port": "imap_port", "imap_user": "imap_user",
"smtp_security": "smtp_security", "imap_host": "imap_host", "imap_port": "imap_port", "imap_user": "imap_user",
"imap_starttls": "imap_starttls", "email_from": "from_address",
}
for in_key, col_name in field_map.items():
@@ -2838,6 +2883,10 @@ def setup_email_routes():
row.imap_password = _enc(data["imap_password"])
if data.get("smtp_password"):
row.smtp_password = _enc(data["smtp_password"])
clear_q = db.query(EmailAccount).filter(EmailAccount.id != row.id)
if owner:
clear_q = clear_q.filter(EmailAccount.owner == owner)
clear_q.update({EmailAccount.is_default: False})
db.commit()
finally:
db.close()
@@ -2902,6 +2951,7 @@ def setup_email_routes():
"imap_starttls": bool(r.imap_starttls),
"smtp_host": r.smtp_host or "",
"smtp_port": int(r.smtp_port or 465),
"smtp_security": _smtp_security_mode({"smtp_security": getattr(r, "smtp_security", ""), "smtp_port": r.smtp_port}),
"smtp_user": r.smtp_user or "",
"from_address": r.from_address or "",
"has_imap_password": bool(r.imap_password),
@@ -2934,6 +2984,7 @@ def setup_email_routes():
imap_starttls=bool(data.get("imap_starttls", True)),
smtp_host=(data.get("smtp_host") or "").strip(),
smtp_port=int(data.get("smtp_port") or 465),
smtp_security=_smtp_security_mode({"smtp_security": data.get("smtp_security"), "smtp_port": data.get("smtp_port") or 465}),
smtp_user=(data.get("smtp_user") or "").strip(),
smtp_password=_enc(data.get("smtp_password") or ""),
from_address=(data.get("from_address") or "").strip(),
@@ -2977,6 +3028,8 @@ def setup_email_routes():
for key in ("imap_port", "smtp_port"):
if data.get(key) not in (None, ""):
setattr(row, key, int(data[key]))
if "smtp_security" in data:
row.smtp_security = _smtp_security_mode({"smtp_security": data.get("smtp_security"), "smtp_port": data.get("smtp_port") or row.smtp_port})
for key in ("imap_starttls", "enabled"):
if key in data:
setattr(row, key, bool(data[key]))
@@ -3061,6 +3114,7 @@ def setup_email_routes():
"imap_starttls": bool(row.imap_starttls),
"smtp_host": row.smtp_host or "",
"smtp_port": row.smtp_port or 465,
"smtp_security": _smtp_security_mode({"smtp_security": getattr(row, "smtp_security", ""), "smtp_port": row.smtp_port}),
"smtp_user": row.smtp_user or "",
"smtp_password": _decrypt(row.smtp_password or ""),
}
@@ -3093,13 +3147,12 @@ def setup_email_routes():
# port (Dovecot on 31143, etc.) would always fail the SSL
# handshake because they're not actually wrapped in TLS.
try:
if imap_starttls:
conn = imaplib.IMAP4(imap_host, imap_port, timeout=10)
conn.starttls()
elif imap_port == 993:
conn = imaplib.IMAP4_SSL(imap_host, imap_port, timeout=10)
else:
conn = imaplib.IMAP4(imap_host, imap_port, timeout=10)
conn = _open_imap_connection(
imap_host,
imap_port,
starttls=imap_starttls,
timeout=_IMAP_TIMEOUT_SECONDS,
)
try:
conn.login(imap_user, imap_pass)
imap_result = {"ok": True}
@@ -3112,14 +3165,16 @@ def setup_email_routes():
smtp_host = (body.get("smtp_host") or "").strip()
if smtp_host:
smtp_port = int(body.get("smtp_port") or 465)
smtp_security = _smtp_security_mode({"smtp_security": body.get("smtp_security"), "smtp_port": smtp_port})
smtp_user = (body.get("smtp_user") or imap_user).strip()
smtp_pass = body.get("smtp_password") or imap_pass
try:
if smtp_port == 587:
smtp = smtplib.SMTP(smtp_host, smtp_port, timeout=10)
smtp.starttls()
else:
if smtp_security == "ssl":
smtp = smtplib.SMTP_SSL(smtp_host, smtp_port, timeout=10)
else:
smtp = smtplib.SMTP(smtp_host, smtp_port, timeout=10)
if smtp_security == "starttls":
smtp.starttls()
try:
smtp.login(smtp_user, smtp_pass)
smtp_result = {"ok": True}

View File

@@ -86,7 +86,8 @@ def _load_custom_endpoint() -> dict:
"""Load the saved custom embedding endpoint, if any."""
try:
if os.path.exists(_ENDPOINT_FILE):
return json.loads(Path(_ENDPOINT_FILE).read_text(encoding="utf-8"))
data = json.loads(Path(_ENDPOINT_FILE).read_text(encoding="utf-8"))
return data if isinstance(data, dict) else {}
except Exception:
pass
return {}
@@ -160,7 +161,7 @@ def setup_embedding_routes():
_downloading[model_name] = True
try:
# Run in thread to not block the event loop
loop = asyncio.get_event_loop()
loop = asyncio.get_running_loop()
cache = _cache_dir()
await loop.run_in_executor(
None,
@@ -242,6 +243,18 @@ def setup_embedding_routes():
if not url:
raise HTTPException(400, "URL is required")
# SSRF hardening: validate the user-supplied URL before any outbound
# request. Local-first means loopback/LAN endpoints are allowed by
# default; non-HTTP(S) schemes and the cloud metadata range are always
# rejected. Set EMBEDDING_BLOCK_PRIVATE_IPS=true for full lockdown.
from src.url_safety import check_outbound_url
ok, reason = check_outbound_url(
url,
block_private=os.getenv("EMBEDDING_BLOCK_PRIVATE_IPS", "false").lower() == "true",
)
if not ok:
raise HTTPException(400, f"Rejected endpoint URL: {reason}")
# Quick health check
try:
import httpx

View File

@@ -5,6 +5,15 @@ from fastapi import APIRouter
CUSTOM_FONTS_DIR = os.path.join("static", "fonts", "custom")
FONT_EXTENSIONS = {".ttf", ".otf", ".woff", ".woff2"}
FAMILY_SUFFIX_WORDS = ("Display", "Rounded", "Serif", "Sans", "Mono", "Code", "Text")
def _split_family_token(token):
"""Split common compact font-family suffixes without breaking brand names."""
for suffix in FAMILY_SUFFIX_WORDS:
if token.endswith(suffix) and len(token) > len(suffix):
return f"{token[:-len(suffix)]} {suffix}"
return re.sub(r'(?<=[a-z])(?=[A-Z])', ' ', token)
def _derive_family(filename):
@@ -15,10 +24,9 @@ def _derive_family(filename):
r'[-_ ]?(Thin|ExtraLight|UltraLight|Light|Regular|Medium|SemiBold|DemiBold|Bold|ExtraBold|UltraBold|Black|Heavy|Italic|Oblique|Variable|VF)$',
'', name, flags=re.IGNORECASE
)
# Insert spaces before uppercase runs: "JetBrainsMono" → "Jet Brains Mono"
name = re.sub(r'(?<=[a-z])(?=[A-Z])', ' ', name)
# Replace dashes/underscores with spaces
name = re.sub(r'[-_]+', ' ', name).strip()
name = " ".join(_split_family_token(part) for part in name.split())
return name or filename

View File

@@ -32,10 +32,21 @@ def _extract_exif(content: bytes) -> dict:
from PIL import Image
from io import BytesIO
img = Image.open(BytesIO(content))
# Read the raw EXIF before any transpose: exif_transpose strips the
# orientation tag and with it the parsed EXIF view.
exif = img._getexif() if hasattr(img, '_getexif') else None
# Record DISPLAY dimensions (EXIF-rotated), matching upload_handler.
# A phone photo with Orientation 6/8 is stored landscape but shown
# portrait, so the raw width/height swap the aspect ratio.
try:
from PIL import ImageOps
img = ImageOps.exif_transpose(img) or img
except Exception:
pass
result["width"] = img.width
result["height"] = img.height
exif = img._getexif() if hasattr(img, '_getexif') else None
if not exif:
return result
@@ -110,9 +121,17 @@ def _image_to_dict(img: GalleryImage, session_name: str = None) -> Dict[str, Any
def _owner_filter(q, user):
"""Apply owner filtering to a gallery query."""
"""Apply owner filtering to a gallery query.
When auth is disabled (single-user mode) get_current_user returns None
and there is no per-user scoping. The main library list and stats already
treat None as "show everything" (`if user is not None`), so this helper
must too — otherwise the tag/model filter sidebars come back empty and the
tag-cleanup endpoints (clear-user-tags, clear-ai-tags, dedupe-tags)
silently affect zero rows in the most common self-hosted deployment.
"""
if user is None:
return q.filter(False)
return q
return q.filter(GalleryImage.owner == user)

View File

@@ -3,6 +3,9 @@
import os
import hashlib
import logging
import re
import uuid
from pathlib import Path
from typing import Dict, Any, Optional
from fastapi import APIRouter, HTTPException, Query, Request
@@ -17,6 +20,14 @@ from routes.gallery_helpers import (
logger = logging.getLogger(__name__)
def _sanitize_gallery_filename(filename: str) -> str:
"""Return a local filename safe to join under generated_images."""
safe_name = re.sub(r"[^A-Za-z0-9._-]", "_", Path(filename or "").name)[:128]
if not safe_name or safe_name in {".", ".."}:
safe_name = uuid.uuid4().hex[:12]
return safe_name
def setup_gallery_routes() -> APIRouter:
router = APIRouter(tags=["gallery"])
@@ -122,7 +133,7 @@ def setup_gallery_routes() -> APIRouter:
content = await file.read()
img_dir = Path("data/generated_images")
img_dir.mkdir(parents=True, exist_ok=True)
img_path = img_dir / img.filename
img_path = img_dir / _sanitize_gallery_filename(img.filename)
img_path.write_bytes(content)
# Refresh dimensions in case the editor resized the canvas.
@@ -912,6 +923,16 @@ def setup_gallery_routes() -> APIRouter:
body = await request.json()
# Use endpoint from request body (editor dropdown) or fall back to DB lookup
base = (body.pop("_endpoint", "") or "").rstrip("/")
# SSRF hardening: validate a client-supplied endpoint before any
# outbound request (mirrors routes/embedding_routes.py).
if base:
from src.url_safety import check_outbound_url
ok, reason = check_outbound_url(
base,
block_private=os.getenv("IMAGE_BLOCK_PRIVATE_IPS", "false").lower() == "true",
)
if not ok:
raise HTTPException(400, f"Rejected endpoint URL: {reason}")
chosen_model = (body.pop("_model", "") or "").strip()
api_key = None
if not base:
@@ -1104,6 +1125,18 @@ def setup_gallery_routes() -> APIRouter:
raise HTTPException(400, "No image provided")
endpoint = (body.get("_endpoint") or "").rstrip("/")
# SSRF hardening: a client-supplied endpoint is fetched server-side
# below, so validate it first (mirrors routes/embedding_routes.py).
# Local-first means loopback/LAN is allowed by default; the cloud
# metadata range and non-HTTP(S) schemes are always rejected.
if endpoint:
from src.url_safety import check_outbound_url
ok, reason = check_outbound_url(
endpoint,
block_private=os.getenv("IMAGE_BLOCK_PRIVATE_IPS", "false").lower() == "true",
)
if not ok:
raise HTTPException(400, f"Rejected endpoint URL: {reason}")
model = (body.get("_model") or "").strip()
base = endpoint
@@ -1125,7 +1158,7 @@ def setup_gallery_routes() -> APIRouter:
db = SessionLocal()
try:
for ep in db.query(ModelEndpoint).all():
if ep.base_url.rstrip("/").rstrip("/v1") == base.rstrip("/v1"):
if ep.base_url.rstrip("/").removesuffix("/v1").rstrip("/") == base.rstrip("/").removesuffix("/v1").rstrip("/"):
api_key = ep.api_key
break
finally:
@@ -1696,7 +1729,7 @@ def setup_gallery_routes() -> APIRouter:
return {"error": "No vision-capable endpoint configured"}
# Call vision model — format differs between Anthropic and OpenAI
from src.llm_core import _detect_provider
from src.llm_core import _detect_provider, _restricts_temperature, _uses_max_completion_tokens
provider = _detect_provider(chat_url)
tag_prompt = (
"Analyze this photo. Return ONLY a comma-separated list of tags. "
@@ -1721,6 +1754,7 @@ def setup_gallery_routes() -> APIRouter:
}],
}
else:
_tok_key = "max_completion_tokens" if _uses_max_completion_tokens(model_name) else "max_tokens"
payload = {
"model": model_name,
"messages": [{
@@ -1730,9 +1764,12 @@ def setup_gallery_routes() -> APIRouter:
{"type": "image_url", "image_url": {"url": f"data:{mime};base64,{b64}"}},
],
}],
"max_tokens": 200,
_tok_key: 200,
"temperature": 0.3,
}
# Reasoning models (o1/o3/o4/gpt-5) reject an explicit temperature.
if _restricts_temperature(model_name):
payload.pop("temperature", None)
h = {"Content-Type": "application/json"}
if headers:

View File

@@ -58,7 +58,7 @@ def setup_history_routes(session_manager) -> APIRouter:
.all()
)
import json as _json
history_dict = []
db_history = []
for m in db_messages:
entry = {"role": m.role, "content": m.content}
meta = {}
@@ -71,12 +71,19 @@ def setup_history_routes(session_manager) -> APIRouter:
meta["timestamp"] = m.timestamp.isoformat() + "Z"
if meta:
entry["metadata"] = meta
history_dict.append(entry)
if history_dict:
db_history.append(entry)
if db_history:
# Rebuild in-memory history from the full set so hidden
# messages (e.g. compaction summaries) are kept for AI context.
session.history = [
ChatMessage(role=m["role"], content=m["content"], metadata=m.get("metadata"))
for m in history_dict
for m in db_history
]
# Response excludes hidden messages, matching the in-memory path.
history_dict = [
m for m in db_history
if not (m.get("metadata") or {}).get("hidden")
]
except Exception as e:
logger.error(f"DB fallback failed for {session_id}: {e}")
finally:
@@ -265,7 +272,7 @@ def setup_history_routes(session_manager) -> APIRouter:
db_messages = (
db.query(DbChatMessage)
.filter(DbChatMessage.session_id == session_id, DbChatMessage.role == 'assistant')
.order_by(DbChatMessage.created_at.desc())
.order_by(DbChatMessage.timestamp.desc())
.first()
)
if db_messages:
@@ -320,7 +327,7 @@ def setup_history_routes(session_manager) -> APIRouter:
db_msg = (
db.query(DbChatMessage)
.filter(DbChatMessage.session_id == session_id, DbChatMessage.role == 'assistant')
.order_by(DbChatMessage.created_at.desc())
.order_by(DbChatMessage.timestamp.desc())
.first()
)
if db_msg:
@@ -401,7 +408,7 @@ def setup_history_routes(session_manager) -> APIRouter:
db_messages = (
db.query(DbChatMessage)
.filter(DbChatMessage.session_id == session_id)
.order_by(DbChatMessage.created_at)
.order_by(DbChatMessage.timestamp)
.all()
)
# Find last two assistant messages in DB
@@ -477,10 +484,10 @@ def setup_history_routes(session_manager) -> APIRouter:
@router.get("/api/conversations/topics")
async def get_conversation_topics(request: Request) -> Dict[str, Any]:
from src.auth_helpers import get_current_user
user = get_current_user(request)
from src.auth_helpers import require_user
user = require_user(request)
try:
return analyze_topics(session_manager, owner=user)
return analyze_topics(session_manager, owner=user or None)
except Exception as e:
raise HTTPException(500, f"Topic analysis failed: {e}")

View File

@@ -1,87 +1,105 @@
import re
from copy import deepcopy
from fastapi import APIRouter
# Backends the manual hardware simulator accepts. Must stay a subset of what
# services.hwfit.fit understands so a simulated box ranks like a real one:
# "metal" routes through the Apple-Silicon path (GGUF-only, llama.cpp/Ollama),
# the CPU backends through the RAM/offload path, cuda/rocm through vLLM.
_MANUAL_BACKENDS = {"cuda", "rocm", "metal", "cpu_x86", "cpu_arm"}
def _apply_manual_hardware(system, manual_mode="", manual_gpu_count="", manual_vram_gb="", manual_ram_gb="", manual_backend=""):
"""Manual hardware is a "what if I had this setup" simulator —
REPLACES the detected hardware entirely instead of adding to it.
The previous additive behavior averaged the manual VRAM across
all GPUs (base + manual), which meant adding "1× 400 GB" on top
of "2× 70 GB" only nudged the per-GPU cap from 70 to 180 GB
(= 540 / 3), so GGUF models bigger than that still didn't surface
— exactly the "cap stuck at detected level" bug the user hit.
"""
manual_mode = (manual_mode or "").lower()
if manual_mode not in {"gpu", "ram"}:
return system
try:
override_ram_gb = float(manual_ram_gb) if manual_ram_gb else 0
except ValueError:
override_ram_gb = 0
override_ram_gb = max(0.0, override_ram_gb)
if override_ram_gb:
# Replace RAM, don't add. The number in the field is the
# TOTAL system memory the user wants to simulate.
system["available_ram_gb"] = round(override_ram_gb, 1)
system["total_ram_gb"] = round(override_ram_gb, 1)
system["manual_hardware"] = True
if manual_mode == "ram":
# RAM-only simulation — wipe GPU entirely so the ranker uses
# CPU/RAM paths.
system["has_gpu"] = False
system["gpu_name"] = None
system["gpu_vram_gb"] = 0
system["gpu_count"] = 0
system["gpus"] = []
system["gpu_groups"] = []
system["backend"] = "cpu_x86"
system.pop("unified_memory", None)
return system
try:
count = int(manual_gpu_count) if manual_gpu_count else 1
except ValueError:
count = 1
try:
vram_each = float(manual_vram_gb) if manual_vram_gb else 8.0
except ValueError:
vram_each = 8.0
count = max(1, min(count, 16))
vram_each = max(1.0, vram_each)
backend = (manual_backend or system.get("backend") or "cuda").lower()
if backend not in _MANUAL_BACKENDS:
backend = "cuda"
total_vram = round(vram_each * count, 1)
gpu_name = f"Simulated {backend.upper()} GPU" + (f" × {count}" if count > 1 else "")
system["has_gpu"] = True
system["gpu_name"] = gpu_name
system["gpu_vram_gb"] = total_vram
system["gpu_count"] = count
system["gpus"] = [
{"index": i, "name": gpu_name, "vram_gb": vram_each}
for i in range(count)
]
# Single homogeneous pool — vram_each here is the ACTUAL per-GPU
# VRAM the user entered, not an average. That's the whole point:
# raising vram_each lifts the per-GPU cap (GGUF, tensor-parallel
# math) all the way up, not just by a small fraction.
system["gpu_groups"] = [{
"name": gpu_name,
"vram_each": vram_each,
"count": count,
"indices": list(range(count)),
"vram_total": total_vram,
}]
system["homogeneous"] = True
system["backend"] = backend
# Apple Silicon shares one unified memory pool with the GPU; flag it so
# the API/UI report it the way real Metal detection does. Discrete GPUs
# (cuda/rocm) and the CPU backends carry separate VRAM, so clear any
# stale flag a previous detection left on the dict.
if backend == "metal":
system["unified_memory"] = True
else:
system.pop("unified_memory", None)
return system
def setup_hwfit_routes():
router = APIRouter(prefix="/api/hwfit", tags=["hwfit"])
def _apply_manual_hardware(system, manual_mode="", manual_gpu_count="", manual_vram_gb="", manual_ram_gb="", manual_backend=""):
"""Manual hardware is a "what if I had this setup" simulator —
REPLACES the detected hardware entirely instead of adding to it.
The previous additive behavior averaged the manual VRAM across
all GPUs (base + manual), which meant adding "1× 400 GB" on top
of "2× 70 GB" only nudged the per-GPU cap from 70 to 180 GB
(= 540 / 3), so GGUF models bigger than that still didn't surface
— exactly the "cap stuck at detected level" bug the user hit.
"""
manual_mode = (manual_mode or "").lower()
if manual_mode not in {"gpu", "ram"}:
return system
try:
override_ram_gb = float(manual_ram_gb) if manual_ram_gb else 0
except ValueError:
override_ram_gb = 0
override_ram_gb = max(0.0, override_ram_gb)
if override_ram_gb:
# Replace RAM, don't add. The number in the field is the
# TOTAL system memory the user wants to simulate.
system["available_ram_gb"] = round(override_ram_gb, 1)
system["total_ram_gb"] = round(override_ram_gb, 1)
system["manual_hardware"] = True
if manual_mode == "ram":
# RAM-only simulation — wipe GPU entirely so the ranker uses
# CPU/RAM paths.
system["has_gpu"] = False
system["gpu_name"] = None
system["gpu_vram_gb"] = 0
system["gpu_count"] = 0
system["gpus"] = []
system["gpu_groups"] = []
system["backend"] = "cpu_x86"
return system
try:
count = int(manual_gpu_count) if manual_gpu_count else 1
except ValueError:
count = 1
try:
vram_each = float(manual_vram_gb) if manual_vram_gb else 8.0
except ValueError:
vram_each = 8.0
count = max(1, min(count, 16))
vram_each = max(1.0, vram_each)
backend = (manual_backend or system.get("backend") or "cuda").lower()
if backend not in {"cuda", "rocm", "cpu_x86", "cpu_arm"}:
backend = "cuda"
total_vram = round(vram_each * count, 1)
gpu_name = f"Simulated {backend.upper()} GPU" + (f" × {count}" if count > 1 else "")
system["has_gpu"] = True
system["gpu_name"] = gpu_name
system["gpu_vram_gb"] = total_vram
system["gpu_count"] = count
system["gpus"] = [
{"index": i, "name": gpu_name, "vram_gb": vram_each}
for i in range(count)
]
# Single homogeneous pool — vram_each here is the ACTUAL per-GPU
# VRAM the user entered, not an average. That's the whole point:
# raising vram_each lifts the per-GPU cap (GGUF, tensor-parallel
# math) all the way up, not just by a small fraction.
system["gpu_groups"] = [{
"name": gpu_name,
"vram_each": vram_each,
"count": count,
"indices": list(range(count)),
"vram_total": total_vram,
}]
system["homogeneous"] = True
system["backend"] = backend
return system
@router.get("/system")
def get_system(host: str = "", ssh_port: str = "", platform: str = "", fresh: bool = False):
"""Detect and return current system hardware info. Pass host=user@server for remote.
@@ -181,6 +199,64 @@ def setup_hwfit_routes():
results = rank_models(system, use_case=use_case or None, limit=limit, search=search or None, sort=sort, quant=quant or None, target_context=target_context, fit_only=fit_only)
return {"system": system, "models": results}
@router.get("/profiles")
def get_serve_profiles(model: str = "", host: str = "", ssh_port: str = "", platform: str = "", fresh: bool = False, serve_weights_gb: float = 0.0, serve_quant: str = ""):
"""Compute llama.cpp serve profiles (Quality/Balanced/Speed) for `model`
against the detected hardware on `host` (or local). Returns concrete
flags (n_gpu_layers, n_cpu_moe, cache_type, ctx) the serve UI can apply.
`model` is matched against the catalog by name; if it's not in the
catalog (e.g. an ad-hoc HF repo), pass enough hints via a minimal synthetic
entry isn't possible here, so we return [] and the UI keeps manual flags.
"""
from services.hwfit.hardware import detect_system
from services.hwfit.models import get_models
from services.hwfit.profiles import compute_serve_profiles
system = detect_system(host=host, ssh_port=ssh_port, platform=platform, fresh=fresh)
if system.get("error"):
return {"system": system, "profiles": [], "error": system["error"]}
catalog = {m.get("name"): m for m in (get_models() or [])}
def _norm(s):
# Normalize for matching: drop org/ prefix, a trailing -GGUF/-gguf
# marker, and any quant tag, lowercase. So "DeepSeek-Coder-V2-Lite-
# Instruct-GGUF" (a local folder name) matches catalog entry
# "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct".
s = (s or "").lower().strip()
s = s.split("/")[-1] # drop org prefix
s = re.sub(r"[-_.]?gguf$", "", s) # drop trailing gguf marker
s = re.sub(r"[-_.](q\d[^/]*|iq\d[^/]*|fp8|bf16|f16|awq[^/]*|gptq[^/]*)$", "", s)
return s
m = catalog.get(model)
if m is None and model:
want = _norm(model)
for name, entry in catalog.items():
nn = _norm(name)
if nn and (nn == want or want.endswith(nn) or nn.endswith(want)):
m = entry
break
if m is None:
return {"system": system, "profiles": [], "error": "model not in catalog"}
# Surface the model's trained context limit so the serve UI can clamp a
# user-typed context down to it (asking for ctx > n_ctx_train overflows
# and, with a quantized KV cache, can crash the GPU).
model_ctx_max = 0
for k in ("context_length", "max_position_embeddings", "n_ctx_train", "context"):
v = m.get(k)
if isinstance(v, (int, float)) and v > 0:
model_ctx_max = int(v)
break
return {
"system": system,
"profiles": compute_serve_profiles(
system, m,
serve_weights_gb=(serve_weights_gb or None),
serve_quant=(serve_quant or None),
),
"model_ctx_max": model_ctx_max,
}
@router.get("/image-models")
def get_image_models(sort: str = "fit", search: str = "", host: str = "", gpu_count: str = "", ssh_port: str = "", platform: str = "", fresh: bool = False, manual_mode: str = "", manual_gpu_count: str = "", manual_vram_gb: str = "", manual_ram_gb: str = "", manual_backend: str = "", ignore_detected_gpu: bool = False, ignore_detected_ram: bool = False):
"""Rank image generation models against detected hardware."""

View File

@@ -27,7 +27,7 @@ from src.request_models import MemoryAddRequest
from core.database import SessionLocal
from src.llm_core import llm_call_async
from services.memory.memory_extractor import audit_memories
from src.auth_helpers import get_current_user
from src.auth_helpers import get_current_user, require_user
from src.endpoint_resolver import resolve_endpoint
logger = logging.getLogger(__name__)
@@ -191,8 +191,7 @@ def setup_memory_routes(memory_manager: MemoryManager, session_manager: SessionM
@router.post("/extract")
async def extract_memory(request: Request, session: str = Form(...)) -> Dict[str, List[str]]:
"""Analyze a session's chat history and return memory suggestions."""
if not get_current_user(request):
raise HTTPException(401, "Not authenticated")
require_user(request)
try:
sess = session_manager.get_session(session)
except KeyError:

View File

@@ -1,73 +1,213 @@
# routes/model_routes.py
"""Routes for model and provider management."""
import os
import re
import uuid
import json
import socket
import time as _time
import logging
import httpx
from datetime import datetime
from typing import List, Dict, Any, Optional
from urllib.parse import urlparse
from urllib.parse import urlparse, urlunparse
from fastapi import APIRouter, HTTPException, Form, Query, Body, Request
from pydantic import BaseModel
from fastapi.responses import StreamingResponse
from core.database import SessionLocal, ModelEndpoint, Session as DbSession
from core.middleware import require_admin
from src.llm_core import _detect_provider, ANTHROPIC_MODELS
from src.llm_core import _detect_provider, _host_match, ANTHROPIC_MODELS
from src.settings import load_settings as _load_settings, save_settings as _save_settings
from src.endpoint_resolver import normalize_base as _normalize_base, build_chat_url
from src.auth_helpers import owner_filter
from src.endpoint_resolver import (
normalize_base as _normalize_base,
build_chat_url,
build_models_url,
build_headers,
)
from src.auth_helpers import _auth_disabled, owner_filter
logger = logging.getLogger(__name__)
_SPEECH_ENDPOINT_SETTINGS = (
("tts_provider", "tts_model", "tts-1", "Text to Speech"),
("stt_provider", "stt_model", "base", "Speech to Text"),
)
def _anthropic_api_root(base: str) -> str:
"""Return Anthropic's API root without duplicating /v1."""
base = (base or "").strip().rstrip("/")
host = urlparse(base).hostname or ""
if host.endswith("anthropic.com") and base.endswith("/v1"):
return base[:-3].rstrip("/")
return base
_ENDPOINT_SETTING_FIELDS = {
"default_endpoint_id": ("default_model", "Default Model"),
"utility_endpoint_id": ("utility_model", "Utility Model"),
"research_endpoint_id": ("research_model", "Deep Research"),
"task_endpoint_id": ("task_model", "Background Tasks"),
}
_ENDPOINT_FALLBACK_FIELDS = {
"default_model_fallbacks": "Default Model Fallbacks",
"utility_model_fallbacks": "Utility Model Fallbacks",
"vision_model_fallbacks": "Vision Model Fallbacks",
}
def _ollama_api_root(base: str) -> str:
"""Return Ollama's native API root without depending on deferred imports."""
base = (base or "").strip().rstrip("/")
parsed = urlparse(base)
host = parsed.hostname or ""
path = (parsed.path or "").rstrip("/")
if path.endswith("/api"):
return base
if host.endswith("ollama.com"):
root = f"{parsed.scheme}://{parsed.netloc}" if parsed.scheme and parsed.netloc else "https://ollama.com"
return root.rstrip("/") + "/api"
return base
def _speech_settings_using_endpoint(settings: dict, ep_id: str) -> list:
"""Return speech settings that reference a model endpoint."""
endpoint_ref = f"endpoint:{ep_id}"
return [
label
for provider_key, _, _, label in _SPEECH_ENDPOINT_SETTINGS
if (settings.get(provider_key) or "") == endpoint_ref
]
def _models_url(base: str) -> str:
"""Return provider-specific model-list URL for route-local probing."""
provider = _detect_provider(base)
host = urlparse(base).hostname or ""
if provider == "anthropic" or host.endswith("anthropic.com"):
return _anthropic_api_root(base) + "/v1/models"
if provider == "ollama" or host.endswith("ollama.com"):
return _ollama_api_root(base) + "/tags"
return base.rstrip("/") + "/models"
def _clear_speech_settings_for_endpoint(settings: dict, ep_id: str) -> list:
"""Reset speech settings that reference a model endpoint."""
endpoint_ref = f"endpoint:{ep_id}"
cleared = []
for provider_key, model_key, default_model, label in _SPEECH_ENDPOINT_SETTINGS:
if (settings.get(provider_key) or "") == endpoint_ref:
settings[provider_key] = "disabled"
settings[model_key] = default_model
cleared.append(label)
return cleared
def _provider_headers(api_key: Optional[str], base: str) -> Dict[str, str]:
"""Build provider auth headers without depending on import-time stubs."""
if not api_key:
return {}
provider = _detect_provider(base)
host = urlparse(base).hostname or ""
if provider == "anthropic" or host.endswith("anthropic.com"):
return {
"x-api-key": api_key,
"anthropic-version": "2023-06-01",
}
return {"Authorization": f"Bearer {api_key}"}
def _endpoint_settings_using_endpoint(settings: dict, ep_id: str, *, include_speech: bool = False) -> list:
"""Return labels for settings and fallback chains that reference an endpoint."""
affected = []
for ep_key, (_, label) in _ENDPOINT_SETTING_FIELDS.items():
if (settings.get(ep_key) or "") == ep_id:
affected.append(label)
for fallback_key, label in _ENDPOINT_FALLBACK_FIELDS.items():
chain = settings.get(fallback_key) or []
if any(isinstance(entry, dict) and (entry.get("endpoint_id") or "") == ep_id for entry in chain):
affected.append(label)
if include_speech:
affected.extend(_speech_settings_using_endpoint(settings, ep_id))
return affected
def _clear_endpoint_settings_for_endpoint(settings: dict, ep_id: str, *, include_speech: bool = False) -> list:
"""Remove an endpoint from direct settings and model fallback chains."""
cleared = []
for ep_key, (model_key, label) in _ENDPOINT_SETTING_FIELDS.items():
if (settings.get(ep_key) or "") == ep_id:
settings[ep_key] = ""
settings[model_key] = ""
cleared.append(label)
for fallback_key, label in _ENDPOINT_FALLBACK_FIELDS.items():
chain = settings.get(fallback_key)
if not isinstance(chain, list):
continue
kept = [
entry for entry in chain
if not (isinstance(entry, dict) and (entry.get("endpoint_id") or "") == ep_id)
]
if len(kept) != len(chain):
settings[fallback_key] = kept
cleared.append(label)
if include_speech:
cleared.extend(_clear_speech_settings_for_endpoint(settings, ep_id))
return cleared
def _clear_user_pref_endpoint_refs(all_prefs: dict, ep_id: str) -> int:
"""Remove endpoint references from scoped or legacy-flat user preferences."""
if not isinstance(all_prefs, dict):
return 0
users = all_prefs.get("_users")
pref_sets = users.values() if isinstance(users, dict) else [all_prefs]
cleared_users = 0
for prefs in pref_sets:
if isinstance(prefs, dict) and _clear_endpoint_settings_for_endpoint(prefs, ep_id):
cleared_users += 1
return cleared_users
# Loopback hosts a user might type for a local model server (LM Studio,
# llama.cpp, vLLM, …). Inside Docker these point at the *container*, not the
# host the server actually runs on.
_ANY_BIND_HOSTS = {"0.0.0.0", "::"}
_LOOPBACK_HOSTS = {"localhost", "127.0.0.1", "::1", *_ANY_BIND_HOSTS}
def _docker_host_gateway_reachable() -> bool:
"""True when we run inside a container whose host is reachable via
``host.docker.internal`` (compose maps it to ``host-gateway``). Returns
False on native installs and on container setups without the mapping, so
the loopback rewrite below stays a no-op there."""
in_container = os.path.exists("/.dockerenv")
if not in_container:
try:
with open("/proc/1/cgroup", encoding="utf-8") as fh:
in_container = any(t in fh.read() for t in ("docker", "containerd", "kubepods"))
except OSError:
in_container = False
if not in_container:
return False
try:
socket.getaddrinfo("host.docker.internal", None)
return True
except OSError:
return False
def _container_loopback_reachable(base_url: str, timeout: float = 0.2) -> bool:
"""True when the requested loopback host:port is already reachable from
inside the current container.
This distinguishes "a model server running alongside Odysseus in the same
container" from "a model server running on the Docker host". Only the
latter should be rewritten to host.docker.internal.
"""
try:
parsed = urlparse(base_url)
except Exception:
return False
host = (parsed.hostname or "").lower()
port = parsed.port
if host not in _LOOPBACK_HOSTS or not port:
return False
probe_host = "::1" if host == "::1" else "127.0.0.1"
family = socket.AF_INET6 if probe_host == "::1" else socket.AF_INET
try:
with socket.socket(family, socket.SOCK_STREAM) as sock:
sock.settimeout(timeout)
sock.connect((probe_host, port))
return True
except OSError:
return False
def _rewrite_loopback_for_docker(base_url: str, *, container_local: bool = False) -> str:
"""Rewrite a loopback model-endpoint URL to ``host.docker.internal`` when
running in Docker. A URL like ``http://localhost:1234/v1`` (the LM Studio
default) otherwise targets the Odysseus container itself, so the probe gets
a connection error and the endpoint is rejected with a misleading "No
models found for that provider/key".
Cookbook local serves are the opposite case: Odysseus started the model
server inside the same container/process environment, so the saved endpoint
must remain container-local. In that mode, normalize a bind address such as
0.0.0.0 to a connectable loopback host, but do not jump to the Docker host.
"""
try:
parsed = urlparse(base_url)
except Exception:
return base_url
host = (parsed.hostname or "").lower()
if host not in _LOOPBACK_HOSTS:
return base_url
if container_local:
if host in _ANY_BIND_HOSTS:
netloc = "127.0.0.1" + (f":{parsed.port}" if parsed.port else "")
return urlunparse(parsed._replace(netloc=netloc))
return base_url
if host in _ANY_BIND_HOSTS and not _docker_host_gateway_reachable():
netloc = "127.0.0.1" + (f":{parsed.port}" if parsed.port else "")
return urlunparse(parsed._replace(netloc=netloc))
if _container_loopback_reachable(base_url):
return base_url
if not _docker_host_gateway_reachable():
return base_url
netloc = "host.docker.internal" + (f":{parsed.port}" if parsed.port else "")
return urlunparse(parsed._replace(netloc=netloc))
# ── Curated model lists per provider ──
@@ -84,10 +224,13 @@ _PROVIDER_CURATED = {
"claude-sonnet-4-5", "claude-haiku-3-5",
],
"zai": [
"glm-5", "glm-4.7", "glm-4.7-flash",
"glm-5", "glm-5.1", "glm-5v-turbo", "glm-4.7", "glm-4.7-flash",
"glm-4.6", "glm-4.6v",
"glm-4.5", "glm-4.5v", "glm-4.5-air", "glm-4.5-flash",
],
"zai-coding": [
"glm-5.1", "glm-5v-turbo", "glm-5-turbo", "glm-4.7", "glm-4.5-air",
],
"deepseek": [
"deepseek-chat", "deepseek-reasoner",
],
@@ -122,31 +265,40 @@ _PROVIDER_CURATED = {
],
}
# Map URL substrings → curated-list keys for providers whose _detect_provider()
# Map hostnames → curated-list keys for providers whose _detect_provider()
# returns a generic value (e.g. "openai") but deserve their own curated list.
# "openrouter" is a sentinel meaning "no curation — show all models as curated".
_URL_TO_CURATED = {
"z.ai": "zai",
"api.deepseek.com": "deepseek",
"api.groq.com": "groq",
"api.mistral.ai": "mistral",
"api.together.xyz": "together",
"api.fireworks.ai": "fireworks",
"generativelanguage.googleapis.com": "google",
"api.x.ai": "xai",
"openrouter.ai": "openrouter",
"ollama.com": "ollama",
}
# Entries are matched by hostname equality or subdomain suffix (via _host_match),
# so e.g. "deepseek.com" covers api.deepseek.com without matching the substring
# inside an unrelated URL.
_HOST_TO_CURATED = (
("z.ai", "zai"),
("deepseek.com", "deepseek"),
("groq.com", "groq"),
("mistral.ai", "mistral"),
("together.xyz", "together"),
("together.ai", "together"),
("fireworks.ai", "fireworks"),
("googleapis.com", "google"),
("x.ai", "xai"),
("openrouter.ai", "openrouter"),
("ollama.com", "ollama"),
)
def _match_provider_curated(base_url: str, provider: str) -> str:
"""Return the curated-list key for a given endpoint.
Checks the base URL against _URL_TO_CURATED first, then falls back
to the raw provider string from _detect_provider().
Checks path-based overrides first (for hosts serving multiple plans),
then matches the base URL's hostname against known providers, and
finally falls back to the raw provider string from _detect_provider().
"""
for substring, key in _URL_TO_CURATED.items():
if substring in (base_url or ""):
# Path-based overrides for hosts that serve multiple curated lists.
parsed = urlparse(base_url)
if _host_match(base_url, "z.ai") and "/api/coding" in (parsed.path or ""):
return "zai-coding"
for domain, key in _HOST_TO_CURATED:
if _host_match(base_url, domain):
return key
return provider
@@ -235,16 +387,20 @@ def _probe_single_model(base: str, api_key: str, model_id: str, timeout: int = 1
elif provider == "ollama":
from src.llm_core import _build_ollama_payload
target_url = build_chat_url(base)
h = _provider_headers(api_key, base)
h = build_headers(api_key, base)
h["Content-Type"] = "application/json"
payload = _build_ollama_payload(model_id, messages, 0.0, 5, stream=False, tools=_test_tools)
else:
target_url = build_chat_url(base)
h = _provider_headers(api_key, base)
h = build_headers(api_key, base)
h["Content-Type"] = "application/json"
from src.llm_core import _uses_max_completion_tokens
from src.llm_core import _uses_max_completion_tokens, _restricts_temperature
_max_key = "max_completion_tokens" if _uses_max_completion_tokens(model_id) else "max_tokens"
payload = {"model": model_id, "messages": messages, _max_key: 5, "temperature": 0.0}
payload = {"model": model_id, "messages": messages, _max_key: 5}
# Reasoning models (o1/o3/o4/gpt-5) reject an explicit temperature, so a
# probe that hardcodes one falsely reports a working endpoint as failing.
if not _restricts_temperature(model_id):
payload["temperature"] = 0.0
if _test_tools:
payload["tools"] = _test_tools
@@ -308,7 +464,7 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
base = resolve_url(_normalize_base(base_url))
if _detect_provider(base) == "anthropic":
# Try Anthropic's /v1/models endpoint first
url = _anthropic_api_root(base) + "/v1/models"
url = build_models_url(base)
headers = {"anthropic-version": "2023-06-01"}
if api_key:
headers["x-api-key"] = api_key
@@ -331,8 +487,8 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
return []
logger.warning(f"Anthropic /v1/models failed, using hardcoded list: {e}")
return list(ANTHROPIC_MODELS)
url = _models_url(base)
headers = _provider_headers(api_key, base)
url = build_models_url(base)
headers = build_headers(api_key, base)
try:
r = httpx.get(url, headers=headers, timeout=timeout)
r.raise_for_status()
@@ -343,6 +499,13 @@ def _probe_endpoint(base_url: str, api_key: str = None, timeout: int = 5) -> Lis
if not models:
models = [m.get("name") or m.get("model") for m in (data.get("models") or []) if m.get("name") or m.get("model")]
if models:
# Z.AI coding plan omits some working models from /models;
# append curated-only entries for that endpoint only.
if _host_match(base, "z.ai") and "/api/coding" in (urlparse(base).path or ""):
_ck = _match_provider_curated(base, None)
for _e in _PROVIDER_CURATED.get(_ck, []):
if _e not in set(models) and not any(m.startswith(_e) for m in models):
models.append(_e)
return models
except httpx.HTTPStatusError as e:
if api_key:
@@ -387,7 +550,24 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->
if api_key:
headers["Authorization"] = f"Bearer {api_key}"
# Ollama exposes /v1/models (OpenAI-compatible) AND native /api/version,
# /api/tags. The OpenAI-style GET base + "/models" returns 404 when the
# base is the host root or the native /api root (e.g. http://localhost:11434,
# http://localhost:11434/api) because /models lives under /v1 there. Treat
# 4xx on a port-11434 / Ollama-named base as "try the native paths" rather
# than as a definitive offline verdict — Ollama is reachable, it just
# doesn't speak OpenAI on that prefix. Without this gate the quickstart
# marks an alive Ollama as offline whenever cached_models is empty (issue
# #1025): _probe_endpoint() falls through to /api/tags on the same 404, but
# _ping_endpoint() was returning before that fallback could run.
parsed_base = urlparse(base)
looks_like_ollama = (
parsed_base.port == 11434
or "ollama" in (parsed_base.hostname or "").lower()
)
url = base + "/models"
last_error: Optional[str] = None
try:
r = httpx.get(url, headers=headers, timeout=timeout)
if 300 <= r.status_code < 400:
@@ -399,17 +579,21 @@ def _ping_endpoint(base_url: str, api_key: str = None, timeout: float = 1.5) ->
"error": "That is Odysseus, not a model server. Use the Ollama URL, usually http://host.docker.internal:11434/v1 in Docker.",
}
return {"reachable": False, "status_code": r.status_code, "error": f"HTTP {r.status_code} redirect"}
if r.status_code < 500:
return {"reachable": r.status_code < 400, "status_code": r.status_code, "error": None if r.status_code < 400 else f"HTTP {r.status_code}"}
if r.status_code < 400:
return {"reachable": True, "status_code": r.status_code, "error": None}
if r.status_code < 500 and not looks_like_ollama:
return {"reachable": False, "status_code": r.status_code, "error": f"HTTP {r.status_code}"}
last_error = f"HTTP {r.status_code}"
except Exception as e:
last_error = str(e)[:120]
else:
last_error = f"HTTP {r.status_code}"
try:
parsed = urlparse(base)
if parsed.port == 11434 or "ollama" in (parsed.hostname or "").lower():
root = base[:-3].rstrip("/") if base.endswith("/v1") else base
if looks_like_ollama:
root = base
for suffix in ("/v1", "/api"):
if root.endswith(suffix):
root = root[: -len(suffix)].rstrip("/")
break
for path in ("/api/version", "/api/tags"):
try:
r = httpx.get(root + path, timeout=timeout)
@@ -449,6 +633,15 @@ def _model_endpoint_error_message(base_url: str, ping: Dict[str, Any] = None) ->
return "No models found for that provider/key."
def _visible_models(cached_models, hidden_models):
"""Filter cached model IDs by hidden_models. Returns list of visible IDs."""
all_models = json.loads(cached_models) if isinstance(cached_models, str) else (cached_models or [])
if not hidden_models:
return all_models
hidden = set(json.loads(hidden_models) if isinstance(hidden_models, str) else (hidden_models or []))
return [m for m in all_models if m not in hidden]
def setup_model_routes(model_discovery):
router = APIRouter(prefix="/api")
@@ -625,7 +818,7 @@ def setup_model_routes(model_discovery):
# list to unauthenticated callers.
try:
auth_mgr = getattr(request.app.state, "auth_manager", None)
if not owner and auth_mgr is not None and getattr(auth_mgr, "is_configured", False):
if not owner and not _auth_disabled() and auth_mgr is not None and getattr(auth_mgr, "is_configured", False):
raise HTTPException(401, "Not authenticated")
except HTTPException:
raise
@@ -746,8 +939,8 @@ def setup_model_routes(model_discovery):
entry["error"] = str(e)
entry["model_count"] = 0
else:
url = _models_url(base)
headers = _provider_headers(ep.api_key, base)
url = build_models_url(base)
headers = build_headers(ep.api_key, base)
try:
t0 = _time.time()
r = httpx.get(url, headers=headers, timeout=5)
@@ -965,23 +1158,23 @@ def setup_model_routes(model_discovery):
require_models: str = Form("false"),
model_type: str = Form("llm"),
supports_tools: str = Form(""), # "true"/"false"/"" (unknown)
container_local: str = Form("false"),
# Default `shared=true` → endpoints are visible to all users (the
# app's historical behaviour). Admins can pass `shared=false` to
# scope a new endpoint to their own account only.
shared: str = Form("true"),
):
require_admin(request)
base_url = base_url.strip().rstrip("/")
# Normalize: strip trailing /models, /chat/completions, /v1/messages etc to get clean base
for suffix in ["/models", "/chat/completions", "/completions", "/v1/messages"]:
if base_url.endswith(suffix):
base_url = base_url[:-len(suffix)].rstrip("/")
base_url = _normalize_base(base_url)
if not base_url:
raise HTTPException(400, "Base URL is required")
# Resolve hostname via Tailscale if DNS fails
from src.endpoint_resolver import resolve_url
base_url = resolve_url(base_url)
# In Docker, manually added loopback URLs usually point at a host-local
# server. Cookbook local serves are launched inside Odysseus itself, so
# keep those container-local when the frontend marks them as such.
base_url = _rewrite_loopback_for_docker(base_url, container_local=_truthy(container_local))
# Auto-generate name from URL if not provided
if not name.strip():
@@ -1052,11 +1245,15 @@ def setup_model_routes(model_discovery):
)
db.add(ep)
db.commit()
# Auto-set as default chat endpoint if none configured yet
# Auto-set as default chat endpoint if none configured yet. Seed
# the first CHAT model (not raw model_ids[0]) so we don't pin the
# global default to an embedding/tts/etc. entry a provider happens
# to list first.
settings = _load_settings()
if not settings.get("default_endpoint_id"):
from src.endpoint_resolver import _first_chat_model
settings["default_endpoint_id"] = ep.id
settings["default_model"] = model_ids[0] if model_ids else ""
settings["default_model"] = _first_chat_model(model_ids) or ""
_save_settings(settings)
_invalidate_models_cache()
_local_probe_cache["data"] = None
@@ -1081,14 +1278,12 @@ def setup_model_routes(model_discovery):
api_key: str = Form(""),
):
require_admin(request)
base_url = base_url.strip().rstrip("/")
for suffix in ["/models", "/chat/completions", "/completions", "/v1/messages"]:
if base_url.endswith(suffix):
base_url = base_url[:-len(suffix)].rstrip("/")
base_url = _normalize_base(base_url)
if not base_url:
raise HTTPException(400, "Base URL is required")
from src.endpoint_resolver import resolve_url
base_url = resolve_url(base_url)
base_url = _rewrite_loopback_for_docker(base_url)
probe_timeout = 3 if (":11434" in base_url or "ollama" in base_url.lower()) else 2
models = _probe_endpoint(base_url, api_key.strip() or None, timeout=probe_timeout)
ping = {"reachable": True, "error": None} if models else _ping_endpoint(base_url, api_key.strip() or None, timeout=probe_timeout)
@@ -1301,9 +1496,9 @@ def setup_model_routes(model_discovery):
chat_url = build_chat_url(base)
if not model and getattr(ep, "cached_models", None):
try:
models = _json.loads(ep.cached_models) if isinstance(ep.cached_models, str) else ep.cached_models
if models:
model = models[0]
visible = _visible_models(ep.cached_models, getattr(ep, "hidden_models", None))
if visible:
model = visible[0]
except Exception:
pass
return {"endpoint_id": ep.id, "endpoint_url": chat_url, "model": model}
@@ -1337,58 +1532,63 @@ def setup_model_routes(model_discovery):
ep.name = body["name"].strip() or ep.name
if "model_type" in body and isinstance(body["model_type"], str):
ep.model_type = body["model_type"].strip() or ep.model_type
# Rotating an API key used to require DELETE+POST, which wiped
# endpoint_url/model from every session referencing the old base
# URL. Allow in-place updates so the admin can change the key
# (or correct a typo'd base URL) without nuking session state.
if "api_key" in body and isinstance(body["api_key"], str):
_new_key = body["api_key"].strip()
# Empty string means "clear it" (e.g. local Ollama no longer needs a key).
ep.api_key = _new_key or None
if "base_url" in body and isinstance(body["base_url"], str):
_new_base = body["base_url"].strip().rstrip("/")
for _suffix in ("/models", "/chat/completions", "/completions", "/v1/messages"):
if _new_base.endswith(_suffix):
_new_base = _new_base[: -len(_suffix)].rstrip("/")
_new_base = _normalize_base(_new_base)
if _new_base:
ep.base_url = _new_base
else:
ep.is_enabled = not ep.is_enabled
db.commit()
_invalidate_models_cache()
_local_probe_cache["data"] = None
return {
"id": ep.id,
"is_enabled": ep.is_enabled,
"supports_tools": ep.supports_tools,
"name": ep.name,
"model_type": ep.model_type,
"base_url": ep.base_url,
}
finally:
db.close()
# ── Settings fields that store an endpoint ID ──
_EP_SETTING_FIELDS = {
"default_endpoint_id": ("default_model", "Default Model"),
"utility_endpoint_id": ("utility_model", "Utility Model"),
"research_endpoint_id": ("research_model", "Deep Research"),
"task_endpoint_id": ("task_model", "Background Tasks"),
}
def _settings_using_endpoint(ep_id: str) -> list:
"""Return human-readable labels for settings that reference this endpoint."""
settings = _load_settings()
affected = []
for ep_key, (_, label) in _EP_SETTING_FIELDS.items():
if (settings.get(ep_key) or "") == ep_id:
affected.append(label)
tts_prov = settings.get("tts_provider") or ""
if tts_prov == f"endpoint:{ep_id}":
affected.append("Text to Speech")
return affected
return _endpoint_settings_using_endpoint(_load_settings(), ep_id, include_speech=True)
def _clear_settings_for_endpoint(ep_id: str) -> list:
"""Clear all settings that reference this endpoint. Returns list of cleared labels."""
settings = _load_settings()
cleared = []
for ep_key, (model_key, label) in _EP_SETTING_FIELDS.items():
if (settings.get(ep_key) or "") == ep_id:
settings[ep_key] = ""
settings[model_key] = ""
cleared.append(label)
tts_prov = settings.get("tts_provider") or ""
if tts_prov == f"endpoint:{ep_id}":
settings["tts_provider"] = "disabled"
settings["tts_model"] = "tts-1"
cleared.append("Text to Speech")
cleared = _clear_endpoint_settings_for_endpoint(settings, ep_id, include_speech=True)
if cleared:
_save_settings(settings)
return cleared
def _clear_user_prefs_for_endpoint(ep_id: str) -> int:
"""Clear per-user endpoint selections and fallback chains."""
try:
from routes.prefs_routes import _load as _load_prefs, _save as _save_prefs
all_prefs = _load_prefs()
cleared_users = _clear_user_pref_endpoint_refs(all_prefs, ep_id)
if cleared_users:
_save_prefs(all_prefs)
return cleared_users
except Exception as e:
logger.warning("Failed to clear user prefs for endpoint %s: %s", ep_id, e)
return 0
def _session_uses_endpoint_url(session_url: str, base_url: str) -> bool:
if not session_url or not base_url:
return False
@@ -1402,12 +1602,18 @@ def setup_model_routes(model_discovery):
return sess in variants or sess.startswith(base + "/")
def _clear_sessions_for_endpoint(db, base_url: str) -> int:
"""Drop stored auth for sessions using an endpoint being deleted.
Keep the session's endpoint URL and model intact. If the admin is
replacing an endpoint with the same URL, clearing those fields leaves
the UI looking selected while chat requests arrive with an empty model.
The chat-time orphan guard still clears truly dead endpoints when no
matching enabled endpoint exists.
"""
cleared = 0
rows = db.query(DbSession).filter(DbSession.endpoint_url.isnot(None)).all()
for row in rows:
if _session_uses_endpoint_url(row.endpoint_url or "", base_url):
row.endpoint_url = ""
row.model = ""
row.headers = {}
row.updated_at = datetime.utcnow()
cleared += 1
@@ -1425,8 +1631,6 @@ def setup_model_routes(model_discovery):
try:
for sess in list(getattr(manager, "sessions", {}).values()):
if _session_uses_endpoint_url(getattr(sess, "endpoint_url", "") or "", base_url):
sess.endpoint_url = ""
sess.model = ""
sess.headers = {}
cleared += 1
except Exception:
@@ -1449,6 +1653,7 @@ def setup_model_routes(model_discovery):
raise HTTPException(404, "Endpoint not found")
# Clean up any settings that reference this endpoint
cleared = _clear_settings_for_endpoint(ep_id)
cleared_user_preferences = _clear_user_prefs_for_endpoint(ep_id)
cleared_sessions = _clear_sessions_for_endpoint(db, ep.base_url)
cleared_loaded_sessions = _clear_loaded_sessions_for_endpoint(ep.base_url)
db.delete(ep)
@@ -1458,6 +1663,7 @@ def setup_model_routes(model_discovery):
return {
"deleted": True,
"cleared_settings": cleared,
"cleared_user_preferences": cleared_user_preferences,
"cleared_sessions": cleared_sessions,
"cleared_loaded_sessions": cleared_loaded_sessions,
}

View File

@@ -683,9 +683,8 @@ def setup_note_routes(task_scheduler=None):
Returns {synthesis, email_sent}.
"""
# Gate against anonymous callers — LLM synthesis can burn tokens.
from src.auth_helpers import get_current_user as _gcu
if not _gcu(request):
raise HTTPException(401, "Not authenticated")
from src.auth_helpers import require_user as _ru
_ru(request)
body = await request.json()
note_id = body.get("note_id")
title = (body.get("title") or "").strip()
@@ -697,7 +696,7 @@ def setup_note_routes(task_scheduler=None):
# the same dispatch without an HTTP roundtrip + auth cookie.
return await dispatch_reminder(
title=title, note_body=note_body, note_id=note_id,
owner=_gcu(request) or "",
owner=_owner(request) or "",
queue_browser=False,
)

View File

@@ -69,9 +69,12 @@ def setup_personal_routes(personal_docs_manager, rag_manager, rag_available):
if not directory:
raise HTTPException(400, "Directory path is required")
base_abs = os.path.abspath(PERSONAL_DIR)
# realpath (not abspath) so a symlink inside PERSONAL_DIR that points
# outside it is resolved before the commonpath confinement check below;
# abspath only normalises `..` and would let such a symlink escape.
base_abs = os.path.realpath(PERSONAL_DIR)
candidate = directory if os.path.isabs(directory) else os.path.join(base_abs, directory)
resolved = os.path.abspath(candidate)
resolved = os.path.realpath(candidate)
try:
in_base = os.path.commonpath([resolved, base_abs]) == base_abs
except ValueError:

View File

@@ -12,7 +12,8 @@ def _load():
"""Load the raw prefs file (internal use only)."""
try:
with open(PREFS_FILE, "r", encoding="utf-8") as f:
return json.load(f)
data = json.load(f)
return data if isinstance(data, dict) else {}
except (FileNotFoundError, json.JSONDecodeError):
return {}
@@ -40,7 +41,18 @@ def _save_for_user(user: Optional[str], prefs: dict):
"""Save preferences for a specific user."""
all_prefs = _load()
if user is None:
# Auth disabled — save flat
# Auth disabled. If the store is already multi-user (e.g. auth was
# turned off on a deployment that previously ran multi-user), writing
# `prefs` flat would overwrite the whole `_users` map and destroy every
# other user's preferences. Instead write back into the same (first)
# slot _load_for_user(None) reads from, preserving the others.
if "_users" in all_prefs:
users = all_prefs["_users"]
first_key = next(iter(users), None)
if first_key is not None:
users[first_key] = prefs
_save(all_prefs)
return
_save(prefs)
return
if "_users" not in all_prefs:

View File

@@ -3,6 +3,7 @@
import asyncio
import json
import logging
import re
import uuid
from datetime import datetime
from pathlib import Path
@@ -12,7 +13,9 @@ from fastapi import APIRouter, HTTPException, Query, Request
from fastapi.responses import HTMLResponse, StreamingResponse
from pydantic import BaseModel, Field
from src.endpoint_resolver import resolve_endpoint
from src.auth_helpers import get_current_user
from src.auth_helpers import _auth_disabled, get_current_user
_SESSION_ID_RE = re.compile(r"^[a-zA-Z0-9-]{1,128}$")
logger = logging.getLogger(__name__)
@@ -55,9 +58,15 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
verify the session belongs to this user."""
user = get_current_user(request)
if not user:
if _auth_disabled():
return ""
raise HTTPException(401, "Not authenticated")
return user
def _validate_session_id(session_id: str) -> None:
if not _SESSION_ID_RE.fullmatch(session_id):
raise HTTPException(400, "Invalid session ID format")
def _owns_in_memory(session_id: str, user: str) -> bool:
"""Ownership check for an in-flight (in-memory) research task.
Falls back to the on-disk JSON if the task has already finished."""
@@ -95,6 +104,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
@router.get("/api/research/status/{session_id}")
async def research_status(session_id: str, request: Request):
user = _require_user(request)
_validate_session_id(session_id)
if not _owns_in_memory(session_id, user):
raise HTTPException(404, "No research found for this session")
status = research_handler.get_status(session_id)
@@ -105,6 +115,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
@router.post("/api/research/cancel/{session_id}")
async def research_cancel(session_id: str, request: Request):
user = _require_user(request)
_validate_session_id(session_id)
if not _owns_in_memory(session_id, user):
raise HTTPException(404, "No research found for this session")
cancelled = research_handler.cancel_research(session_id)
@@ -113,6 +124,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
@router.post("/api/research/result/{session_id}")
async def research_result(session_id: str, request: Request):
user = _require_user(request)
_validate_session_id(session_id)
if not _owns_in_memory(session_id, user):
raise HTTPException(404, "No research result available")
result = research_handler.get_result(session_id)
@@ -140,6 +152,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
async def research_report(session_id: str, request: Request):
"""Serve the visual HTML report for a completed research session."""
user = _require_user(request)
_validate_session_id(session_id)
_assert_owns_research(session_id, user)
logger.info(f"Visual report requested for session {session_id}")
try:
@@ -160,6 +173,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
"""Mark an image URL as hidden for this research's visual report.
Persisted to the research JSON so subsequent /report renders skip it."""
user = _require_user(request)
_validate_session_id(session_id)
_assert_owns_research(session_id, user)
ok = research_handler.hide_image(session_id, body.url)
if not ok:
@@ -170,6 +184,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
async def research_unhide_images(session_id: str, request: Request):
"""Clear the hidden-images list for a research session."""
user = _require_user(request)
_validate_session_id(session_id)
_assert_owns_research(session_id, user)
ok = research_handler.unhide_all_images(session_id)
if not ok:
@@ -235,6 +250,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
"""Return the full JSON for a single research result — sources,
summary, stats — used by the Library preview panel."""
user = _require_user(request)
_validate_session_id(session_id)
path = Path("data/deep_research") / f"{session_id}.json"
if not path.exists():
raise HTTPException(404, "Research not found")
@@ -251,6 +267,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
async def research_archive(session_id: str, request: Request, archived: bool = Query(True)):
"""Soft-archive / restore a research report (sets `archived` in its JSON)."""
user = _require_user(request)
_validate_session_id(session_id)
path = Path("data/deep_research") / f"{session_id}.json"
if not path.exists():
raise HTTPException(404, "Research not found")
@@ -270,6 +287,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
async def research_delete(session_id: str, request: Request):
"""Delete a research result from disk."""
user = _require_user(request)
_validate_session_id(session_id)
data_dir = Path("data/deep_research")
json_path = data_dir / f"{session_id}.json"
deleted = False
@@ -299,7 +317,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
endpoint_id: Optional[str] = None
model: Optional[str] = None
max_time: int = Field(default=300, ge=60, le=1800)
extraction_timeout: Optional[int] = Field(default=None, ge=15, le=600)
extraction_timeout: Optional[int] = Field(default=None, ge=15, le=3600)
extraction_concurrency: Optional[int] = Field(default=None, ge=1, le=12)
category: Optional[str] = None
@@ -413,6 +431,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
async def research_stream(session_id: str, request: Request):
"""SSE stream of research progress events."""
user = _require_user(request)
_validate_session_id(session_id)
if not _owns_in_memory(session_id, user):
raise HTTPException(404, "No research found for this session")
async def _generate():
@@ -446,6 +465,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
async def research_result_peek(session_id: str, request: Request):
"""Get research result without clearing it (for panel use)."""
user = _require_user(request)
_validate_session_id(session_id)
if not _owns_in_memory(session_id, user):
raise HTTPException(404, "No research found for this session")
result = research_handler.get_result(session_id)
@@ -474,7 +494,14 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
injects a single system message containing the report and sources so
the user can ask follow-up questions in a clean conversation.
"""
_require_user(request)
user = _require_user(request)
_validate_session_id(session_id)
# SECURITY: gate on ownership before reading the persisted research —
# otherwise any authenticated user could spin off (and thereby read)
# another user's report by guessing its session ID. Mirrors every other
# endpoint in this file (see result_peek above).
if not _owns_in_memory(session_id, user):
raise HTTPException(404, "No research found for this session")
if session_manager is None:
raise HTTPException(500, "session_manager not configured")
@@ -555,7 +582,6 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
# Create new session
new_sid = str(uuid.uuid4())
user = get_current_user(request)
title_query = (query or "research").strip()
if len(title_query) > 60:

View File

@@ -11,45 +11,118 @@ from core.session_manager import SessionManager
from core.models import ChatMessage
from src.request_models import SessionResponse
from core.database import Session as DbSession, SessionLocal, Document, GalleryImage
from src.auth_helpers import get_current_user
from src.auth_helpers import get_current_user, effective_user
def _verify_session_owner(request: Request, session_id: str):
"""Verify the current user owns the session. Raises 404 if not."""
user = get_current_user(request)
def _sanitize_export_filename(name: str) -> str:
"""Return a conservative filename safe for Content-Disposition."""
name = name if isinstance(name, str) else ""
name = re.sub(r"[^A-Za-z0-9._-]", "_", name)
return name[:128]
def _verify_session_owner(request: Request, session_id: str, session_manager=None):
"""Verify the current user owns the session. Raises 404 if not.
Ownership is checked against the DB row when one exists (unchanged). If
there is no DB row but the caller owns an in-memory "ghost" session — one
that lives only in ``session_manager`` because it was never persisted, or
its DB row was removed out-of-band — fall back to the in-memory owner so the
user can still manage and delete it. Without this fallback such sessions are
listed by ``/api/sessions`` (they come from the in-memory manager) yet every
per-session operation 404s, making them impossible to delete (issue #1044).
``session_manager`` is optional and defaults to ``None`` so existing callers
that only care about persisted sessions keep their exact prior behavior.
"""
user = effective_user(request)
if not user:
raise HTTPException(403, "Authentication required")
db = SessionLocal()
try:
row = db.query(DbSession.owner).filter(DbSession.id == session_id).first()
if not row:
raise HTTPException(404, f"Session {session_id} not found")
if row.owner != user:
raise HTTPException(404, f"Session {session_id} not found")
finally:
db.close()
if row is not None:
if row.owner != user:
raise HTTPException(404, f"Session {session_id} not found")
return
# No DB row — allow the caller to act on an in-memory ghost they own.
if session_manager is not None:
ghost = getattr(session_manager, "sessions", {}).get(session_id)
if ghost is not None and getattr(ghost, "owner", None) == user:
return
raise HTTPException(404, f"Session {session_id} not found")
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api", tags=["sessions"])
def _pick_endpoint_for_sort():
def _current_user_is_admin(request: Request, user: str | None) -> bool:
if not user:
return False
auth_mgr = getattr(request.app.state, "auth_manager", None)
is_admin = getattr(auth_mgr, "is_admin", None)
if not callable(is_admin):
return False
try:
return bool(is_admin(user))
except Exception:
return False
def _reject_raw_endpoint_url_for_non_admin(
request: Request,
user: str | None,
endpoint_id: str | None,
endpoint_url: str | None,
) -> None:
"""Require registered endpoints for signed-in non-admin session changes."""
if endpoint_id and endpoint_id.strip():
return
if not endpoint_url:
return
# Raw URLs make the server dial whatever host the request supplies. For
# non-admin users, require a saved endpoint row so normal owner scoping and
# endpoint validation have already happened.
if user and not _current_user_is_admin(request, user):
raise HTTPException(403, "Choose a registered model endpoint")
def _persist_session_headers(session_id: str, headers: dict | None) -> None:
"""Persist endpoint auth headers for DB-backed session metadata."""
db = SessionLocal()
try:
db_session = db.query(DbSession).filter(DbSession.id == session_id).first()
if db_session:
db_session.headers = headers or {}
db_session.updated_at = datetime.utcnow()
db.commit()
except Exception:
db.rollback()
raise
finally:
db.close()
def _pick_endpoint_for_sort(owner=None):
"""Pick model endpoint for auto-sort LLM call — uses utility endpoint setting, falls back to default."""
from src.endpoint_resolver import resolve_endpoint
# Try utility endpoint first (what the user configured for background tasks)
url, model, headers = resolve_endpoint("utility")
url, model, headers = resolve_endpoint("utility", owner=owner)
if url and model:
return url, model, headers
# Fall back to task endpoint
try:
from src.task_endpoint import resolve_task_endpoint
url, model, headers = resolve_task_endpoint()
url, model, headers = resolve_task_endpoint(owner=owner)
if url and model:
return url, model, headers
except Exception:
pass
# Fall back to default
url, model, headers = resolve_endpoint("default")
url, model, headers = resolve_endpoint("default", owner=owner)
if url and model:
return url, model, headers
return None, None, None
@@ -63,7 +136,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
@router.get("/sessions")
def list_sessions(request: Request):
user = get_current_user(request)
user = effective_user(request)
# Lazy purge: incognito sessions are ephemeral by design — wipe leftovers
# from the DB and session_manager so they vanish on the next page refresh.
# BUT: skip sessions that were created within the last 10 minutes.
@@ -172,11 +245,41 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
endpoint_id: str = Form(""),
):
skip_val = str(skip_validation).lower() == "true"
user = get_current_user(request)
endpoint_api_key = ""
endpoint_base_url = ""
_reject_raw_endpoint_url_for_non_admin(request, user, endpoint_id, endpoint_url)
if endpoint_id and endpoint_id.strip():
from core.database import ModelEndpoint
from src.auth_helpers import owner_filter
from src.endpoint_resolver import build_chat_url, normalize_base
_db = SessionLocal()
try:
q = _db.query(ModelEndpoint).filter(
ModelEndpoint.id == endpoint_id.strip(),
ModelEndpoint.is_enabled == True,
)
if user:
q = owner_filter(q, ModelEndpoint, user)
endpoint_row = q.first()
if not endpoint_row:
raise HTTPException(400, "Model endpoint no longer exists")
endpoint_base_url = endpoint_row.base_url or ""
endpoint_api_key = endpoint_row.api_key or ""
endpoint_url = build_chat_url(normalize_base(endpoint_base_url))
finally:
_db.close()
if not endpoint_url and not skip_val:
raise HTTPException(400, "endpoint_url is required (choose from /api/models)")
model_to_use = model
request_api_key = api_key.strip() if api_key else ""
effective_api_key = request_api_key or endpoint_api_key
validation_headers = None
if effective_api_key:
from src.endpoint_resolver import build_headers
validation_headers = build_headers(effective_api_key, endpoint_base_url or endpoint_url)
if skip_val:
# skip_validation = trust the caller and do NOT probe /v1/models.
@@ -187,7 +290,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
elif not model_to_use:
from src.llm_core import list_model_ids
ids = list_model_ids(endpoint_url, timeout=REQUEST_TIMEOUT,
headers={"Authorization": f"Bearer {api_key}"} if api_key.strip() else None)
headers=validation_headers)
if not ids:
raise HTTPException(400, "Cannot reach /v1/models")
# Default to the first CHAT model — endpoints often list embedding/
@@ -202,7 +305,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
import os as _os
req_base = _os.path.basename(model_to_use.rstrip("/"))
avail = list_model_ids(endpoint_url, timeout=REQUEST_TIMEOUT,
headers={"Authorization": f"Bearer {api_key}"} if api_key.strip() else None)
headers=validation_headers)
if not avail:
raise HTTPException(400, "Cannot reach /v1/models")
if model_to_use not in avail:
@@ -217,7 +320,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
model_to_use = found
sid = str(uuid.uuid4())
user = get_current_user(request)
user = effective_user(request)
session = session_manager.create_session(
session_id=sid,
name=name or "",
@@ -227,22 +330,15 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
owner=user,
)
# Set auth headers for custom API-key endpoints
resolved_key = api_key.strip() if api_key else ""
resolved_key = request_api_key
resolved_base = endpoint_url
if not resolved_key and endpoint_id and endpoint_id.strip():
from core.database import ModelEndpoint
_db = SessionLocal()
try:
ep = _db.query(ModelEndpoint).filter(ModelEndpoint.id == endpoint_id.strip()).first()
if ep and ep.api_key:
resolved_key = ep.api_key
resolved_base = ep.base_url
finally:
_db.close()
if not resolved_key and endpoint_api_key:
resolved_key = endpoint_api_key
resolved_base = endpoint_base_url
if resolved_key:
from src.endpoint_resolver import build_headers
session.headers = build_headers(resolved_key, resolved_base)
session_manager.save_sessions()
_persist_session_headers(sid, session.headers)
# Fire webhook (sync-safe)
if webhook_manager:
webhook_manager.fire_and_forget("session.created", {
@@ -288,27 +384,38 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
db.close()
# Switch model/endpoint mid-session
if model is not None and endpoint_url is not None:
user = get_current_user(request)
_reject_raw_endpoint_url_for_non_admin(request, user, endpoint_id, endpoint_url)
endpoint_api_key = ""
endpoint_base_url = ""
if endpoint_id:
from core.database import ModelEndpoint
from src.auth_helpers import owner_filter
from src.endpoint_resolver import build_chat_url, normalize_base
_db = SessionLocal()
try:
ep = _db.query(ModelEndpoint).filter(ModelEndpoint.id == endpoint_id).first()
q = _db.query(ModelEndpoint).filter(
ModelEndpoint.id == endpoint_id,
ModelEndpoint.is_enabled == True,
)
if user:
q = owner_filter(q, ModelEndpoint, user)
ep = q.first()
if not ep:
raise HTTPException(400, "Model endpoint no longer exists")
endpoint_base_url = ep.base_url or ""
endpoint_api_key = ep.api_key or ""
endpoint_url = build_chat_url(normalize_base(endpoint_base_url))
finally:
_db.close()
session.model = model
session.endpoint_url = endpoint_url
# Update auth headers from the endpoint's stored API key
if endpoint_id:
_db = SessionLocal()
try:
ep = _db.query(ModelEndpoint).filter(ModelEndpoint.id == endpoint_id).first()
if ep and ep.api_key:
from src.endpoint_resolver import build_headers
session.headers = build_headers(ep.api_key, ep.base_url)
finally:
_db.close()
if endpoint_api_key:
from src.endpoint_resolver import build_headers
session.headers = build_headers(endpoint_api_key, endpoint_base_url)
else:
session.headers = {}
# Persist to DB
db = SessionLocal()
try:
@@ -316,6 +423,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
if db_session:
db_session.model = model
db_session.endpoint_url = endpoint_url
db_session.headers = session.headers or {}
db_session.updated_at = datetime.utcnow()
db.commit()
finally:
@@ -356,7 +464,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
ids = []
for sid in ids:
try:
_verify_session_owner(request, sid)
_verify_session_owner(request, sid, session_manager)
session_manager.delete_session(sid)
db = SessionLocal()
try:
@@ -374,7 +482,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
@router.delete("/session/{sid}")
def delete_session(request: Request, sid: str):
"""Permanently delete a session and all its messages."""
_verify_session_owner(request, sid)
_verify_session_owner(request, sid, session_manager)
try:
# Block deletion of starred/favorited sessions
db = SessionLocal()
@@ -499,7 +607,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
@router.get("/sessions/archived")
def list_archived_sessions(request: Request, search: str = "", offset: int = 0, limit: int = 20, sort: str = "recent", model: str = ""):
"""List archived sessions for the archive browser."""
user = get_current_user(request)
user = effective_user(request)
db = SessionLocal()
try:
q = db.query(DbSession).filter(DbSession.archived == True)
@@ -510,7 +618,12 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
safe_search = search.replace('%', r'\%').replace('_', r'\_')
q = q.filter(DbSession.name.ilike(f"%{safe_search}%", escape='\\'))
if model:
q = q.filter(DbSession.model.ilike(f"%{model}"))
# Contains match (mirrors the name filter above). The old
# f"%{model}" was a SUFFIX-only match, so filtering by "gpt-4"
# dropped "gpt-4o" and over-matched on shared suffixes; it also
# left LIKE wildcards in the user value unescaped.
safe_model = model.replace('%', r'\%').replace('_', r'\_')
q = q.filter(DbSession.model.ilike(f"%{safe_model}%", escape='\\'))
total = q.count()
sort_map = {
"recent": DbSession.updated_at.desc(),
@@ -558,6 +671,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
safe_name = re.sub(r'[^\w\-_]', '_', session.name)
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
filename = _sanitize_export_filename(filename)
if fmt == "json":
import json as _json
@@ -635,7 +749,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
@router.post("/sessions/save")
def sessions_save_now(request: Request):
user = get_current_user(request)
user = effective_user(request)
if not user:
raise HTTPException(401, "Not authenticated")
session_manager.save_sessions()
@@ -651,7 +765,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
if not OPENAI_API_KEY:
raise HTTPException(400, "Server missing OPENAI_API_KEY")
sid = str(uuid.uuid4())
user = get_current_user(request)
user = effective_user(request)
session = session_manager.create_session(
session_id=sid,
name="",
@@ -728,7 +842,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
from src.endpoint_resolver import resolve_endpoint
from src.llm_core import llm_call_async
url, model, headers = resolve_endpoint("utility")
url, model, headers = resolve_endpoint("utility", owner=get_current_user(request))
if not url or not model:
url, model, headers = session.endpoint_url, session.model, session.headers
if not url or not model:
@@ -791,7 +905,7 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
users can clean junk without spending tokens.
"""
from src.llm_core import llm_call
user = get_current_user(request)
user = effective_user(request)
user_sessions = session_manager.get_sessions_for_user(user)
# Delete empty and throwaway sessions before sorting
@@ -928,9 +1042,9 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
# Pick an endpoint — prefer admin-configured task endpoint
from src.task_endpoint import resolve_task_endpoint
url, model, headers = resolve_task_endpoint()
url, model, headers = resolve_task_endpoint(owner=user)
if not url:
url, model, headers = _pick_endpoint_for_sort()
url, model, headers = _pick_endpoint_for_sort(owner=user)
if not url:
raise HTTPException(503, "No available model endpoint for auto-sort")

View File

@@ -118,6 +118,7 @@ def _running_in_container(dockerenv_path="/.dockerenv", cgroup_path="/proc/1/cgr
DockerRowStatus = namedtuple("DockerRowStatus", ["applicable", "install_hint"])
PackageUpdateStatus = namedtuple("PackageUpdateStatus", ["available", "note"])
def _docker_row_status(*, on_remote, in_container, installed, default_hint):
@@ -127,6 +128,24 @@ def _docker_row_status(*, on_remote, in_container, installed, default_hint):
return DockerRowStatus(applicable=True, install_hint=default_hint)
def _pip_dist_name(pkg: dict) -> str:
"""Distribution name for importlib.metadata lookups.
The Cookbook package catalog carries both the import name (``name``, e.g.
``llama_cpp``) and the pip spec (``pip``, e.g. ``llama-cpp-python[server]``).
The distribution is NOT always the import name with underscores swapped for
dashes — ``llama_cpp`` ships in the ``llama-cpp-python`` distribution — so
derive it from the pip spec (stripping any ``[extras]`` and version markers)
and fall back to the munged import name only when no pip spec is declared.
"""
pip = (pkg.get("pip") or "").strip()
if pip:
base = re.split(r"[\[<>=!~;\s]", pip, maxsplit=1)[0].strip()
if base:
return base
return (pkg.get("name") or "").replace("_", "-")
def _package_installed_from_probe(name: str, probe: dict) -> bool:
"""Return whether an optional dependency is usable by Cookbook.
@@ -162,7 +181,10 @@ def _package_status_note(name: str, probe: dict) -> str:
locations = module.get("locations") or []
if name == "vllm":
if binaries.get("vllm"):
return f"vLLM CLI: {binaries['vllm']}"
parts = [f"vLLM CLI: {binaries['vllm']}"]
if dists.get("vllm"):
parts.append(f"python package: vllm {dists['vllm']}")
return "; ".join(parts)
if module.get("found") and not dists.get("vllm"):
loc = locations[0] if locations else module.get("origin") or "unknown path"
return f"Python sees a vllm namespace at {loc}, but no vLLM CLI is on PATH."
@@ -183,13 +205,70 @@ def _package_status_note(name: str, probe: dict) -> str:
return ""
def _package_pip_update_status(pkg: dict, probe: dict | None = None) -> PackageUpdateStatus:
"""Return whether the Dependencies UI should offer a generic pip update.
"Installed" means Cookbook can use the dependency. It does not always mean
the dependency is a Python package that Cookbook should update with pip:
native llama-server can come from a package manager/source build, and a CLI
may be on PATH without matching Python package metadata.
"""
if pkg.get("kind") == "system" or not pkg.get("pip"):
return PackageUpdateStatus(False, "Update this system dependency outside Odysseus.")
name = pkg.get("name")
binaries = probe.get("binaries") if isinstance(probe, dict) and isinstance(probe.get("binaries"), dict) else {}
dists = probe.get("dists") if isinstance(probe, dict) and isinstance(probe.get("dists"), dict) else {}
if name == "llama_cpp" and binaries.get("llama-server"):
return PackageUpdateStatus(
False,
"Using native llama-server on PATH; update it with its package manager or source checkout.",
)
if name == "vllm" and binaries.get("vllm") and not dists.get("vllm"):
return PackageUpdateStatus(
False,
"Using a vLLM CLI on PATH without Python package metadata; update it outside Odysseus.",
)
return PackageUpdateStatus(True, "Update uses pip in the selected Python environment.")
def _prepend_user_install_bins_to_path() -> None:
"""Make pip --user console scripts visible to dependency probes.
Docker Cookbook installs vLLM with `python -m pip install --user`, which
drops the `vllm` CLI in /app/.local/bin. The running app process does not
inherit that PATH update, so `shutil.which("vllm")` can report missing even
after a successful install.
"""
try:
import site
candidates = [os.path.join(site.USER_BASE, "bin")]
except Exception:
candidates = []
candidates.append(os.path.expanduser("~/.local/bin"))
parts = os.environ.get("PATH", "").split(os.pathsep) if os.environ.get("PATH") else []
changed = False
for path in reversed([p for p in candidates if p]):
if path not in parts:
parts.insert(0, path)
changed = True
if changed:
os.environ["PATH"] = os.pathsep.join(parts)
def _package_probe_script(names: list[str]) -> str:
names_lit = ",".join(repr(n) for n in names)
return f"""
import importlib.util
import importlib.metadata as md
import json
import os
import shutil
import site
names=[{names_lit}]
dist_names={{
@@ -204,6 +283,24 @@ bin_names={{
'llama_cpp':['llama-server'],
}}
def add_user_install_bins_to_path():
candidates = []
try:
candidates.append(os.path.join(site.USER_BASE, 'bin'))
except Exception:
pass
candidates.append(os.path.expanduser('~/.local/bin'))
parts = os.environ.get('PATH', '').split(os.pathsep) if os.environ.get('PATH') else []
changed = False
for path in reversed([p for p in candidates if p]):
if path not in parts:
parts.insert(0, path)
changed = True
if changed:
os.environ['PATH'] = os.pathsep.join(parts)
add_user_install_bins_to_path()
def mod_status(n):
spec = importlib.util.find_spec(n)
loader = getattr(spec, 'loader', None) if spec else None
@@ -317,7 +414,7 @@ async def _generate_pty(cmd: str, timeout: int, request: Request):
yield f"data: {json.dumps({'exit_code': -1, 'error': PTY_UNSUPPORTED_ERROR})}\n\n"
return
loop = asyncio.get_event_loop()
loop = asyncio.get_running_loop()
master_fd, slave_fd = pty.openpty()
# Set master to non-blocking
@@ -469,7 +566,8 @@ async def _generate_tmux(cmd: str, request: Request):
f"EC=${{PIPESTATUS[0]}}\n"
f"echo ':::EXIT_CODE:::'$EC >> '{log_path}'\n"
f"rm -f '{script_path}'\n"
f"exit $EC\n"
f"exit $EC\n",
encoding="utf-8",
)
script_path.chmod(0o755)
logger.info("tmux wrapper script created: session=%s path=%s", session_id, script_path)
@@ -504,7 +602,7 @@ async def _generate_tmux(cmd: str, request: Request):
# Read new lines from log
try:
if log_path.exists():
lines = log_path.read_text(errors="replace").splitlines()
lines = log_path.read_text(encoding="utf-8", errors="replace").splitlines()
new_lines = lines[lines_sent:]
for line in new_lines:
if line.startswith(":::EXIT_CODE:::"):
@@ -532,7 +630,7 @@ async def _generate_tmux(cmd: str, request: Request):
# Session ended — do one final read
await asyncio.sleep(0.5)
if log_path.exists():
lines = log_path.read_text(errors="replace").splitlines()
lines = log_path.read_text(encoding="utf-8", errors="replace").splitlines()
for line in lines[lines_sent:]:
if line.startswith(":::EXIT_CODE:::"):
try:
@@ -735,10 +833,11 @@ def setup_shell_routes() -> APIRouter:
]
finished = 0
deadline = (asyncio.get_event_loop().time() + timeout) if timeout else None
loop = asyncio.get_running_loop()
deadline = (loop.time() + timeout) if timeout else None
while finished < 2:
if deadline:
remaining = deadline - asyncio.get_event_loop().time()
remaining = deadline - loop.time()
if remaining <= 0:
raise asyncio.TimeoutError()
wait = min(remaining, 2.0)
@@ -791,7 +890,15 @@ def setup_shell_routes() -> APIRouter:
"""
_require_admin(request)
_reject_cross_site(request)
import importlib, importlib.metadata as importlib_metadata, shlex, json as _json
import importlib, importlib.metadata as importlib_metadata, shlex, json as _json, site, sys
_prepend_user_install_bins_to_path()
importlib.invalidate_caches()
try:
user_site = site.getusersitepackages()
if user_site and os.path.isdir(user_site) and user_site not in sys.path:
sys.path.append(user_site)
except Exception:
pass
if ssh_port and str(ssh_port).strip() not in ("", "22"):
_port = str(ssh_port).strip()
if not _SSH_PORT_RE.match(_port) or not (1 <= int(_port) <= 65535):
@@ -870,6 +977,7 @@ def setup_shell_routes() -> APIRouter:
for pkg in packages:
on_remote = bool(host and pkg.get("target") == "remote")
probe = None
if on_remote:
pkg["installed"] = bool(remote_status.get(pkg["name"], False))
probe = remote_details.get(pkg["name"])
@@ -883,19 +991,36 @@ def setup_shell_routes() -> APIRouter:
elif pkg["name"] == "llama_cpp" and shutil.which("llama-server"):
pkg["installed"] = True
pkg["status_note"] = f"native llama-server: {shutil.which('llama-server')}"
probe = {"binaries": {"llama-server": shutil.which("llama-server")}, "dists": {}}
elif pkg["name"] == "vllm":
_vllm_cli = shutil.which("vllm")
pkg["installed"] = _vllm_cli is not None
if pkg["installed"]:
try:
_vllm_version = importlib_metadata.version(_pip_dist_name(pkg))
except importlib_metadata.PackageNotFoundError:
_vllm_version = None
probe = {
"binaries": {"vllm": _vllm_cli},
"dists": {"vllm": _vllm_version} if _vllm_version else {},
}
pkg["status_note"] = _package_status_note("vllm", probe)
else:
try:
importlib.import_module(pkg["name"])
if pkg["name"] == "vllm":
pkg["installed"] = shutil.which("vllm") is not None
else:
importlib_metadata.version(pkg["name"].replace("_", "-"))
pkg["installed"] = True
importlib_metadata.version(_pip_dist_name(pkg))
pkg["installed"] = True
except ImportError:
pkg["installed"] = False
except importlib_metadata.PackageNotFoundError:
pkg["installed"] = False
if pkg.get("installed"):
update_status = _package_pip_update_status(pkg, probe)
pkg["pip_update_available"] = update_status.available
if update_status.note:
pkg["update_note"] = update_status.note
if pkg["name"] == "docker":
status = _docker_row_status(
on_remote=on_remote,
@@ -933,4 +1058,39 @@ def setup_shell_routes() -> APIRouter:
return {"ok": True, "output": stdout.decode()[-200:]}
return {"ok": False, "error": stderr.decode()[-300:]}
@router.post("/api/cookbook/rebuild-engine")
async def rebuild_engine(request: Request):
"""Clear the cached llama.cpp build so the next serve recompiles.
Admin only — this removes the Cookbook-managed ``~/bin/llama-server``
symlink and ``~/llama.cpp/build`` directory, locally or on the selected
remote server. It installs and downloads nothing; the next llama.cpp
serve rebuilds from source and picks up CUDA/HIP if a toolchain is now
present. This is the missing "force a fresh GPU build" lever for hosts
stuck on a CPU-only llama-server.
"""
_require_admin(request)
from routes.cookbook_helpers import _llama_cpp_rebuild_cmd
body = await request.json()
engine = str(body.get("engine") or "llamacpp").strip()
if engine != "llamacpp":
return {"ok": False, "error": f"Unsupported engine: {engine}"}
host = str(body.get("remote_host") or "").strip()
ssh_port = body.get("ssh_port")
cmd = _llama_cpp_rebuild_cmd()
try:
argv = (_ssh_base_argv(host, ssh_port) + [cmd]) if host else ["bash", "-lc", cmd]
except ValueError as e:
raise HTTPException(400, str(e))
try:
proc = await asyncio.create_subprocess_exec(
*argv, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
)
out, err = await asyncio.wait_for(proc.communicate(), timeout=30)
except asyncio.TimeoutError:
return {"ok": False, "error": "Rebuild-engine command timed out."}
if proc.returncode == 0:
return {"ok": True, "output": out.decode("utf-8", errors="replace")[-400:]}
return {"ok": False, "error": err.decode("utf-8", errors="replace")[-400:]}
return router

View File

@@ -79,6 +79,8 @@ def _skill_test_task(skill: dict) -> str:
an email); if we just hand over the 'when to use' text the agent has nothing
to work on and stalls asking for input. So we tell it to create its own
realistic fixture first, then apply the skill end-to-end."""
if not isinstance(skill, dict):
skill = {}
ctx = (skill.get("when_to_use") or skill.get("description") or skill.get("name") or "").strip()
return (
"Test this skill end-to-end. FIRST, set up a small realistic scenario it "
@@ -310,6 +312,8 @@ def _should_check_retrieval_precision(skill: dict) -> bool:
"installation", "install", "system", "ssh", "document", "documents",
"search", "email", "calendar", "gpu", "server", "python",
}
if not isinstance(skill, dict):
return False
tags = {str(t or "").strip().lower() for t in (skill.get("tags") or [])}
if tags & broad:
return True
@@ -463,13 +467,13 @@ async def _run_skill_test_job(key, name, md, task, url, model, headers, owner, s
if skills_manager is not None:
v = (job["verdict"] or {}).get("verdict") or "unknown"
try:
skills_manager.set_audit(name, v, by_teacher=False, worker_model=model)
skills_manager.set_audit(name, v, by_teacher=False, worker_model=model, owner=owner)
except Exception:
pass
conf = {"pass": 0.95, "needs_work": 0.6, "fail": 0.4}.get(v)
if conf is not None:
try:
skills_manager.update_skill(name, {"confidence": conf})
skills_manager.update_skill(name, {"confidence": conf}, owner=owner)
except Exception:
pass
job["status"] = "done"
@@ -563,6 +567,7 @@ def _skill_duplicate_blocker(skills_manager, name: str, owner) -> Optional[str]:
False,
[keeper_name],
f"Lower-priority duplicate of {keeper_name}",
owner=owner,
)
except Exception:
pass
@@ -629,7 +634,7 @@ def _audit_finalize_status(skills_manager, name: str, owner, verdict: str,
if generic_reason:
necessary = False
try:
skills_manager.set_necessity(name, False, [], generic_reason)
skills_manager.set_necessity(name, False, [], generic_reason, owner=owner)
except Exception:
pass
duplicate_of = _skill_duplicate_blocker(skills_manager, name, owner) if verdict == "pass" else None
@@ -638,7 +643,7 @@ def _audit_finalize_status(skills_manager, name: str, owner, verdict: str,
c = float(confidence or 0.0)
status = "published" if (auto_publish and necessary and verdict == "pass" and c >= min_conf) else "draft"
try:
skills_manager.update_skill(name, {"status": status})
skills_manager.update_skill(name, {"status": status}, owner=owner)
except Exception:
pass
return status
@@ -662,7 +667,7 @@ def _apply_skill_md(skills_manager, name: str, md: str, owner) -> bool:
"teacher_model": sk.teacher_model, "owner": sk.owner or owner,
"when_to_use": sk.when_to_use, "procedure": sk.procedure,
"pitfalls": sk.pitfalls, "verification": sk.verification, "body_extra": sk.body_extra,
}))
}, owner=owner))
except Exception as e:
logger.warning(f"Audit: could not save edited skill {name}: {e}")
return False
@@ -762,11 +767,11 @@ async def _audit_one_skill(skills_manager, skill, url, model, headers,
# earns a bit less; a skill that still fails is marked low.
def _set_conf(c):
try:
skills_manager.update_skill(name, {"confidence": c})
skills_manager.update_skill(name, {"confidence": c}, owner=owner)
except Exception:
pass
md = skills_manager.read_skill_md(name)
md = skills_manager.read_skill_md(name, owner=owner)
if not md:
log(f"{name}: no source — skipped")
return {"skill": name, "result": "skipped"}
@@ -788,7 +793,8 @@ async def _audit_one_skill(skills_manager, skill, url, model, headers,
nec = await _eval_skill_necessity(md, others, url, model, headers)
if nec is not None:
skills_manager.set_necessity(name, nec.get("necessary", True),
nec.get("redundant_with"), nec.get("reason"))
nec.get("redundant_with"), nec.get("reason"),
owner=owner)
if not nec.get("necessary", True):
log(f"{name}: possibly unnecessary — {nec.get('reason', '')[:80]}")
except Exception as e:
@@ -799,12 +805,12 @@ async def _audit_one_skill(skills_manager, skill, url, model, headers,
if generic_reason or duplicate_of or (isinstance(nec, dict) and nec.get("necessary") is False):
reason = generic_reason or (f"Lower-priority duplicate of {duplicate_of}" if duplicate_of else str((nec or {}).get("reason") or "Unnecessary skill"))
try:
skills_manager.update_skill(name, {"status": "draft", "confidence": 0.35})
skills_manager.set_audit(name, "skipped", by_teacher=False, worker_model=model)
skills_manager.update_skill(name, {"status": "draft", "confidence": 0.35}, owner=owner)
skills_manager.set_audit(name, "skipped", by_teacher=False, worker_model=model, owner=owner)
if duplicate_of:
skills_manager.set_necessity(name, False, [duplicate_of], reason)
skills_manager.set_necessity(name, False, [duplicate_of], reason, owner=owner)
else:
skills_manager.set_necessity(name, False, [], reason)
skills_manager.set_necessity(name, False, [], reason, owner=owner)
except Exception:
pass
log(f"{name}: draft — skipped functional test ({reason[:100]})")
@@ -848,13 +854,13 @@ async def _audit_one_skill(skills_manager, skill, url, model, headers,
if fixed and fixed.strip() != md.strip():
_apply_skill_md(skills_manager, name, fixed, owner)
_set_conf(0.95)
skills_manager.set_audit(name, "pass", by_teacher=False, worker_model=model)
skills_manager.set_audit(name, "pass", by_teacher=False, worker_model=model, owner=owner)
refreshed = next((s for s in skills_manager.load(owner=owner) if s.get("name") == name), None)
status = _audit_finalize_status(skills_manager, name, owner, "pass", 0.95, (refreshed or {}).get("necessity"), verdict)
log(f"{name}: {status} — confidence 95%")
return {"skill": name, "result": "pass", "verdict": verdict, "confidence": 0.95, "status": status}
if v in ("unknown", "inconclusive"):
skills_manager.set_audit(name, "inconclusive", by_teacher=False, worker_model=model)
skills_manager.set_audit(name, "inconclusive", by_teacher=False, worker_model=model, owner=owner)
status = _audit_finalize_status(skills_manager, name, owner, "inconclusive", skill.get("confidence") or 0.0, skill.get("necessity"))
log(f"{name}: {status} — inconclusive")
return {"skill": name, "result": "inconclusive", "verdict": verdict, "status": status}
@@ -869,7 +875,7 @@ async def _audit_one_skill(skills_manager, skill, url, model, headers,
log(f"{name}: retry (self) = {v}")
if v == "pass":
_set_conf(0.85)
skills_manager.set_audit(name, "pass", by_teacher=False, worker_model=model)
skills_manager.set_audit(name, "pass", by_teacher=False, worker_model=model, owner=owner)
refreshed = next((s for s in skills_manager.load(owner=owner) if s.get("name") == name), None)
status = _audit_finalize_status(skills_manager, name, owner, "pass", 0.85, (refreshed or {}).get("necessity"), verdict)
log(f"{name}: {status} — confidence 85% after self-edit")
@@ -893,7 +899,9 @@ async def _audit_one_skill(skills_manager, skill, url, model, headers,
log(f"{name}: retry on student after teacher rewrite = {v}")
if v == "pass":
_set_conf(0.8)
skills_manager.set_audit(name, "pass", by_teacher=True, worker_model=model, teacher_model=t_model)
skills_manager.set_audit(
name, "pass", by_teacher=True, worker_model=model, teacher_model=t_model, owner=owner
)
refreshed = next((s for s in skills_manager.load(owner=owner) if s.get("name") == name), None)
status = _audit_finalize_status(skills_manager, name, owner, "pass", 0.8, (refreshed or {}).get("necessity"), verdict)
log(f"{name}: {status} — confidence 80% after teacher rewrite")
@@ -901,13 +909,14 @@ async def _audit_one_skill(skills_manager, skill, url, model, headers,
# Still failing → demote to draft + low confidence + flag (do NOT delete).
try:
skills_manager.update_skill(name, {"status": "draft", "confidence": 0.35})
skills_manager.update_skill(name, {"status": "draft", "confidence": 0.35}, owner=owner)
except Exception:
pass
skills_manager.set_audit(
name, v or "fail", by_teacher=teacher_ran,
worker_model=model,
teacher_model=(teacher[1] if teacher_ran and teacher else ""),
owner=owner,
)
log(f"{name}: flagged — confidence lowered, kept as draft for manual review")
return {"skill": name, "result": "flagged", "verdict": verdict, "confidence": 0.35}
@@ -976,7 +985,7 @@ async def _run_audit_all_job(key, skills_manager, names, url, model, headers, te
job.pop("task", None)
def _resolve_audit_models():
def _resolve_audit_models(owner=None):
"""Resolve (url, model, headers, teacher) for an audit run from Settings.
Worker = Utility model (falling back to Default, normalized to a served
@@ -985,7 +994,7 @@ def _resolve_audit_models():
ValueError if no worker model.
"""
from src.endpoint_resolver import resolve_endpoint
url, model, headers = resolve_endpoint("utility")
url, model, headers = resolve_endpoint("utility", owner=owner)
if not url or not model:
raise ValueError("No model configured — set a Default or Utility model in Settings.")
try:
@@ -1029,7 +1038,7 @@ async def run_scheduled_skill_audit(skills_manager: SkillsManager,
return {"status": "running", "skipped": True}
try:
url, model, headers, teacher = _resolve_audit_models()
url, model, headers, teacher = _resolve_audit_models(owner=owner)
except ValueError as e:
logger.info(f"Scheduled skill audit skipped — {e}")
return {"status": "skipped", "reason": str(e)}
@@ -1246,7 +1255,7 @@ def setup_skills_routes(skills_manager: SkillsManager) -> APIRouter:
if not match:
raise HTTPException(404, "Skill not found")
_verify_owner(match, user)
md = skills_manager.read_skill_md(match.get("name"))
md = skills_manager.read_skill_md(match.get("name"), owner=user)
if md is None:
raise HTTPException(404, "Skill source unavailable (legacy entry?)")
return {"name": match.get("name"), "markdown": md}
@@ -1273,14 +1282,14 @@ def setup_skills_routes(skills_manager: SkillsManager) -> APIRouter:
raise HTTPException(404, "Skill not found")
_verify_owner(match, user)
name = match.get("name")
md = skills_manager.read_skill_md(name) or ""
md = skills_manager.read_skill_md(name, owner=user) or ""
if not task:
task = _skill_test_task(match)
# Prefer the configured DEFAULT (→ Utility) model — not the current chat
# session's model. Fall back to the caller's session model only if unset.
url, model, headers = resolve_endpoint("default")
url, model, headers = resolve_endpoint("default", owner=user)
if not url or not model:
url = url or ((body.get("endpoint_url") or "").strip() or None)
model = model or ((body.get("model") or "").strip() or None)
@@ -1360,7 +1369,7 @@ def setup_skills_routes(skills_manager: SkillsManager) -> APIRouter:
# Worker model (Default, normalized) + optional teacher — shared resolver.
try:
url, model, headers, teacher = _resolve_audit_models()
url, model, headers, teacher = _resolve_audit_models(owner=user)
except ValueError as e:
raise HTTPException(400, str(e))
@@ -1437,7 +1446,7 @@ def setup_skills_routes(skills_manager: SkillsManager) -> APIRouter:
@router.post("/{skill_id}/markdown")
async def save_skill_markdown(request: Request, skill_id: str):
"""Replace SKILL.md with new raw content. Parses + validates first."""
from services.memory.skill_format import Skill, slugify
from services.memory.skill_format import Skill
user = _owner(request)
body = await request.json()
new_content = body.get("markdown")
@@ -1452,7 +1461,10 @@ def setup_skills_routes(skills_manager: SkillsManager) -> APIRouter:
sk = Skill.from_markdown(new_content)
except Exception as e:
raise HTTPException(400, f"Could not parse SKILL.md: {e}")
sk.name = slugify(sk.name or match.get("name"))
# Never rename on save: a changed `name` in the markdown would move
# the skill dir (update_skill) and orphan the original id, so a later
# delete 404s (#1333). Pin to the stored name, like _apply_skill_md.
sk.name = match.get("name")
if not sk.owner:
sk.owner = match.get("owner") or user
ok = skills_manager.update_skill(match.get("name"), {
@@ -1474,7 +1486,7 @@ def setup_skills_routes(skills_manager: SkillsManager) -> APIRouter:
"pitfalls": sk.pitfalls,
"verification": sk.verification,
"body_extra": sk.body_extra,
})
}, owner=user)
if not ok:
raise HTTPException(500, "Update failed")
# Manual markdown edits can create or substantially rewrite a draft
@@ -1496,7 +1508,7 @@ def setup_skills_routes(skills_manager: SkillsManager) -> APIRouter:
updates = body.dict(exclude_none=True)
if not updates:
return {"ok": True}
ok = skills_manager.update_skill(match.get("name"), updates)
ok = skills_manager.update_skill(match.get("name"), updates, owner=user)
if not ok:
raise HTTPException(404, "Skill not found")
if not match.get("audit_verdict"):
@@ -1511,7 +1523,7 @@ def setup_skills_routes(skills_manager: SkillsManager) -> APIRouter:
if not match:
raise HTTPException(404, "Skill not found")
_verify_owner(match, user)
ok = skills_manager.delete_skill(match.get("name"))
ok = skills_manager.delete_skill(match.get("name"), owner=user)
if not ok:
raise HTTPException(404, "Skill not found")
return {"ok": True}

View File

@@ -8,6 +8,7 @@ from typing import List
import logging
from core.middleware import require_admin
from src.auth_helpers import get_current_user
from src.upload_handler import count_recent_uploads
logger = logging.getLogger(__name__)
@@ -24,15 +25,18 @@ def setup_upload_routes(upload_handler):
client_ip = request.client.host if request.client else "unknown"
out = []
# Limit concurrent uploads per IP
ip_upload_count = sum(
1 for f in files
if client_ip in upload_handler.upload_rate_log and
any(now > time.time() - 10 for now in upload_handler.upload_rate_log[client_ip][-len(files):])
# Limit concurrent uploads per IP. Count genuine recent upload events —
# NOT the number of files in this batch. The previous check summed over
# `files`, so a single multi-file request counted itself as N concurrent
# uploads and tripped the limit (issue #1346: "attach more than one file
# → the model doesn't even see them"). save_upload still enforces the
# per-minute sliding-window rate limit per file.
recent_uploads = count_recent_uploads(
upload_handler.upload_rate_log.get(client_ip, []), time.time()
)
if ip_upload_count >= upload_handler.max_concurrent_uploads:
if recent_uploads >= upload_handler.max_concurrent_uploads:
raise HTTPException(
status_code=429,
detail=f"Maximum concurrent uploads ({upload_handler.max_concurrent_uploads}) exceeded"
@@ -107,7 +111,7 @@ def setup_upload_routes(upload_handler):
if os.path.exists(uploads_db):
with open(uploads_db, encoding="utf-8") as f:
db = json.load(f)
info = next((fi for fi in db.values() if fi["id"] == file_id), None)
info = next((fi for fi in db.values() if fi.get("id") == file_id), None)
if info:
original_name = info.get("name", file_id)
auth_mgr = getattr(request.app.state, "auth_manager", None)
@@ -155,7 +159,7 @@ def setup_upload_routes(upload_handler):
if os.path.exists(uploads_db):
with open(uploads_db, encoding="utf-8") as f:
db = json.load(f)
info = next((fi for fi in db.values() if fi["id"] == file_id), None)
info = next((fi for fi in db.values() if fi.get("id") == file_id), None)
return info
def _vision_cache_path(file_id: str) -> str:

View File

@@ -61,7 +61,8 @@ def _find_bw() -> str:
def _load_config() -> dict:
if VAULT_FILE.exists():
try:
return json.loads(VAULT_FILE.read_text(encoding="utf-8"))
data = json.loads(VAULT_FILE.read_text(encoding="utf-8"))
return data if isinstance(data, dict) else {}
except Exception:
pass
return {}
@@ -75,11 +76,18 @@ def _save_config(cfg: dict):
safe_chmod(str(VAULT_FILE), 0o600)
async def _run_bw(args: list, session: str = None, input_text: str = None) -> tuple:
async def _run_bw(args: list, session: str = None, input_text: str = None,
bw_password: str = None) -> tuple:
env = {}
env.update(os.environ)
if session:
env["BW_SESSION"] = session
# Secrets must never be passed as argv — process arguments are world-readable
# via `ps` / `/proc/<pid>/cmdline` to any local user. Keep --passwordenv
# support for bw commands that need it; unlock/login callers should prefer
# stdin so the master password is not left in the child environment either.
if bw_password is not None:
env["BW_PASSWORD"] = bw_password
bw_path = _find_bw()
try:
proc = await asyncio.create_subprocess_exec(
@@ -175,8 +183,12 @@ def setup_vault_routes():
async def unlock(req: VaultUnlockRequest, request: Request):
"""Unlock the vault and save the session key."""
require_admin(request)
# Pass the master password on stdin, not argv. argv is visible through
# `ps` / /proc/<pid>/cmdline; stdin also avoids leaving the secret in
# the child process environment.
stdout, stderr, rc = await _run_bw(
["unlock", req.master_password, "--raw"],
["unlock", "--raw"],
input_text=req.master_password + "\n",
)
if rc != 0:
return {"ok": False, "error": f"Unlock failed: {stderr[:300]}"}

View File

@@ -26,6 +26,44 @@ MAX_MESSAGE_LEN = 32_000
from core.middleware import require_admin as _require_admin
def _first_enabled_endpoint(db, owner):
"""First enabled ModelEndpoint VISIBLE to `owner` — their own rows plus
legacy null-owner ("shared") rows. Owner-scoped on purpose: ModelEndpoint
is per-user (core/database.py — "when non-null, the model picker only shows
the endpoint to that user"), and the sync-chat fallback uses the row's
decrypted `api_key`. An unscoped ``.first()`` would let a chat-scoped token
(e.g. a paired mobile device) fall back onto ANOTHER user's private
endpoint and silently spend that owner's API key / quota — and reach
whatever internal base_url they configured. Mirrors the owner_filter scoping
in routes/model_routes.py and companion/routes.py. A null/empty owner is a
no-op (single-user / legacy mode), preserving the original behaviour.
"""
from core.database import ModelEndpoint
from src.auth_helpers import owner_filter
q = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True) # noqa: E712
q = owner_filter(q, ModelEndpoint, owner)
return q.first()
def _caller_owns_session(sess_owner, caller) -> bool:
"""Strict session-ownership gate for the token-authenticated sync-chat
endpoint (`POST /api/v1/chat`).
Mirrors ``_verify_session_owner`` in session_routes.py and the null-owner
gates in notes/calendar/gallery: a caller may resume a session ONLY when
its owner matches them exactly. A null/empty session owner (legacy or
migrated rows) is deliberately NOT resumable by an arbitrary token — the
old ``sess_owner and sess_owner != caller`` form skipped the check whenever
``sess_owner`` was falsy, so any chat-scoped token (e.g. a paired mobile
device) could resume such a session, inject a message, and read back its
history and reuse the owner's endpoint credentials. Fail closed: an
unresolvable caller also returns False.
"""
if not caller:
return False
return sess_owner == caller
def setup_webhook_routes(
webhook_manager: WebhookManager,
auth_manager,
@@ -159,6 +197,7 @@ def setup_webhook_routes(
"openrouter": "https://openrouter.ai/api/v1",
"ollama": "https://ollama.com/api",
"fireworks": "https://api.fireworks.ai/inference/v1",
"venice": "https://api.venice.ai/api/v1",
}
# Model prefix → provider mapping for auto-detection
@@ -203,7 +242,6 @@ def setup_webhook_routes(
from core.models import ChatMessage
from src.llm_core import llm_call_async
from core.database import ModelEndpoint
from src.endpoint_resolver import build_chat_url, build_headers, build_models_url, normalize_base
message = body.message.strip()
@@ -228,8 +266,11 @@ def setup_webhook_routes(
_tok_user = token_owner or getattr(request.state, "user", None) or _gcu(request)
except Exception:
_tok_user = None
# Strict ownership (see _caller_owns_session): fail closed so a
# null-owner / cross-owner session can't be resumed by an arbitrary
# chat-scoped token.
_sess_owner = getattr(sess, "owner", None)
if _tok_user and _sess_owner and _sess_owner != _tok_user:
if not _caller_owns_session(_sess_owner, _tok_user):
raise HTTPException(404, "Session not found")
# --- Case 2: Direct API key + model (no pre-configured endpoint needed) ---
@@ -265,7 +306,9 @@ def setup_webhook_routes(
if not sess:
db = SessionLocal()
try:
ep = db.query(ModelEndpoint).filter(ModelEndpoint.is_enabled == True).first()
# Owner-scoped: only THIS token owner's endpoints + legacy
# shared rows, never another user's private endpoint/api_key.
ep = _first_enabled_endpoint(db, token_owner)
finally:
db.close()

View File

@@ -9,7 +9,9 @@ Adds:
Metadata is taken from the HF Hub `list_models(full=True)` response plus the
repo name (which encodes the param size, e.g. "Qwen3.6-35B-A3B"). Param-less
names fall back to a single per-repo model_info() call to read safetensors.
names fall back, in order, to the parent `base_model:` tag, the repo's
`config.json` (computed from `hidden_size` / `num_hidden_layers` / MoE
fields), and finally a per-repo `model_info()` call to read safetensors.
Re-runnable: merges by `name`, leaving existing entries untouched unless
--overwrite is passed. Writes a .bak first.
@@ -23,7 +25,8 @@ import re
import sys
from datetime import datetime
from huggingface_hub import HfApi
from huggingface_hub import HfApi, hf_hub_download
from huggingface_hub.utils import EntryNotFoundError, RepositoryNotFoundError
DATA_PATH = os.path.join(os.path.dirname(__file__), "..", "services", "hwfit", "data", "hf_models.json")
DATA_PATH = os.path.abspath(DATA_PATH)
@@ -43,7 +46,8 @@ _GENERIC_TAGS = {
"transformers", "safetensors", "conversational", "text-generation",
"image-text-to-text", "text-generation-inference", "endpoints_compatible",
"autotrain_compatible", "compressed-tensors", "gguf", "mlx", "vllm", "4-bit",
"8-bit", "awq", "gptq", "fp8", "quantized", "chat",
"8-bit", "awq", "gptq", "fp8", "fp4", "nvfp4", "mxfp4", "nf4",
"quantized", "chat",
}
api = HfApi()
@@ -69,6 +73,128 @@ def _parse_params(name):
return total, active
def _params_from_config(cfg):
"""Estimate (total, active) parameter counts from a HF config.json dict.
Returns (None, None) when the architecture fields aren't usable. Covers:
* explicit ``num_parameters`` / ``n_params`` (rare but authoritative)
* dense transformers (LLaMA / Qwen / Mistral / GLM-dense / etc.) via
embeddings + per-layer attention + MLP
* MoE (Qwen3-MoE, GLM-4-MoE, DeepSeek-style) using ``num_experts`` or
``n_routed_experts`` (+ ``n_shared_experts``). Active count assumes
``num_experts_per_tok`` routed experts plus any shared experts.
The estimate is intentionally coarse — within ~5-10% of the true count for
standard decoder-only architectures — which is fine for the downstream
``min_vram_gb`` heuristic (it already buckets via ``parameter_count`` to
one decimal place of "B").
"""
if not isinstance(cfg, dict):
return None, None
# Authoritative fields first. Some custom configs embed the trained
# parameter count directly.
for key in ("num_parameters", "n_params", "total_params"):
v = cfg.get(key)
if isinstance(v, (int, float)) and v > 0:
return int(v), None
def _i(key, default=None):
v = cfg.get(key, default)
try:
return int(v) if v is not None else None
except (TypeError, ValueError):
return None
h = _i("hidden_size")
L = _i("num_hidden_layers")
if not h or not L:
return None, None
vocab = _i("vocab_size") or 0
ffn = _i("intermediate_size") or (4 * h)
n_heads = _i("num_attention_heads") or 0
n_kv = _i("num_key_value_heads") or n_heads
head_dim = _i("head_dim") or (h // n_heads if n_heads else h)
# Attention: Q is hidden_size wide, KV is grouped (GQA / MQA).
q_proj = h * (n_heads * head_dim if n_heads else h)
kv_proj = 2 * h * (n_kv * head_dim if n_kv else h)
o_proj = (n_heads * head_dim if n_heads else h) * h
per_layer_attn = q_proj + kv_proj + o_proj
# Dense MLP: gate + up + down (SwiGLU / GeGLU). Configs without a gate
# (plain GELU) are within the noise floor of this estimate.
per_layer_dense_mlp = 3 * h * ffn
# MoE routing. Both naming conventions are seen in the wild.
n_experts = _i("num_experts") or _i("n_routed_experts") or 0
n_shared = _i("n_shared_experts") or 0
n_active = _i("num_experts_per_tok") or 0
moe_ffn = _i("moe_intermediate_size") or ffn
# Some configs (GLM-4-MoE, DeepSeek-V3) keep the first K layers dense.
first_dense = _i("first_k_dense_replace") or 0
if n_experts > 0 and n_active > 0:
moe_layers = max(0, L - first_dense)
dense_layers = L - moe_layers
per_expert = 3 * h * moe_ffn
total_mlp = (
dense_layers * per_layer_dense_mlp
+ moe_layers * (n_experts + n_shared) * per_expert
)
active_mlp = (
dense_layers * per_layer_dense_mlp
+ moe_layers * (n_active + n_shared) * per_expert
)
else:
total_mlp = L * per_layer_dense_mlp
active_mlp = total_mlp
embed = vocab * h
# Untied output head doubles the embedding contribution.
head = 0 if cfg.get("tie_word_embeddings", True) else vocab * h
total = embed + head + L * per_layer_attn + total_mlp
active = embed + head + L * per_layer_attn + active_mlp
if total <= 0:
return None, None
if active == total or n_experts == 0:
return int(total), None
return int(total), int(active)
_CONFIG_CACHE = {}
def _fetch_config_json(repo_id):
"""Download and cache a repo's config.json. Returns a dict or None.
Network / 404 / private-repo failures are swallowed — the caller already
has a safetensors fallback below this. We rely on huggingface_hub's own
on-disk cache so repeated script runs don't re-hit the Hub.
"""
if repo_id in _CONFIG_CACHE:
return _CONFIG_CACHE[repo_id]
try:
path = hf_hub_download(repo_id=repo_id, filename="config.json")
except (EntryNotFoundError, RepositoryNotFoundError):
_CONFIG_CACHE[repo_id] = None
return None
except Exception:
# Network hiccup, gated repo, etc. — don't crash the bulk run.
_CONFIG_CACHE[repo_id] = None
return None
try:
with open(path, encoding="utf-8") as f:
cfg = json.load(f)
except (OSError, ValueError):
_CONFIG_CACHE[repo_id] = None
return None
_CONFIG_CACHE[repo_id] = cfg
return cfg
def _base_model_tag(tags):
"""Return the `base_model:...` repo id from tags, if any."""
for t in (tags or []):
@@ -79,6 +205,20 @@ def _base_model_tag(tags):
def _quant_from_name(name):
n = name.lower()
if "nvfp4" in n:
return "NVFP4"
if "mxfp4" in n:
return "MXFP4"
if re.search(r"(^|[-_/])nf4($|[-_/])", n):
return "NF4"
if re.search(r"(^|[-_/])fp4($|[-_/])", n):
return "FP4"
if re.search(r"(^|[-_/])w4a16($|[-_/])", n):
return "W4A16"
if re.search(r"(^|[-_/])w8a8($|[-_/])", n):
return "W8A8"
if re.search(r"(^|[-_/])w8a16($|[-_/])", n):
return "W8A16"
is8 = "8bit" in n or "8-bit" in n or "int8" in n
if "awq" in n:
return "AWQ-8bit" if is8 else "AWQ-4bit"
@@ -88,10 +228,14 @@ def _quant_from_name(name):
if "6bit" in n:
return "mlx-6bit"
return "mlx-8bit" if is8 else "mlx-4bit"
if "nvfp4" in n:
return "NVFP4"
if "fp8" in n:
return "FP8"
if "int4" in n or "4bit" in n or "4-bit" in n:
return "AWQ-4bit"
return "INT4"
if "int8" in n or "8bit" in n or "8-bit" in n:
return "INT8"
return "Q4_K_M"
@@ -122,6 +266,27 @@ def _entry_from_modelinfo(mi, overrides):
active = ba
# Determine quant first — we need it to unpack the safetensors fallback.
quant = _quant_from_name(name)
# Next-to-last resort: parse config.json. This is robust against
# parameter-less repo names (e.g. "GLM-4.5" with no "9B" suffix) where
# both the regex and the base_model tag come up empty. We try this
# before safetensors so non-standard names still resolve without a
# per-repo manual override in EXTRA_REPOS. Source repo first (works for
# unquantized models) then the quantized parent via base_model:.
if total is None:
config_targets = [name]
bm = _base_model_tag(getattr(mi, "tags", None))
if bm and bm != name:
config_targets.append(bm)
for target in config_targets:
cfg = _fetch_config_json(target)
if not cfg:
continue
ct, ca = _params_from_config(cfg)
if ct:
total = ct
if ca and active is None:
active = ca
break
# Last resort: read safetensors element counts. For pre-quantized repos
# (AWQ/GPTQ/MLX-Int4 etc.) the weights are packed: 8× 4-bit weights per
# I32 element, 4× 8-bit weights per I32. The bare safetensors total
@@ -136,7 +301,7 @@ def _entry_from_modelinfo(mi, overrides):
params_by_dtype = getattr(st, "parameters", None) or {}
if quant.endswith("4bit") or quant.endswith("Int4"):
pack_factor = 8
elif quant.endswith("8bit") or quant.endswith("Int8") or quant == "FP8":
elif quant.endswith("8bit") or quant.endswith("Int8") or quant in ("FP8", "NVFP4"):
pack_factor = 4
else:
pack_factor = 1
@@ -158,7 +323,10 @@ def _entry_from_modelinfo(mi, overrides):
rel = created.strftime("%Y-%m-%d") if created else datetime.utcnow().strftime("%Y-%m-%d")
# Rough RAM/VRAM hints (fit.py recomputes the real requirement from params+quant).
_BPP = {"AWQ-4bit": 0.58, "GPTQ-Int4": 0.58, "mlx-4bit": 0.55, "mlx-6bit": 0.85,
"AWQ-8bit": 1.1, "GPTQ-Int8": 1.1, "mlx-8bit": 1.1, "FP8": 1.1, "Q4_K_M": 0.6}
"AWQ-8bit": 1.1, "GPTQ-Int8": 1.1, "mlx-8bit": 1.1, "FP8": 1.1,
"FP4": 0.58, "NVFP4": 0.58, "MXFP4": 0.58, "NF4": 0.58,
"INT4": 0.58, "INT8": 1.1, "W4A16": 0.58, "W8A8": 1.1, "W8A16": 1.1,
"Q4_K_M": 0.6}
bpp = _BPP.get(quant, 0.6)
vram = round(pb * bpp + 0.5, 1)
entry = {

205
scripts/check-docker-amd-gpu.sh Executable file
View File

@@ -0,0 +1,205 @@
#!/usr/bin/env bash
# check-docker-amd-gpu.sh - read-only AMD/ROCm Docker passthrough diagnostic.
#
# This script does not install packages, edit .env, or restart Docker. It only
# checks host AMD device nodes, Docker access, and whether a small container can
# see /dev/kfd and /dev/dri. The Odysseus slim image does not include ROCm tools
# such as rocm-smi, so container verification checks devices instead.
set -u
PASS=0
FAIL=0
WARN=0
RENDER_GID=""
VIDEO_GID=""
TEST_IMAGE="${ODYSSEUS_AMD_TEST_IMAGE:-alpine:3.20}"
_pass() { printf '\033[32m[PASS]\033[0m %s\n' "$*"; PASS=$((PASS + 1)); }
_fail() { printf '\033[31m[FAIL]\033[0m %s\n' "$*"; FAIL=$((FAIL + 1)); }
_warn() { printf '\033[33m[WARN]\033[0m %s\n' "$*"; WARN=$((WARN + 1)); }
_info() { printf '\033[34m[INFO]\033[0m %s\n' "$*"; }
_usage() {
cat <<'USAGE'
Usage: scripts/check-docker-amd-gpu.sh
Read-only AMD/ROCm Docker GPU diagnostic. Installs nothing, edits nothing, and
does not restart Docker.
Checks:
- host /dev/kfd and /dev/dri/renderD* exist
- host render group GID for RENDER_GID in .env
- optional host rocminfo visibility
- Docker can pass AMD device nodes into a small container
Environment:
ODYSSEUS_AMD_TEST_IMAGE Docker image for the passthrough smoke
(default: alpine:3.20)
USAGE
}
for _arg in "$@"; do
case "${_arg}" in
--help|-h)
_usage
exit 0
;;
*)
printf 'Unknown option: %s\n\n' "${_arg}" >&2
_usage >&2
exit 1
;;
esac
done
_find_cmd() {
if command -v "$1" >/dev/null 2>&1; then
command -v "$1"
return 0
fi
if [ -x "/opt/rocm/bin/$1" ]; then
printf '/opt/rocm/bin/%s\n' "$1"
return 0
fi
return 1
}
_check_host_devices() {
_info "Checking host AMD device nodes..."
if [ -e /dev/kfd ]; then
_pass "/dev/kfd exists"
else
_fail "/dev/kfd is missing - ROCm kernel driver access is not available."
fi
if [ -d /dev/dri ]; then
_pass "/dev/dri exists"
else
_fail "/dev/dri is missing - render devices are not available."
return
fi
render_nodes="$(find /dev/dri -maxdepth 1 -type c -name 'renderD*' -print 2>/dev/null | sort)"
if [ -n "${render_nodes}" ]; then
_pass "Render nodes found:"
printf '%s\n' "${render_nodes}" | sed 's/^/ /'
else
_fail "No /dev/dri/renderD* node found."
fi
echo
}
_check_groups() {
_info "Checking host render/video groups..."
RENDER_GID="$(getent group render | awk -F: '{print $3; exit}')"
VIDEO_GID="$(getent group video | awk -F: '{print $3; exit}')"
if [ -n "${RENDER_GID}" ]; then
_pass "render group GID: ${RENDER_GID}"
else
_fail "render group not found - set RENDER_GID manually if your distro uses a different group."
fi
if [ -n "${VIDEO_GID}" ]; then
_pass "video group GID: ${VIDEO_GID}"
else
_warn "video group not found. /dev/kfd and renderD* may still be enough on some hosts."
fi
echo
}
_check_host_rocm() {
_info "Checking host ROCm tools..."
rocminfo_cmd="$(_find_cmd rocminfo || true)"
if [ -n "${rocminfo_cmd}" ]; then
if "${rocminfo_cmd}" 2>/dev/null | grep -Eq 'gfx[0-9a-f]+'; then
_pass "rocminfo works on the host: ${rocminfo_cmd}"
"${rocminfo_cmd}" 2>/dev/null \
| grep -E 'Marketing Name:|Name:[[:space:]]+gfx' \
| head -12 \
| sed 's/^/ /'
else
_warn "rocminfo exists but did not list a gfx target."
fi
else
_warn "rocminfo not found on PATH or /opt/rocm/bin. This does not block Docker passthrough, but host ROCm may be incomplete."
fi
echo
}
_check_docker() {
_info "Checking Docker..."
if ! command -v docker >/dev/null 2>&1; then
_fail "docker not found - install Docker first."
echo
return 1
fi
if docker info >/dev/null 2>&1; then
_pass "Docker daemon is running."
else
_fail "Docker daemon is not running or this user lacks Docker permission."
echo
return 1
fi
echo
}
_check_docker_passthrough() {
if [ -z "${RENDER_GID}" ]; then
_fail "Skipping Docker passthrough smoke because render GID is unknown."
echo
return
fi
_info "Testing AMD device passthrough with ${TEST_IMAGE} (may pull on first run)..."
group_args=(--group-add "${RENDER_GID}")
if [ -n "${VIDEO_GID}" ]; then
group_args+=(--group-add "${VIDEO_GID}")
fi
if docker run --rm \
--device=/dev/kfd \
--device=/dev/dri \
"${group_args[@]}" \
"${TEST_IMAGE}" \
sh -lc 'test -e /dev/kfd && test -d /dev/dri && ls /dev/dri/renderD* >/dev/null' \
>/dev/null 2>&1; then
_pass "Docker can pass /dev/kfd and /dev/dri render nodes into a container."
else
_fail "Docker AMD device passthrough failed."
_info "Check that Docker can access /dev/kfd and /dev/dri, then retry."
fi
echo
}
_print_next_steps() {
echo "=== Suggested .env values ==="
if [ -n "${RENDER_GID}" ]; then
printf 'COMPOSE_FILE=docker-compose.yml:docker/gpu.amd.yml\n'
printf 'RENDER_GID=%s\n' "${RENDER_GID}"
else
printf 'COMPOSE_FILE=docker-compose.yml:docker/gpu.amd.yml\n'
printf 'RENDER_GID=<numeric render group id>\n'
fi
echo
echo "After restarting Odysseus, verify the slim app container sees devices:"
echo " docker compose exec odysseus sh -lc 'test -e /dev/kfd && test -d /dev/dri && ls -l /dev/kfd /dev/dri/renderD*'"
echo
echo "Note: rocm-smi/rocminfo are not expected inside the slim Odysseus image."
echo "Device passthrough is necessary but not sufficient for GPU serving; vLLM and"
echo "llama.cpp still need ROCm-compatible builds or ROCm-specific Docker images."
}
echo "=== Odysseus AMD Docker GPU diagnostic ==="
echo
_check_host_devices
_check_groups
_check_host_rocm
if _check_docker; then
_check_docker_passthrough
fi
_print_next_steps
echo
echo "=== Results: ${PASS} passed, ${WARN} warnings, ${FAIL} failed ==="
[ "${FAIL}" -eq 0 ]

579
scripts/check-docker-gpu.sh Executable file
View File

@@ -0,0 +1,579 @@
#!/usr/bin/env bash
# check-docker-gpu.sh — Diagnostic and optional setup helper for NVIDIA Docker GPU access.
#
# Default mode is READ-ONLY — does not install packages, modify config, or restart Docker.
# The Odysseus app never calls this script automatically.
#
# USAGE
# scripts/check-docker-gpu.sh # read-only diagnostics (default)
# scripts/check-docker-gpu.sh --enable-nvidia-overlay # also write COMPOSE_FILE to .env
# scripts/check-docker-gpu.sh --print-install-commands # show OS-specific commands, don't run
# scripts/check-docker-gpu.sh --install-nvidia-toolkit # install toolkit (Ubuntu/Debian only)
# scripts/check-docker-gpu.sh --install-nvidia-toolkit --enable-nvidia-overlay
# scripts/check-docker-gpu.sh --install-nvidia-toolkit --enable-nvidia-overlay --yes
# scripts/check-docker-gpu.sh --help
MODE="check"
OPT_YES=0
OPT_ENABLE_OVERLAY=0
_GPU_PASSTHROUGH_OK=0
# ─── output helpers ──────────────────────────────────────────────────────────
PASS=0
FAIL=0
_pass() { printf '\033[32m[PASS]\033[0m %s\n' "$*"; PASS=$((PASS + 1)); }
_fail() { printf '\033[31m[FAIL]\033[0m %s\n' "$*"; FAIL=$((FAIL + 1)); }
_info() { printf '\033[34m[INFO]\033[0m %s\n' "$*"; }
_warn() { printf '\033[33m[WARN]\033[0m %s\n' "$*"; }
_step() { printf '\033[36m[STEP]\033[0m %s\n' "$*"; }
_confirm() {
printf '%s [y/N] ' "$1"
read -r _ans
case "${_ans}" in
[Yy]|[Yy][Ee][Ss]) return 0 ;;
*) return 1 ;;
esac
}
# ─── paths ───────────────────────────────────────────────────────────────────
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
REPO_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
# ─── arg parsing ─────────────────────────────────────────────────────────────
_usage() {
cat <<'USAGE'
Usage: scripts/check-docker-gpu.sh [OPTIONS]
Read-only diagnostic (default — safe to run at any time, installs nothing):
(no flags) Check host nvidia-smi, Docker daemon, and Docker
GPU passthrough. Prints PASS/FAIL and next steps.
Informational:
--print-install-commands Detect the OS and print recommended NVIDIA
Container Toolkit commands without running them.
Inspect these before deciding to install.
--help Show this help.
Opt-in .env update (requires .env or .env.example in the repo root):
--enable-nvidia-overlay Write COMPOSE_FILE=docker-compose.yml:docker/gpu.nvidia.yml
into .env. Creates a timestamped backup first.
Blocked if GPU passthrough is not working — fix
passthrough first, then re-run. --yes does not
override this gate.
Never edits .env unless this flag is passed.
Opt-in install (Ubuntu/Debian only, requires sudo):
--install-nvidia-toolkit Add NVIDIA's apt repository, install
nvidia-container-toolkit, configure the Docker
runtime, and optionally restart Docker.
Shows all commands and prompts before any
privileged action.
--yes Skip confirmation prompts (for use with
--install-nvidia-toolkit and/or
--enable-nvidia-overlay in automated setups).
Examples:
# Diagnose GPU passthrough before enabling the NVIDIA compose overlay:
scripts/check-docker-gpu.sh
# See what install commands apply to this system without running them:
scripts/check-docker-gpu.sh --print-install-commands
# Diagnose and automatically update .env with the NVIDIA overlay:
scripts/check-docker-gpu.sh --enable-nvidia-overlay
# Install toolkit interactively, then enable the overlay if it works:
scripts/check-docker-gpu.sh --install-nvidia-toolkit --enable-nvidia-overlay
# Full assisted setup without prompts (automated/CI use):
scripts/check-docker-gpu.sh --install-nvidia-toolkit --enable-nvidia-overlay --yes
After a successful setup, start Odysseus:
docker compose up -d --build
Full guide: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html
USAGE
}
for _arg in "$@"; do
case "${_arg}" in
--help|-h)
_usage
exit 0
;;
--print-install-commands)
MODE="print"
;;
--install-nvidia-toolkit)
MODE="install"
;;
--enable-nvidia-overlay)
OPT_ENABLE_OVERLAY=1
;;
--yes|-y)
OPT_YES=1
;;
*)
printf 'Unknown option: %s\n\n' "${_arg}" >&2
_usage >&2
exit 1
;;
esac
done
# ─── OS/distro detection ─────────────────────────────────────────────────────
DISTRO_ID=""
DISTRO_LIKE=""
DISTRO_VERSION=""
DISTRO_ARCH="$(uname -m 2>/dev/null || echo unknown)"
if [ -f /etc/os-release ]; then
DISTRO_ID="$(grep '^ID=' /etc/os-release | cut -d= -f2 | tr -d '"')"
DISTRO_LIKE="$(grep '^ID_LIKE=' /etc/os-release | cut -d= -f2 | tr -d '"')"
DISTRO_VERSION="$(grep '^VERSION_ID=' /etc/os-release | cut -d= -f2 | tr -d '"')"
fi
_is_debian_family() {
case "${DISTRO_ID}" in
ubuntu|debian|linuxmint|pop|elementary) return 0 ;;
esac
# ID_LIKE can be a space-separated list, e.g. "ubuntu debian"
case " ${DISTRO_LIKE} " in
*" debian "*|*" ubuntu "*) return 0 ;;
esac
return 1
}
_distro_label() {
if [ -n "${DISTRO_ID}" ]; then
printf '%s%s (%s)' \
"${DISTRO_ID}" \
"${DISTRO_VERSION:+ ${DISTRO_VERSION}}" \
"${DISTRO_ARCH}"
else
printf 'unknown Linux (%s)' "${DISTRO_ARCH}"
fi
}
# ─── Ubuntu/Debian install command text ──────────────────────────────────────
# Printed both by --print-install-commands and shown before --install runs.
_debian_install_steps() {
cat <<'STEPS'
# 1. Install prerequisites
sudo apt-get update
sudo apt-get install -y curl gpg
# 2. Add NVIDIA's signing key
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey \
| sudo gpg --batch --yes --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg
# 3. Add NVIDIA's apt repository
curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list \
| sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' \
| sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
# 4. Install the toolkit
sudo apt-get update
sudo apt-get install -y nvidia-container-toolkit
# 5. Configure the Docker runtime
sudo nvidia-ctk runtime configure --runtime=docker
# 6. Restart Docker
sudo systemctl restart docker
# 7. Verify
docker run --rm --gpus all nvidia/cuda:12.4.1-base-ubuntu22.04 nvidia-smi
STEPS
}
# ─── read-only checks ────────────────────────────────────────────────────────
_check_nvidia_smi() {
_info "Checking host nvidia-smi..."
if command -v nvidia-smi >/dev/null 2>&1; then
if nvidia-smi -L 2>/dev/null | grep -q 'GPU '; then
_pass "nvidia-smi is working. Detected GPUs:"
nvidia-smi -L 2>/dev/null | sed 's/^/ /'
else
_fail "nvidia-smi found but no GPUs listed — check your NVIDIA driver installation."
fi
else
_fail "nvidia-smi not found — install the NVIDIA driver for your distribution."
_info "No NVIDIA GPU? Skip this script — the NVIDIA overlay is not needed for CPU-only use."
fi
echo
}
# Returns 1 if Docker is unavailable (callers should stop further GPU checks).
_check_docker() {
_info "Checking Docker..."
if ! command -v docker >/dev/null 2>&1; then
_fail "docker not found — install Docker: https://docs.docker.com/engine/install/"
echo "Cannot continue without Docker."
return 1
fi
if docker info >/dev/null 2>&1; then
_pass "Docker daemon is running."
else
_fail "Docker daemon is not running or current user lacks permission."
_info "Try: sudo systemctl start docker"
_info "Or add your user to the docker group: sudo usermod -aG docker \$USER"
echo "Cannot continue — GPU passthrough test requires a running Docker daemon."
return 1
fi
echo
}
_check_gpu_passthrough() {
_info "Testing GPU passthrough (may pull image on first run):"
_info " docker run --rm --gpus all nvidia/cuda:12.4.1-base-ubuntu22.04 nvidia-smi"
echo
if docker run --rm --gpus all nvidia/cuda:12.4.1-base-ubuntu22.04 nvidia-smi 2>&1; then
echo
_GPU_PASSTHROUGH_OK=1
_pass "GPU passthrough is working — the NVIDIA compose overlay should work."
_info "Passthrough means Docker can see your GPU. It does NOT guarantee"
_info "llama.cpp will use CUDA. If Cookbook logs show:"
_info " 'Unable to find cudart library'"
_info " 'Could NOT find CUDAToolkit' / 'CUDA Toolkit not found'"
_info " tensors or layers assigned to CPU"
_info "that is a Cookbook/llama.cpp CUDA build or runtime issue, not a"
_info "passthrough failure. Re-install the serve engine via"
_info "Cookbook -> Dependencies to get a CUDA-enabled build."
if [ "${OPT_ENABLE_OVERLAY}" -eq 0 ]; then
_info "Enable the overlay in .env with:"
_info " scripts/check-docker-gpu.sh --enable-nvidia-overlay"
fi
else
echo
_fail "GPU passthrough failed. Check these steps in order:"
echo
echo " 1. Install NVIDIA Container Toolkit (if not already installed):"
echo " Arch: sudo pacman -S nvidia-container-toolkit"
echo " Debian: sudo apt install nvidia-container-toolkit"
echo " Fedora: sudo dnf install nvidia-container-toolkit"
echo " Full guide: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html"
echo
echo " 2. Configure the Docker runtime:"
echo " sudo nvidia-ctk runtime configure --runtime=docker"
echo
echo " 3. Restart Docker:"
echo " sudo systemctl restart docker"
echo
echo " Then re-run this script to confirm."
echo
_warn "Without GPU passthrough, Cookbook will detect the iGPU, another card, or"
_warn "CPU instead of your NVIDIA GPU — model recommendations will use the wrong VRAM."
_info "Run with --print-install-commands to see OS-specific commands."
_info "Run with --install-nvidia-toolkit to install on Ubuntu/Debian."
fi
echo
}
# ─── --enable-nvidia-overlay ─────────────────────────────────────────────────
_enable_nvidia_overlay() {
echo "=== Enabling NVIDIA compose overlay ==="
echo
local _env_file="${REPO_ROOT}/.env"
local _env_example="${REPO_ROOT}/.env.example"
local _overlay_fragment="docker/gpu.nvidia.yml"
local _backup_ts
_backup_ts="$(date +%Y%m%d-%H%M%S)"
# Ensure .env exists
if [ ! -f "${_env_file}" ]; then
if [ -f "${_env_example}" ]; then
_info ".env not found. .env.example is available."
local _do_copy=0
if [ "${OPT_YES}" -eq 1 ]; then
_do_copy=1
elif _confirm "Copy .env.example to .env?"; then
_do_copy=1
fi
if [ "${_do_copy}" -eq 1 ]; then
if ! cp "${_env_example}" "${_env_file}"; then
_fail "Failed to copy .env.example to .env."
return 1
fi
_pass "Copied .env.example to .env."
else
_fail ".env is required to set COMPOSE_FILE — aborted."
return 1
fi
else
_fail ".env not found and .env.example is missing."
_info "Create a .env file in the repo root, then re-run."
return 1
fi
fi
# Read current active (uncommented) COMPOSE_FILE value, if any
local _current_cf
_current_cf="$(grep '^COMPOSE_FILE=' "${_env_file}" | tail -1 | cut -d= -f2-)"
# Idempotency check
if echo "${_current_cf}" | grep -qF "${_overlay_fragment}"; then
_pass "COMPOSE_FILE already includes the NVIDIA overlay — nothing to change."
echo
_info "Start or restart Odysseus to apply:"
_info " docker compose up -d --build"
return 0
fi
# Back up .env before any edit
local _backup="${_env_file}.bak.${_backup_ts}"
if ! cp "${_env_file}" "${_backup}"; then
_fail "Failed to create backup of .env — aborting to avoid data loss."
return 1
fi
_info "Backup created: .env.bak.${_backup_ts}"
local _new_cf=""
if [ -z "${_current_cf}" ]; then
# No active COMPOSE_FILE line — append one
_new_cf="docker-compose.yml:${_overlay_fragment}"
if ! printf '\nCOMPOSE_FILE=%s\n' "${_new_cf}" >> "${_env_file}"; then
_fail "Failed to write COMPOSE_FILE to .env."
return 1
fi
else
# Existing COMPOSE_FILE — append the overlay to the existing value
_new_cf="${_current_cf}:${_overlay_fragment}"
local _tmp="${_env_file}.tmp"
if ! sed "s|^COMPOSE_FILE=.*|COMPOSE_FILE=${_new_cf}|" "${_env_file}" > "${_tmp}"; then
_fail "Failed to update COMPOSE_FILE in .env."
rm -f "${_tmp}"
return 1
fi
if ! mv "${_tmp}" "${_env_file}"; then
_fail "Failed to write updated .env."
rm -f "${_tmp}"
return 1
fi
fi
_pass "COMPOSE_FILE set to: ${_new_cf}"
echo
_info "Start or restart Odysseus with the NVIDIA overlay:"
_info " docker compose up -d --build"
echo
_info "To undo, restore the backup:"
_info " cp ${_backup} ${_env_file}"
}
# ─── mode: default read-only diagnostic ──────────────────────────────────────
_mode_check() {
echo "=== Odysseus Docker GPU diagnostic ==="
echo
_check_nvidia_smi
_check_docker || { echo "=== Results: ${PASS} passed, ${FAIL} failed ==="; return 1; }
_check_gpu_passthrough
if [ "${OPT_ENABLE_OVERLAY}" -eq 1 ]; then
if [ "${_GPU_PASSTHROUGH_OK}" -eq 0 ]; then
# Hard gate: broken passthrough blocks .env edits regardless of --yes.
# Writing COMPOSE_FILE before passthrough works causes Odysseus to fail
# at startup, so this is not a prompt — it is a stop.
_fail "GPU passthrough is not working — .env will not be modified."
_info "Fix passthrough first, then re-run with --enable-nvidia-overlay:"
_info " Ubuntu/Debian: scripts/check-docker-gpu.sh --install-nvidia-toolkit"
_info " Other distros: scripts/check-docker-gpu.sh --print-install-commands"
echo
else
_enable_nvidia_overlay
fi
fi
echo "=== Results: ${PASS} passed, ${FAIL} failed ==="
[ "${FAIL}" -eq 0 ]
}
# ─── mode: --print-install-commands ──────────────────────────────────────────
_mode_print() {
echo "=== NVIDIA Container Toolkit — install commands ==="
echo
_info "Detected system: $(_distro_label)"
echo
if _is_debian_family; then
_info "Ubuntu/Debian — recommended install commands:"
_debian_install_steps
_info "After running these, re-run the diagnostic to confirm:"
_info " scripts/check-docker-gpu.sh"
else
case "${DISTRO_ID}" in
fedora|rhel|centos|rocky|almalinux)
_info "Fedora/RHEL — install commands:"
echo
echo " sudo dnf install -y nvidia-container-toolkit"
echo " sudo nvidia-ctk runtime configure --runtime=docker"
echo " sudo systemctl restart docker"
echo " docker run --rm --gpus all nvidia/cuda:12.4.1-base-ubuntu22.04 nvidia-smi"
;;
opensuse*|sles)
_info "OpenSUSE/SLES — install commands:"
echo
echo " sudo zypper install nvidia-container-toolkit"
echo " sudo nvidia-ctk runtime configure --runtime=docker"
echo " sudo systemctl restart docker"
echo " docker run --rm --gpus all nvidia/cuda:12.4.1-base-ubuntu22.04 nvidia-smi"
;;
arch|manjaro|endeavouros)
_info "Arch Linux — install commands:"
echo
echo " sudo pacman -S nvidia-container-toolkit"
echo " sudo nvidia-ctk runtime configure --runtime=docker"
echo " sudo systemctl restart docker"
echo " docker run --rm --gpus all nvidia/cuda:12.4.1-base-ubuntu22.04 nvidia-smi"
;;
*)
_warn "Distro '${DISTRO_ID:-unknown}' is not specifically recognized."
echo
echo " See the full guide for your distribution:"
echo " https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html"
;;
esac
echo
_info "Automated install (--install-nvidia-toolkit) supports Ubuntu/Debian only."
_info "For other distros, run the commands above manually, then re-run:"
_info " scripts/check-docker-gpu.sh"
fi
}
# ─── mode: --install-nvidia-toolkit ──────────────────────────────────────────
_mode_install() {
echo "=== NVIDIA Container Toolkit — interactive installer ==="
echo
if [ "$(uname -s)" != "Linux" ]; then
_fail "Install mode is Linux-only. Detected: $(uname -s)"
exit 1
fi
if ! _is_debian_family; then
_fail "Automated install currently supports Ubuntu/Debian only."
_info "Detected: $(_distro_label)"
_info "Run --print-install-commands to see manual steps for your distro."
exit 1
fi
_info "Detected system: $(_distro_label)"
echo
echo "This will run the following commands with sudo:"
_debian_install_steps
if [ "${OPT_YES}" -eq 0 ]; then
if ! _confirm "Proceed with the above steps?"; then
echo "Aborted — nothing was changed."
exit 0
fi
echo
fi
# Step 1: prerequisites
_step "Updating package lists..."
sudo apt-get update -qq || { _fail "apt-get update failed."; exit 1; }
_step "Installing prerequisites (curl, gpg)..."
sudo apt-get install -y curl gpg || { _fail "Failed to install prerequisites."; exit 1; }
_pass "Prerequisites ready."
echo
# Step 2: signing key
_step "Adding NVIDIA GPG signing key..."
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey \
| sudo gpg --batch --yes --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
|| { _fail "Failed to add NVIDIA GPG key."; exit 1; }
_pass "Signing key added."
echo
# Step 3: apt repository
_step "Adding NVIDIA apt repository..."
curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list \
| sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' \
| sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list > /dev/null \
|| { _fail "Failed to add NVIDIA apt repository."; exit 1; }
_pass "apt repository added."
echo
# Step 4: install toolkit
_step "Installing nvidia-container-toolkit..."
sudo apt-get update -qq || { _fail "apt-get update failed after adding NVIDIA repo."; exit 1; }
sudo apt-get install -y nvidia-container-toolkit \
|| { _fail "Failed to install nvidia-container-toolkit."; exit 1; }
_pass "nvidia-container-toolkit installed."
echo
# Step 5: configure Docker runtime
_step "Configuring Docker runtime..."
sudo nvidia-ctk runtime configure --runtime=docker \
|| { _fail "nvidia-ctk runtime configure failed."; exit 1; }
_pass "Docker runtime configured."
echo
# Step 6: restart Docker
_step "A Docker restart is required for the runtime change to take effect."
local _do_restart=0
if [ "${OPT_YES}" -eq 1 ]; then
_do_restart=1
elif _confirm "Restart Docker now?"; then
_do_restart=1
else
_warn "Docker not restarted."
_warn "Run 'sudo systemctl restart docker' before testing GPU passthrough."
fi
if [ "${_do_restart}" -eq 1 ]; then
_step "Restarting Docker..."
if sudo systemctl restart docker; then
_pass "Docker restarted."
else
_fail "Docker restart failed — run: sudo systemctl restart docker"
fi
fi
echo
# Step 7: verification
_info "Running GPU passthrough verification..."
echo
_check_docker || { echo "=== Results: ${PASS} passed, ${FAIL} failed ==="; exit 1; }
_check_gpu_passthrough
# Step 8: enable overlay (only if passthrough verified)
if [ "${OPT_ENABLE_OVERLAY}" -eq 1 ]; then
if [ "${_GPU_PASSTHROUGH_OK}" -eq 1 ]; then
_enable_nvidia_overlay
else
_warn "GPU passthrough verification failed — skipping overlay setup."
_warn "Fix the passthrough issue, then run:"
_warn " scripts/check-docker-gpu.sh --enable-nvidia-overlay"
echo
fi
fi
echo "=== Results: ${PASS} passed, ${FAIL} failed ==="
[ "${FAIL}" -eq 0 ]
}
# ─── dispatch ────────────────────────────────────────────────────────────────
case "${MODE}" in
check) _mode_check ;;
print) _mode_print ;;
install) _mode_install ;;
esac

View File

@@ -13,6 +13,18 @@ import json
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
def claim_json_entries(entries, owner):
count = 0
for entry in entries:
if not isinstance(entry, dict):
continue
if not entry.get("owner"):
entry["owner"] = owner
count += 1
return count
def main():
if len(sys.argv) < 2:
print("Usage: python scripts/claim_ownerless.py <username>")
@@ -31,11 +43,7 @@ def main():
continue
with open(path, "r", encoding="utf-8") as f:
entries = json.load(f)
count = 0
for e in entries:
if not e.get("owner"):
e["owner"] = owner
count += 1
count = claim_json_entries(entries, owner)
if count:
with open(path, "w", encoding="utf-8") as f:
json.dump(entries, f, ensure_ascii=False, indent=2)
@@ -58,10 +66,12 @@ def main():
count = db.query(Session).filter(Session.owner == None).update({"owner": owner})
print(f" sessions: claimed {count}")
# Documents
count = db.query(Document).filter(Document.session_id.in_(
db.query(Session.id).filter(Session.owner == owner)
)).update({"session_id": Document.session_id}, synchronize_session=False)
# Documents (have their own owner column; claim the ownerless ones,
# mirroring the sessions/gallery/comparisons blocks). The old query set
# session_id to itself — a no-op — and never set owner, so ownerless
# documents stayed ownerless and invisible in the user's Library.
count = db.query(Document).filter(Document.owner == None).update({"owner": owner})
print(f" documents: claimed {count}")
# Gallery
if GalleryImage:

View File

@@ -26,6 +26,39 @@ logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(mess
logger = logging.getLogger("migrate")
def _load_json(path, default):
try:
with open(path, encoding="utf-8") as f:
return json.load(f)
except (OSError, json.JSONDecodeError):
return default
def _memory_map(rows):
memories = {}
if not isinstance(rows, list):
return memories
for row in rows:
if not isinstance(row, dict):
continue
memory_id = row.get("id", "")
if memory_id:
memories[memory_id] = row
return memories
def _rag_docstore(data):
if not isinstance(data, dict):
return [], [], []
ids = data.get("ids", [])
documents = data.get("documents", [])
metadatas = data.get("metadatas", [])
if not isinstance(ids, list) or not isinstance(documents, list) or not isinstance(metadatas, list):
return [], [], []
count = min(len(ids), len(documents), len(metadatas))
return ids[:count], documents[:count], metadatas[:count]
def migrate_memories():
"""Migrate memory vectors from FAISS to ChromaDB."""
from src.chroma_client import get_chroma_client
@@ -39,7 +72,9 @@ def migrate_memories():
logger.info("No memory FAISS index found, skipping memory migration")
return
ids = json.loads(open(ids_path, encoding="utf-8").read())
ids = _load_json(ids_path, [])
if not isinstance(ids, list):
ids = []
if not ids:
logger.info("Memory FAISS index is empty, skipping")
return
@@ -47,8 +82,7 @@ def migrate_memories():
# Load memory texts
memories = {}
if os.path.exists(memory_path):
for mem in json.loads(open(memory_path, encoding="utf-8").read()):
memories[mem.get("id", "")] = mem
memories = _memory_map(_load_json(memory_path, []))
embed = get_embedding_client()
if not embed:
@@ -97,10 +131,7 @@ def migrate_rag():
logger.info("No RAG DocStore found, skipping RAG migration")
return
data = json.loads(open(docs_path, encoding="utf-8").read())
ids = data.get("ids", [])
documents = data.get("documents", [])
metadatas = data.get("metadatas", [])
ids, documents, metadatas = _rag_docstore(_load_json(docs_path, {}))
if not ids:
logger.info("RAG DocStore is empty, skipping")

View File

@@ -68,6 +68,10 @@ def _short_help(path: Path) -> str:
return first
def _is_runnable_subcommand(path: Path) -> bool:
return path.exists() and path.is_file() and os.access(path, os.X_OK)
def _print_listing() -> None:
"""`odysseus` with no args (or `odysseus help`) — print the table."""
sys.stdout.write(f"odysseus {VERSION} — every feature, on the shell.\n\n")
@@ -101,7 +105,7 @@ def main(argv: list[str] | None = None) -> int:
_print_listing()
return 0
sub = SCRIPTS_DIR / f"odysseus-{argv[1]}"
if not sub.exists():
if not _is_runnable_subcommand(sub):
sys.stderr.write(f"odysseus: unknown subcommand {argv[1]!r}\n")
return 1
return subprocess.call([str(sub), "--help"])
@@ -109,7 +113,7 @@ def main(argv: list[str] | None = None) -> int:
# `odysseus foo ...` → exec `odysseus-foo ...` under the project venv.
name = argv[0]
sub = SCRIPTS_DIR / f"odysseus-{name}"
if not sub.exists():
if not _is_runnable_subcommand(sub):
sys.stderr.write(
f"odysseus: unknown subcommand {name!r}. "
f"Try `odysseus help` to see available ones.\n"

View File

@@ -56,6 +56,16 @@ def _sqlite_safe_copy(src: Path, dst: Path) -> None:
dst.write_bytes(src.read_bytes())
def _reject_output_inside_data(out_path: Path) -> None:
try:
resolved = out_path.resolve()
data_root = _DATA_DIR.resolve()
resolved.relative_to(data_root)
except ValueError:
return
fail("backup output path must be outside data/")
def cmd_snapshot(args):
"""Write a tar.gz of the entire data/ directory.
@@ -68,6 +78,7 @@ def cmd_snapshot(args):
out_path = Path(args.out) if args.out else (
_BACKUP_DIR / f"odysseus-backup-{datetime.now().strftime('%Y%m%d-%H%M%S')}.tar.gz"
)
_reject_output_inside_data(out_path)
out_path.parent.mkdir(parents=True, exist_ok=True)
sqlite_dbs = [p for p in _DATA_DIR.rglob("*.db") if p.is_file() and not p.is_symlink()]

View File

@@ -69,11 +69,17 @@ def _parse_dt(s: str) -> datetime:
return datetime.fromisoformat(s.replace("Z", "+00:00"))
def _calendar_name(ev: "CalendarEvent") -> str:
cal = getattr(ev, "calendar", None)
name = getattr(cal, "name", "") if cal else ""
return name if isinstance(name, str) else ""
def _serialize_event(ev: "CalendarEvent") -> dict:
return {
"uid": ev.uid,
"calendar_id": ev.calendar_id,
"calendar_name": ev.calendar.name if ev.calendar else "",
"calendar_name": _calendar_name(ev),
"summary": ev.summary,
"description": ev.description or "",
"location": ev.location or "",

View File

@@ -60,13 +60,17 @@ def fail(msg: str, code: int = 1) -> None:
sys.exit(code)
def _contact_rows(contacts):
return [c for c in contacts or [] if isinstance(c, dict)]
# ─── list ────────────────────────────────────────────────────────────
def cmd_list(args) -> None:
cfg = _get_carddav_config()
if not cfg["url"]:
fail("CardDAV not configured. Set carddav_url/username/password in the web UI.")
contacts = _fetch_contacts(force=args.refresh)
contacts = _contact_rows(_fetch_contacts(force=args.refresh))
emit(contacts, args)
@@ -77,7 +81,7 @@ def cmd_search(args) -> None:
if not cfg["url"]:
fail("CardDAV not configured.")
q = args.query.lower()
contacts = _fetch_contacts()
contacts = _contact_rows(_fetch_contacts())
matches = [
c for c in contacts
if q in (c.get("name") or "").lower() or q in (c.get("email") or "").lower()

View File

@@ -411,6 +411,8 @@ def cmd_state_set(args) -> None:
obj = json.loads(data)
except json.JSONDecodeError as e:
fail(f"invalid JSON on stdin: {e}")
if not isinstance(obj, dict):
fail("invalid cookbook state: expected a JSON object")
_STATE_PATH.parent.mkdir(parents=True, exist_ok=True)
# Backup the existing state — undo button if a bad pipe clobbers it.
if _STATE_PATH.exists():

View File

@@ -33,6 +33,10 @@ except ModuleNotFoundError as e:
sys.exit(2)
def _text_len(value) -> int:
return len(value) if isinstance(value, str) else 0
def _serialize(d: "Document", include_content: bool = False) -> dict:
out = {
"id": d.id,
@@ -42,7 +46,7 @@ def _serialize(d: "Document", include_content: bool = False) -> dict:
"version_count": d.version_count or 1,
"is_active": bool(d.is_active),
"tidy_verdict": d.tidy_verdict or "",
"content_length": len(d.current_content or ""),
"content_length": _text_len(d.current_content),
"created_at": d.created_at.isoformat() if d.created_at else "",
"updated_at": d.updated_at.isoformat() if d.updated_at else "",
}
@@ -90,7 +94,7 @@ def cmd_versions(args):
"version_number": v.version_number,
"summary": v.summary or "",
"source": v.source or "ai",
"content_length": len(v.content or ""),
"content_length": _text_len(v.content),
} for v in rows
], args)
finally:

View File

@@ -30,11 +30,19 @@ except ModuleNotFoundError as e:
sys.exit(2)
def _preview_text(value, limit: int = 200) -> str:
"""Truncated preview tolerant of non-string values. A gallery row whose
``prompt`` is a non-string would crash ``(value or "")[:200]`` with a
TypeError; coerce non-strings to ""."""
text = value if isinstance(value, str) else ""
return text[:limit]
def _serialize_image(i: "GalleryImage") -> dict:
return {
"id": i.id,
"filename": i.filename,
"prompt": (i.prompt or "")[:200],
"prompt": _preview_text(i.prompt),
"model": i.model or "",
"size": i.size or "",
"tags": i.tags or "",
@@ -51,6 +59,14 @@ def _serialize_image(i: "GalleryImage") -> dict:
}
def _album_image_count(album) -> int:
images = getattr(album, "images", None)
try:
return len(images) if images is not None else 0
except TypeError:
return 0
def cmd_list(args):
db = SessionLocal()
try:
@@ -92,7 +108,7 @@ def cmd_albums(args):
try:
rows = db.query(GalleryAlbum).order_by(GalleryAlbum.name.asc()).all()
emit([
{"id": a.id, "name": a.name, "image_count": len(a.images)}
{"id": a.id, "name": a.name, "image_count": _album_image_count(a)}
for a in rows
], args)
finally:

View File

@@ -58,6 +58,8 @@ def _resolve(name: str) -> Path | None:
"""Match a log by exact filename, basename-without-extension, or
substring. Returns the most-recently-modified match if there are
ties."""
if not isinstance(name, str):
return None
candidates = []
for base in (_APP_LOGS, _TMUX_LOGS):
if not base.is_dir():

View File

@@ -107,6 +107,19 @@ def _q(name: str) -> str:
return '"' + (name or "").replace("\\", "\\\\").replace('"', '\\"') + '"'
def _split_recipients(value: str) -> list[str]:
return [r.strip() for r in (value or "").split(",") if r.strip()]
def _recipient_list(to: str, cc: str = "", bcc: str = "") -> list[str]:
recipients = _split_recipients(to)
recipients.extend(_split_recipients(cc))
recipients.extend(_split_recipients(bcc))
if not recipients:
fail("at least one recipient is required")
return recipients
# ─── list ────────────────────────────────────────────────────────────
def cmd_list(args) -> None:
@@ -177,7 +190,7 @@ def cmd_read(args) -> None:
if st != "OK":
fail(f"select {args.folder!r} failed: {st}")
st, msg_data = conn.fetch(args.uid.encode(), "(BODY.PEEK[])")
if st != "OK":
if st != "OK" or not msg_data or not msg_data[0]:
fail(f"fetch UID {args.uid} failed: {st}")
raw = msg_data[0][1]
msg = email_mod.message_from_bytes(raw)
@@ -302,11 +315,7 @@ def cmd_send(args) -> None:
outer["Date"] = datetime.utcnow().strftime("%a, %d %b %Y %H:%M:%S +0000")
outer.attach(MIMEText(body, "plain", "utf-8"))
recipients = [r.strip() for r in args.to.split(",") if r.strip()]
if args.cc:
recipients.extend([r.strip() for r in args.cc.split(",") if r.strip()])
if args.bcc:
recipients.extend([r.strip() for r in args.bcc.split(",") if r.strip()])
recipients = _recipient_list(args.to, args.cc, args.bcc)
if args.dry_run:
emit({

View File

@@ -33,16 +33,26 @@ except ModuleNotFoundError as e:
sys.exit(2)
def _json_list(raw) -> list:
try:
value = json.loads(raw) if raw else []
except (TypeError, json.JSONDecodeError):
return []
return value if isinstance(value, list) else []
def _json_dict(raw) -> dict:
try:
value = json.loads(raw) if raw else {}
except (TypeError, json.JSONDecodeError):
return {}
return value if isinstance(value, dict) else {}
def _serialize(s: "McpServer", redact_env: bool = True) -> dict:
try:
args_arr = json.loads(s.args) if s.args else []
except json.JSONDecodeError:
args_arr = []
try:
env_obj = json.loads(s.env) if s.env else {}
except json.JSONDecodeError:
env_obj = {}
if redact_env and env_obj:
args_arr = _json_list(s.args)
env_obj = _json_dict(s.env)
if redact_env and isinstance(env_obj, dict):
env_obj = {k: ("***" if v else "") for k, v in env_obj.items()}
return {
"id": s.id,

View File

@@ -47,8 +47,12 @@ def _manager() -> MemoryManager:
return _mgr
def _memory_entries(entries):
return [e for e in entries or [] if isinstance(e, dict)]
def cmd_list(args):
entries = _manager().load_all()
entries = _memory_entries(_manager().load_all())
if args.category:
entries = [e for e in entries if (e.get("category") or "fact") == args.category]
if args.source:
@@ -62,14 +66,14 @@ def cmd_list(args):
def cmd_search(args):
q = args.query.lower()
entries = _manager().load_all()
entries = _memory_entries(_manager().load_all())
matches = [e for e in entries if q in (e.get("text") or "").lower()]
matches = sorted(matches, key=lambda e: e.get("timestamp", 0), reverse=True)
emit(matches[: args.limit], args)
def cmd_show(args):
for e in _manager().load_all():
for e in _memory_entries(_manager().load_all()):
if e.get("id") == args.id:
emit(e, args)
return
@@ -93,7 +97,7 @@ def cmd_add(args):
def cmd_delete(args):
entries = _manager().load_all()
entries = _memory_entries(_manager().load_all())
target = next((e for e in entries if e.get("id") == args.id), None)
if not target:
fail(f"no memory with id {args.id!r}")
@@ -104,7 +108,7 @@ def cmd_delete(args):
def cmd_categories(args):
counts: dict[str, int] = {}
for e in _manager().load_all():
for e in _memory_entries(_manager().load_all()):
cat = e.get("category") or "fact"
counts[cat] = counts.get(cat, 0) + 1
rows = sorted(counts.items(), key=lambda kv: -kv[1])

View File

@@ -29,12 +29,22 @@ except ModuleNotFoundError as e:
sys.exit(2)
def _load_items(raw) -> list:
if not raw:
return []
try:
items = json.loads(raw)
except (TypeError, json.JSONDecodeError):
return []
return items if isinstance(items, list) else []
def _serialize(n: "Note") -> dict:
return {
"id": n.id,
"title": n.title or "",
"content": n.content or "",
"items": json.loads(n.items) if n.items else [],
"items": _load_items(n.items),
"note_type": n.note_type or "note",
"color": n.color or "",
"label": n.label or "",

View File

@@ -42,8 +42,12 @@ def _manager() -> PersonalDocsManager:
return _mgr
def _file_rows(files):
return [f for f in files or [] if isinstance(f, dict)]
def cmd_list(args):
files = getattr(_manager(), "index", []) or []
files = _file_rows(getattr(_manager(), "index", []) or [])
out = [
{"name": f.get("name"), "size": f.get("size"), "path": f.get("path", "")}
for f in files

View File

@@ -28,9 +28,12 @@ def _load() -> dict:
if not _PATH.exists():
return {}
try:
return json.loads(_PATH.read_text())
data = json.loads(_PATH.read_text())
except json.JSONDecodeError as e:
fail(f"presets.json corrupt: {e}")
if not isinstance(data, dict):
fail("presets.json corrupt: expected an object")
return data
def _save(data: dict) -> None:
@@ -46,6 +49,15 @@ def _save(data: dict) -> None:
tmp.replace(_PATH)
def _entry_or_fail(presets: dict, name: str) -> dict:
if name not in presets:
fail(f"no preset named {name!r}")
entry = presets[name]
if not isinstance(entry, dict):
fail(f"preset {name!r} is corrupt: expected an object")
return entry
def cmd_list(args):
presets = _load()
rows = []
@@ -63,9 +75,7 @@ def cmd_list(args):
def cmd_get(args):
presets = _load()
if args.name not in presets:
fail(f"no preset named {args.name!r}")
emit({"id": args.name, **presets[args.name]}, args)
emit({"id": args.name, **_entry_or_fail(presets, args.name)}, args)
def cmd_set(args):
@@ -75,7 +85,8 @@ def cmd_set(args):
if prompt is None and args.temperature is None:
fail("nothing to set — pass --prompt, --prompt-file, or --temperature")
presets = _load()
entry = dict(presets.get(args.name) or {})
current = presets.get(args.name)
entry = dict(current) if isinstance(current, dict) else {}
entry.setdefault("name", args.name)
if prompt is not None:
entry["system_prompt"] = prompt
@@ -90,9 +101,8 @@ def cmd_set(args):
def cmd_delete(args):
presets = _load()
if args.name not in presets:
fail(f"no preset named {args.name!r}")
snap = presets.pop(args.name)
snap = _entry_or_fail(presets, args.name)
presets.pop(args.name)
_save(presets)
emit({"ok": True, "deleted": {"id": args.name, **snap}}, args)

View File

@@ -26,20 +26,33 @@ from pathlib import Path
_DATA_DIR = _REPO_ROOT / "data" / "deep_research"
def _load_path(path: Path) -> dict | None:
try:
data = json.loads(path.read_text())
except (json.JSONDecodeError, OSError):
return None
return data if isinstance(data, dict) else None
def _load(rp_id: str) -> dict | None:
path = _DATA_DIR / f"{rp_id}.json"
if not path.exists():
return None
try:
return json.loads(path.read_text())
except json.JSONDecodeError:
return None
return _load_path(path)
def _preview_text(value, limit: int = 200) -> str:
"""Truncated preview tolerant of non-string values. A stored research
record whose ``query`` is a non-string (legacy/corrupt JSON) would crash
``(value or "")[:200]`` with a TypeError; coerce non-strings to ""."""
text = value if isinstance(value, str) else ""
return text[:limit]
def _summarize(rp_id: str, data: dict) -> dict:
return {
"id": rp_id,
"query": (data.get("query") or "")[:200],
"query": _preview_text(data.get("query")),
"category": data.get("category") or "",
"status": data.get("status") or "",
"started_at": data.get("started_at") or "",
@@ -56,9 +69,8 @@ def cmd_list(args):
out = []
for path in sorted(_DATA_DIR.glob("*.json")):
rp_id = path.stem
try:
data = json.loads(path.read_text())
except Exception:
data = _load_path(path)
if data is None:
continue
if args.status and (data.get("status") or "") != args.status:
continue
@@ -100,9 +112,8 @@ def cmd_search(args):
out = []
for path in _DATA_DIR.glob("*.json"):
rp_id = path.stem
try:
data = json.loads(path.read_text())
except Exception:
data = _load_path(path)
if data is None:
continue
haystack = " ".join([
(data.get("query") or "").lower(),

View File

@@ -27,6 +27,12 @@ except ModuleNotFoundError as e:
def _serialize(s: "DbSession") -> dict:
def _int_or_zero(value) -> int:
try:
return int(value or 0)
except (TypeError, ValueError):
return 0
return {
"id": s.id,
"name": s.name,
@@ -37,9 +43,9 @@ def _serialize(s: "DbSession") -> dict:
"archived": bool(s.archived),
"rag": bool(s.rag),
"is_important": bool(s.is_important),
"message_count": s.message_count or 0,
"total_input_tokens": s.total_input_tokens or 0,
"total_output_tokens": s.total_output_tokens or 0,
"message_count": _int_or_zero(s.message_count),
"total_input_tokens": _int_or_zero(s.total_input_tokens),
"total_output_tokens": _int_or_zero(s.total_output_tokens),
"last_accessed": s.last_accessed.isoformat() if s.last_accessed else "",
"created_at": s.created_at.isoformat() if s.created_at else "",
}

View File

@@ -29,6 +29,19 @@ except ModuleNotFoundError as e:
sys.exit(2)
def _decode_png_data(data_png: str) -> bytes:
raw = data_png or ""
if "," in raw:
raw = raw.split(",", 1)[1]
try:
decoded = base64.b64decode(raw, validate=True)
except Exception as e:
fail(f"data_png is not valid base64: {e}")
if not decoded.startswith(b"\x89PNG\r\n\x1a\n"):
fail("data_png is not a PNG image")
return decoded
def cmd_list(args):
"""No `Signature` SQLAlchemy model is registered for the
`signatures` table — query via raw SQL so we don't depend on it."""
@@ -85,13 +98,7 @@ def cmd_export(args):
), {"id": args.id}).mappings().first()
if not row:
fail(f"no signature with id {args.id!r}")
raw = row["data_png"] or ""
if "," in raw:
raw = raw.split(",", 1)[1]
try:
png_bytes = base64.b64decode(raw)
except Exception as e:
fail(f"data_png is not valid base64: {e}")
png_bytes = _decode_png_data(row["data_png"] or "")
out = Path(args.png)
out.parent.mkdir(parents=True, exist_ok=True)
out.write_bytes(png_bytes)

View File

@@ -41,11 +41,26 @@ def _manager() -> SkillsManager:
return _mgr
def _preview_text(value, limit: int = 200) -> str:
"""Truncated preview of a text field, tolerant of non-string values.
A skill whose ``description`` is a non-string (e.g. a number from a
hand-edited/legacy store) would crash ``(value or "")[:200]`` with a
TypeError; coerce non-strings to "" instead.
"""
text = value if isinstance(value, str) else ""
return text[:limit]
def _skill_entries(skills):
return [s for s in skills or [] if isinstance(s, dict)]
def _summary(skill: dict) -> dict:
return {
"name": skill.get("name", ""),
"category": skill.get("category", "general"),
"description": (skill.get("description") or "")[:200],
"description": _preview_text(skill.get("description")),
"status": skill.get("status", ""),
"uses": skill.get("uses", 0),
"last_used": skill.get("last_used") or "",
@@ -54,7 +69,7 @@ def _summary(skill: dict) -> dict:
def cmd_list(args):
out = _manager().load_all()
out = _skill_entries(_manager().load_all())
if args.category:
out = [s for s in out if (s.get("category") or "general") == args.category]
out.sort(key=lambda s: (-int(s.get("uses") or 0), s.get("name", "")))
@@ -62,7 +77,7 @@ def cmd_list(args):
def cmd_show(args):
for s in _manager().load_all():
for s in _skill_entries(_manager().load_all()):
if s.get("name") == args.name:
emit(s, args)
return
@@ -71,7 +86,7 @@ def cmd_show(args):
def cmd_categories(args):
counts: dict[str, int] = {}
for s in _manager().load_all():
for s in _skill_entries(_manager().load_all()):
c = s.get("category") or "general"
counts[c] = counts.get(c, 0) + 1
emit([{"category": c, "count": n} for c, n in sorted(counts.items())], args)
@@ -80,7 +95,7 @@ def cmd_categories(args):
def cmd_delete(args):
# Locate the skill's directory and rm -rf it.
skills_root = Path(_DATA_DIR) / "skills"
for s in _manager().load_all():
for s in _skill_entries(_manager().load_all()):
if s.get("name") != args.name:
continue
cat = s.get("category") or "general"
@@ -94,7 +109,7 @@ def cmd_delete(args):
def cmd_export(args):
for s in _manager().load_all():
for s in _skill_entries(_manager().load_all()):
if s.get("name") != args.name:
continue
cat = s.get("category") or "general"

View File

@@ -26,13 +26,18 @@ except ModuleNotFoundError as e:
sys.exit(2)
def _preview_text(value, limit: int = 200) -> str:
text = value if isinstance(value, str) else ""
return text[:limit] + ("…" if len(text) > limit else "")
def _serialize_task(t: "ScheduledTask") -> dict:
return {
"id": t.id,
"name": t.name,
"task_type": t.task_type,
"action": t.action,
"prompt": (t.prompt or "")[:200] + ("…" if t.prompt and len(t.prompt) > 200 else ""),
"prompt": _preview_text(t.prompt),
"schedule": t.schedule,
"scheduled_time": t.scheduled_time,
"next_run": t.next_run.isoformat() if t.next_run else "",
@@ -51,7 +56,7 @@ def _serialize_run(r: "TaskRun") -> dict:
"started_at": r.started_at.isoformat() if r.started_at else "",
"completed_at": r.completed_at.isoformat() if r.completed_at else "",
"status": r.status,
"output_preview": (getattr(r, "output", "") or "")[:200],
"output_preview": _preview_text(getattr(r, "output", "")),
}

View File

@@ -36,10 +36,14 @@ def _load_prefs() -> dict:
return {"_users": {}}
try:
data = json.loads(_USER_PREFS_PATH.read_text())
data.setdefault("_users", {})
return data
except json.JSONDecodeError as e:
fail(f"user_prefs.json is corrupt: {e}")
if not isinstance(data, dict):
fail("user_prefs.json is corrupt: expected an object")
users = data.setdefault("_users", {})
if not isinstance(users, dict):
fail("user_prefs.json is corrupt: _users must be an object")
return data
def _save_prefs(data: dict) -> None:

View File

@@ -30,6 +30,17 @@ except ModuleNotFoundError as e:
sys.exit(2)
def _mask_token(token: str, reveal: bool = False) -> str:
token = token or ""
if reveal:
return token
if not token:
return ""
if len(token) <= 10:
return "***"
return token[:6] + "…" + token[-4:]
def _summary(t: "ScheduledTask", reveal: bool = False) -> dict:
tok = t.webhook_token or ""
return {
@@ -37,7 +48,7 @@ def _summary(t: "ScheduledTask", reveal: bool = False) -> dict:
"name": t.name,
"status": t.status,
"task_type": t.task_type,
"webhook_token": tok if reveal else (tok[:6] + "…" + tok[-4:]) if tok else "",
"webhook_token": _mask_token(tok, reveal),
"has_token": bool(tok),
}

Some files were not shown because too many files have changed in this diff Show More