From 1284b14a132f27e168f9a125831159e0577085f3 Mon Sep 17 00:00:00 2001 From: Alexandre Teixeira <111787685+alteixeira20@users.noreply.github.com> Date: Wed, 3 Jun 2026 12:54:35 +0100 Subject: [PATCH] feat(docker): add standalone GPU compose files for stack UIs --- README.md | 14 +++ docker-compose.gpu-amd.yml | 164 ++++++++++++++++++++++++++ docker-compose.gpu-nvidia.yml | 167 +++++++++++++++++++++++++++ tests/test_gpu_compose_standalone.py | 147 +++++++++++++++++++++++ 4 files changed, 492 insertions(+) create mode 100644 docker-compose.gpu-amd.yml create mode 100644 docker-compose.gpu-nvidia.yml create mode 100644 tests/test_gpu_compose_standalone.py diff --git a/README.md b/README.md index d02c139..5e7d3d8 100644 --- a/README.md +++ b/README.md @@ -189,6 +189,20 @@ RENDER_GID=989 For NVIDIA/AMD GPU support, also read the comments in the selected overlay file: docker/gpu.nvidia.yml or docker/gpu.amd.yml. +**Stack-management UIs (Portainer, Coolify, Dockhand, etc.).** These tools +often accept only a single Compose file and do not reliably honor `COMPOSE_FILE` +or multiple `-f` overlays. CLI users should keep using the `COMPOSE_FILE` +overlay workflow above. For stack UIs, point the stack at one of the standalone +files instead, which bundle the base stack plus the GPU settings: + +- `docker-compose.gpu-nvidia.yml` — still requires the NVIDIA Container Toolkit + on the host. +- `docker-compose.gpu-amd.yml` — still requires host ROCm/kfd/DRI setup, the + `video`/`render` group membership, and `RENDER_GID` when needed. + +The base `docker-compose.yml` plus the `docker/gpu.*.yml` overlays remain the +source of truth; the standalone files mirror them for single-file deployments. + Verify after enabling either overlay: ```bash diff --git a/docker-compose.gpu-amd.yml b/docker-compose.gpu-amd.yml new file mode 100644 index 0000000..47e0c85 --- /dev/null +++ b/docker-compose.gpu-amd.yml @@ -0,0 +1,164 @@ +# Standalone AMD ROCm GPU Compose file for stack-management UIs (Portainer, +# Coolify, Dockhand, etc.) that accept only a single Compose file and do not +# reliably honor COMPOSE_FILE or multiple `-f` overlays. +# +# This is equivalent to: docker-compose.yml + docker/gpu.amd.yml. +# The base docker-compose.yml plus the docker/gpu.amd.yml overlay remain the +# source of truth — CLI users should keep using the COMPOSE_FILE overlay +# workflow. Keep this file in sync with both when either changes. +# +# Requires ROCm drivers on the host (kfd + DRI devices) and the host user +# running Docker in the `video` and `render` groups. Set RENDER_GID to your +# host's numeric render group id when needed. See docker/gpu.amd.yml for details. +services: + odysseus: + build: . + ports: + - "${APP_BIND:-127.0.0.1}:${APP_PORT:-7000}:7000" + volumes: + - ./data:/app/data:z + - ./logs:/app/logs:z + # Cookbook remote-server SSH identity. Odysseus can generate a key here; + # add the shown public key to each remote server's authorized_keys. + - ./data/ssh:/app/.ssh:z + # Cookbook local model cache. Inside Docker, "Local" means the Odysseus + # container, so persist its HuggingFace cache under ./data/huggingface. + - ./data/huggingface:/app/.cache/huggingface:z + # Cookbook-installed Python CLIs/packages (vLLM, llama-cpp-python, etc.) + # land under /app/.local for the odysseus user. Persist them so a + # container recreate does not silently remove installed serve engines. + - ./data/local:/app/.local:z + extra_hosts: + # Lets the container reach local services on the Docker host, including + # Ollama at http://host.docker.internal:11434. + - "host.docker.internal:host-gateway" + environment: + - LLM_HOST=${LLM_HOST:-localhost} + - LLM_HOSTS=${LLM_HOSTS:-} + - OPENAI_API_KEY=${OPENAI_API_KEY:-} + - OLLAMA_BASE_URL=${OLLAMA_BASE_URL:-} + - RESEARCH_LLM_ENDPOINT=${RESEARCH_LLM_ENDPOINT:-} + - HF_TOKEN=${HF_TOKEN:-} + - HUGGING_FACE_HUB_TOKEN=${HUGGING_FACE_HUB_TOKEN:-} + - SEARXNG_INSTANCE=http://searxng:8080 + - CHROMADB_HOST=chromadb + - CHROMADB_PORT=8000 + - DATABASE_URL=${DATABASE_URL:-sqlite:///./data/app.db} + - AUTH_ENABLED=${AUTH_ENABLED:-true} + - LOCALHOST_BYPASS=${LOCALHOST_BYPASS:-false} + - ODYSSEUS_ADMIN_USER=${ODYSSEUS_ADMIN_USER:-admin} + - ODYSSEUS_ADMIN_PASSWORD=${ODYSSEUS_ADMIN_PASSWORD:-} + - ALLOWED_ORIGINS=${ALLOWED_ORIGINS:-http://localhost,http://127.0.0.1} + - SECURE_COOKIES=${SECURE_COOKIES:-false} + - EMBEDDING_URL=${EMBEDDING_URL:-} + - EMBEDDING_MODEL=${EMBEDDING_MODEL:-} + - FASTEMBED_MODEL=${FASTEMBED_MODEL:-sentence-transformers/all-MiniLM-L6-v2} + - FASTEMBED_CACHE_PATH=${FASTEMBED_CACHE_PATH:-} + - CLEANUP_INTERVAL_HOURS=${CLEANUP_INTERVAL_HOURS:-24} + - ODYSSEUS_INPROCESS_POLLERS=${ODYSSEUS_INPROCESS_POLLERS:-1} + - ODYSSEUS_INPROCESS_TASKS=${ODYSSEUS_INPROCESS_TASKS:-1} + - ODYSSEUS_SCRIPT_HOST=${ODYSSEUS_SCRIPT_HOST:-localhost} + - DATA_BRAVE_API_KEY=${DATA_BRAVE_API_KEY:-} + - GOOGLE_API_KEY=${GOOGLE_API_KEY:-} + - GOOGLE_PSE_CX=${GOOGLE_PSE_CX:-} + - TAVILY_API_KEY=${TAVILY_API_KEY:-} + - SERPER_API_KEY=${SERPER_API_KEY:-} + # PUID / PGID — the user/group the container drops to before + # running uvicorn (entrypoint also chowns /app/data + /app/logs + # to match, so bind-mounted files stay editable from the host). + # 1000 is the default first user on most Linux installs. If your + # host user has a different id, override here or via .env, e.g.: + # PUID=1001 + # PGID=1001 + # Find yours with: id -u / id -g + - PUID=${PUID:-1000} + - PGID=${PGID:-1000} + depends_on: + searxng: + condition: service_healthy + chromadb: + condition: service_started + restart: unless-stopped + # AMD ROCm overlay (from docker/gpu.amd.yml). + devices: + - /dev/kfd + - /dev/dri + group_add: + - video + - ${RENDER_GID:-render} + + chromadb: + image: docker.io/chromadb/chroma:latest + ports: + - "${CHROMADB_BIND:-127.0.0.1}:8100:8000" + volumes: + - chromadb-data:/chroma/chroma + environment: + - ANONYMIZED_TELEMETRY=FALSE + restart: unless-stopped + + searxng: + # Pinned, not :latest — odysseus waits on searxng's healthcheck + # (depends_on: condition: service_healthy), so a broken upstream `latest` + # tag blocks the whole app from starting. 2026.6.2 crashes on boot with + # `KeyError: 'default_doi_resolver'`, failing the healthcheck (issue #1414). + # Bump this deliberately after verifying a newer tag boots clean. + image: docker.io/searxng/searxng:2026.5.31-7159b8aed + entrypoint: + - /bin/sh + - -c + - | + set -eu + if [ ! -s /etc/searxng/settings.yml ] || grep -q 'odysseus-local-searxng-json-2026-05-30\|__SEARXNG_SECRET__' /etc/searxng/settings.yml; then + secret="$${SEARXNG_SECRET:-}" + if [ -z "$$secret" ]; then + secret="$$(python -c 'import secrets; print(secrets.token_urlsafe(48))')" + fi + sed "s|__SEARXNG_SECRET__|$$secret|g" /tmp/searxng-settings.yml.template > /etc/searxng/settings.yml + fi + exec /usr/local/searxng/entrypoint.sh + ports: + - "127.0.0.1:8080:8080" + volumes: + - searxng-data:/etc/searxng + - ./config/searxng/settings.yml:/tmp/searxng-settings.yml.template:ro,z + environment: + - SEARXNG_BASE_URL=http://localhost:8080/ + - SEARXNG_SECRET=${SEARXNG_SECRET:-} + # The official searxng image runs as the non-root `searxng` user, but its + # entrypoint still needs to chown /etc/searxng on first boot, drop privs via + # su-exec, and (with our wrapper above) write settings.yml into the named + # volume. Without these capabilities the wrapper aborts at the redirection + # with EACCES and the container fails its healthcheck with permission + # errors during setup. Mirrors the cap set recommended by the upstream + # searxng-docker compose file. See issue #721. + cap_drop: + - ALL + cap_add: + - CHOWN + - SETGID + - SETUID + - DAC_OVERRIDE + healthcheck: + test: ["CMD-SHELL", "python -c \"import urllib.request; urllib.request.urlopen('http://localhost:8080/', timeout=5).read(1)\""] + interval: 5s + timeout: 6s + retries: 20 + start_period: 10s + restart: unless-stopped + + ntfy: + image: docker.io/binwiederhier/ntfy + command: serve + ports: + - "${NTFY_BIND:-127.0.0.1}:8091:80" + volumes: + - ntfy-cache:/var/cache/ntfy + environment: + - NTFY_BASE_URL=${NTFY_BASE_URL:-http://localhost:8091} + restart: unless-stopped + +volumes: + searxng-data: + chromadb-data: + ntfy-cache: diff --git a/docker-compose.gpu-nvidia.yml b/docker-compose.gpu-nvidia.yml new file mode 100644 index 0000000..36ca10e --- /dev/null +++ b/docker-compose.gpu-nvidia.yml @@ -0,0 +1,167 @@ +# Standalone NVIDIA GPU Compose file for stack-management UIs (Portainer, +# Coolify, Dockhand, etc.) that accept only a single Compose file and do not +# reliably honor COMPOSE_FILE or multiple `-f` overlays. +# +# This is equivalent to: docker-compose.yml + docker/gpu.nvidia.yml. +# The base docker-compose.yml plus the docker/gpu.nvidia.yml overlay remain +# the source of truth — CLI users should keep using the COMPOSE_FILE overlay +# workflow. Keep this file in sync with both when either changes. +# +# Requires the NVIDIA Container Toolkit on the host. See docker/gpu.nvidia.yml +# for setup details. +services: + odysseus: + build: . + ports: + - "${APP_BIND:-127.0.0.1}:${APP_PORT:-7000}:7000" + volumes: + - ./data:/app/data:z + - ./logs:/app/logs:z + # Cookbook remote-server SSH identity. Odysseus can generate a key here; + # add the shown public key to each remote server's authorized_keys. + - ./data/ssh:/app/.ssh:z + # Cookbook local model cache. Inside Docker, "Local" means the Odysseus + # container, so persist its HuggingFace cache under ./data/huggingface. + - ./data/huggingface:/app/.cache/huggingface:z + # Cookbook-installed Python CLIs/packages (vLLM, llama-cpp-python, etc.) + # land under /app/.local for the odysseus user. Persist them so a + # container recreate does not silently remove installed serve engines. + - ./data/local:/app/.local:z + extra_hosts: + # Lets the container reach local services on the Docker host, including + # Ollama at http://host.docker.internal:11434. + - "host.docker.internal:host-gateway" + environment: + - LLM_HOST=${LLM_HOST:-localhost} + - LLM_HOSTS=${LLM_HOSTS:-} + - OPENAI_API_KEY=${OPENAI_API_KEY:-} + - OLLAMA_BASE_URL=${OLLAMA_BASE_URL:-} + - RESEARCH_LLM_ENDPOINT=${RESEARCH_LLM_ENDPOINT:-} + - HF_TOKEN=${HF_TOKEN:-} + - HUGGING_FACE_HUB_TOKEN=${HUGGING_FACE_HUB_TOKEN:-} + - SEARXNG_INSTANCE=http://searxng:8080 + - CHROMADB_HOST=chromadb + - CHROMADB_PORT=8000 + - DATABASE_URL=${DATABASE_URL:-sqlite:///./data/app.db} + - AUTH_ENABLED=${AUTH_ENABLED:-true} + - LOCALHOST_BYPASS=${LOCALHOST_BYPASS:-false} + - ODYSSEUS_ADMIN_USER=${ODYSSEUS_ADMIN_USER:-admin} + - ODYSSEUS_ADMIN_PASSWORD=${ODYSSEUS_ADMIN_PASSWORD:-} + - ALLOWED_ORIGINS=${ALLOWED_ORIGINS:-http://localhost,http://127.0.0.1} + - SECURE_COOKIES=${SECURE_COOKIES:-false} + - EMBEDDING_URL=${EMBEDDING_URL:-} + - EMBEDDING_MODEL=${EMBEDDING_MODEL:-} + - FASTEMBED_MODEL=${FASTEMBED_MODEL:-sentence-transformers/all-MiniLM-L6-v2} + - FASTEMBED_CACHE_PATH=${FASTEMBED_CACHE_PATH:-} + - CLEANUP_INTERVAL_HOURS=${CLEANUP_INTERVAL_HOURS:-24} + - ODYSSEUS_INPROCESS_POLLERS=${ODYSSEUS_INPROCESS_POLLERS:-1} + - ODYSSEUS_INPROCESS_TASKS=${ODYSSEUS_INPROCESS_TASKS:-1} + - ODYSSEUS_SCRIPT_HOST=${ODYSSEUS_SCRIPT_HOST:-localhost} + - DATA_BRAVE_API_KEY=${DATA_BRAVE_API_KEY:-} + - GOOGLE_API_KEY=${GOOGLE_API_KEY:-} + - GOOGLE_PSE_CX=${GOOGLE_PSE_CX:-} + - TAVILY_API_KEY=${TAVILY_API_KEY:-} + - SERPER_API_KEY=${SERPER_API_KEY:-} + # PUID / PGID — the user/group the container drops to before + # running uvicorn (entrypoint also chowns /app/data + /app/logs + # to match, so bind-mounted files stay editable from the host). + # 1000 is the default first user on most Linux installs. If your + # host user has a different id, override here or via .env, e.g.: + # PUID=1001 + # PGID=1001 + # Find yours with: id -u / id -g + - PUID=${PUID:-1000} + - PGID=${PGID:-1000} + # NVIDIA overlay (from docker/gpu.nvidia.yml). + - NVIDIA_VISIBLE_DEVICES=all + - NVIDIA_DRIVER_CAPABILITIES=compute,utility + depends_on: + searxng: + condition: service_healthy + chromadb: + condition: service_started + restart: unless-stopped + # NVIDIA overlay (from docker/gpu.nvidia.yml). + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [gpu] + + chromadb: + image: docker.io/chromadb/chroma:latest + ports: + - "${CHROMADB_BIND:-127.0.0.1}:8100:8000" + volumes: + - chromadb-data:/chroma/chroma + environment: + - ANONYMIZED_TELEMETRY=FALSE + restart: unless-stopped + + searxng: + # Pinned, not :latest — odysseus waits on searxng's healthcheck + # (depends_on: condition: service_healthy), so a broken upstream `latest` + # tag blocks the whole app from starting. 2026.6.2 crashes on boot with + # `KeyError: 'default_doi_resolver'`, failing the healthcheck (issue #1414). + # Bump this deliberately after verifying a newer tag boots clean. + image: docker.io/searxng/searxng:2026.5.31-7159b8aed + entrypoint: + - /bin/sh + - -c + - | + set -eu + if [ ! -s /etc/searxng/settings.yml ] || grep -q 'odysseus-local-searxng-json-2026-05-30\|__SEARXNG_SECRET__' /etc/searxng/settings.yml; then + secret="$${SEARXNG_SECRET:-}" + if [ -z "$$secret" ]; then + secret="$$(python -c 'import secrets; print(secrets.token_urlsafe(48))')" + fi + sed "s|__SEARXNG_SECRET__|$$secret|g" /tmp/searxng-settings.yml.template > /etc/searxng/settings.yml + fi + exec /usr/local/searxng/entrypoint.sh + ports: + - "127.0.0.1:8080:8080" + volumes: + - searxng-data:/etc/searxng + - ./config/searxng/settings.yml:/tmp/searxng-settings.yml.template:ro,z + environment: + - SEARXNG_BASE_URL=http://localhost:8080/ + - SEARXNG_SECRET=${SEARXNG_SECRET:-} + # The official searxng image runs as the non-root `searxng` user, but its + # entrypoint still needs to chown /etc/searxng on first boot, drop privs via + # su-exec, and (with our wrapper above) write settings.yml into the named + # volume. Without these capabilities the wrapper aborts at the redirection + # with EACCES and the container fails its healthcheck with permission + # errors during setup. Mirrors the cap set recommended by the upstream + # searxng-docker compose file. See issue #721. + cap_drop: + - ALL + cap_add: + - CHOWN + - SETGID + - SETUID + - DAC_OVERRIDE + healthcheck: + test: ["CMD-SHELL", "python -c \"import urllib.request; urllib.request.urlopen('http://localhost:8080/', timeout=5).read(1)\""] + interval: 5s + timeout: 6s + retries: 20 + start_period: 10s + restart: unless-stopped + + ntfy: + image: docker.io/binwiederhier/ntfy + command: serve + ports: + - "${NTFY_BIND:-127.0.0.1}:8091:80" + volumes: + - ntfy-cache:/var/cache/ntfy + environment: + - NTFY_BASE_URL=${NTFY_BASE_URL:-http://localhost:8091} + restart: unless-stopped + +volumes: + searxng-data: + chromadb-data: + ntfy-cache: diff --git a/tests/test_gpu_compose_standalone.py b/tests/test_gpu_compose_standalone.py new file mode 100644 index 0000000..57bdaf3 --- /dev/null +++ b/tests/test_gpu_compose_standalone.py @@ -0,0 +1,147 @@ +"""Guards the standalone GPU compose files against drift. + +Stack-management UIs (Portainer, Coolify, Dockhand, ...) often accept only a +single compose file and do not honor COMPOSE_FILE or multiple ``-f`` overlays, +so the repo ships standalone ``docker-compose.gpu-*.yml`` files that inline the +GPU overlay. The base ``docker-compose.yml`` plus ``docker/gpu.*.yml`` overlays +remain the source of truth; these tests assert each standalone file equals the +base compose with only the matching overlay merged into the ``odysseus`` +service. No Docker / docker compose is required — everything is pure YAML. +""" + +import copy +from pathlib import Path + +import pytest +import yaml + +ROOT = Path(__file__).resolve().parents[1] + +BASE = ROOT / "docker-compose.yml" +NVIDIA_OVERLAY = ROOT / "docker" / "gpu.nvidia.yml" +AMD_OVERLAY = ROOT / "docker" / "gpu.amd.yml" +NVIDIA_STANDALONE = ROOT / "docker-compose.gpu-nvidia.yml" +AMD_STANDALONE = ROOT / "docker-compose.gpu-amd.yml" + +SERVICE = "odysseus" + + +def _load(path: Path) -> dict: + return yaml.safe_load(path.read_text(encoding="utf-8")) + + +def _deep_merge(base: dict, overlay: dict) -> dict: + """Mirror docker compose overlay semantics for the keys these files use. + + Mappings merge recursively; list-valued service fields are concatenated + (compose appends override sequences such as ``environment`` rather than + replacing them); scalars are overwritten. The overlays here only append to + ``environment`` and add otherwise-absent keys (``deploy``, ``devices``, + ``group_add``), so this keeps the expected merge explicit without invoking + docker compose. + """ + result = copy.deepcopy(base) + for key, value in overlay.items(): + if isinstance(value, dict) and isinstance(result.get(key), dict): + result[key] = _deep_merge(result[key], value) + elif isinstance(value, list) and isinstance(result.get(key), list): + result[key] = copy.deepcopy(result[key]) + copy.deepcopy(value) + else: + result[key] = copy.deepcopy(value) + return result + + +def _merge_overlay_into_base(base: dict, overlay: dict) -> dict: + """Build the expected standalone config: base + overlay on odysseus only.""" + expected = copy.deepcopy(base) + overlay_service = overlay["services"][SERVICE] + expected["services"][SERVICE] = _deep_merge( + expected["services"][SERVICE], overlay_service + ) + return expected + + +@pytest.fixture(scope="module") +def base(): + return _load(BASE) + + +# --- Equivalence: standalone == base + overlay ----------------------------- + + +def test_nvidia_standalone_equals_base_plus_overlay(base): + overlay = _load(NVIDIA_OVERLAY) + standalone = _load(NVIDIA_STANDALONE) + assert standalone == _merge_overlay_into_base(base, overlay) + + +def test_amd_standalone_equals_base_plus_overlay(base): + overlay = _load(AMD_OVERLAY) + standalone = _load(AMD_STANDALONE) + assert standalone == _merge_overlay_into_base(base, overlay) + + +# --- Non-odysseus services and volumes untouched --------------------------- + + +@pytest.mark.parametrize("standalone_path", [NVIDIA_STANDALONE, AMD_STANDALONE]) +def test_non_odysseus_services_match_base(base, standalone_path): + standalone = _load(standalone_path) + for name, definition in base["services"].items(): + if name == SERVICE: + continue + assert standalone["services"][name] == definition + assert set(standalone["services"]) == set(base["services"]) + + +@pytest.mark.parametrize("standalone_path", [NVIDIA_STANDALONE, AMD_STANDALONE]) +def test_top_level_volumes_match_base(base, standalone_path): + standalone = _load(standalone_path) + assert standalone.get("volumes") == base.get("volumes") + + +# --- odysseus = base service + only the overlay additions ------------------ + + +def test_nvidia_odysseus_adds_only_overlay(base): + standalone = _load(NVIDIA_STANDALONE) + svc = standalone["services"][SERVICE] + base_svc = base["services"][SERVICE] + + # Base environment preserved, plus exactly the two NVIDIA variables. + assert "NVIDIA_VISIBLE_DEVICES=all" in svc["environment"] + assert "NVIDIA_DRIVER_CAPABILITIES=compute,utility" in svc["environment"] + added_env = set(svc["environment"]) - set(base_svc["environment"]) + assert added_env == { + "NVIDIA_VISIBLE_DEVICES=all", + "NVIDIA_DRIVER_CAPABILITIES=compute,utility", + } + + # deploy block is new and matches the overlay's GPU reservation exactly. + assert "deploy" not in base_svc + devices = svc["deploy"]["resources"]["reservations"]["devices"] + assert devices == [ + {"driver": "nvidia", "count": "all", "capabilities": ["gpu"]} + ] + + # No AMD-only keys leaked in. + assert "devices" not in svc + assert "group_add" not in svc + + +def test_amd_odysseus_adds_only_overlay(base): + standalone = _load(AMD_STANDALONE) + svc = standalone["services"][SERVICE] + base_svc = base["services"][SERVICE] + + # Environment is unchanged from base for AMD. + assert svc["environment"] == base_svc["environment"] + + # devices and group_add are new and match the overlay exactly. + assert "devices" not in base_svc + assert "group_add" not in base_svc + assert svc["devices"] == ["/dev/kfd", "/dev/dri"] + assert svc["group_add"] == ["video", "${RENDER_GID:-render}"] + + # No NVIDIA-only keys leaked in. + assert "deploy" not in svc