From c953c078e50e823830aecbd23140e8f9132f9931 Mon Sep 17 00:00:00 2001 From: pewdiepie-archdaemon Date: Mon, 1 Jun 2026 11:43:08 +0900 Subject: [PATCH] Improve Cookbook serve reliability --- README.md | 10 +++++++++- docker-compose.yml | 4 ++++ routes/cookbook_routes.py | 5 +++++ static/js/cookbook-diagnosis.js | 15 +++++++++++++++ static/js/cookbookRunning.js | 4 ---- 5 files changed, 33 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 6f674f8..255de22 100644 --- a/README.md +++ b/README.md @@ -67,7 +67,15 @@ After generating the key, you can also install it from the host with: ssh-copy-id -i data/ssh/id_ed25519.pub user@server ``` Cookbook local downloads are stored in `./data/huggingface`, mounted as -`~/.cache/huggingface` inside the Odysseus container. +`~/.cache/huggingface` inside the Odysseus container. Cookbook-installed +serve engines and Python CLIs are stored in `./data/local`, mounted as +`~/.local`, so vLLM/llama.cpp installs survive container recreation. + +After downloading a model, open **Cookbook -> Serve**, pick the cached model, +and launch it. When the server answers `/v1/models`, Odysseus adds it to the +chat model picker automatically. For NVIDIA GPUs in Docker, install the NVIDIA +Container Toolkit and add `gpus: all` to the `odysseus` service if `nvidia-smi` +is not visible inside the container. Useful checks: ```bash diff --git a/docker-compose.yml b/docker-compose.yml index afc3dfd..94d4246 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -12,6 +12,10 @@ services: # Cookbook local model cache. Inside Docker, "Local" means the Odysseus # container, so persist its HuggingFace cache under ./data/huggingface. - ./data/huggingface:/app/.cache/huggingface + # Cookbook-installed Python CLIs/packages (vLLM, llama-cpp-python, etc.) + # land under /app/.local for the odysseus user. Persist them so a + # container recreate does not silently remove installed serve engines. + - ./data/local:/app/.local extra_hosts: # Lets the container reach local services on the Docker host, including # Ollama at http://host.docker.internal:11434. diff --git a/routes/cookbook_routes.py b/routes/cookbook_routes.py index 9ba054b..e8bbbe3 100644 --- a/routes/cookbook_routes.py +++ b/routes/cookbook_routes.py @@ -121,6 +121,11 @@ def setup_cookbook_routes() -> APIRouter: "Model requires custom code or newer model support.", [{"label": "retry with --trust-remote-code", "op": "append", "arg": "--trust-remote-code"}], ), + ( + r"Either a revision or a version must be specified|transformers\.integrations\.hub_kernels|kernels/layer", + "vLLM/Transformers kernel package mismatch.", + [{"label": "update vLLM, Transformers, and kernels on this server", "op": "dependency", "package": "vllm transformers kernels"}], + ), ( r"Address already in use|bind.*address.*in use", "Port is already in use.", diff --git a/static/js/cookbook-diagnosis.js b/static/js/cookbook-diagnosis.js index 13b78b4..a8f697d 100644 --- a/static/js/cookbook-diagnosis.js +++ b/static/js/cookbook-diagnosis.js @@ -293,6 +293,21 @@ export const ERROR_PATTERNS = [ }}, ], }, + { + pattern: /Either a revision or a version must be specified|transformers\.integrations\.hub_kernels|kernels\/layer/i, + message: 'vLLM/Transformers kernel package mismatch.', + fixes: [ + { label: 'Update vLLM/Transformers/kernels', action: (panel) => { + const taskEl = panel.closest('.cookbook-task'); + const task = taskEl ? _loadTasks().find(t => t.sessionId === taskEl.dataset.taskId) : null; + const host = task?.remoteHost || ''; + const prefix = _buildEnvPrefix(); + const pipCmd = prefix ? prefix + ' python3 -m pip install -U vllm transformers kernels' : 'python3 -m pip install -U vllm transformers kernels'; + const cmd = host ? _sshCmd(host, pipCmd) : pipCmd; + _launchServeTask('update-vllm-stack', 'pip-update', cmd); + }}, + ], + }, { pattern: /ollama.*command not found/i, message: 'Ollama is not installed on this server. Run: curl -fsSL https://ollama.com/install.sh | sh', diff --git a/static/js/cookbookRunning.js b/static/js/cookbookRunning.js index 0aebf1b..f88333a 100644 --- a/static/js/cookbookRunning.js +++ b/static/js/cookbookRunning.js @@ -2158,10 +2158,6 @@ async function _reconnectTask(el, task) { task._serveReady = true; _updateTask(task.sessionId, { _serveReady: true }); } - if (!task._serveReady && task.ts && (Date.now() - task.ts) > 300000) { - task._serveReady = true; - _updateTask(task.sessionId, { _serveReady: true }); - } if (info.phase) { badge.textContent = info.phase; // Always the green "running" style — loading/warming is the same