Improve Cookbook serve reliability

2026-06-01 11:43:08 +09:00
parent 058d32451c
commit c953c078e5
5 changed files with 33 additions and 5 deletions
--- a/README.md
+++ b/README.md
@@ -67,7 +67,15 @@ After generating the key, you can also install it from the host with:
 ssh-copy-id -i data/ssh/id_ed25519.pub user@server
 ```
 Cookbook local downloads are stored in `./data/huggingface`, mounted as
-`~/.cache/huggingface` inside the Odysseus container.
+`~/.cache/huggingface` inside the Odysseus container. Cookbook-installed
+serve engines and Python CLIs are stored in `./data/local`, mounted as
+`~/.local`, so vLLM/llama.cpp installs survive container recreation.
+
+After downloading a model, open **Cookbook -> Serve**, pick the cached model,
+and launch it. When the server answers `/v1/models`, Odysseus adds it to the
+chat model picker automatically. For NVIDIA GPUs in Docker, install the NVIDIA
+Container Toolkit and add `gpus: all` to the `odysseus` service if `nvidia-smi`
+is not visible inside the container.

 Useful checks:
 ```bash
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -12,6 +12,10 @@ services:
      # Cookbook local model cache. Inside Docker, "Local" means the Odysseus
      # container, so persist its HuggingFace cache under ./data/huggingface.
      - ./data/huggingface:/app/.cache/huggingface
+      # Cookbook-installed Python CLIs/packages (vLLM, llama-cpp-python, etc.)
+      # land under /app/.local for the odysseus user. Persist them so a
+      # container recreate does not silently remove installed serve engines.
+      - ./data/local:/app/.local
    extra_hosts:
      # Lets the container reach local services on the Docker host, including
      # Ollama at http://host.docker.internal:11434.
--- a/routes/cookbook_routes.py
+++ b/routes/cookbook_routes.py
@@ -121,6 +121,11 @@ def setup_cookbook_routes() -> APIRouter:
                "Model requires custom code or newer model support.",
                [{"label": "retry with --trust-remote-code", "op": "append", "arg": "--trust-remote-code"}],
            ),
+            (
+                r"Either a revision or a version must be specified|transformers\.integrations\.hub_kernels|kernels/layer",
+                "vLLM/Transformers kernel package mismatch.",
+                [{"label": "update vLLM, Transformers, and kernels on this server", "op": "dependency", "package": "vllm transformers kernels"}],
+            ),
            (
                r"Address already in use|bind.*address.*in use",
                "Port is already in use.",
--- a/static/js/cookbook-diagnosis.js
+++ b/static/js/cookbook-diagnosis.js
@@ -293,6 +293,21 @@ export const ERROR_PATTERNS = [
      }},
    ],
  },
+  {
+    pattern: /Either a revision or a version must be specified|transformers\.integrations\.hub_kernels|kernels\/layer/i,
+    message: 'vLLM/Transformers kernel package mismatch.',
+    fixes: [
+      { label: 'Update vLLM/Transformers/kernels', action: (panel) => {
+        const taskEl = panel.closest('.cookbook-task');
+        const task = taskEl ? _loadTasks().find(t => t.sessionId === taskEl.dataset.taskId) : null;
+        const host = task?.remoteHost || '';
+        const prefix = _buildEnvPrefix();
+        const pipCmd = prefix ? prefix + ' python3 -m pip install -U vllm transformers kernels' : 'python3 -m pip install -U vllm transformers kernels';
+        const cmd = host ? _sshCmd(host, pipCmd) : pipCmd;
+        _launchServeTask('update-vllm-stack', 'pip-update', cmd);
+      }},
+    ],
+  },
  {
    pattern: /ollama.*command not found/i,
    message: 'Ollama is not installed on this server. Run: curl -fsSL https://ollama.com/install.sh | sh',
--- a/static/js/cookbookRunning.js
+++ b/static/js/cookbookRunning.js
@@ -2158,10 +2158,6 @@ async function _reconnectTask(el, task) {
              task._serveReady = true;
              _updateTask(task.sessionId, { _serveReady: true });
            }
-            if (!task._serveReady && task.ts && (Date.now() - task.ts) > 300000) {
-              task._serveReady = true;
-              _updateTask(task.sessionId, { _serveReady: true });
-            }
            if (info.phase) {
              badge.textContent = info.phase;
              // Always the green "running" style — loading/warming is the same