Add a 'Rebuild llama.cpp' Cookbook action to force a fresh GPU build (#1787)

The serve bootstrap builds llama-server from source only when it is missing from PATH, so a host that first compiled CPU-only (no nvcc present at build time) reuses that CPU-only binary on every later serve and never gets a GPU build, even after a CUDA/ROCm toolkit is installed. There was no UI lever to force a rebuild. Adds a 'Rebuild llama.cpp' button to the Cookbook Dependencies tab. It clears the cached ~/bin/llama-server symlink and ~/llama.cpp/build directory (locally or on the selected remote server) so the next serve recompiles and picks up CUDA/HIP if a toolchain is now present. It installs and downloads nothing. - routes/cookbook_helpers.py: _llama_cpp_rebuild_cmd() (single source of truth) - routes/shell_routes.py: POST /api/cookbook/rebuild-engine (admin-only, reuses the existing SSH plumbing for remote hosts) - static/js/cookbook.js: header button + handler honoring the deps server selector - tests: cover the command shape and a clean run on a fresh HOME Motivated by #831 (RTX 4070 user stuck on a CPU-only build with no way to re-trigger the build). Co-authored-by: ghreprimand <203024559+ghreprimand@users.noreply.github.com>
2026-06-02 23:28:19 -05:00
parent 51857c9008
commit 6f001af2a3
4 changed files with 135 additions and 0 deletions
--- a/static/js/cookbook.js
+++ b/static/js/cookbook.js
@@ -1018,6 +1018,51 @@ function _wireTabEvents(body) {
    });
  }

+  // "Rebuild llama.cpp" clears the cached build so the next serve recompiles.
+  // The serve bootstrap only builds llama-server when it is missing from PATH,
+  // so a host that first built CPU-only (no nvcc at build time) keeps reusing
+  // that binary forever; this is the lever to force a fresh GPU build after a
+  // CUDA/ROCm toolkit is installed.
+  const rebuildBtn = document.getElementById('cookbook-rebuild-engine');
+  if (rebuildBtn && !rebuildBtn._wired) {
+    rebuildBtn._wired = true;
+    rebuildBtn.addEventListener('click', async () => {
+      // Match _installDep: honor the Dependencies server selector so the clear
+      // runs on the same host the build runs on.
+      const sel = document.getElementById('hwfit-deps-server');
+      if (sel) _applyServerSelection(sel.value);
+      const host = _envState.remoteHost || '';
+      const where = host || 'this server';
+      if (!confirm(`Rebuild the llama.cpp engine on ${where}?\n\nThis clears the cached llama-server build so the next serve recompiles from source (with CUDA/HIP if a toolchain is present). It does not download or install anything.`)) return;
+      const _label = rebuildBtn.textContent;
+      rebuildBtn.disabled = true;
+      rebuildBtn.textContent = 'Clearing...';
+      try {
+        const res = await fetch('/api/cookbook/rebuild-engine', {
+          method: 'POST', credentials: 'same-origin',
+          headers: { 'Content-Type': 'application/json' },
+          body: JSON.stringify({
+            engine: 'llamacpp',
+            remote_host: host || undefined,
+            ssh_port: _getPort(host) || undefined,
+          }),
+        });
+        const data = await res.json().catch(() => ({}));
+        if (!res.ok || !data.ok) {
+          const reason = data.detail || data.error || `HTTP ${res.status}`;
+          uiModule.showToast('Rebuild failed: ' + String(reason).slice(0, 200));
+        } else {
+          uiModule.showToast(`Cleared llama.cpp build on ${where}. Re-launch the serve task to rebuild with GPU support.`);
+        }
+      } catch (err) {
+        uiModule.showToast('Rebuild failed: ' + err.message);
+      } finally {
+        rebuildBtn.disabled = false;
+        rebuildBtn.textContent = _label;
+      }
+    });
+  }
+
  // Serve sort
  const serveSort = document.getElementById('serve-sort');
  if (serveSort) {
@@ -1616,6 +1661,7 @@ function _renderRecipes() {
  html += '<div class="admin-card" style="flex:1;display:flex;flex-direction:column;overflow:hidden;">';
  html += '<div style="display:flex;align-items:center;gap:8px;margin-bottom:4px;">';
  html += '<h2 style="margin:0;padding:0;line-height:1;">Dependencies</h2>';
+  html += '<button class="cookbook-field-input" id="cookbook-rebuild-engine" title="Clear the cached llama.cpp build so the next serve recompiles from source (use after installing a CUDA/ROCm toolkit to turn a CPU-only build into a GPU build)." style="height:24px;font-size:10px;padding:0 8px;cursor:pointer;width:auto;">Rebuild llama.cpp</button>';
  html += '<span style="font-size:10px;opacity:0.5;margin-left:auto;">Server</span>';
  html += '<select class="cookbook-field-input" id="hwfit-deps-server" style="height:28px;min-width:70px;">';
  html += _buildServerOpts(false);