diff --git a/routes/cookbook_helpers.py b/routes/cookbook_helpers.py index 9dc232a..7ff2b70 100644 --- a/routes/cookbook_helpers.py +++ b/routes/cookbook_helpers.py @@ -552,6 +552,27 @@ def _append_llama_cpp_linux_accel_build_lines(runner_lines: list[str]) -> None: runner_lines.append(' cmake -B build -DCMAKE_BUILD_TYPE=Release && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server') runner_lines.append(' fi') + +def _llama_cpp_rebuild_cmd() -> str: + """Shell command that clears the Cookbook-managed llama.cpp build. + + Removes the cached ``llama-server`` symlink and the ``~/llama.cpp/build`` + directory so the next llama.cpp serve recompiles from source, picking up a + CUDA or HIP toolchain if one is now available. The serve bootstrap only + builds when ``llama-server`` is missing from PATH, so without this an + existing CPU-only build is reused forever. It deliberately installs and + downloads nothing; the rebuild itself happens on the next serve. + """ + return ( + 'mkdir -p "$HOME/bin" && ' + 'rm -f "$HOME/bin/llama-server" && ' + 'rm -rf "$HOME/llama.cpp/build" && ' + 'echo "[odysseus] Cleared the cached llama.cpp build. ' + 'Re-launch the serve task to rebuild llama-server from source ' + '(CUDA or HIP will be used if a toolchain is now available)."' + ) + + class ModelDownloadRequest(BaseModel): repo_id: str include: str | None = None # glob pattern e.g. "*Q4_K_M*" diff --git a/routes/shell_routes.py b/routes/shell_routes.py index 8565319..3be54ab 100644 --- a/routes/shell_routes.py +++ b/routes/shell_routes.py @@ -1058,4 +1058,39 @@ def setup_shell_routes() -> APIRouter: return {"ok": True, "output": stdout.decode()[-200:]} return {"ok": False, "error": stderr.decode()[-300:]} + @router.post("/api/cookbook/rebuild-engine") + async def rebuild_engine(request: Request): + """Clear the cached llama.cpp build so the next serve recompiles. + + Admin only — this removes the Cookbook-managed ``~/bin/llama-server`` + symlink and ``~/llama.cpp/build`` directory, locally or on the selected + remote server. It installs and downloads nothing; the next llama.cpp + serve rebuilds from source and picks up CUDA/HIP if a toolchain is now + present. This is the missing "force a fresh GPU build" lever for hosts + stuck on a CPU-only llama-server. + """ + _require_admin(request) + from routes.cookbook_helpers import _llama_cpp_rebuild_cmd + body = await request.json() + engine = str(body.get("engine") or "llamacpp").strip() + if engine != "llamacpp": + return {"ok": False, "error": f"Unsupported engine: {engine}"} + host = str(body.get("remote_host") or "").strip() + ssh_port = body.get("ssh_port") + cmd = _llama_cpp_rebuild_cmd() + try: + argv = (_ssh_base_argv(host, ssh_port) + [cmd]) if host else ["bash", "-lc", cmd] + except ValueError as e: + raise HTTPException(400, str(e)) + try: + proc = await asyncio.create_subprocess_exec( + *argv, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + ) + out, err = await asyncio.wait_for(proc.communicate(), timeout=30) + except asyncio.TimeoutError: + return {"ok": False, "error": "Rebuild-engine command timed out."} + if proc.returncode == 0: + return {"ok": True, "output": out.decode("utf-8", errors="replace")[-400:]} + return {"ok": False, "error": err.decode("utf-8", errors="replace")[-400:]} + return router diff --git a/static/js/cookbook.js b/static/js/cookbook.js index fe8b073..fa4b589 100644 --- a/static/js/cookbook.js +++ b/static/js/cookbook.js @@ -1018,6 +1018,51 @@ function _wireTabEvents(body) { }); } + // "Rebuild llama.cpp" clears the cached build so the next serve recompiles. + // The serve bootstrap only builds llama-server when it is missing from PATH, + // so a host that first built CPU-only (no nvcc at build time) keeps reusing + // that binary forever; this is the lever to force a fresh GPU build after a + // CUDA/ROCm toolkit is installed. + const rebuildBtn = document.getElementById('cookbook-rebuild-engine'); + if (rebuildBtn && !rebuildBtn._wired) { + rebuildBtn._wired = true; + rebuildBtn.addEventListener('click', async () => { + // Match _installDep: honor the Dependencies server selector so the clear + // runs on the same host the build runs on. + const sel = document.getElementById('hwfit-deps-server'); + if (sel) _applyServerSelection(sel.value); + const host = _envState.remoteHost || ''; + const where = host || 'this server'; + if (!confirm(`Rebuild the llama.cpp engine on ${where}?\n\nThis clears the cached llama-server build so the next serve recompiles from source (with CUDA/HIP if a toolchain is present). It does not download or install anything.`)) return; + const _label = rebuildBtn.textContent; + rebuildBtn.disabled = true; + rebuildBtn.textContent = 'Clearing...'; + try { + const res = await fetch('/api/cookbook/rebuild-engine', { + method: 'POST', credentials: 'same-origin', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + engine: 'llamacpp', + remote_host: host || undefined, + ssh_port: _getPort(host) || undefined, + }), + }); + const data = await res.json().catch(() => ({})); + if (!res.ok || !data.ok) { + const reason = data.detail || data.error || `HTTP ${res.status}`; + uiModule.showToast('Rebuild failed: ' + String(reason).slice(0, 200)); + } else { + uiModule.showToast(`Cleared llama.cpp build on ${where}. Re-launch the serve task to rebuild with GPU support.`); + } + } catch (err) { + uiModule.showToast('Rebuild failed: ' + err.message); + } finally { + rebuildBtn.disabled = false; + rebuildBtn.textContent = _label; + } + }); + } + // Serve sort const serveSort = document.getElementById('serve-sort'); if (serveSort) { @@ -1616,6 +1661,7 @@ function _renderRecipes() { html += '
'; html += '
'; html += '

Dependencies

'; + html += ''; html += 'Server'; html += '