From 1c2ec288dd66ec0bc4ada57f4bfdb5df0d1af5f7 Mon Sep 17 00:00:00 2001 From: Alexandre Teixeira <111787685+alteixeira20@users.noreply.github.com> Date: Wed, 3 Jun 2026 06:23:55 +0100 Subject: [PATCH] Check cudart before llama.cpp CUDA build (#1466) --- routes/cookbook_helpers.py | 25 ++++++++++++++++-- tests/test_cookbook_helpers.py | 47 ++++++++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+), 2 deletions(-) diff --git a/routes/cookbook_helpers.py b/routes/cookbook_helpers.py index ece63ee..2fe9d1f 100644 --- a/routes/cookbook_helpers.py +++ b/routes/cookbook_helpers.py @@ -558,8 +558,29 @@ def _append_llama_cpp_linux_accel_build_lines(runner_lines: list[str]) -> None: runner_lines.append(' echo "[odysseus] ROCm/HIP detected — building llama-server with HIP support..."') runner_lines.append(' cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_HIP=ON && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server') runner_lines.append(' elif command -v nvcc &>/dev/null; then') - runner_lines.append(' echo "[odysseus] CUDA nvcc found — building llama-server with CUDA (GPU) support..."') - runner_lines.append(' cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_CUDA=ON && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server') + # nvcc alone is not sufficient — pip-installed CUDA wheels or incomplete + # tooling can expose nvcc without shipping libcudart, causing cmake to fail + # mid-build with "CUDA runtime library not found". Check cudart explicitly + # via a small helper so the guard stays readable. + runner_lines.append(' _odysseus_has_cudart() {') + runner_lines.append(' ldconfig -p 2>/dev/null | grep -q \'libcudart\\.so\' && return 0') + runner_lines.append(' local _cuh="${CUDA_HOME:-/usr/local/cuda}"') + runner_lines.append(' ls "$_cuh/lib64/libcudart.so"* &>/dev/null && return 0') + runner_lines.append(' ls "$_cuh/lib/libcudart.so"* &>/dev/null && return 0') + runner_lines.append(' ls /usr/local/cuda/lib64/libcudart.so* &>/dev/null && return 0') + runner_lines.append(' ls /usr/local/cuda/lib/libcudart.so* &>/dev/null && return 0') + runner_lines.append(' ls "${_cuh%/cuda_nvcc}/cuda_runtime/lib/libcudart.so"* &>/dev/null && return 0') + runner_lines.append(' return 1') + runner_lines.append(' }') + runner_lines.append(' if _odysseus_has_cudart; then') + runner_lines.append(' echo "[odysseus] CUDA nvcc + cudart found — building llama-server with CUDA (GPU) support..."') + runner_lines.append(' cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_CUDA=ON && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server') + runner_lines.append(' else') + runner_lines.append(' echo "[odysseus] WARNING: nvcc found but CUDA runtime (libcudart.so) is not visible — building llama-server for CPU only."') + runner_lines.append(' echo "[odysseus] GPU inference will not be available for this llama.cpp build."') + runner_lines.append(' echo "[odysseus] Ensure libcudart is installed (e.g. cuda-runtime package) and visible via ldconfig or CUDA_HOME."') + runner_lines.append(' cmake -B build -DCMAKE_BUILD_TYPE=Release && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server') + runner_lines.append(' fi') runner_lines.append(' else') runner_lines.append(' echo "[odysseus] WARNING: no HIP/CUDA toolchain found — building llama-server for CPU only."') runner_lines.append(' echo "[odysseus] GPU inference will not be available for this llama.cpp build."') diff --git a/tests/test_cookbook_helpers.py b/tests/test_cookbook_helpers.py index 2421201..f17bad4 100644 --- a/tests/test_cookbook_helpers.py +++ b/tests/test_cookbook_helpers.py @@ -331,6 +331,53 @@ def test_llama_cpp_linux_bootstrap_prefers_rocm_before_cuda(): assert 'ROCm/HIP detected — building llama-server with HIP support' in script +def test_llama_cpp_linux_bootstrap_checks_cudart_before_cuda_build(): + """cudart helper and all required paths must appear before the CUDA cmake command.""" + runner_lines = [] + _append_llama_cpp_linux_accel_build_lines(runner_lines) + script = "\n".join(runner_lines) + + assert '_odysseus_has_cudart' in script + assert "grep -q 'libcudart\\.so'" in script + # lib64 and lib variants for CUDA_HOME and /usr/local/cuda + assert '$_cuh/lib64/libcudart.so' in script + assert '$_cuh/lib/libcudart.so' in script + assert '/usr/local/cuda/lib64/libcudart.so' in script + assert '/usr/local/cuda/lib/libcudart.so' in script + # pip-installed nvidia runtime wheel sibling path + assert 'cuda_runtime/lib/libcudart.so' in script + # entire helper definition precedes the CUDA cmake invocation + assert script.index('_odysseus_has_cudart') < script.index('DGGML_CUDA=ON') + + +def test_llama_cpp_linux_bootstrap_cuda_cmake_present_when_cudart_found(): + """The CUDA cmake command must still be present (inside the cudart-present branch).""" + runner_lines = [] + _append_llama_cpp_linux_accel_build_lines(runner_lines) + script = "\n".join(runner_lines) + + assert 'cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_CUDA=ON' in script + assert 'CUDA nvcc + cudart found' in script + + +def test_llama_cpp_linux_bootstrap_nvcc_without_cudart_warns_and_falls_back(): + """When nvcc exists but cudart is absent, the script must warn and use CPU-only cmake.""" + runner_lines = [] + _append_llama_cpp_linux_accel_build_lines(runner_lines) + script = "\n".join(runner_lines) + + assert 'WARNING: nvcc found but CUDA runtime (libcudart.so) is not visible — building llama-server for CPU only.' in script + assert 'GPU inference will not be available for this llama.cpp build.' in script + assert 'libcudart is installed' in script + # The CPU-only cmake fallback must appear inside the nvcc branch (before the + # outer else that handles no-GPU-toolchain). Verify it appears at least once + # before the outer "no HIP/CUDA toolchain" warning. + cpu_cmake = 'cmake -B build -DCMAKE_BUILD_TYPE=Release &&' + no_toolchain_warn = 'WARNING: no HIP/CUDA toolchain found' + assert cpu_cmake in script + assert script.index(cpu_cmake) < script.index(no_toolchain_warn) + + def test_llama_cpp_linux_bootstrap_keeps_cpu_fallback_when_no_gpu_toolchain(): runner_lines = [] _append_llama_cpp_linux_accel_build_lines(runner_lines)