From 1c2ec288dd66ec0bc4ada57f4bfdb5df0d1af5f7 Mon Sep 17 00:00:00 2001
From: Alexandre Teixeira <111787685+alteixeira20@users.noreply.github.com>
Date: Wed, 3 Jun 2026 06:23:55 +0100
Subject: [PATCH] Check cudart before llama.cpp CUDA build (#1466)

---
 routes/cookbook_helpers.py     | 25 ++++++++++++++++--
 tests/test_cookbook_helpers.py | 47 ++++++++++++++++++++++++++++++++++
 2 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/routes/cookbook_helpers.py b/routes/cookbook_helpers.py
index ece63ee..2fe9d1f 100644
--- a/routes/cookbook_helpers.py
+++ b/routes/cookbook_helpers.py
@@ -558,8 +558,29 @@ def _append_llama_cpp_linux_accel_build_lines(runner_lines: list[str]) -> None:
     runner_lines.append('      echo "[odysseus] ROCm/HIP detected — building llama-server with HIP support..."')
     runner_lines.append('      cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_HIP=ON && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
     runner_lines.append('    elif command -v nvcc &>/dev/null; then')
-    runner_lines.append('      echo "[odysseus] CUDA nvcc found — building llama-server with CUDA (GPU) support..."')
-    runner_lines.append('      cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_CUDA=ON && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
+    # nvcc alone is not sufficient — pip-installed CUDA wheels or incomplete
+    # tooling can expose nvcc without shipping libcudart, causing cmake to fail
+    # mid-build with "CUDA runtime library not found". Check cudart explicitly
+    # via a small helper so the guard stays readable.
+    runner_lines.append('      _odysseus_has_cudart() {')
+    runner_lines.append('        ldconfig -p 2>/dev/null | grep -q \'libcudart\\.so\' && return 0')
+    runner_lines.append('        local _cuh="${CUDA_HOME:-/usr/local/cuda}"')
+    runner_lines.append('        ls "$_cuh/lib64/libcudart.so"* &>/dev/null && return 0')
+    runner_lines.append('        ls "$_cuh/lib/libcudart.so"* &>/dev/null && return 0')
+    runner_lines.append('        ls /usr/local/cuda/lib64/libcudart.so* &>/dev/null && return 0')
+    runner_lines.append('        ls /usr/local/cuda/lib/libcudart.so* &>/dev/null && return 0')
+    runner_lines.append('        ls "${_cuh%/cuda_nvcc}/cuda_runtime/lib/libcudart.so"* &>/dev/null && return 0')
+    runner_lines.append('        return 1')
+    runner_lines.append('      }')
+    runner_lines.append('      if _odysseus_has_cudart; then')
+    runner_lines.append('        echo "[odysseus] CUDA nvcc + cudart found — building llama-server with CUDA (GPU) support..."')
+    runner_lines.append('        cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_CUDA=ON && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
+    runner_lines.append('      else')
+    runner_lines.append('        echo "[odysseus] WARNING: nvcc found but CUDA runtime (libcudart.so) is not visible — building llama-server for CPU only."')
+    runner_lines.append('        echo "[odysseus]   GPU inference will not be available for this llama.cpp build."')
+    runner_lines.append('        echo "[odysseus]   Ensure libcudart is installed (e.g. cuda-runtime package) and visible via ldconfig or CUDA_HOME."')
+    runner_lines.append('        cmake -B build -DCMAKE_BUILD_TYPE=Release && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
+    runner_lines.append('      fi')
     runner_lines.append('    else')
     runner_lines.append('      echo "[odysseus] WARNING: no HIP/CUDA toolchain found — building llama-server for CPU only."')
     runner_lines.append('      echo "[odysseus]   GPU inference will not be available for this llama.cpp build."')
diff --git a/tests/test_cookbook_helpers.py b/tests/test_cookbook_helpers.py
index 2421201..f17bad4 100644
--- a/tests/test_cookbook_helpers.py
+++ b/tests/test_cookbook_helpers.py
@@ -331,6 +331,53 @@ def test_llama_cpp_linux_bootstrap_prefers_rocm_before_cuda():
     assert 'ROCm/HIP detected — building llama-server with HIP support' in script
 
 
+def test_llama_cpp_linux_bootstrap_checks_cudart_before_cuda_build():
+    """cudart helper and all required paths must appear before the CUDA cmake command."""
+    runner_lines = []
+    _append_llama_cpp_linux_accel_build_lines(runner_lines)
+    script = "\n".join(runner_lines)
+
+    assert '_odysseus_has_cudart' in script
+    assert "grep -q 'libcudart\\.so'" in script
+    # lib64 and lib variants for CUDA_HOME and /usr/local/cuda
+    assert '$_cuh/lib64/libcudart.so' in script
+    assert '$_cuh/lib/libcudart.so' in script
+    assert '/usr/local/cuda/lib64/libcudart.so' in script
+    assert '/usr/local/cuda/lib/libcudart.so' in script
+    # pip-installed nvidia runtime wheel sibling path
+    assert 'cuda_runtime/lib/libcudart.so' in script
+    # entire helper definition precedes the CUDA cmake invocation
+    assert script.index('_odysseus_has_cudart') < script.index('DGGML_CUDA=ON')
+
+
+def test_llama_cpp_linux_bootstrap_cuda_cmake_present_when_cudart_found():
+    """The CUDA cmake command must still be present (inside the cudart-present branch)."""
+    runner_lines = []
+    _append_llama_cpp_linux_accel_build_lines(runner_lines)
+    script = "\n".join(runner_lines)
+
+    assert 'cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_CUDA=ON' in script
+    assert 'CUDA nvcc + cudart found' in script
+
+
+def test_llama_cpp_linux_bootstrap_nvcc_without_cudart_warns_and_falls_back():
+    """When nvcc exists but cudart is absent, the script must warn and use CPU-only cmake."""
+    runner_lines = []
+    _append_llama_cpp_linux_accel_build_lines(runner_lines)
+    script = "\n".join(runner_lines)
+
+    assert 'WARNING: nvcc found but CUDA runtime (libcudart.so) is not visible — building llama-server for CPU only.' in script
+    assert 'GPU inference will not be available for this llama.cpp build.' in script
+    assert 'libcudart is installed' in script
+    # The CPU-only cmake fallback must appear inside the nvcc branch (before the
+    # outer else that handles no-GPU-toolchain). Verify it appears at least once
+    # before the outer "no HIP/CUDA toolchain" warning.
+    cpu_cmake = 'cmake -B build -DCMAKE_BUILD_TYPE=Release &&'
+    no_toolchain_warn = 'WARNING: no HIP/CUDA toolchain found'
+    assert cpu_cmake in script
+    assert script.index(cpu_cmake) < script.index(no_toolchain_warn)
+
+
 def test_llama_cpp_linux_bootstrap_keeps_cpu_fallback_when_no_gpu_toolchain():
     runner_lines = []
     _append_llama_cpp_linux_accel_build_lines(runner_lines)