diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh
index dd4cb2a..1af879c 100644
--- a/docker/entrypoint.sh
+++ b/docker/entrypoint.sh
@@ -46,6 +46,24 @@ for dir in /app /app/data /app/logs; do
     fi
 done
 
+# Cookbook installs vllm/etc. via `pip install --user`, which pulls
+# nvidia-cuda-* wheels into /app/.local but does not set CUDA_HOME or
+# symlink /usr/local/cuda. vllm 0.22+ then crashes during engine init
+# when FlashInfer tries to JIT a sampler kernel ("Could not find nvcc",
+# then "CUDA compiler and toolkit headers are incompatible" on the
+# mixed cuda-nvcc 13.3 / cuda-runtime 13.0 wheel combo).
+#
+# Auto-set CUDA_HOME if a pip-installed nvcc is present, and disable the
+# FlashInfer JIT sampler — sampler only, no impact on attention path.
+# No-op when vllm isn't installed.
+for cu in /app/.local/lib/python*/site-packages/nvidia/cu13; do
+    if [ -x "$cu/bin/nvcc" ]; then
+        export CUDA_HOME="$cu"
+        export VLLM_USE_FLASHINFER_SAMPLER="${VLLM_USE_FLASHINFER_SAMPLER:-0}"
+        break
+    fi
+done
+
 # Drop root and run the actual app. `gosu` is preferred over `su` /
 # `sudo` because it cleans up the process tree (no extra shell layer)
 # so signals (SIGTERM from `docker stop`) reach uvicorn directly.