From bd3204fe96001a711dd651fcdb3be830951abae7 Mon Sep 17 00:00:00 2001
From: Dustin <139303216+dustinm16@users.noreply.github.com>
Date: Mon, 1 Jun 2026 20:30:07 -0700
Subject: [PATCH] Diagnose vLLM device detection failure with actionable
 suggestion (#778)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds a diagnosis pattern for the 'Failed to infer device type' error
vLLM raises when no CUDA or ROCm GPU is found (e.g. systems with only
integrated or Intel Xe graphics). The existing pattern only caught
'No CUDA GPUs are available' which fires later in startup; this new
entry catches the earlier device-probe failure and the NVML/amdsmi
library-not-found messages that precede it.

Surfaces in the Cookbook serve card as: "vLLM could not find a supported
GPU — switch to llama.cpp or Ollama" instead of a raw Python traceback.

Co-authored-by: Claude Sonnet 4.6 (1M context) <noreply@anthropic.com>
---
 routes/cookbook_routes.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/routes/cookbook_routes.py b/routes/cookbook_routes.py
index 5f177d9..92e83ae 100644
--- a/routes/cookbook_routes.py
+++ b/routes/cookbook_routes.py
@@ -148,6 +148,15 @@ def setup_cookbook_routes() -> APIRouter:
                 "No GPUs are visible to the serve process.",
                 [{"label": "clear Cookbook GPU selection or choose available GPUs", "op": "settings", "field": "gpus", "value": ""}],
             ),
+            (
+                r"Failed to infer device type|NVML Shared Library Not Found|No module named 'amdsmi'|platform is not available",
+                "vLLM could not find a supported GPU (CUDA or ROCm). "
+                "This machine may have integrated or unsupported graphics only.",
+                [
+                    {"label": "switch to llama.cpp (CPU/Metal, works without a discrete GPU)", "op": "manual"},
+                    {"label": "switch to Ollama (CPU/Metal, works without a discrete GPU)", "op": "manual"},
+                ],
+            ),
             (
                 r"vllm.*command not found|No module named vllm|ERROR: vLLM is not installed",
                 "vLLM is not installed or not in PATH on this server.",