From f9e1d38cc28d37938e209e810a2ab93e256703b1 Mon Sep 17 00:00:00 2001
From: spooky <partialabstraction@gmail.com>
Date: Fri, 5 Jun 2026 20:03:04 +1000
Subject: [PATCH] fix: diagnose vllm serve runtime issues (#1198)

---
 routes/cookbook_helpers.py       | 25 +++++++++++++++++++++++++
 routes/cookbook_routes.py        | 16 +++++-----------
 static/js/cookbook-diagnosis.js  | 12 ++++++++++++
 tests/test_cookbook_diagnosis.py | 15 +++++++++++++++
 tests/test_cookbook_helpers.py   | 14 ++++++++++++++
 5 files changed, 71 insertions(+), 11 deletions(-)
 create mode 100644 tests/test_cookbook_diagnosis.py

diff --git a/routes/cookbook_helpers.py b/routes/cookbook_helpers.py
index 1748bbb..8fbaa9e 100644
--- a/routes/cookbook_helpers.py
+++ b/routes/cookbook_helpers.py
@@ -559,6 +559,21 @@ def _append_serve_preflight_exit_lines(runner_lines: list[str], *, keep_shell_op
     runner_lines.append('fi')
 
 
+def _append_vllm_linux_preflight_lines(runner_lines: list[str]) -> None:
+    """Append Linux vLLM readiness lines that identify the runtime being used."""
+    # Keep the user install bin visible for Odysseus-managed `pip install --user`
+    # installs, but then report the actual CLI path so external runtimes are clear.
+    runner_lines.append('export PATH="$HOME/.local/bin:$PATH"')
+    runner_lines.append('ODYSSEUS_VLLM_BIN="$(command -v vllm 2>/dev/null || true)"')
+    runner_lines.append('if [ -z "$ODYSSEUS_VLLM_BIN" ]; then')
+    runner_lines.append('  echo "ERROR: vLLM is not installed."')
+    runner_lines.append('  ODYSSEUS_PREFLIGHT_EXIT=127')
+    runner_lines.append('else')
+    runner_lines.append('  echo "[odysseus] vLLM CLI: $ODYSSEUS_VLLM_BIN"')
+    runner_lines.append('  ODYSSEUS_VLLM_VERSION="$("$ODYSSEUS_VLLM_BIN" --version 2>&1 | head -n 1 || true)"')
+    runner_lines.append('  if [ -n "$ODYSSEUS_VLLM_VERSION" ]; then echo "[odysseus] vLLM version: $ODYSSEUS_VLLM_VERSION"; fi')
+    runner_lines.append('fi')
+
 def _append_serve_exit_code_lines(
     runner_lines: list[str],
     *,
@@ -860,6 +875,16 @@ def _diagnose_serve_output(text: str) -> dict | None:
             "Model requires custom code or newer model support.",
             [{"label": "retry with --trust-remote-code", "op": "append", "arg": "--trust-remote-code"}],
         ),
+        (
+            r"There is no module or parameter named ['\"]lm_head\.input_scale['\"]|lm_head\.input_scale|weight_scale_2",
+            "vLLM cannot load this ModelOpt LM-head quantized checkpoint with the current runtime.",
+            [
+                {
+                    "label": "upgrade vLLM through the environment that provides this CLI, or use a compatible checkpoint",
+                    "op": "manual",
+                }
+            ],
+        ),
         (
             r"Either a revision or a version must be specified|transformers\.integrations\.hub_kernels|kernels/layer",
             "vLLM/Transformers kernel package mismatch.",
diff --git a/routes/cookbook_routes.py b/routes/cookbook_routes.py
index f25c7d7..af5ff1d 100644
--- a/routes/cookbook_routes.py
+++ b/routes/cookbook_routes.py
@@ -38,9 +38,10 @@ from routes.cookbook_helpers import (
     _ps_squote, _bash_squote, _validate_serve_cmd, _parse_serve_phase,
     _safe_env_prefix, _local_tooling_path_export, _append_serve_preflight_exit_lines,
     _append_serve_exit_code_lines, _append_llama_cpp_linux_accel_build_lines, _cached_model_scan_script,
-    _ollama_bind_from_cmd, _pip_install_fallback_chain, _pip_install_no_cache,
-    _user_shell_path_bootstrap, _venv_safe_local_pip_install_cmd,
-    ModelDownloadRequest, ServeRequest, _diagnose_serve_output,
+    _append_vllm_linux_preflight_lines, _ollama_bind_from_cmd, _pip_install_fallback_chain,
+    _pip_install_no_cache, _user_shell_path_bootstrap, _venv_safe_local_pip_install_cmd,
+    _diagnose_serve_output,
+    ModelDownloadRequest, ServeRequest,
 )
 
 _HF_TOKEN_STATUS_SNIPPET = (
@@ -1084,14 +1085,7 @@ def setup_cookbook_routes() -> APIRouter:
                 runner_lines.append('  echo "ERROR: vLLM does not run on macOS. Use Ollama or llama.cpp (Metal) instead."')
                 runner_lines.append('  ODYSSEUS_PREFLIGHT_EXIT=1')
                 runner_lines.append('fi')
-                # Put ~/.local/bin on PATH first — without a venv, vllm installs
-                # there via --user and the non-login serve shell otherwise can't
-                # find the `vllm` CLI ("command not found"). Mirrors llama.cpp above.
-                runner_lines.append('export PATH="$HOME/.local/bin:$PATH"')
-                runner_lines.append('if ! command -v vllm &>/dev/null; then')
-                runner_lines.append('  echo "ERROR: vLLM is not installed."')
-                runner_lines.append('  ODYSSEUS_PREFLIGHT_EXIT=127')
-                runner_lines.append('fi')
+                _append_vllm_linux_preflight_lines(runner_lines)
             elif "sglang.launch_server" in req.cmd:
                 runner_lines.append('export PATH="$HOME/.local/bin:$PATH"')
                 runner_lines.append('if ! command -v sglang &>/dev/null; then')
diff --git a/static/js/cookbook-diagnosis.js b/static/js/cookbook-diagnosis.js
index ec81aa0..af90d99 100644
--- a/static/js/cookbook-diagnosis.js
+++ b/static/js/cookbook-diagnosis.js
@@ -166,6 +166,18 @@ export const ERROR_PATTERNS = [
       { label: 'Edit serve', action: (panel) => _openServeEditFromDiagnosis(panel) },
     ],
   },
+  {
+    pattern: /There is no module or parameter named ['"]lm_head\.input_scale['"]|lm_head\.input_scale|weight_scale_2/i,
+    message: 'vLLM cannot load this ModelOpt LM-head quantized checkpoint with the current runtime.',
+    suggestion: 'Suggested action: upgrade vLLM through the environment that provides this CLI (package manager, venv, Docker image, or source checkout), or choose a compatible checkpoint.',
+    fixes: [
+      { label: 'Open Dependencies', action: () => _openCookbookDependencies('vllm') },
+      {
+        label: 'Copy upgrade hint',
+        action: () => _copyText('Upgrade the vLLM environment that provides the selected vllm CLI, or use a compatible checkpoint. Do not assume Odysseus owns PATH/system/source/Docker installs.'),
+      },
+    ],
+  },
   {
     pattern: /not divisib|must be divisible|attention heads.*divisible/i,
     message: 'Tensor parallel size incompatible with model dimensions.',
diff --git a/tests/test_cookbook_diagnosis.py b/tests/test_cookbook_diagnosis.py
new file mode 100644
index 0000000..da3168a
--- /dev/null
+++ b/tests/test_cookbook_diagnosis.py
@@ -0,0 +1,15 @@
+from routes.cookbook_helpers import _diagnose_serve_output
+
+
+def test_diagnose_vllm_modelopt_lm_head_error():
+    output = """
+    ValueError: There is no module or parameter named 'lm_head.input_scale'
+    Engine core initialization failed.
+    """
+
+    diagnosis = _diagnose_serve_output(output)
+
+    assert diagnosis is not None
+    assert "ModelOpt LM-head" in diagnosis["message"]
+    assert diagnosis["suggestions"][0]["op"] == "manual"
+    assert "provides this CLI" in diagnosis["suggestions"][0]["label"]
diff --git a/tests/test_cookbook_helpers.py b/tests/test_cookbook_helpers.py
index 0b6a045..84e91ba 100644
--- a/tests/test_cookbook_helpers.py
+++ b/tests/test_cookbook_helpers.py
@@ -11,6 +11,7 @@ from routes.cookbook_helpers import (
     _append_serve_exit_code_lines,
     _append_serve_preflight_exit_lines,
     _llama_cpp_rebuild_cmd,
+    _append_vllm_linux_preflight_lines,
     _local_tooling_path_export,
     _pip_install_attempt,
     _pip_install_fallback_chain,
@@ -192,6 +193,19 @@ def test_serve_runner_installs_llama_cpp_server_extra():
     assert "_pip_install_fallback_chain('llama-cpp-python[server]'" in src
 
 
+def test_vllm_preflight_reports_cli_and_version():
+    lines = []
+
+    _append_vllm_linux_preflight_lines(lines)
+    script = "\n".join(lines)
+
+    assert 'export PATH="$HOME/.local/bin:$PATH"' in script
+    assert 'ODYSSEUS_VLLM_BIN="$(command -v vllm 2>/dev/null || true)"' in script
+    assert 'echo "[odysseus] vLLM CLI: $ODYSSEUS_VLLM_BIN"' in script
+    assert '"$ODYSSEUS_VLLM_BIN" --version' in script
+    assert 'ODYSSEUS_PREFLIGHT_EXIT=127' in script
+
+
 def test_venv_safe_local_pip_install_strips_user_flags_only_for_local_venv():
     cmd = 'python3 -m pip install -U --user --break-system-packages "vllm"'