From f9e1d38cc28d37938e209e810a2ab93e256703b1 Mon Sep 17 00:00:00 2001 From: spooky Date: Fri, 5 Jun 2026 20:03:04 +1000 Subject: [PATCH] fix: diagnose vllm serve runtime issues (#1198) --- routes/cookbook_helpers.py | 25 +++++++++++++++++++++++++ routes/cookbook_routes.py | 16 +++++----------- static/js/cookbook-diagnosis.js | 12 ++++++++++++ tests/test_cookbook_diagnosis.py | 15 +++++++++++++++ tests/test_cookbook_helpers.py | 14 ++++++++++++++ 5 files changed, 71 insertions(+), 11 deletions(-) create mode 100644 tests/test_cookbook_diagnosis.py diff --git a/routes/cookbook_helpers.py b/routes/cookbook_helpers.py index 1748bbb..8fbaa9e 100644 --- a/routes/cookbook_helpers.py +++ b/routes/cookbook_helpers.py @@ -559,6 +559,21 @@ def _append_serve_preflight_exit_lines(runner_lines: list[str], *, keep_shell_op runner_lines.append('fi') +def _append_vllm_linux_preflight_lines(runner_lines: list[str]) -> None: + """Append Linux vLLM readiness lines that identify the runtime being used.""" + # Keep the user install bin visible for Odysseus-managed `pip install --user` + # installs, but then report the actual CLI path so external runtimes are clear. + runner_lines.append('export PATH="$HOME/.local/bin:$PATH"') + runner_lines.append('ODYSSEUS_VLLM_BIN="$(command -v vllm 2>/dev/null || true)"') + runner_lines.append('if [ -z "$ODYSSEUS_VLLM_BIN" ]; then') + runner_lines.append(' echo "ERROR: vLLM is not installed."') + runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127') + runner_lines.append('else') + runner_lines.append(' echo "[odysseus] vLLM CLI: $ODYSSEUS_VLLM_BIN"') + runner_lines.append(' ODYSSEUS_VLLM_VERSION="$("$ODYSSEUS_VLLM_BIN" --version 2>&1 | head -n 1 || true)"') + runner_lines.append(' if [ -n "$ODYSSEUS_VLLM_VERSION" ]; then echo "[odysseus] vLLM version: $ODYSSEUS_VLLM_VERSION"; fi') + runner_lines.append('fi') + def _append_serve_exit_code_lines( runner_lines: list[str], *, @@ -860,6 +875,16 @@ def _diagnose_serve_output(text: str) -> dict | None: "Model requires custom code or newer model support.", [{"label": "retry with --trust-remote-code", "op": "append", "arg": "--trust-remote-code"}], ), + ( + r"There is no module or parameter named ['\"]lm_head\.input_scale['\"]|lm_head\.input_scale|weight_scale_2", + "vLLM cannot load this ModelOpt LM-head quantized checkpoint with the current runtime.", + [ + { + "label": "upgrade vLLM through the environment that provides this CLI, or use a compatible checkpoint", + "op": "manual", + } + ], + ), ( r"Either a revision or a version must be specified|transformers\.integrations\.hub_kernels|kernels/layer", "vLLM/Transformers kernel package mismatch.", diff --git a/routes/cookbook_routes.py b/routes/cookbook_routes.py index f25c7d7..af5ff1d 100644 --- a/routes/cookbook_routes.py +++ b/routes/cookbook_routes.py @@ -38,9 +38,10 @@ from routes.cookbook_helpers import ( _ps_squote, _bash_squote, _validate_serve_cmd, _parse_serve_phase, _safe_env_prefix, _local_tooling_path_export, _append_serve_preflight_exit_lines, _append_serve_exit_code_lines, _append_llama_cpp_linux_accel_build_lines, _cached_model_scan_script, - _ollama_bind_from_cmd, _pip_install_fallback_chain, _pip_install_no_cache, - _user_shell_path_bootstrap, _venv_safe_local_pip_install_cmd, - ModelDownloadRequest, ServeRequest, _diagnose_serve_output, + _append_vllm_linux_preflight_lines, _ollama_bind_from_cmd, _pip_install_fallback_chain, + _pip_install_no_cache, _user_shell_path_bootstrap, _venv_safe_local_pip_install_cmd, + _diagnose_serve_output, + ModelDownloadRequest, ServeRequest, ) _HF_TOKEN_STATUS_SNIPPET = ( @@ -1084,14 +1085,7 @@ def setup_cookbook_routes() -> APIRouter: runner_lines.append(' echo "ERROR: vLLM does not run on macOS. Use Ollama or llama.cpp (Metal) instead."') runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=1') runner_lines.append('fi') - # Put ~/.local/bin on PATH first — without a venv, vllm installs - # there via --user and the non-login serve shell otherwise can't - # find the `vllm` CLI ("command not found"). Mirrors llama.cpp above. - runner_lines.append('export PATH="$HOME/.local/bin:$PATH"') - runner_lines.append('if ! command -v vllm &>/dev/null; then') - runner_lines.append(' echo "ERROR: vLLM is not installed."') - runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127') - runner_lines.append('fi') + _append_vllm_linux_preflight_lines(runner_lines) elif "sglang.launch_server" in req.cmd: runner_lines.append('export PATH="$HOME/.local/bin:$PATH"') runner_lines.append('if ! command -v sglang &>/dev/null; then') diff --git a/static/js/cookbook-diagnosis.js b/static/js/cookbook-diagnosis.js index ec81aa0..af90d99 100644 --- a/static/js/cookbook-diagnosis.js +++ b/static/js/cookbook-diagnosis.js @@ -166,6 +166,18 @@ export const ERROR_PATTERNS = [ { label: 'Edit serve', action: (panel) => _openServeEditFromDiagnosis(panel) }, ], }, + { + pattern: /There is no module or parameter named ['"]lm_head\.input_scale['"]|lm_head\.input_scale|weight_scale_2/i, + message: 'vLLM cannot load this ModelOpt LM-head quantized checkpoint with the current runtime.', + suggestion: 'Suggested action: upgrade vLLM through the environment that provides this CLI (package manager, venv, Docker image, or source checkout), or choose a compatible checkpoint.', + fixes: [ + { label: 'Open Dependencies', action: () => _openCookbookDependencies('vllm') }, + { + label: 'Copy upgrade hint', + action: () => _copyText('Upgrade the vLLM environment that provides the selected vllm CLI, or use a compatible checkpoint. Do not assume Odysseus owns PATH/system/source/Docker installs.'), + }, + ], + }, { pattern: /not divisib|must be divisible|attention heads.*divisible/i, message: 'Tensor parallel size incompatible with model dimensions.', diff --git a/tests/test_cookbook_diagnosis.py b/tests/test_cookbook_diagnosis.py new file mode 100644 index 0000000..da3168a --- /dev/null +++ b/tests/test_cookbook_diagnosis.py @@ -0,0 +1,15 @@ +from routes.cookbook_helpers import _diagnose_serve_output + + +def test_diagnose_vllm_modelopt_lm_head_error(): + output = """ + ValueError: There is no module or parameter named 'lm_head.input_scale' + Engine core initialization failed. + """ + + diagnosis = _diagnose_serve_output(output) + + assert diagnosis is not None + assert "ModelOpt LM-head" in diagnosis["message"] + assert diagnosis["suggestions"][0]["op"] == "manual" + assert "provides this CLI" in diagnosis["suggestions"][0]["label"] diff --git a/tests/test_cookbook_helpers.py b/tests/test_cookbook_helpers.py index 0b6a045..84e91ba 100644 --- a/tests/test_cookbook_helpers.py +++ b/tests/test_cookbook_helpers.py @@ -11,6 +11,7 @@ from routes.cookbook_helpers import ( _append_serve_exit_code_lines, _append_serve_preflight_exit_lines, _llama_cpp_rebuild_cmd, + _append_vllm_linux_preflight_lines, _local_tooling_path_export, _pip_install_attempt, _pip_install_fallback_chain, @@ -192,6 +193,19 @@ def test_serve_runner_installs_llama_cpp_server_extra(): assert "_pip_install_fallback_chain('llama-cpp-python[server]'" in src +def test_vllm_preflight_reports_cli_and_version(): + lines = [] + + _append_vllm_linux_preflight_lines(lines) + script = "\n".join(lines) + + assert 'export PATH="$HOME/.local/bin:$PATH"' in script + assert 'ODYSSEUS_VLLM_BIN="$(command -v vllm 2>/dev/null || true)"' in script + assert 'echo "[odysseus] vLLM CLI: $ODYSSEUS_VLLM_BIN"' in script + assert '"$ODYSSEUS_VLLM_BIN" --version' in script + assert 'ODYSSEUS_PREFLIGHT_EXIT=127' in script + + def test_venv_safe_local_pip_install_strips_user_flags_only_for_local_venv(): cmd = 'python3 -m pip install -U --user --break-system-packages "vllm"'