From b10e6bc8707a6e9153045b00083da512de0ed1e6 Mon Sep 17 00:00:00 2001 From: Shaw Date: Wed, 3 Jun 2026 01:24:26 -0400 Subject: [PATCH] fix(cookbook): install llama-cpp-python[server] so llama.cpp serving works (#730) (#1338) The llama.cpp serve auto-install built a bare `llama-cpp-python` in the Linux source-build fallback and the Termux path, but the serve command runs `python3 -m llama_cpp.server`, which needs the `[server]` extra. Because the "already installed?" guard only checks `import llama_cpp` (a bare install satisfies it), the missing extra was never added, so serving crashed with `ModuleNotFoundError: No module named 'starlette_context'` (issue #730). - Request the `[server]` extra in both the Termux direct install and the Linux Python-bindings fallback (the Windows path already used `[server]`). - Shell-quote the package spec in `_pip_install_fallback_chain` via `shlex.quote` so the `[server]` brackets aren't treated as a bash glob; plain names unaffected. Tests: tests/test_cookbook_helpers.py gains extras-quoting coverage and a serve-runner regression guard. --- routes/cookbook_helpers.py | 9 +++++++-- routes/cookbook_routes.py | 4 ++-- tests/test_cookbook_helpers.py | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 41 insertions(+), 4 deletions(-) diff --git a/routes/cookbook_helpers.py b/routes/cookbook_helpers.py index 2fe9d1f..c60940a 100644 --- a/routes/cookbook_helpers.py +++ b/routes/cookbook_helpers.py @@ -194,8 +194,13 @@ def _pip_install_fallback_chain(package: str, *, python_cmd: str = "python3 -m p pip output appear in the Cookbook log on failure. """ upgrade_flag = " -U" if upgrade else "" - base = _pip_install_attempt(f"{python_cmd} install -q{upgrade_flag} {package}") - user = _pip_install_attempt(f"{python_cmd} install --user --break-system-packages -q{upgrade_flag} {package}") + # Shell-quote the package spec: an extras spec like ``llama-cpp-python[server]`` + # contains brackets that bash would treat as a glob, so it must be quoted + # before being embedded in the install command. Plain names (e.g. + # ``huggingface_hub``) are returned unchanged by ``shlex.quote``. + pkg = shlex.quote(package) + base = _pip_install_attempt(f"{python_cmd} install -q{upgrade_flag} {pkg}") + user = _pip_install_attempt(f"{python_cmd} install --user --break-system-packages -q{upgrade_flag} {pkg}") # Derive the python executable for the venv detection check. # Must use the same interpreter that pip belongs to; hardcoding # python3 breaks when pip lives in a venv that only has "python". diff --git a/routes/cookbook_routes.py b/routes/cookbook_routes.py index a890000..f3fbc4a 100644 --- a/routes/cookbook_routes.py +++ b/routes/cookbook_routes.py @@ -1039,7 +1039,7 @@ def setup_cookbook_routes() -> APIRouter: runner_lines.append(' if ! python3 -c "import llama_cpp" 2>/dev/null; then') runner_lines.append(' pkg install -y cmake 2>/dev/null') runner_lines.append(' pip install numpy diskcache jinja2 2>/dev/null') - runner_lines.append(' CMAKE_ARGS="-DGGML_BLAS=OFF -DGGML_LLAMAFILE=OFF" pip install llama-cpp-python --no-build-isolation --no-cache-dir 2>&1 || true') + runner_lines.append(' CMAKE_ARGS="-DGGML_BLAS=OFF -DGGML_LLAMAFILE=OFF" pip install \'llama-cpp-python[server]\' --no-build-isolation --no-cache-dir 2>&1 || true') runner_lines.append(' fi') runner_lines.append('elif ! command -v llama-server &>/dev/null; then') runner_lines.append(' echo "Native llama-server not found — building from source (one-time, may take a few minutes)..."') @@ -1066,7 +1066,7 @@ def setup_cookbook_routes() -> APIRouter: runner_lines.append(' # If the native build failed, fall back to the Python bindings.') runner_lines.append(' if ! command -v llama-server &>/dev/null && ! python3 -c "import llama_cpp" 2>/dev/null; then') runner_lines.append(' echo "llama-server build failed — installing Python bindings as fallback..."') - runner_lines.append(f" {_pip_install_fallback_chain('llama-cpp-python', python_cmd='pip')} || true") + runner_lines.append(f" {_pip_install_fallback_chain('llama-cpp-python[server]', python_cmd='pip')} || true") runner_lines.append(' fi') runner_lines.append(' if ! command -v llama-server &>/dev/null && ! python3 -c "import llama_cpp" 2>/dev/null; then') runner_lines.append(' echo "ERROR: llama.cpp serving is not available after install/build attempts."') diff --git a/tests/test_cookbook_helpers.py b/tests/test_cookbook_helpers.py index f17bad4..6b8f425 100644 --- a/tests/test_cookbook_helpers.py +++ b/tests/test_cookbook_helpers.py @@ -159,6 +159,38 @@ def test_pip_install_fallback_chain_tries_user_outside_venv(): assert "user_attempt" in result.stdout, "Chain should try --user when not in venv and base fails" +def test_pip_install_fallback_chain_quotes_extras_spec(): + """An extras spec like ``llama-cpp-python[server]`` must be shell-quoted so + bash does not treat the brackets as a glob, and the ``[server]`` extra + (which pulls in starlette_context for ``python -m llama_cpp.server``) is + actually installed instead of a bare ``llama-cpp-python`` (issue #730).""" + chain = _pip_install_fallback_chain("llama-cpp-python[server]", python_cmd="pip") + # Quoted in both the plain and the --user attempt. + assert chain.count("'llama-cpp-python[server]'") == 2 + # Never the unquoted form (bracket-glob risk). + assert "install -q llama-cpp-python[server]" not in chain + # A plain package name is still passed through unquoted (no regression). + plain = _pip_install_fallback_chain("hf_transfer", python_cmd="pip") + assert "install -q hf_transfer" in plain + + +def test_serve_runner_installs_llama_cpp_server_extra(): + """The llama.cpp serve auto-install must request the ``[server]`` extra in + every path (issue #730): a bare ``llama-cpp-python`` passes the + ``import llama_cpp`` guard, so ``python -m llama_cpp.server`` then crashes + with ``ModuleNotFoundError: No module named 'starlette_context'`` and the + extra is never reinstalled.""" + import pathlib + src = (pathlib.Path(__file__).resolve().parent.parent + / "routes" / "cookbook_routes.py").read_text(encoding="utf-8") + # No serve path may install a bare (extra-less) llama-cpp-python. + assert "pip install llama-cpp-python " not in src + assert "_pip_install_fallback_chain('llama-cpp-python'" not in src + # The [server] extra is requested in the build/fallback paths. + assert "'llama-cpp-python[server]'" in src + assert "_pip_install_fallback_chain('llama-cpp-python[server]'" in src + + def test_venv_safe_local_pip_install_strips_user_flags_only_for_local_venv(): cmd = 'python3 -m pip install -U --user --break-system-packages "vllm"'