From b10e6bc8707a6e9153045b00083da512de0ed1e6 Mon Sep 17 00:00:00 2001
From: Shaw <shawmakesmagic@gmail.com>
Date: Wed, 3 Jun 2026 01:24:26 -0400
Subject: [PATCH] fix(cookbook): install llama-cpp-python[server] so llama.cpp
 serving works (#730) (#1338)

The llama.cpp serve auto-install built a bare `llama-cpp-python` in the Linux
source-build fallback and the Termux path, but the serve command runs
`python3 -m llama_cpp.server`, which needs the `[server]` extra. Because the
"already installed?" guard only checks `import llama_cpp` (a bare install
satisfies it), the missing extra was never added, so serving crashed with
`ModuleNotFoundError: No module named 'starlette_context'` (issue #730).

- Request the `[server]` extra in both the Termux direct install and the Linux
  Python-bindings fallback (the Windows path already used `[server]`).
- Shell-quote the package spec in `_pip_install_fallback_chain` via `shlex.quote`
  so the `[server]` brackets aren't treated as a bash glob; plain names unaffected.

Tests: tests/test_cookbook_helpers.py gains extras-quoting coverage and a
serve-runner regression guard.
---
 routes/cookbook_helpers.py     |  9 +++++++--
 routes/cookbook_routes.py      |  4 ++--
 tests/test_cookbook_helpers.py | 32 ++++++++++++++++++++++++++++++++
 3 files changed, 41 insertions(+), 4 deletions(-)

diff --git a/routes/cookbook_helpers.py b/routes/cookbook_helpers.py
index 2fe9d1f..c60940a 100644
--- a/routes/cookbook_helpers.py
+++ b/routes/cookbook_helpers.py
@@ -194,8 +194,13 @@ def _pip_install_fallback_chain(package: str, *, python_cmd: str = "python3 -m p
     pip output appear in the Cookbook log on failure.
     """
     upgrade_flag = " -U" if upgrade else ""
-    base = _pip_install_attempt(f"{python_cmd} install -q{upgrade_flag} {package}")
-    user = _pip_install_attempt(f"{python_cmd} install --user --break-system-packages -q{upgrade_flag} {package}")
+    # Shell-quote the package spec: an extras spec like ``llama-cpp-python[server]``
+    # contains brackets that bash would treat as a glob, so it must be quoted
+    # before being embedded in the install command. Plain names (e.g.
+    # ``huggingface_hub``) are returned unchanged by ``shlex.quote``.
+    pkg = shlex.quote(package)
+    base = _pip_install_attempt(f"{python_cmd} install -q{upgrade_flag} {pkg}")
+    user = _pip_install_attempt(f"{python_cmd} install --user --break-system-packages -q{upgrade_flag} {pkg}")
     # Derive the python executable for the venv detection check.
     # Must use the same interpreter that pip belongs to; hardcoding
     # python3 breaks when pip lives in a venv that only has "python".
diff --git a/routes/cookbook_routes.py b/routes/cookbook_routes.py
index a890000..f3fbc4a 100644
--- a/routes/cookbook_routes.py
+++ b/routes/cookbook_routes.py
@@ -1039,7 +1039,7 @@ def setup_cookbook_routes() -> APIRouter:
                 runner_lines.append('  if ! python3 -c "import llama_cpp" 2>/dev/null; then')
                 runner_lines.append('    pkg install -y cmake 2>/dev/null')
                 runner_lines.append('    pip install numpy diskcache jinja2 2>/dev/null')
-                runner_lines.append('    CMAKE_ARGS="-DGGML_BLAS=OFF -DGGML_LLAMAFILE=OFF" pip install llama-cpp-python --no-build-isolation --no-cache-dir 2>&1 || true')
+                runner_lines.append('    CMAKE_ARGS="-DGGML_BLAS=OFF -DGGML_LLAMAFILE=OFF" pip install \'llama-cpp-python[server]\' --no-build-isolation --no-cache-dir 2>&1 || true')
                 runner_lines.append('  fi')
                 runner_lines.append('elif ! command -v llama-server &>/dev/null; then')
                 runner_lines.append('  echo "Native llama-server not found — building from source (one-time, may take a few minutes)..."')
@@ -1066,7 +1066,7 @@ def setup_cookbook_routes() -> APIRouter:
                 runner_lines.append('  # If the native build failed, fall back to the Python bindings.')
                 runner_lines.append('  if ! command -v llama-server &>/dev/null && ! python3 -c "import llama_cpp" 2>/dev/null; then')
                 runner_lines.append('    echo "llama-server build failed — installing Python bindings as fallback..."')
-                runner_lines.append(f"    {_pip_install_fallback_chain('llama-cpp-python', python_cmd='pip')} || true")
+                runner_lines.append(f"    {_pip_install_fallback_chain('llama-cpp-python[server]', python_cmd='pip')} || true")
                 runner_lines.append('  fi')
                 runner_lines.append('  if ! command -v llama-server &>/dev/null && ! python3 -c "import llama_cpp" 2>/dev/null; then')
                 runner_lines.append('    echo "ERROR: llama.cpp serving is not available after install/build attempts."')
diff --git a/tests/test_cookbook_helpers.py b/tests/test_cookbook_helpers.py
index f17bad4..6b8f425 100644
--- a/tests/test_cookbook_helpers.py
+++ b/tests/test_cookbook_helpers.py
@@ -159,6 +159,38 @@ def test_pip_install_fallback_chain_tries_user_outside_venv():
     assert "user_attempt" in result.stdout, "Chain should try --user when not in venv and base fails"
 
 
+def test_pip_install_fallback_chain_quotes_extras_spec():
+    """An extras spec like ``llama-cpp-python[server]`` must be shell-quoted so
+    bash does not treat the brackets as a glob, and the ``[server]`` extra
+    (which pulls in starlette_context for ``python -m llama_cpp.server``) is
+    actually installed instead of a bare ``llama-cpp-python`` (issue #730)."""
+    chain = _pip_install_fallback_chain("llama-cpp-python[server]", python_cmd="pip")
+    # Quoted in both the plain and the --user attempt.
+    assert chain.count("'llama-cpp-python[server]'") == 2
+    # Never the unquoted form (bracket-glob risk).
+    assert "install -q llama-cpp-python[server]" not in chain
+    # A plain package name is still passed through unquoted (no regression).
+    plain = _pip_install_fallback_chain("hf_transfer", python_cmd="pip")
+    assert "install -q hf_transfer" in plain
+
+
+def test_serve_runner_installs_llama_cpp_server_extra():
+    """The llama.cpp serve auto-install must request the ``[server]`` extra in
+    every path (issue #730): a bare ``llama-cpp-python`` passes the
+    ``import llama_cpp`` guard, so ``python -m llama_cpp.server`` then crashes
+    with ``ModuleNotFoundError: No module named 'starlette_context'`` and the
+    extra is never reinstalled."""
+    import pathlib
+    src = (pathlib.Path(__file__).resolve().parent.parent
+           / "routes" / "cookbook_routes.py").read_text(encoding="utf-8")
+    # No serve path may install a bare (extra-less) llama-cpp-python.
+    assert "pip install llama-cpp-python " not in src
+    assert "_pip_install_fallback_chain('llama-cpp-python'" not in src
+    # The [server] extra is requested in the build/fallback paths.
+    assert "'llama-cpp-python[server]'" in src
+    assert "_pip_install_fallback_chain('llama-cpp-python[server]'" in src
+
+
 def test_venv_safe_local_pip_install_strips_user_flags_only_for_local_venv():
     cmd = 'python3 -m pip install -U --user --break-system-packages "vllm"'