760 lines
37 KiB
Python
760 lines
37 KiB
Python
"""cookbook_helpers.py — validators + small helpers shared by the cookbook routes.
|
|
Extracted from cookbook_routes.py; the routes module imports the symbols it needs."""
|
|
|
|
import logging
|
|
import os
|
|
import posixpath
|
|
import re
|
|
import shlex
|
|
|
|
from fastapi import HTTPException
|
|
from pydantic import BaseModel
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
# HuggingFace repo IDs are <org>/<name>, both alphanumerics plus ._-
|
|
# Rejecting anything else up front closes off shell-interpolation vectors.
|
|
_REPO_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]*/[A-Za-z0-9][A-Za-z0-9._-]*$")
|
|
# Cached models scanned from a custom/local model dir are keyed by their leaf
|
|
# folder name (no slash), e.g. `DeepSeek-R1-UD-IQ4_XS`. The serve command uses
|
|
# the real on-disk path separately; this identifier is only for UI/task
|
|
# bookkeeping, so serving should accept the same safe glyph set as repo IDs.
|
|
_LOCAL_MODEL_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]*$")
|
|
# Ollama model names include tags, e.g. `qwen2.5:0.5b` or `llama3.2:latest`.
|
|
# Some registries also use a namespace path. Keep this shell-safe: no spaces,
|
|
# quotes, `$`, `;`, `&`, pipes, or redirects.
|
|
_OLLAMA_MODEL_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._:/-]{0,200}$")
|
|
# Include pattern is a glob: allow typical safe glyphs only.
|
|
_INCLUDE_RE = re.compile(r"^[A-Za-z0-9._\-*?/\[\]]+$")
|
|
# Remote host: user@host (optionally with :port-free hostname parts).
|
|
_REMOTE_HOST_RE = re.compile(r"^[A-Za-z0-9._-]+@[A-Za-z0-9._-]+$")
|
|
# HF tokens and API tokens are url-safe base64-like.
|
|
_TOKEN_RE = re.compile(r"^[A-Za-z0-9._~+/=-]+$")
|
|
# Session IDs we mint look like "cookbook-deadbeef" or "serve-deadbeef".
|
|
# Anything beyond plain alphanumerics + dash + underscore could break out
|
|
# of the shell/PowerShell contexts the value lands in.
|
|
_SESSION_ID_RE = re.compile(r"^[A-Za-z0-9_-]{1,64}$")
|
|
_SSH_PORT_RE = re.compile(r"^\d{1,5}$")
|
|
_GPU_LIST_RE = re.compile(r"^\d+(?:,\d+)*$")
|
|
# A download target directory. Absolute or ~-relative path; safe path glyphs
|
|
# only (no quotes, shell metacharacters, or spaces) since it lands in a shell
|
|
# command. A leading ~ is expanded to $HOME at command-build time.
|
|
_LOCAL_DIR_RE = re.compile(r"^~?/[A-Za-z0-9._/-]*$|^~$")
|
|
|
|
|
|
def _validate_repo_id(v: str | None) -> str:
|
|
if not v or not _REPO_ID_RE.match(v):
|
|
raise HTTPException(400, "Invalid repo_id — must be <org>/<name> using [A-Za-z0-9._-]")
|
|
return v
|
|
|
|
|
|
def _validate_serve_model_id(v: str | None) -> str:
|
|
if not v:
|
|
raise HTTPException(400, "repo_id is required")
|
|
if _REPO_ID_RE.match(v) or _LOCAL_MODEL_ID_RE.match(v) or _OLLAMA_MODEL_ID_RE.match(v):
|
|
return v
|
|
raise HTTPException(400, "Invalid repo_id — must be <org>/<name>, an Ollama name:tag, or a cached local model id")
|
|
|
|
|
|
def _validate_include(v: str | None) -> str | None:
|
|
if v is None or v == "":
|
|
return None
|
|
if not _INCLUDE_RE.match(v):
|
|
raise HTTPException(400, "Invalid include pattern")
|
|
return v
|
|
|
|
|
|
def _validate_remote_host(v: str | None) -> str | None:
|
|
if v is None or v == "":
|
|
return None
|
|
if not _REMOTE_HOST_RE.match(v):
|
|
raise HTTPException(400, "Invalid remote_host — must be user@host, no SSH option syntax")
|
|
return v
|
|
|
|
|
|
def _validate_token(v: str | None) -> str | None:
|
|
if v is None or v == "":
|
|
return None
|
|
if not _TOKEN_RE.match(v):
|
|
raise HTTPException(400, "Invalid token characters")
|
|
return v
|
|
|
|
|
|
def _validate_local_dir(v: str | None) -> str | None:
|
|
if v is None or v == "":
|
|
return None
|
|
v = v.rstrip("/") or "/"
|
|
if not _LOCAL_DIR_RE.match(v):
|
|
raise HTTPException(400, "Invalid local_dir — must be an absolute or ~ path with no spaces or shell metacharacters")
|
|
return v
|
|
|
|
|
|
def _validate_ssh_port(v: str | None) -> str | None:
|
|
if v is None or v == "":
|
|
return None
|
|
if not _SSH_PORT_RE.fullmatch(str(v)):
|
|
raise HTTPException(400, "Invalid ssh_port")
|
|
port = int(v)
|
|
if port < 1 or port > 65535:
|
|
raise HTTPException(400, "Invalid ssh_port")
|
|
return str(port)
|
|
|
|
|
|
def _validate_gpus(v: str | None) -> str | None:
|
|
if v is None or v == "":
|
|
return None
|
|
if not _GPU_LIST_RE.fullmatch(str(v)):
|
|
raise HTTPException(400, "Invalid gpus — expected comma-separated GPU indexes")
|
|
return str(v)
|
|
|
|
|
|
def _shell_path(p: str) -> str:
|
|
"""Render a validated path for a double-quoted shell context, expanding a
|
|
leading ~ to $HOME (single quotes wouldn't expand it). Safe because
|
|
_validate_local_dir already restricts the charset."""
|
|
if p == "~":
|
|
return '"$HOME"'
|
|
if p.startswith("~/"):
|
|
return '"$HOME/' + p[2:] + '"'
|
|
return '"' + p + '"'
|
|
|
|
|
|
def _local_tooling_path_export(executable: str) -> str:
|
|
"""Bash line prepending the running interpreter's bin dir to PATH.
|
|
|
|
When Odysseus runs from a virtualenv, that bin dir holds the tools the
|
|
cookbook runners shell out to (`hf`, `python`). tmux runners start from a
|
|
fresh login shell with the venv NOT activated, so without this they can't
|
|
find `hf` and downloads fail with "hf: command not found" — notably on
|
|
macOS, where the `pip --user` self-heal also misses (`pip` isn't a command,
|
|
only `pip3`/`python3 -m pip`). Local runs only; meaningless over SSH.
|
|
"""
|
|
# This builds a bash snippet, so an explicit POSIX absolute path should keep
|
|
# POSIX semantics even when the app/tests run on Windows. Otherwise
|
|
# os.path.abspath("/opt/...") would incorrectly turn it into "D:\\opt\\...".
|
|
if executable.startswith("/"):
|
|
bin_dir = posixpath.dirname(executable)
|
|
else:
|
|
bin_dir = os.path.dirname(os.path.abspath(executable))
|
|
# Escape for a double-quoted context: $PATH must still expand, but spaces
|
|
# and shell metacharacters in the path must be preserved literally.
|
|
esc = (
|
|
bin_dir.replace("\\", "\\\\")
|
|
.replace('"', '\\"')
|
|
.replace("$", "\\$")
|
|
.replace("`", "\\`")
|
|
)
|
|
return f'export PATH="{esc}:$PATH"'
|
|
|
|
|
|
def _pip_install_no_cache(cmd: str) -> str:
|
|
"""Add ``--no-cache-dir`` to a pip install command.
|
|
|
|
Cookbook dependency installs (vLLM, llama-cpp-python, …) build large wheels;
|
|
pip's default cache lives under ``$HOME/.cache/pip`` and these builds can fill
|
|
a small home filesystem with ``[Errno 28] No space left on device`` mid-build
|
|
(issue #1219), leaving the dependency "installed" but unusable (#1459).
|
|
Disabling the cache for these one-off installs keeps them off the home disk
|
|
(the maintainer's suggested ``PIP_CACHE_DIR=`` workaround, made the default).
|
|
Idempotent; leaves non-pip-install commands untouched."""
|
|
if not cmd or "pip install" not in cmd or "--no-cache-dir" in cmd:
|
|
return cmd
|
|
return cmd.replace("pip install", "pip install --no-cache-dir", 1)
|
|
|
|
|
|
def _pip_install_attempt(pip_cmd: str) -> str:
|
|
"""Wrap a single pip install command so its exit status survives the
|
|
fallback chain and its stderr is visible in the tmux log on failure.
|
|
|
|
Without this wrapper, `pip … 2>&1 | tail -5` returns ``tail``'s exit
|
|
code (0), masking pip's real failure and preventing the next fallback
|
|
from running. The generated snippet captures all output to a temp
|
|
file, prints the last 5 lines on failure (so the Cookbook log panel
|
|
shows useful diagnostics), cleans up, and exits with pip's original
|
|
status.
|
|
"""
|
|
return (
|
|
"bash -c '"
|
|
f'_out=$(mktemp) && {pip_cmd} >"$_out" 2>&1; _rc=$?; '
|
|
'tail -5 "$_out"; rm -f "$_out"; exit $_rc'
|
|
"'"
|
|
)
|
|
|
|
|
|
def _pip_install_fallback_chain(package: str, *, python_cmd: str = "python3 -m pip", upgrade: bool = False) -> str:
|
|
"""Build a bash pip install fallback chain that surfaces errors.
|
|
|
|
Try the active interpreter/environment first. ``--user`` is invalid
|
|
inside many venvs, so only attempt the ``--user`` fallback when NOT
|
|
inside a venv.
|
|
|
|
Each attempt is wrapped via :func:`_pip_install_attempt` so pip's real
|
|
exit code is preserved (no ``| tail`` masking) and the last 5 lines of
|
|
pip output appear in the Cookbook log on failure.
|
|
"""
|
|
upgrade_flag = " -U" if upgrade else ""
|
|
base = _pip_install_attempt(f"{python_cmd} install -q{upgrade_flag} {package}")
|
|
user = _pip_install_attempt(f"{python_cmd} install --user --break-system-packages -q{upgrade_flag} {package}")
|
|
# Derive the python executable for the venv detection check.
|
|
# Must use the same interpreter that pip belongs to; hardcoding
|
|
# python3 breaks when pip lives in a venv that only has "python".
|
|
if " -m pip" in python_cmd:
|
|
python_exe = python_cmd.replace(" -m pip", "")
|
|
elif python_cmd.strip() == "pip":
|
|
python_exe = "python"
|
|
elif python_cmd.strip() == "pip3":
|
|
python_exe = "python3"
|
|
else:
|
|
python_exe = "python3"
|
|
venv_check = f'{python_exe} -c "import sys; sys.exit(0 if sys.prefix != sys.base_prefix else 1)"'
|
|
# Negated: `! venv_check` succeeds (exit 0) when NOT in a venv → `&&` tries
|
|
# --user. When IN a venv `! venv_check` fails → `&&` skips --user and the
|
|
# group exits non-zero, propagating the base-install failure instead of
|
|
# masking it as success (the `|| { venv_check || … }` shape from #903
|
|
# swallowed the exit code because venv_check's exit-0 became the group's
|
|
# result).
|
|
return f"{base} || {{ ! {venv_check} && {user}; }}"
|
|
|
|
|
|
def _venv_safe_local_pip_install_cmd(cmd: str, *, local: bool, in_venv: bool) -> str:
|
|
"""Drop pip user-install flags that are invalid for local venv installs.
|
|
|
|
Cookbook dependency installs run through the model-serve task path so users
|
|
can watch progress in the same log UI. For local POSIX runs, that task
|
|
prepends Odysseus' own interpreter directory to PATH. If Odysseus itself is
|
|
running from a venv, `python3` resolves to the venv Python and pip rejects
|
|
`--user` with "User site-packages are not visible in this virtualenv".
|
|
|
|
Keep remote and non-venv installs unchanged: remotes may intentionally use
|
|
system Python, and Docker/non-venv installs still need user-site fallback.
|
|
"""
|
|
if not local or not in_venv:
|
|
return cmd
|
|
if "pip install" not in (cmd or ""):
|
|
return cmd
|
|
try:
|
|
parts = shlex.split(cmd)
|
|
except ValueError:
|
|
return cmd
|
|
stripped = [
|
|
part
|
|
for part in parts
|
|
if part not in {"--user", "--break-system-packages"}
|
|
]
|
|
return shlex.join(stripped)
|
|
|
|
|
|
def _cached_model_scan_script(model_dirs: list[str] | None = None) -> str:
|
|
"""Build the standalone Python scanner used by /api/model/cached."""
|
|
lines = [
|
|
"import json, os, re, shutil, subprocess, urllib.request",
|
|
"models = []",
|
|
"seen = set()",
|
|
"BLOCKED_ROOTS = ('/sys', '/proc', '/dev', '/run', '/var/run')",
|
|
"def safe_path(p):",
|
|
" try:",
|
|
" rp = os.path.realpath(os.path.expanduser(p))",
|
|
" return not any(rp == b or rp.startswith(b + os.sep) for b in BLOCKED_ROOTS)",
|
|
" except Exception:",
|
|
" return False",
|
|
"def safe_walk(top):",
|
|
" if not safe_path(top): return",
|
|
" for root, dirs, fns in os.walk(top, followlinks=False):",
|
|
" dirs[:] = [d for d in dirs if not os.path.islink(os.path.join(root, d)) and safe_path(os.path.join(root, d))]",
|
|
" yield root, dirs, fns",
|
|
"def gguf_role(name):",
|
|
" n = name.lower()",
|
|
" if n.startswith('mmproj') or 'mmproj' in n: return 'projector'",
|
|
" return 'model'",
|
|
"def gguf_quant(name):",
|
|
" m = re.search(r'(?i)(UD-)?(IQ[0-9]_[A-Z0-9_]+|Q[0-9](?:_[A-Z0-9]+)+|BF16|F16|FP16|F32|Q8_0)', name)",
|
|
" return m.group(0).upper() if m else ''",
|
|
"def collect_ggufs(base):",
|
|
" files = []",
|
|
" split_groups = {}",
|
|
" if not os.path.isdir(base) or not safe_path(base): return files",
|
|
" for root, dirs, fns in safe_walk(base):",
|
|
" for fn in sorted(fns):",
|
|
" if not fn.lower().endswith('.gguf'): continue",
|
|
" fp = os.path.join(root, fn)",
|
|
" try: size = os.path.getsize(fp)",
|
|
" except Exception: size = 0",
|
|
" try: rel = os.path.relpath(fp, base).replace(os.sep, '/')",
|
|
" except Exception: rel = fn",
|
|
" sm = re.match(r'(?i)^(.+)-(\\d+)-of-(\\d+)\\.gguf$', fn)",
|
|
" if sm:",
|
|
" prefix, part_s, total_s = sm.group(1), sm.group(2), sm.group(3)",
|
|
" key = (root, prefix, total_s)",
|
|
" g = split_groups.setdefault(key, {'name':fn,'rel_path':rel,'size_bytes':0,'role':gguf_role(fn),'quant':gguf_quant(fn),'parts':int(total_s),'split':True})",
|
|
" g['size_bytes'] += size",
|
|
" if int(part_s) == 1:",
|
|
" g.update({'name':fn,'rel_path':rel,'role':gguf_role(fn),'quant':gguf_quant(fn)})",
|
|
" continue",
|
|
" files.append({'name':fn,'rel_path':rel,'size_bytes':size,'role':gguf_role(fn),'quant':gguf_quant(fn)})",
|
|
" files.extend(split_groups.values())",
|
|
" files.sort(key=lambda f: (f.get('role') != 'model', f.get('rel_path', '')))",
|
|
" return files",
|
|
"def scan_hf(cache):",
|
|
" if not os.path.isdir(cache): return",
|
|
" for d in sorted(os.listdir(cache)):",
|
|
" if not d.startswith('models--'): continue",
|
|
" rid = d.replace('models--','').replace('--','/')",
|
|
" if rid in seen: continue",
|
|
" seen.add(rid)",
|
|
" blobs = os.path.join(cache, d, 'blobs')",
|
|
" sz, nf, ic = 0, 0, False",
|
|
" if os.path.isdir(blobs):",
|
|
" for f in os.scandir(blobs):",
|
|
" if f.is_file(): nf += 1; sz += f.stat().st_size",
|
|
" if f.name.endswith('.incomplete'): ic = True",
|
|
" snap = os.path.join(cache, d, 'snapshots')",
|
|
" is_diffusion = False; gguf_files = []",
|
|
" if os.path.isdir(snap):",
|
|
" for sd in os.listdir(snap):",
|
|
" sf = os.path.join(snap, sd)",
|
|
" if not os.path.isdir(sf): continue",
|
|
" if os.path.exists(os.path.join(sf, 'model_index.json')): is_diffusion = True",
|
|
" for f in collect_ggufs(sf): f['rel_path'] = sd + '/' + f['rel_path']; gguf_files.append(f)",
|
|
" models.append({'repo_id':rid,'size_bytes':sz,'nb_files':nf,'has_incomplete':ic,'path':cache,'is_diffusion':is_diffusion,'is_gguf':bool(gguf_files),'gguf_files':gguf_files})",
|
|
"def scan_dir(p):",
|
|
" if not os.path.isdir(p) or not safe_path(p): return",
|
|
" for d in sorted(os.listdir(p)):",
|
|
" if d.startswith('.'): continue",
|
|
" if d.startswith('models--'): continue",
|
|
" fp = os.path.join(p, d)",
|
|
" if not os.path.isdir(fp) or os.path.islink(fp) or not safe_path(fp): continue",
|
|
" if d in seen: continue",
|
|
" is_model = False; gguf_files = []",
|
|
" for root, dirs, fns in safe_walk(fp):",
|
|
" for fn in fns:",
|
|
" if fn.lower().endswith('.gguf'): is_model = True",
|
|
" elif fn == 'config.json' or fn.endswith('.safetensors') or fn.endswith('.bin'): is_model = True",
|
|
" if is_model: break",
|
|
" if not is_model: continue",
|
|
" gguf_files = collect_ggufs(fp)",
|
|
" seen.add(d)",
|
|
" sz, nf = 0, 0",
|
|
" for dp, _, fns in safe_walk(fp):",
|
|
" for fn in fns:",
|
|
" try: nf += 1; sz += os.path.getsize(os.path.join(dp, fn))",
|
|
" except Exception: pass",
|
|
" is_diff = os.path.exists(os.path.join(fp, 'model_index.json'))",
|
|
" models.append({'repo_id':d,'size_bytes':sz,'nb_files':nf,'has_incomplete':False,'path':p,'is_local_dir':True,'is_diffusion':is_diff,'is_gguf':bool(gguf_files),'gguf_files':gguf_files})",
|
|
"def parse_size(num, unit):",
|
|
" try: n = float(num)",
|
|
" except Exception: return 0",
|
|
" u = (unit or '').upper()",
|
|
" if u.startswith('TB'): return int(n * 1024 ** 4)",
|
|
" if u.startswith('GB'): return int(n * 1024 ** 3)",
|
|
" if u.startswith('MB'): return int(n * 1024 ** 2)",
|
|
" if u.startswith('KB'): return int(n * 1024)",
|
|
" return int(n)",
|
|
"def scan_ollama():",
|
|
" if not shutil.which('ollama'): return",
|
|
" try:",
|
|
" p = subprocess.run(['ollama', 'list'], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True, timeout=6)",
|
|
" except Exception:",
|
|
" return",
|
|
" if p.returncode != 0: return",
|
|
" for line in (p.stdout or '').splitlines()[1:]:",
|
|
" parts = line.split()",
|
|
" if len(parts) < 4: continue",
|
|
" name = parts[0]",
|
|
" if not name or name in seen: continue",
|
|
" size_bytes = parse_size(parts[2], parts[3])",
|
|
" seen.add(name)",
|
|
" models.append({'repo_id':name,'size_bytes':size_bytes,'nb_files':1,'has_incomplete':False,'path':'ollama','backend':'ollama','is_ollama':True})",
|
|
"def scan_ollama_api():",
|
|
" urls = ['http://127.0.0.1:11434/api/tags', 'http://localhost:11434/api/tags', 'http://host.docker.internal:11434/api/tags']",
|
|
" for url in urls:",
|
|
" try:",
|
|
" with urllib.request.urlopen(url, timeout=2) as r:",
|
|
" data = json.loads(r.read().decode('utf-8', 'replace'))",
|
|
" except Exception:",
|
|
" continue",
|
|
" for item in data.get('models', []):",
|
|
" name = item.get('name') or item.get('model')",
|
|
" if not name or name in seen: continue",
|
|
" size_bytes = int(item.get('size') or item.get('size_bytes') or 0)",
|
|
" seen.add(name)",
|
|
" models.append({'repo_id':name,'size_bytes':size_bytes,'nb_files':1,'has_incomplete':False,'path':'ollama','backend':'ollama','is_ollama':True})",
|
|
" return",
|
|
"scan_hf(os.path.expanduser('~/.cache/huggingface/hub'))",
|
|
"scan_ollama()",
|
|
"scan_ollama_api()",
|
|
]
|
|
for model_dir in model_dirs or []:
|
|
lines.append(f"scan_dir(os.path.expanduser({model_dir!r}))")
|
|
lines.append("print(json.dumps(models))")
|
|
return "\n".join(lines) + "\n"
|
|
|
|
|
|
def _ps_squote(v: str) -> str:
|
|
"""Escape a value for PowerShell single-quoted string interpolation.
|
|
Belt-and-suspenders on top of _validate_token's regex — if the regex
|
|
is ever loosened, this still keeps the heredoc shell-safe."""
|
|
return v.replace("'", "''")
|
|
|
|
|
|
def _bash_squote(v: str) -> str:
|
|
"""Escape a value for bash/sh single-quoted string interpolation."""
|
|
return v.replace("'", "'\\''")
|
|
|
|
|
|
# Allow-list of binaries permitted as the leading token of `req.cmd` for /api/model/serve.
|
|
# Anything else is rejected before the cmd is interpolated into a tmux/PowerShell wrapper.
|
|
_SERVE_CMD_ALLOWLIST = {
|
|
"vllm", "llama-server", "llama_server", "llama.cpp", "ollama",
|
|
"python", "python3",
|
|
"sglang", "lmdeploy",
|
|
"node", "npx",
|
|
}
|
|
|
|
|
|
# The llama.cpp GGUF launcher (static/js/cookbook.js) emits a fixed-shape
|
|
# prelude that resolves the cached .gguf on the target host before serving:
|
|
# MODEL_FILE=$( { find …; find …; } | head -1 ) && { [ -n "$MODEL_FILE" ] && \
|
|
# [ -f "$MODEL_FILE" ]; } || { echo "ERROR…"; exit 1; } && <serve> || <serve>
|
|
# That legitimately needs $(...)/&&/||, so we recognise this exact shape and
|
|
# validate the serve binaries it guards rather than rejecting it wholesale.
|
|
_GGUF_PRELUDE_RE = re.compile(
|
|
r'^MODEL_FILE=\$\([^\n]*?\)\s*&&\s*\{[^{}]*\}\s*\|\|\s*\{[^{}]*\}\s*&&\s*'
|
|
)
|
|
_OLLAMA_HOST_ASSIGNMENT_RE = re.compile(r"(?:^|\s)OLLAMA_HOST=([^\s]+)")
|
|
_OLLAMA_BIND_RE = re.compile(r"^\[([^\]]+)\]:(\d+)$|^([^:]+):(\d+)$")
|
|
_OLLAMA_BIND_HOST_RE = re.compile(r"^[A-Za-z0-9._:-]+$")
|
|
|
|
|
|
def _ollama_bind_from_cmd(cmd: str | None, *, default_host: str = "127.0.0.1") -> tuple[str, str]:
|
|
"""Return the Ollama bind host/port requested by a serve command.
|
|
|
|
Plain local `ollama serve` defaults to loopback. Remote callers can pass a
|
|
wider default host so the resulting API is reachable by Odysseus.
|
|
"""
|
|
if not cmd:
|
|
return default_host, "11434"
|
|
match = _OLLAMA_HOST_ASSIGNMENT_RE.search(cmd)
|
|
if not match:
|
|
return default_host, "11434"
|
|
value = match.group(1).strip("'\"")
|
|
bind_match = _OLLAMA_BIND_RE.match(value)
|
|
if not bind_match:
|
|
return "127.0.0.1", "11434"
|
|
bracketed_host = bind_match.group(1)
|
|
host = bracketed_host or bind_match.group(3) or "127.0.0.1"
|
|
port = bind_match.group(2) or bind_match.group(4) or "11434"
|
|
if not _OLLAMA_BIND_HOST_RE.match(host):
|
|
return "127.0.0.1", "11434"
|
|
try:
|
|
port_num = int(port, 10)
|
|
except ValueError:
|
|
return "127.0.0.1", "11434"
|
|
if port_num < 1 or port_num > 65535:
|
|
return "127.0.0.1", "11434"
|
|
return f"[{host}]" if bracketed_host else host, port
|
|
|
|
|
|
def _check_serve_binary(seg: str) -> None:
|
|
"""Validate that a single command segment starts with an allowlisted binary
|
|
(after skipping leading env-var assignments like `CUDA_VISIBLE_DEVICES=0`)."""
|
|
try:
|
|
tokens = shlex.split(seg) if seg.strip() else []
|
|
except ValueError:
|
|
raise HTTPException(400, "Invalid cmd — could not parse")
|
|
if not tokens:
|
|
return
|
|
env_re = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*=")
|
|
first = next((t for t in tokens if not env_re.match(t)), "")
|
|
base = os.path.basename(first)
|
|
if base not in _SERVE_CMD_ALLOWLIST:
|
|
raise HTTPException(
|
|
400,
|
|
f"cmd binary '{base or '(empty)'}' is not allowed. Must start with one of: "
|
|
f"{', '.join(sorted(_SERVE_CMD_ALLOWLIST))}",
|
|
)
|
|
|
|
|
|
def _validate_serve_cmd(v: str | None) -> str | None:
|
|
"""Reject serve commands that aren't in the allowlist or contain shell metachars.
|
|
|
|
`req.cmd` is dropped verbatim into a bash/PowerShell wrapper script and
|
|
executed in a tmux session. Without this gate, an admin (or anyone in the
|
|
pre-fix world) could pass arbitrary shell payloads.
|
|
|
|
Leading env-var assignments (e.g. `CUDA_VISIBLE_DEVICES=0 python3 ...`)
|
|
are stripped before checking the binary — several of our cmd builders
|
|
prepend them, and they shouldn't trip the allowlist.
|
|
"""
|
|
if v is None or v == "":
|
|
return None
|
|
# Collapse backslash-newline line continuations into single spaces. Serve
|
|
# commands (vLLM especially) are routinely pasted multi-line with trailing
|
|
# `\` — that's a safe shell/shlex continuation, so the command stays ONE
|
|
# logical invocation and the leading-token allowlist below still governs.
|
|
v = re.sub(r"\\[ \t]*\r?\n[ \t]*", " ", v).strip()
|
|
# Backticks and raw newlines are never legitimate here.
|
|
if any(c in v for c in ("`", "\n", "\r")):
|
|
raise HTTPException(400, "Invalid characters in cmd")
|
|
# Known GGUF launcher prelude → validate the serve invocation(s) it guards.
|
|
m = _GGUF_PRELUDE_RE.match(v)
|
|
if m:
|
|
rest = v[m.end():]
|
|
# rest is `[ENV=…] python3 -m llama_cpp.server … || [ENV=…] llama-server …`
|
|
for part in rest.split("||"):
|
|
_check_serve_binary(part.strip())
|
|
return v
|
|
# Otherwise: a single invocation — no shell metacharacters allowed.
|
|
# (`$(` was the original intent; bare `$` is fine for shell-safe paths.)
|
|
if any(c in v for c in (";", "&&", "||", "$(")):
|
|
raise HTTPException(400, "Invalid characters in cmd")
|
|
_check_serve_binary(v)
|
|
return v
|
|
|
|
|
|
def _append_serve_preflight_exit_lines(runner_lines: list[str], *, keep_shell_open: bool) -> None:
|
|
"""Append serve-runner lines that surface preflight failures before exit."""
|
|
runner_lines.append('if [ -n "$ODYSSEUS_PREFLIGHT_EXIT" ]; then')
|
|
runner_lines.append(' echo ""; echo "=== Process exited with code $ODYSSEUS_PREFLIGHT_EXIT ==="')
|
|
if keep_shell_open:
|
|
runner_lines.append(' exec "${SHELL:-/bin/bash}"')
|
|
else:
|
|
runner_lines.append(' exit "$ODYSSEUS_PREFLIGHT_EXIT"')
|
|
runner_lines.append('fi')
|
|
|
|
|
|
def _append_serve_exit_code_lines(runner_lines: list[str], *, keep_shell_open: bool) -> None:
|
|
"""Append serve-runner lines that preserve and report the command exit code."""
|
|
runner_lines.append('ODYSSEUS_CMD_EXIT=$?')
|
|
if keep_shell_open:
|
|
runner_lines.append('echo ""; echo "=== Process exited with code $ODYSSEUS_CMD_EXIT ==="; exec "${SHELL:-/bin/bash}"')
|
|
else:
|
|
runner_lines.append('echo ""; echo "=== Process exited with code $ODYSSEUS_CMD_EXIT ==="')
|
|
runner_lines.append('exit "$ODYSSEUS_CMD_EXIT"')
|
|
|
|
|
|
def _append_llama_cpp_linux_accel_build_lines(runner_lines: list[str]) -> None:
|
|
"""Append Linux llama.cpp build lines that prefer ROCm/HIP when available.
|
|
|
|
Cookbook already detects AMD GPUs elsewhere, but the llama.cpp bootstrap used
|
|
to hard-wire CUDA on Linux. That made ROCm hosts attempt a CUDA configure and
|
|
fail with "CUDA Toolkit not found" instead of building with HIP.
|
|
"""
|
|
# Detect pip-installed nvcc (from vLLM/nvidia CUDA wheels) and put it on PATH
|
|
# so cmake's CUDA configure can find it. We keep this after the ROCm/HIP
|
|
# check — a machine with both stacks should honor the native HIP toolchain on
|
|
# AMD hosts instead of accidentally preferring a stray nvcc wheel.
|
|
runner_lines.append(' for _cudir in ~/.local/lib/python*/site-packages/nvidia/cu13 ~/.local/lib/python*/site-packages/nvidia/cu12 ~/.local/lib/python*/site-packages/nvidia/cuda_nvcc; do')
|
|
runner_lines.append(' [ -x "$_cudir/bin/nvcc" ] && export CUDA_HOME="$_cudir" && export PATH="$_cudir/bin:$PATH" && break')
|
|
runner_lines.append(' done')
|
|
# rm -rf build so a prior poisoned CMakeCache.txt (e.g. from a failed CUDA
|
|
# or HIP attempt) doesn't cause the next configure to reuse stale settings.
|
|
runner_lines.append(' cd ~/llama.cpp && rm -rf build')
|
|
runner_lines.append(' if command -v hipconfig &>/dev/null || [ -d /opt/rocm ] || [ -n "$ROCM_PATH" ] || [ -n "$HIP_PATH" ]; then')
|
|
runner_lines.append(' if command -v hipconfig &>/dev/null; then')
|
|
runner_lines.append(' export HIPCXX="${HIPCXX:-$(hipconfig -l)/clang}"')
|
|
runner_lines.append(' export HIP_PATH="${HIP_PATH:-$(hipconfig -R)}"')
|
|
runner_lines.append(' fi')
|
|
runner_lines.append(' echo "[odysseus] ROCm/HIP detected — building llama-server with HIP support..."')
|
|
runner_lines.append(' cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_HIP=ON && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
|
|
runner_lines.append(' elif command -v nvcc &>/dev/null; then')
|
|
# nvcc alone is not sufficient — pip-installed CUDA wheels or incomplete
|
|
# tooling can expose nvcc without shipping libcudart, causing cmake to fail
|
|
# mid-build with "CUDA runtime library not found". Check cudart explicitly
|
|
# via a small helper so the guard stays readable.
|
|
runner_lines.append(' _odysseus_has_cudart() {')
|
|
runner_lines.append(' ldconfig -p 2>/dev/null | grep -q \'libcudart\\.so\' && return 0')
|
|
runner_lines.append(' local _cuh="${CUDA_HOME:-/usr/local/cuda}"')
|
|
runner_lines.append(' ls "$_cuh/lib64/libcudart.so"* &>/dev/null && return 0')
|
|
runner_lines.append(' ls "$_cuh/lib/libcudart.so"* &>/dev/null && return 0')
|
|
runner_lines.append(' ls /usr/local/cuda/lib64/libcudart.so* &>/dev/null && return 0')
|
|
runner_lines.append(' ls /usr/local/cuda/lib/libcudart.so* &>/dev/null && return 0')
|
|
runner_lines.append(' ls "${_cuh%/cuda_nvcc}/cuda_runtime/lib/libcudart.so"* &>/dev/null && return 0')
|
|
runner_lines.append(' return 1')
|
|
runner_lines.append(' }')
|
|
runner_lines.append(' if _odysseus_has_cudart; then')
|
|
runner_lines.append(' echo "[odysseus] CUDA nvcc + cudart found — building llama-server with CUDA (GPU) support..."')
|
|
runner_lines.append(' cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_CUDA=ON && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
|
|
runner_lines.append(' else')
|
|
runner_lines.append(' echo "[odysseus] WARNING: nvcc found but CUDA runtime (libcudart.so) is not visible — building llama-server for CPU only."')
|
|
runner_lines.append(' echo "[odysseus] GPU inference will not be available for this llama.cpp build."')
|
|
runner_lines.append(' echo "[odysseus] Ensure libcudart is installed (e.g. cuda-runtime package) and visible via ldconfig or CUDA_HOME."')
|
|
runner_lines.append(' cmake -B build -DCMAKE_BUILD_TYPE=Release && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
|
|
runner_lines.append(' fi')
|
|
runner_lines.append(' else')
|
|
runner_lines.append(' echo "[odysseus] WARNING: no HIP/CUDA toolchain found — building llama-server for CPU only."')
|
|
runner_lines.append(' echo "[odysseus] GPU inference will not be available for this llama.cpp build."')
|
|
runner_lines.append(' echo "[odysseus] Install ROCm for AMD GPUs or vLLM/CUDA tooling for NVIDIA, then re-launch this serve task."')
|
|
runner_lines.append(' cmake -B build -DCMAKE_BUILD_TYPE=Release && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
|
|
runner_lines.append(' fi')
|
|
|
|
|
|
def _llama_cpp_rebuild_cmd() -> str:
|
|
"""Shell command that clears the Cookbook-managed llama.cpp build.
|
|
|
|
Removes the cached ``llama-server`` symlink and the ``~/llama.cpp/build``
|
|
directory so the next llama.cpp serve recompiles from source, picking up a
|
|
CUDA or HIP toolchain if one is now available. The serve bootstrap only
|
|
builds when ``llama-server`` is missing from PATH, so without this an
|
|
existing CPU-only build is reused forever. It deliberately installs and
|
|
downloads nothing; the rebuild itself happens on the next serve.
|
|
"""
|
|
return (
|
|
'mkdir -p "$HOME/bin" && '
|
|
'rm -f "$HOME/bin/llama-server" && '
|
|
'rm -rf "$HOME/llama.cpp/build" && '
|
|
'echo "[odysseus] Cleared the cached llama.cpp build. '
|
|
'Re-launch the serve task to rebuild llama-server from source '
|
|
'(CUDA or HIP will be used if a toolchain is now available)."'
|
|
)
|
|
|
|
|
|
class ModelDownloadRequest(BaseModel):
|
|
repo_id: str
|
|
include: str | None = None # glob pattern e.g. "*Q4_K_M*"
|
|
hf_token: str | None = None
|
|
env_prefix: str | None = None # e.g. "source ~/venv/bin/activate"
|
|
remote_host: str | None = None # e.g. "gpu-box" — run download on this host via SSH
|
|
ssh_port: str | None = None # e.g. "8022" for Termux
|
|
platform: str | None = None # "linux", "termux", or "windows"
|
|
local_dir: str | None = None # base dir to download into (a per-model subfolder is created under it); None = default HF cache
|
|
disable_hf_transfer: bool = False # skip the Rust hf_transfer downloader — slower but far more reliable on large files (used by retries)
|
|
|
|
|
|
class ServeRequest(BaseModel):
|
|
repo_id: str
|
|
cmd: str
|
|
remote_host: str | None = None
|
|
ssh_port: str | None = None
|
|
env_prefix: str | None = None
|
|
hf_token: str | None = None
|
|
gpus: str | None = None
|
|
platform: str | None = None # "linux", "termux", or "windows"
|
|
|
|
|
|
def _parse_serve_phase(snapshot: str, task_type: str = "serve") -> dict:
|
|
"""Parse a tmux snapshot of a serve task into structured phase info.
|
|
|
|
Single source of truth for serve task status detection. Returns:
|
|
{ "phase": str, "status": "ready"|"running"|"", "tps": float|None,
|
|
"reqs": int|None, "pct": int|None }
|
|
"""
|
|
import re
|
|
if task_type != "serve" or not snapshot:
|
|
return {}
|
|
# Strip newlines so tmux line-wrapping doesn't break regex matching
|
|
flat = re.sub(r'\s+', ' ', snapshot)
|
|
|
|
load_matches = re.findall(r'Loading safetensors.*?(\d+)%', flat)
|
|
# Prefer "Downloading (incomplete total...)" (real aggregate bytes) over
|
|
# "Fetching N files" (whole-file count, lags with hf_transfer's chunked pulls).
|
|
downloading_matches = re.findall(r'Downloading.*?(\d+)%', flat)
|
|
fetching_matches = re.findall(r'Fetching.*?(\d+)%', flat)
|
|
dl_matches = downloading_matches if downloading_matches else fetching_matches
|
|
# Match "Avg generation throughput: X tokens/s, Running: N reqs" (with line-wrap tolerance)
|
|
tps_matches = re.findall(
|
|
r'(?:Avg )?generation throughput:\s*([\d.]+)\s*tokens/s.*?Running:\s*(\d+)\s*reqs',
|
|
flat,
|
|
)
|
|
|
|
# Check throughput FIRST — the throughput log line contains "GPU KV cache usage"
|
|
# which would otherwise false-match the warmup check
|
|
if tps_matches:
|
|
tps_str, reqs_str = tps_matches[-1]
|
|
tps = float(tps_str)
|
|
reqs = int(reqs_str)
|
|
return {
|
|
"phase": f"{tps_str} tok/s" if reqs > 0 else "idle",
|
|
"status": "ready",
|
|
"tps": tps,
|
|
"reqs": reqs,
|
|
}
|
|
if "Application startup complete" in flat:
|
|
return {"phase": "ready", "status": "ready"}
|
|
if re.search(r'Ollama API ready on port\s+\d+', flat, re.I):
|
|
return {"phase": "ready", "status": "ready"}
|
|
# HTTP access logs (e.g. GET /v1/models 200 OK) mean the server is up and serving
|
|
if re.search(r'(?:GET|POST)\s+/[^\s]*\s+HTTP/[\d.]+"\s*\d{3}', flat):
|
|
return {"phase": "idle", "status": "ready"}
|
|
if "Loading weights took" in flat:
|
|
return {"phase": "initializing", "status": "running"}
|
|
# "GPU KV cache" alone (during allocation) — not "GPU KV cache usage" (runtime log)
|
|
if "GPU KV cache" in flat and "GPU KV cache usage" not in flat:
|
|
return {"phase": "warming up", "status": "running"}
|
|
if load_matches:
|
|
pct = int(load_matches[-1])
|
|
return {"phase": f"loading {pct}%", "status": "running", "pct": pct}
|
|
if dl_matches:
|
|
pct = int(dl_matches[-1])
|
|
return {"phase": f"downloading {pct}%", "status": "running", "pct": pct}
|
|
return {}
|
|
|
|
|
|
def _ssh(host, cmd, port=None):
|
|
"""Build SSH command string with optional port."""
|
|
pf = f"-p {port} " if port and port != "22" else ""
|
|
return f"ssh {pf}{host} '{cmd}'"
|
|
|
|
|
|
def _safe_env_prefix(ep: str | None) -> str | None:
|
|
"""Rewrite a `source <path>` env_prefix so it no-ops if the path is missing.
|
|
Prevents `line N: <path>: No such file or directory` errors when a serve
|
|
task is launched against a host that doesn't have the expected venv.
|
|
|
|
Also rewrites leading `~/` → `$HOME/` so the path expands inside double
|
|
quotes (bash only tilde-expands unquoted tokens at word start)."""
|
|
if not ep:
|
|
return ep
|
|
import shlex
|
|
try:
|
|
parts = shlex.split(ep, posix=True)
|
|
except ValueError:
|
|
raise HTTPException(400, "Invalid env_prefix")
|
|
if len(parts) != 2 or parts[0] not in {"source", "."}:
|
|
# Bash conda activation emitted by the frontend:
|
|
# eval "$(conda shell.bash hook)" && conda activate ENV
|
|
m = re.fullmatch(r'eval "\$\(conda shell\.bash hook\)" && conda activate (.+)', ep)
|
|
if m:
|
|
env = m.group(1).strip()
|
|
try:
|
|
env_parts = shlex.split(env, posix=True)
|
|
except ValueError:
|
|
raise HTTPException(400, "Invalid env_prefix")
|
|
if len(env_parts) != 1:
|
|
raise HTTPException(400, "Invalid env_prefix")
|
|
return 'eval "$(conda shell.bash hook)" && conda activate ' + shlex.quote(env_parts[0])
|
|
|
|
# Plain conda activation, used by Windows/PowerShell and some manual callers.
|
|
if len(parts) == 3 and parts[0] == "conda" and parts[1] == "activate":
|
|
return "conda activate " + shlex.quote(parts[2])
|
|
|
|
# PowerShell venv activation emitted by the frontend:
|
|
# & 'C:\path\Scripts\Activate.ps1'
|
|
if len(parts) == 2 and parts[0] == "&":
|
|
path = parts[1]
|
|
if any(c in path for c in "\r\n;&|`$<>"):
|
|
raise HTTPException(400, "Invalid env_prefix")
|
|
return "& '" + path.replace("'", "''") + "'"
|
|
|
|
raise HTTPException(400, "Invalid env_prefix")
|
|
path = parts[1]
|
|
if any(c in path for c in "\r\n;&|`$<>"):
|
|
raise HTTPException(400, "Invalid env_prefix")
|
|
# Replace a leading "~/" with "$HOME/" so it survives quoting
|
|
if path.startswith("~/"):
|
|
path = "$HOME/" + path[2:]
|
|
elif path == "~":
|
|
path = "$HOME"
|
|
path = path.replace('"', '\\"')
|
|
return f'[ -f "{path}" ] && source "{path}" || true'
|
|
|
|
|
|
def _ssh_ps(host, script_path, port=None):
|
|
"""Build SSH command to run a PowerShell script on a Windows remote."""
|
|
pf = f"-p {port} " if port and port != "22" else ""
|
|
return f'ssh {pf}{host} "powershell -ExecutionPolicy Bypass -File {script_path}"'
|
|
|
|
|
|
# Windows session dir — stored in user's temp on the remote
|
|
WIN_SESSION_DIR = "$env:TEMP\\\\odysseus-sessions"
|