odysseus/routes/cookbook_helpers.py

"""cookbook_helpers.py — validators + small helpers shared by the cookbook routes.
Extracted from cookbook_routes.py; the routes module imports the symbols it needs."""

import logging
import os
import posixpath
import re
import shlex

from fastapi import HTTPException
from pydantic import BaseModel

logger = logging.getLogger(__name__)


# HuggingFace repo IDs are <org>/<name>, both alphanumerics plus ._-
# Rejecting anything else up front closes off shell-interpolation vectors.
_REPO_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]*/[A-Za-z0-9][A-Za-z0-9._-]*$")
# Cached models scanned from a custom/local model dir are keyed by their leaf
# folder name (no slash), e.g. `DeepSeek-R1-UD-IQ4_XS`. The serve command uses
# the real on-disk path separately; this identifier is only for UI/task
# bookkeeping, so serving should accept the same safe glyph set as repo IDs.
_LOCAL_MODEL_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]*$")
# Ollama model names include tags, e.g. `qwen2.5:0.5b` or `llama3.2:latest`.
# Some registries also use a namespace path. Keep this shell-safe: no spaces,
# quotes, `$`, `;`, `&`, pipes, or redirects.
_OLLAMA_MODEL_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._:/-]{0,200}$")
# Include pattern is a glob: allow typical safe glyphs only.
_INCLUDE_RE = re.compile(r"^[A-Za-z0-9._\-*?/\[\]]+$")
# Remote host: user@host (optionally with :port-free hostname parts).
_REMOTE_HOST_RE = re.compile(r"^[A-Za-z0-9._-]+@[A-Za-z0-9._-]+$")
# HF tokens and API tokens are url-safe base64-like.
_TOKEN_RE = re.compile(r"^[A-Za-z0-9._~+/=-]+$")
# Session IDs we mint look like "cookbook-deadbeef" or "serve-deadbeef".
# Anything beyond plain alphanumerics + dash + underscore could break out
# of the shell/PowerShell contexts the value lands in.
_SESSION_ID_RE = re.compile(r"^[A-Za-z0-9_-]{1,64}$")
_SSH_PORT_RE = re.compile(r"^\d{1,5}$")
_GPU_LIST_RE = re.compile(r"^\d+(?:,\d+)*$")
# A download target directory. Absolute or ~-relative path; safe path glyphs
# only (no quotes, shell metacharacters, or spaces) since it lands in a shell
# command. A leading ~ is expanded to $HOME at command-build time.
_LOCAL_DIR_RE = re.compile(r"^~?/[A-Za-z0-9._/-]*$|^~$")


def _validate_repo_id(v: str | None) -> str:
    if not v or not _REPO_ID_RE.match(v):
        raise HTTPException(400, "Invalid repo_id — must be <org>/<name> using [A-Za-z0-9._-]")
    return v


def _validate_serve_model_id(v: str | None) -> str:
    if not v:
        raise HTTPException(400, "repo_id is required")
    if _REPO_ID_RE.match(v) or _LOCAL_MODEL_ID_RE.match(v) or _OLLAMA_MODEL_ID_RE.match(v):
        return v
    raise HTTPException(400, "Invalid repo_id — must be <org>/<name>, an Ollama name:tag, or a cached local model id")


def _validate_include(v: str | None) -> str | None:
    if v is None or v == "":
        return None
    if not _INCLUDE_RE.match(v):
        raise HTTPException(400, "Invalid include pattern")
    return v


def _validate_remote_host(v: str | None) -> str | None:
    if v is None or v == "":
        return None
    if not _REMOTE_HOST_RE.match(v):
        raise HTTPException(400, "Invalid remote_host — must be user@host, no SSH option syntax")
    return v


def _validate_token(v: str | None) -> str | None:
    if v is None or v == "":
        return None
    if not _TOKEN_RE.match(v):
        raise HTTPException(400, "Invalid token characters")
    return v


def _validate_local_dir(v: str | None) -> str | None:
    if v is None or v == "":
        return None
    v = v.rstrip("/") or "/"
    if not _LOCAL_DIR_RE.match(v):
        raise HTTPException(400, "Invalid local_dir — must be an absolute or ~ path with no spaces or shell metacharacters")
    return v


def _validate_ssh_port(v: str | None) -> str | None:
    if v is None or v == "":
        return None
    if not _SSH_PORT_RE.fullmatch(str(v)):
        raise HTTPException(400, "Invalid ssh_port")
    port = int(v)
    if port < 1 or port > 65535:
        raise HTTPException(400, "Invalid ssh_port")
    return str(port)


def _validate_gpus(v: str | None) -> str | None:
    if v is None or v == "":
        return None
    if not _GPU_LIST_RE.fullmatch(str(v)):
        raise HTTPException(400, "Invalid gpus — expected comma-separated GPU indexes")
    return str(v)


def _shell_path(p: str) -> str:
    """Render a validated path for a double-quoted shell context, expanding a
    leading ~ to $HOME (single quotes wouldn't expand it). Safe because
    _validate_local_dir already restricts the charset."""
    if p == "~":
        return '"$HOME"'
    if p.startswith("~/"):
        return '"$HOME/' + p[2:] + '"'
    return '"' + p + '"'


def _local_tooling_path_export(executable: str) -> str:
    """Bash line prepending the running interpreter's bin dir to PATH.

    When Odysseus runs from a virtualenv, that bin dir holds the tools the
    cookbook runners shell out to (`hf`, `python`). tmux runners start from a
    fresh login shell with the venv NOT activated, so without this they can't
    find `hf` and downloads fail with "hf: command not found" — notably on
    macOS, where the `pip --user` self-heal also misses (`pip` isn't a command,
    only `pip3`/`python3 -m pip`). Local runs only; meaningless over SSH.
    """
    # This builds a bash snippet, so an explicit POSIX absolute path should keep
    # POSIX semantics even when the app/tests run on Windows. Otherwise
    # os.path.abspath("/opt/...") would incorrectly turn it into "D:\\opt\\...".
    if executable.startswith("/"):
        bin_dir = posixpath.dirname(executable)
    else:
        bin_dir = os.path.dirname(os.path.abspath(executable))
    # Escape for a double-quoted context: $PATH must still expand, but spaces
    # and shell metacharacters in the path must be preserved literally.
    esc = (
        bin_dir.replace("\\", "\\\\")
        .replace('"', '\\"')
        .replace("$", "\\$")
        .replace("`", "\\`")
    )
    return f'export PATH="{esc}:$PATH"'


def _pip_install_no_cache(cmd: str) -> str:
    """Add ``--no-cache-dir`` to a pip install command.

    Cookbook dependency installs (vLLM, llama-cpp-python, …) build large wheels;
    pip's default cache lives under ``$HOME/.cache/pip`` and these builds can fill
    a small home filesystem with ``[Errno 28] No space left on device`` mid-build
    (issue #1219), leaving the dependency "installed" but unusable (#1459).
    Disabling the cache for these one-off installs keeps them off the home disk
    (the maintainer's suggested ``PIP_CACHE_DIR=`` workaround, made the default).
    Idempotent; leaves non-pip-install commands untouched."""
    if not cmd or "pip install" not in cmd or "--no-cache-dir" in cmd:
        return cmd
    return cmd.replace("pip install", "pip install --no-cache-dir", 1)


def _pip_install_attempt(pip_cmd: str) -> str:
    """Wrap a single pip install command so its exit status survives the
    fallback chain and its stderr is visible in the tmux log on failure.

    Without this wrapper, `pip … 2>&1 | tail -5` returns ``tail``'s exit
    code (0), masking pip's real failure and preventing the next fallback
    from running.  The generated snippet captures all output to a temp
    file, prints the last 5 lines on failure (so the Cookbook log panel
    shows useful diagnostics), cleans up, and exits with pip's original
    status.
    """
    return (
        "bash -c '"
        f'_out=$(mktemp) && {pip_cmd} >"$_out" 2>&1; _rc=$?; '
        'tail -5 "$_out"; rm -f "$_out"; exit $_rc'
        "'"
    )


def _pip_install_fallback_chain(package: str, *, python_cmd: str = "python3 -m pip", upgrade: bool = False) -> str:
    """Build a bash pip install fallback chain that surfaces errors.

    Try the active interpreter/environment first. ``--user`` is invalid
    inside many venvs, so only attempt the ``--user`` fallback when NOT
    inside a venv.

    Each attempt is wrapped via :func:`_pip_install_attempt` so pip's real
    exit code is preserved (no ``| tail`` masking) and the last 5 lines of
    pip output appear in the Cookbook log on failure.
    """
    upgrade_flag = " -U" if upgrade else ""
    base = _pip_install_attempt(f"{python_cmd} install -q{upgrade_flag} {package}")
    user = _pip_install_attempt(f"{python_cmd} install --user --break-system-packages -q{upgrade_flag} {package}")
    # Derive the python executable for the venv detection check.
    # Must use the same interpreter that pip belongs to; hardcoding
    # python3 breaks when pip lives in a venv that only has "python".
    if " -m pip" in python_cmd:
        python_exe = python_cmd.replace(" -m pip", "")
    elif python_cmd.strip() == "pip":
        python_exe = "python"
    elif python_cmd.strip() == "pip3":
        python_exe = "python3"
    else:
        python_exe = "python3"
    venv_check = f'{python_exe} -c "import sys; sys.exit(0 if sys.prefix != sys.base_prefix else 1)"'
    # Negated: `! venv_check` succeeds (exit 0) when NOT in a venv → `&&` tries
    # --user.  When IN a venv `! venv_check` fails → `&&` skips --user and the
    # group exits non-zero, propagating the base-install failure instead of
    # masking it as success (the `|| { venv_check || … }` shape from #903
    # swallowed the exit code because venv_check's exit-0 became the group's
    # result).
    return f"{base} || {{ ! {venv_check} && {user}; }}"


def _venv_safe_local_pip_install_cmd(cmd: str, *, local: bool, in_venv: bool) -> str:
    """Drop pip user-install flags that are invalid for local venv installs.

    Cookbook dependency installs run through the model-serve task path so users
    can watch progress in the same log UI. For local POSIX runs, that task
    prepends Odysseus' own interpreter directory to PATH. If Odysseus itself is
    running from a venv, `python3` resolves to the venv Python and pip rejects
    `--user` with "User site-packages are not visible in this virtualenv".

    Keep remote and non-venv installs unchanged: remotes may intentionally use
    system Python, and Docker/non-venv installs still need user-site fallback.
    """
    if not local or not in_venv:
        return cmd
    if "pip install" not in (cmd or ""):
        return cmd
    try:
        parts = shlex.split(cmd)
    except ValueError:
        return cmd
    stripped = [
        part
        for part in parts
        if part not in {"--user", "--break-system-packages"}
    ]
    return shlex.join(stripped)


def _cached_model_scan_script(model_dirs: list[str] | None = None) -> str:
    """Build the standalone Python scanner used by /api/model/cached."""
    lines = [
        "import json, os, re, shutil, subprocess, urllib.request",
        "models = []",
        "seen = set()",
        "BLOCKED_ROOTS = ('/sys', '/proc', '/dev', '/run', '/var/run')",
        "def safe_path(p):",
        "    try:",
        "        rp = os.path.realpath(os.path.expanduser(p))",
        "        return not any(rp == b or rp.startswith(b + os.sep) for b in BLOCKED_ROOTS)",
        "    except Exception:",
        "        return False",
        "def safe_walk(top):",
        "    if not safe_path(top): return",
        "    for root, dirs, fns in os.walk(top, followlinks=False):",
        "        dirs[:] = [d for d in dirs if not os.path.islink(os.path.join(root, d)) and safe_path(os.path.join(root, d))]",
        "        yield root, dirs, fns",
        "def gguf_role(name):",
        "    n = name.lower()",
        "    if n.startswith('mmproj') or 'mmproj' in n: return 'projector'",
        "    return 'model'",
        "def gguf_quant(name):",
        "    m = re.search(r'(?i)(UD-)?(IQ[0-9]_[A-Z0-9_]+|Q[0-9](?:_[A-Z0-9]+)+|BF16|F16|FP16|F32|Q8_0)', name)",
        "    return m.group(0).upper() if m else ''",
        "def collect_ggufs(base):",
        "    files = []",
        "    split_groups = {}",
        "    if not os.path.isdir(base) or not safe_path(base): return files",
        "    for root, dirs, fns in safe_walk(base):",
        "        for fn in sorted(fns):",
        "            if not fn.lower().endswith('.gguf'): continue",
        "            fp = os.path.join(root, fn)",
        "            try: size = os.path.getsize(fp)",
        "            except Exception: size = 0",
        "            try: rel = os.path.relpath(fp, base).replace(os.sep, '/')",
        "            except Exception: rel = fn",
        "            sm = re.match(r'(?i)^(.+)-(\\d+)-of-(\\d+)\\.gguf$', fn)",
        "            if sm:",
        "                prefix, part_s, total_s = sm.group(1), sm.group(2), sm.group(3)",
        "                key = (root, prefix, total_s)",
        "                g = split_groups.setdefault(key, {'name':fn,'rel_path':rel,'size_bytes':0,'role':gguf_role(fn),'quant':gguf_quant(fn),'parts':int(total_s),'split':True})",
        "                g['size_bytes'] += size",
        "                if int(part_s) == 1:",
        "                    g.update({'name':fn,'rel_path':rel,'role':gguf_role(fn),'quant':gguf_quant(fn)})",
        "                continue",
        "            files.append({'name':fn,'rel_path':rel,'size_bytes':size,'role':gguf_role(fn),'quant':gguf_quant(fn)})",
        "    files.extend(split_groups.values())",
        "    files.sort(key=lambda f: (f.get('role') != 'model', f.get('rel_path', '')))",
        "    return files",
        "def scan_hf(cache):",
        "    if not os.path.isdir(cache): return",
        "    for d in sorted(os.listdir(cache)):",
        "        if not d.startswith('models--'): continue",
        "        rid = d.replace('models--','').replace('--','/')",
        "        if rid in seen: continue",
        "        seen.add(rid)",
        "        blobs = os.path.join(cache, d, 'blobs')",
        "        sz, nf, ic = 0, 0, False",
        "        if os.path.isdir(blobs):",
        "            for f in os.scandir(blobs):",
        "                if f.is_file(): nf += 1; sz += f.stat().st_size",
        "                if f.name.endswith('.incomplete'): ic = True",
        "        snap = os.path.join(cache, d, 'snapshots')",
        "        is_diffusion = False; gguf_files = []",
        "        if os.path.isdir(snap):",
        "            for sd in os.listdir(snap):",
        "                sf = os.path.join(snap, sd)",
        "                if not os.path.isdir(sf): continue",
        "                if os.path.exists(os.path.join(sf, 'model_index.json')): is_diffusion = True",
        "                for f in collect_ggufs(sf): f['rel_path'] = sd + '/' + f['rel_path']; gguf_files.append(f)",
        "        models.append({'repo_id':rid,'size_bytes':sz,'nb_files':nf,'has_incomplete':ic,'path':cache,'is_diffusion':is_diffusion,'is_gguf':bool(gguf_files),'gguf_files':gguf_files})",
        "def scan_dir(p):",
        "    if not os.path.isdir(p) or not safe_path(p): return",
        "    for d in sorted(os.listdir(p)):",
        "        if d.startswith('.'): continue",
        "        if d.startswith('models--'): continue",
        "        fp = os.path.join(p, d)",
        "        if not os.path.isdir(fp) or os.path.islink(fp) or not safe_path(fp): continue",
        "        if d in seen: continue",
        "        is_model = False; gguf_files = []",
        "        for root, dirs, fns in safe_walk(fp):",
        "            for fn in fns:",
        "                if fn.lower().endswith('.gguf'): is_model = True",
        "                elif fn == 'config.json' or fn.endswith('.safetensors') or fn.endswith('.bin'): is_model = True",
        "            if is_model: break",
        "        if not is_model: continue",
        "        gguf_files = collect_ggufs(fp)",
        "        seen.add(d)",
        "        sz, nf = 0, 0",
        "        for dp, _, fns in safe_walk(fp):",
        "            for fn in fns:",
        "                try: nf += 1; sz += os.path.getsize(os.path.join(dp, fn))",
        "                except Exception: pass",
        "        is_diff = os.path.exists(os.path.join(fp, 'model_index.json'))",
        "        models.append({'repo_id':d,'size_bytes':sz,'nb_files':nf,'has_incomplete':False,'path':p,'is_local_dir':True,'is_diffusion':is_diff,'is_gguf':bool(gguf_files),'gguf_files':gguf_files})",
        "def parse_size(num, unit):",
        "    try: n = float(num)",
        "    except Exception: return 0",
        "    u = (unit or '').upper()",
        "    if u.startswith('TB'): return int(n * 1024 ** 4)",
        "    if u.startswith('GB'): return int(n * 1024 ** 3)",
        "    if u.startswith('MB'): return int(n * 1024 ** 2)",
        "    if u.startswith('KB'): return int(n * 1024)",
        "    return int(n)",
        "def scan_ollama():",
        "    if not shutil.which('ollama'): return",
        "    try:",
        "        p = subprocess.run(['ollama', 'list'], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True, timeout=6)",
        "    except Exception:",
        "        return",
        "    if p.returncode != 0: return",
        "    for line in (p.stdout or '').splitlines()[1:]:",
        "        parts = line.split()",
        "        if len(parts) < 4: continue",
        "        name = parts[0]",
        "        if not name or name in seen: continue",
        "        size_bytes = parse_size(parts[2], parts[3])",
        "        seen.add(name)",
        "        models.append({'repo_id':name,'size_bytes':size_bytes,'nb_files':1,'has_incomplete':False,'path':'ollama','backend':'ollama','is_ollama':True})",
        "def scan_ollama_api():",
        "    urls = ['http://127.0.0.1:11434/api/tags', 'http://localhost:11434/api/tags', 'http://host.docker.internal:11434/api/tags']",
        "    for url in urls:",
        "        try:",
        "            with urllib.request.urlopen(url, timeout=2) as r:",
        "                data = json.loads(r.read().decode('utf-8', 'replace'))",
        "        except Exception:",
        "            continue",
        "        for item in data.get('models', []):",
        "            name = item.get('name') or item.get('model')",
        "            if not name or name in seen: continue",
        "            size_bytes = int(item.get('size') or item.get('size_bytes') or 0)",
        "            seen.add(name)",
        "            models.append({'repo_id':name,'size_bytes':size_bytes,'nb_files':1,'has_incomplete':False,'path':'ollama','backend':'ollama','is_ollama':True})",
        "        return",
        "scan_hf(os.path.expanduser('~/.cache/huggingface/hub'))",
        "scan_ollama()",
        "scan_ollama_api()",
    ]
    for model_dir in model_dirs or []:
        lines.append(f"scan_dir(os.path.expanduser({model_dir!r}))")
    lines.append("print(json.dumps(models))")
    return "\n".join(lines) + "\n"


def _ps_squote(v: str) -> str:
    """Escape a value for PowerShell single-quoted string interpolation.
    Belt-and-suspenders on top of _validate_token's regex — if the regex
    is ever loosened, this still keeps the heredoc shell-safe."""
    return v.replace("'", "''")


def _bash_squote(v: str) -> str:
    """Escape a value for bash/sh single-quoted string interpolation."""
    return v.replace("'", "'\\''")


# Allow-list of binaries permitted as the leading token of `req.cmd` for /api/model/serve.
# Anything else is rejected before the cmd is interpolated into a tmux/PowerShell wrapper.
_SERVE_CMD_ALLOWLIST = {
    "vllm", "llama-server", "llama_server", "llama.cpp", "ollama",
    "python", "python3",
    "sglang", "lmdeploy",
    "node", "npx",
}


# The llama.cpp GGUF launcher (static/js/cookbook.js) emits a fixed-shape
# prelude that resolves the cached .gguf on the target host before serving:
#   MODEL_FILE=$( { find …; find …; } | head -1 ) && { [ -n "$MODEL_FILE" ] && \
#   [ -f "$MODEL_FILE" ]; } || { echo "ERROR…"; exit 1; } && <serve> || <serve>
# That legitimately needs $(...)/&&/||, so we recognise this exact shape and
# validate the serve binaries it guards rather than rejecting it wholesale.
_GGUF_PRELUDE_RE = re.compile(
    r'^MODEL_FILE=\$\([^\n]*?\)\s*&&\s*\{[^{}]*\}\s*\|\|\s*\{[^{}]*\}\s*&&\s*'
)
_OLLAMA_HOST_ASSIGNMENT_RE = re.compile(r"(?:^|\s)OLLAMA_HOST=([^\s]+)")
_OLLAMA_BIND_RE = re.compile(r"^\[([^\]]+)\]:(\d+)$|^([^:]+):(\d+)$")
_OLLAMA_BIND_HOST_RE = re.compile(r"^[A-Za-z0-9._:-]+$")


def _ollama_bind_from_cmd(cmd: str | None, *, default_host: str = "127.0.0.1") -> tuple[str, str]:
    """Return the Ollama bind host/port requested by a serve command.

    Plain local `ollama serve` defaults to loopback. Remote callers can pass a
    wider default host so the resulting API is reachable by Odysseus.
    """
    if not cmd:
        return default_host, "11434"
    match = _OLLAMA_HOST_ASSIGNMENT_RE.search(cmd)
    if not match:
        return default_host, "11434"
    value = match.group(1).strip("'\"")
    bind_match = _OLLAMA_BIND_RE.match(value)
    if not bind_match:
        return "127.0.0.1", "11434"
    bracketed_host = bind_match.group(1)
    host = bracketed_host or bind_match.group(3) or "127.0.0.1"
    port = bind_match.group(2) or bind_match.group(4) or "11434"
    if not _OLLAMA_BIND_HOST_RE.match(host):
        return "127.0.0.1", "11434"
    try:
        port_num = int(port, 10)
    except ValueError:
        return "127.0.0.1", "11434"
    if port_num < 1 or port_num > 65535:
        return "127.0.0.1", "11434"
    return f"[{host}]" if bracketed_host else host, port


def _check_serve_binary(seg: str) -> None:
    """Validate that a single command segment starts with an allowlisted binary
    (after skipping leading env-var assignments like `CUDA_VISIBLE_DEVICES=0`)."""
    try:
        tokens = shlex.split(seg) if seg.strip() else []
    except ValueError:
        raise HTTPException(400, "Invalid cmd — could not parse")
    if not tokens:
        return
    env_re = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*=")
    first = next((t for t in tokens if not env_re.match(t)), "")
    base = os.path.basename(first)
    if base not in _SERVE_CMD_ALLOWLIST:
        raise HTTPException(
            400,
            f"cmd binary '{base or '(empty)'}' is not allowed. Must start with one of: "
            f"{', '.join(sorted(_SERVE_CMD_ALLOWLIST))}",
        )


def _validate_serve_cmd(v: str | None) -> str | None:
    """Reject serve commands that aren't in the allowlist or contain shell metachars.

    `req.cmd` is dropped verbatim into a bash/PowerShell wrapper script and
    executed in a tmux session. Without this gate, an admin (or anyone in the
    pre-fix world) could pass arbitrary shell payloads.

    Leading env-var assignments (e.g. `CUDA_VISIBLE_DEVICES=0 python3 ...`)
    are stripped before checking the binary — several of our cmd builders
    prepend them, and they shouldn't trip the allowlist.
    """
    if v is None or v == "":
        return None
    # Collapse backslash-newline line continuations into single spaces. Serve
    # commands (vLLM especially) are routinely pasted multi-line with trailing
    # `\` — that's a safe shell/shlex continuation, so the command stays ONE
    # logical invocation and the leading-token allowlist below still governs.
    v = re.sub(r"\\[ \t]*\r?\n[ \t]*", " ", v).strip()
    # Backticks and raw newlines are never legitimate here.
    if any(c in v for c in ("`", "\n", "\r")):
        raise HTTPException(400, "Invalid characters in cmd")
    # Known GGUF launcher prelude → validate the serve invocation(s) it guards.
    m = _GGUF_PRELUDE_RE.match(v)
    if m:
        rest = v[m.end():]
        # rest is `[ENV=…] python3 -m llama_cpp.server … || [ENV=…] llama-server …`
        for part in rest.split("||"):
            _check_serve_binary(part.strip())
        return v
    # Otherwise: a single invocation — no shell metacharacters allowed.
    # (`$(` was the original intent; bare `$` is fine for shell-safe paths.)
    if any(c in v for c in (";", "&&", "||", "$(")):
        raise HTTPException(400, "Invalid characters in cmd")
    _check_serve_binary(v)
    return v


def _append_serve_preflight_exit_lines(runner_lines: list[str], *, keep_shell_open: bool) -> None:
    """Append serve-runner lines that surface preflight failures before exit."""
    runner_lines.append('if [ -n "$ODYSSEUS_PREFLIGHT_EXIT" ]; then')
    runner_lines.append('  echo ""; echo "=== Process exited with code $ODYSSEUS_PREFLIGHT_EXIT ==="')
    if keep_shell_open:
        runner_lines.append('  exec "${SHELL:-/bin/bash}"')
    else:
        runner_lines.append('  exit "$ODYSSEUS_PREFLIGHT_EXIT"')
    runner_lines.append('fi')


def _append_serve_exit_code_lines(runner_lines: list[str], *, keep_shell_open: bool) -> None:
    """Append serve-runner lines that preserve and report the command exit code."""
    runner_lines.append('ODYSSEUS_CMD_EXIT=$?')
    if keep_shell_open:
        runner_lines.append('echo ""; echo "=== Process exited with code $ODYSSEUS_CMD_EXIT ==="; exec "${SHELL:-/bin/bash}"')
    else:
        runner_lines.append('echo ""; echo "=== Process exited with code $ODYSSEUS_CMD_EXIT ==="')
        runner_lines.append('exit "$ODYSSEUS_CMD_EXIT"')


def _append_llama_cpp_linux_accel_build_lines(runner_lines: list[str]) -> None:
    """Append Linux llama.cpp build lines that prefer ROCm/HIP when available.

    Cookbook already detects AMD GPUs elsewhere, but the llama.cpp bootstrap used
    to hard-wire CUDA on Linux. That made ROCm hosts attempt a CUDA configure and
    fail with "CUDA Toolkit not found" instead of building with HIP.
    """
    # Detect pip-installed nvcc (from vLLM/nvidia CUDA wheels) and put it on PATH
    # so cmake's CUDA configure can find it. We keep this after the ROCm/HIP
    # check — a machine with both stacks should honor the native HIP toolchain on
    # AMD hosts instead of accidentally preferring a stray nvcc wheel.
    runner_lines.append('    for _cudir in ~/.local/lib/python*/site-packages/nvidia/cu13 ~/.local/lib/python*/site-packages/nvidia/cu12 ~/.local/lib/python*/site-packages/nvidia/cuda_nvcc; do')
    runner_lines.append('      [ -x "$_cudir/bin/nvcc" ] && export CUDA_HOME="$_cudir" && export PATH="$_cudir/bin:$PATH" && break')
    runner_lines.append('    done')
    # rm -rf build so a prior poisoned CMakeCache.txt (e.g. from a failed CUDA
    # or HIP attempt) doesn't cause the next configure to reuse stale settings.
    runner_lines.append('    cd ~/llama.cpp && rm -rf build')
    runner_lines.append('    if command -v hipconfig &>/dev/null || [ -d /opt/rocm ] || [ -n "$ROCM_PATH" ] || [ -n "$HIP_PATH" ]; then')
    runner_lines.append('      if command -v hipconfig &>/dev/null; then')
    runner_lines.append('        export HIPCXX="${HIPCXX:-$(hipconfig -l)/clang}"')
    runner_lines.append('        export HIP_PATH="${HIP_PATH:-$(hipconfig -R)}"')
    runner_lines.append('      fi')
    runner_lines.append('      echo "[odysseus] ROCm/HIP detected — building llama-server with HIP support..."')
    runner_lines.append('      cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_HIP=ON && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
    runner_lines.append('    elif command -v nvcc &>/dev/null; then')
    # nvcc alone is not sufficient — pip-installed CUDA wheels or incomplete
    # tooling can expose nvcc without shipping libcudart, causing cmake to fail
    # mid-build with "CUDA runtime library not found". Check cudart explicitly
    # via a small helper so the guard stays readable.
    runner_lines.append('      _odysseus_has_cudart() {')
    runner_lines.append('        ldconfig -p 2>/dev/null | grep -q \'libcudart\\.so\' && return 0')
    runner_lines.append('        local _cuh="${CUDA_HOME:-/usr/local/cuda}"')
    runner_lines.append('        ls "$_cuh/lib64/libcudart.so"* &>/dev/null && return 0')
    runner_lines.append('        ls "$_cuh/lib/libcudart.so"* &>/dev/null && return 0')
    runner_lines.append('        ls /usr/local/cuda/lib64/libcudart.so* &>/dev/null && return 0')
    runner_lines.append('        ls /usr/local/cuda/lib/libcudart.so* &>/dev/null && return 0')
    runner_lines.append('        ls "${_cuh%/cuda_nvcc}/cuda_runtime/lib/libcudart.so"* &>/dev/null && return 0')
    runner_lines.append('        return 1')
    runner_lines.append('      }')
    runner_lines.append('      if _odysseus_has_cudart; then')
    runner_lines.append('        echo "[odysseus] CUDA nvcc + cudart found — building llama-server with CUDA (GPU) support..."')
    runner_lines.append('        cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_CUDA=ON && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
    runner_lines.append('      else')
    runner_lines.append('        echo "[odysseus] WARNING: nvcc found but CUDA runtime (libcudart.so) is not visible — building llama-server for CPU only."')
    runner_lines.append('        echo "[odysseus]   GPU inference will not be available for this llama.cpp build."')
    runner_lines.append('        echo "[odysseus]   Ensure libcudart is installed (e.g. cuda-runtime package) and visible via ldconfig or CUDA_HOME."')
    runner_lines.append('        cmake -B build -DCMAKE_BUILD_TYPE=Release && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
    runner_lines.append('      fi')
    runner_lines.append('    else')
    runner_lines.append('      echo "[odysseus] WARNING: no HIP/CUDA toolchain found — building llama-server for CPU only."')
    runner_lines.append('      echo "[odysseus]   GPU inference will not be available for this llama.cpp build."')
    runner_lines.append('      echo "[odysseus]   Install ROCm for AMD GPUs or vLLM/CUDA tooling for NVIDIA, then re-launch this serve task."')
    runner_lines.append('      cmake -B build -DCMAKE_BUILD_TYPE=Release && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
    runner_lines.append('    fi')


def _llama_cpp_rebuild_cmd() -> str:
    """Shell command that clears the Cookbook-managed llama.cpp build.

    Removes the cached ``llama-server`` symlink and the ``~/llama.cpp/build``
    directory so the next llama.cpp serve recompiles from source, picking up a
    CUDA or HIP toolchain if one is now available. The serve bootstrap only
    builds when ``llama-server`` is missing from PATH, so without this an
    existing CPU-only build is reused forever. It deliberately installs and
    downloads nothing; the rebuild itself happens on the next serve.
    """
    return (
        'mkdir -p "$HOME/bin" && '
        'rm -f "$HOME/bin/llama-server" && '
        'rm -rf "$HOME/llama.cpp/build" && '
        'echo "[odysseus] Cleared the cached llama.cpp build. '
        'Re-launch the serve task to rebuild llama-server from source '
        '(CUDA or HIP will be used if a toolchain is now available)."'
    )


class ModelDownloadRequest(BaseModel):
    repo_id: str
    include: str | None = None  # glob pattern e.g. "*Q4_K_M*"
    hf_token: str | None = None
    env_prefix: str | None = None  # e.g. "source ~/venv/bin/activate"
    remote_host: str | None = None  # e.g. "gpu-box" — run download on this host via SSH
    ssh_port: str | None = None    # e.g. "8022" for Termux
    platform: str | None = None    # "linux", "termux", or "windows"
    local_dir: str | None = None   # base dir to download into (a per-model subfolder is created under it); None = default HF cache
    disable_hf_transfer: bool = False  # skip the Rust hf_transfer downloader — slower but far more reliable on large files (used by retries)


class ServeRequest(BaseModel):
    repo_id: str
    cmd: str
    remote_host: str | None = None
    ssh_port: str | None = None
    env_prefix: str | None = None
    hf_token: str | None = None
    gpus: str | None = None
    platform: str | None = None    # "linux", "termux", or "windows"


def _parse_serve_phase(snapshot: str, task_type: str = "serve") -> dict:
    """Parse a tmux snapshot of a serve task into structured phase info.

    Single source of truth for serve task status detection. Returns:
        { "phase": str, "status": "ready"|"running"|"", "tps": float|None,
          "reqs": int|None, "pct": int|None }
    """
    import re
    if task_type != "serve" or not snapshot:
        return {}
    # Strip newlines so tmux line-wrapping doesn't break regex matching
    flat = re.sub(r'\s+', ' ', snapshot)

    load_matches = re.findall(r'Loading safetensors.*?(\d+)%', flat)
    # Prefer "Downloading (incomplete total...)" (real aggregate bytes) over
    # "Fetching N files" (whole-file count, lags with hf_transfer's chunked pulls).
    downloading_matches = re.findall(r'Downloading.*?(\d+)%', flat)
    fetching_matches = re.findall(r'Fetching.*?(\d+)%', flat)
    dl_matches = downloading_matches if downloading_matches else fetching_matches
    # Match "Avg generation throughput: X tokens/s, Running: N reqs" (with line-wrap tolerance)
    tps_matches = re.findall(
        r'(?:Avg )?generation throughput:\s*([\d.]+)\s*tokens/s.*?Running:\s*(\d+)\s*reqs',
        flat,
    )

    # Check throughput FIRST — the throughput log line contains "GPU KV cache usage"
    # which would otherwise false-match the warmup check
    if tps_matches:
        tps_str, reqs_str = tps_matches[-1]
        tps = float(tps_str)
        reqs = int(reqs_str)
        return {
            "phase": f"{tps_str} tok/s" if reqs > 0 else "idle",
            "status": "ready",
            "tps": tps,
            "reqs": reqs,
        }
    if "Application startup complete" in flat:
        return {"phase": "ready", "status": "ready"}
    if re.search(r'Ollama API ready on port\s+\d+', flat, re.I):
        return {"phase": "ready", "status": "ready"}
    # HTTP access logs (e.g. GET /v1/models 200 OK) mean the server is up and serving
    if re.search(r'(?:GET|POST)\s+/[^\s]*\s+HTTP/[\d.]+"\s*\d{3}', flat):
        return {"phase": "idle", "status": "ready"}
    if "Loading weights took" in flat:
        return {"phase": "initializing", "status": "running"}
    # "GPU KV cache" alone (during allocation) — not "GPU KV cache usage" (runtime log)
    if "GPU KV cache" in flat and "GPU KV cache usage" not in flat:
        return {"phase": "warming up", "status": "running"}
    if load_matches:
        pct = int(load_matches[-1])
        return {"phase": f"loading {pct}%", "status": "running", "pct": pct}
    if dl_matches:
        pct = int(dl_matches[-1])
        return {"phase": f"downloading {pct}%", "status": "running", "pct": pct}
    return {}


def _ssh(host, cmd, port=None):
    """Build SSH command string with optional port."""
    pf = f"-p {port} " if port and port != "22" else ""
    return f"ssh {pf}{host} '{cmd}'"


def _safe_env_prefix(ep: str | None) -> str | None:
    """Rewrite a `source <path>` env_prefix so it no-ops if the path is missing.
    Prevents `line N: <path>: No such file or directory` errors when a serve
    task is launched against a host that doesn't have the expected venv.

    Also rewrites leading `~/` → `$HOME/` so the path expands inside double
    quotes (bash only tilde-expands unquoted tokens at word start)."""
    if not ep:
        return ep
    import shlex
    try:
        parts = shlex.split(ep, posix=True)
    except ValueError:
        raise HTTPException(400, "Invalid env_prefix")
    if len(parts) != 2 or parts[0] not in {"source", "."}:
        # Bash conda activation emitted by the frontend:
        #   eval "$(conda shell.bash hook)" && conda activate ENV
        m = re.fullmatch(r'eval "\$\(conda shell\.bash hook\)" && conda activate (.+)', ep)
        if m:
            env = m.group(1).strip()
            try:
                env_parts = shlex.split(env, posix=True)
            except ValueError:
                raise HTTPException(400, "Invalid env_prefix")
            if len(env_parts) != 1:
                raise HTTPException(400, "Invalid env_prefix")
            return 'eval "$(conda shell.bash hook)" && conda activate ' + shlex.quote(env_parts[0])

        # Plain conda activation, used by Windows/PowerShell and some manual callers.
        if len(parts) == 3 and parts[0] == "conda" and parts[1] == "activate":
            return "conda activate " + shlex.quote(parts[2])

        # PowerShell venv activation emitted by the frontend:
        #   & 'C:\path\Scripts\Activate.ps1'
        if len(parts) == 2 and parts[0] == "&":
            path = parts[1]
            if any(c in path for c in "\r\n;&|`$<>"):
                raise HTTPException(400, "Invalid env_prefix")
            return "& '" + path.replace("'", "''") + "'"

        raise HTTPException(400, "Invalid env_prefix")
    path = parts[1]
    if any(c in path for c in "\r\n;&|`$<>"):
        raise HTTPException(400, "Invalid env_prefix")
    # Replace a leading "~/" with "$HOME/" so it survives quoting
    if path.startswith("~/"):
        path = "$HOME/" + path[2:]
    elif path == "~":
        path = "$HOME"
    path = path.replace('"', '\\"')
    return f'[ -f "{path}" ] && source "{path}" || true'


def _ssh_ps(host, script_path, port=None):
    """Build SSH command to run a PowerShell script on a Windows remote."""
    pf = f"-p {port} " if port and port != "22" else ""
    return f'ssh {pf}{host} "powershell -ExecutionPolicy Bypass -File {script_path}"'


# Windows session dir — stored in user's temp on the remote
WIN_SESSION_DIR = "$env:TEMP\\\\odysseus-sessions"