odysseus/routes/cookbook_helpers.py

"""cookbook_helpers.py — validators + small helpers shared by the cookbook routes.
Extracted from cookbook_routes.py; the routes module imports the symbols it needs."""

import logging
import os
import posixpath
import re
import shlex

from fastapi import HTTPException
from pydantic import BaseModel

logger = logging.getLogger(__name__)


# HuggingFace repo IDs are <org>/<name>, both alphanumerics plus ._-
# Rejecting anything else up front closes off shell-interpolation vectors.
_REPO_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]*/[A-Za-z0-9][A-Za-z0-9._-]*$")
# Cached models scanned from a custom/local model dir are keyed by their leaf
# folder name (no slash), e.g. `DeepSeek-R1-UD-IQ4_XS`. The serve command uses
# the real on-disk path separately; this identifier is only for UI/task
# bookkeeping, so serving should accept the same safe glyph set as repo IDs.
_LOCAL_MODEL_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]*$")
# Ollama model names include tags, e.g. `qwen2.5:0.5b` or `llama3.2:latest`.
# Some registries also use a namespace path. Keep this shell-safe: no spaces,
# quotes, `$`, `;`, `&`, pipes, or redirects.
_OLLAMA_MODEL_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._:/-]{0,200}$")
# Include pattern is a glob: allow typical safe glyphs only.
_INCLUDE_RE = re.compile(r"^[A-Za-z0-9._\-*?/\[\]]+$")
# Remote host: user@host (optionally with :port-free hostname parts).
_REMOTE_HOST_RE = re.compile(r"^[A-Za-z0-9._-]+@[A-Za-z0-9._-]+$")
# HF tokens and API tokens are url-safe base64-like.
_TOKEN_RE = re.compile(r"^[A-Za-z0-9._~+/=-]+$")
# Session IDs we mint look like "cookbook-deadbeef" or "serve-deadbeef".
# Anything beyond plain alphanumerics + dash + underscore could break out
# of the shell/PowerShell contexts the value lands in.
_SESSION_ID_RE = re.compile(r"^[A-Za-z0-9_-]{1,64}$")
_SSH_PORT_RE = re.compile(r"^\d{1,5}$")
_GPU_LIST_RE = re.compile(r"^\d+(?:,\d+)*$")
# A download target directory. Absolute or ~-relative path; safe path glyphs
# only (no quotes, shell metacharacters, or spaces) since it lands in a shell
# command. A leading ~ is expanded to $HOME at command-build time.
_LOCAL_DIR_RE = re.compile(r"^~?/[A-Za-z0-9._/-]*$|^~$")


def _validate_repo_id(v: str | None) -> str:
    if not v or not _REPO_ID_RE.match(v):
        raise HTTPException(400, "Invalid repo_id — must be <org>/<name> using [A-Za-z0-9._-]")
    return v


def _validate_serve_model_id(v: str | None) -> str:
    if not v:
        raise HTTPException(400, "repo_id is required")
    if _REPO_ID_RE.match(v) or _LOCAL_MODEL_ID_RE.match(v) or _OLLAMA_MODEL_ID_RE.match(v):
        return v
    raise HTTPException(400, "Invalid repo_id — must be <org>/<name>, an Ollama name:tag, or a cached local model id")


def _validate_include(v: str | None) -> str | None:
    if v is None or v == "":
        return None
    if not _INCLUDE_RE.match(v):
        raise HTTPException(400, "Invalid include pattern")
    return v


def _validate_remote_host(v: str | None) -> str | None:
    if v is None or v == "":
        return None
    if not _REMOTE_HOST_RE.match(v):
        raise HTTPException(400, "Invalid remote_host — must be user@host, no SSH option syntax")
    return v


def _validate_token(v: str | None) -> str | None:
    if v is None or v == "":
        return None
    if not _TOKEN_RE.match(v):
        raise HTTPException(400, "Invalid token characters")
    return v


def _validate_local_dir(v: str | None) -> str | None:
    if v is None or v == "":
        return None
    v = v.rstrip("/") or "/"
    if not _LOCAL_DIR_RE.match(v):
        raise HTTPException(400, "Invalid local_dir — must be an absolute or ~ path with no spaces or shell metacharacters")
    return v


def _validate_ssh_port(v: str | None) -> str | None:
    if v is None or v == "":
        return None
    if not _SSH_PORT_RE.fullmatch(str(v)):
        raise HTTPException(400, "Invalid ssh_port")
    port = int(v)
    if port < 1 or port > 65535:
        raise HTTPException(400, "Invalid ssh_port")
    return str(port)


def _validate_gpus(v: str | None) -> str | None:
    if v is None or v == "":
        return None
    if not _GPU_LIST_RE.fullmatch(str(v)):
        raise HTTPException(400, "Invalid gpus — expected comma-separated GPU indexes")
    return str(v)


def _shell_path(p: str) -> str:
    """Render a validated path for a double-quoted shell context, expanding a
    leading ~ to $HOME (single quotes wouldn't expand it). Safe because
    _validate_local_dir already restricts the charset."""
    if p == "~":
        return '"$HOME"'
    if p.startswith("~/"):
        return '"$HOME/' + p[2:] + '"'
    return '"' + p + '"'


def _local_tooling_path_export(executable: str) -> str:
    """Bash line prepending the running interpreter's bin dir to PATH.

    When Odysseus runs from a virtualenv, that bin dir holds the tools the
    cookbook runners shell out to (`hf`, `python`). tmux runners start from a
    fresh login shell with the venv NOT activated, so without this they can't
    find `hf` and downloads fail with "hf: command not found" — notably on
    macOS, where the `pip --user` self-heal also misses (`pip` isn't a command,
    only `pip3`/`python3 -m pip`). Local runs only; meaningless over SSH.
    """
    # This builds a bash snippet, so an explicit POSIX absolute path should keep
    # POSIX semantics even when the app/tests run on Windows. Otherwise
    # os.path.abspath("/opt/...") would incorrectly turn it into "D:\\opt\\...".
    if executable.startswith("/"):
        bin_dir = posixpath.dirname(executable)
    else:
        bin_dir = os.path.dirname(os.path.abspath(executable))
    # Escape for a double-quoted context: $PATH must still expand, but spaces
    # and shell metacharacters in the path must be preserved literally.
    esc = (
        bin_dir.replace("\\", "\\\\")
        .replace('"', '\\"')
        .replace("$", "\\$")
        .replace("`", "\\`")
    )
    return f'export PATH="{esc}:$PATH"'


def _pip_install_attempt(pip_cmd: str) -> str:
    """Wrap a single pip install command so its exit status survives the
    fallback chain and its stderr is visible in the tmux log on failure.

    Without this wrapper, `pip … 2>&1 | tail -5` returns ``tail``'s exit
    code (0), masking pip's real failure and preventing the next fallback
    from running.  The generated snippet captures all output to a temp
    file, prints the last 5 lines on failure (so the Cookbook log panel
    shows useful diagnostics), cleans up, and exits with pip's original
    status.
    """
    return (
        "bash -c '"
        f'_out=$(mktemp) && {pip_cmd} >"$_out" 2>&1; _rc=$?; '
        'tail -5 "$_out"; rm -f "$_out"; exit $_rc'
        "'"
    )


def _pip_install_fallback_chain(package: str, *, python_cmd: str = "python3 -m pip", upgrade: bool = False) -> str:
    """Build a bash pip install fallback chain that surfaces errors.

    Try the active interpreter/environment first. ``--user`` is invalid
    inside many venvs, so only attempt the ``--user`` fallback when NOT
    inside a venv.

    Each attempt is wrapped via :func:`_pip_install_attempt` so pip's real
    exit code is preserved (no ``| tail`` masking) and the last 5 lines of
    pip output appear in the Cookbook log on failure.
    """
    upgrade_flag = " -U" if upgrade else ""
    base = _pip_install_attempt(f"{python_cmd} install -q{upgrade_flag} {package}")
    user = _pip_install_attempt(f"{python_cmd} install --user --break-system-packages -q{upgrade_flag} {package}")
    # Derive the python executable for the venv detection check.
    # Must use the same interpreter that pip belongs to; hardcoding
    # python3 breaks when pip lives in a venv that only has "python".
    if " -m pip" in python_cmd:
        python_exe = python_cmd.replace(" -m pip", "")
    elif python_cmd.strip() == "pip":
        python_exe = "python"
    elif python_cmd.strip() == "pip3":
        python_exe = "python3"
    else:
        python_exe = "python3"
    venv_check = f'{python_exe} -c "import sys; sys.exit(0 if sys.prefix != sys.base_prefix else 1)"'
    # Negated: `! venv_check` succeeds (exit 0) when NOT in a venv → `&&` tries
    # --user.  When IN a venv `! venv_check` fails → `&&` skips --user and the
    # group exits non-zero, propagating the base-install failure instead of
    # masking it as success (the `|| { venv_check || … }` shape from #903
    # swallowed the exit code because venv_check's exit-0 became the group's
    # result).
    return f"{base} || {{ ! {venv_check} && {user}; }}"


def _cached_model_scan_script(model_dirs: list[str] | None = None) -> str:
    """Build the standalone Python scanner used by /api/model/cached."""
    lines = [
        "import json, os, re, shutil, subprocess, urllib.request",
        "models = []",
        "seen = set()",
        "BLOCKED_ROOTS = ('/sys', '/proc', '/dev', '/run', '/var/run')",
        "def safe_path(p):",
        "    try:",
        "        rp = os.path.realpath(os.path.expanduser(p))",
        "        return not any(rp == b or rp.startswith(b + os.sep) for b in BLOCKED_ROOTS)",
        "    except Exception:",
        "        return False",
        "def safe_walk(top):",
        "    if not safe_path(top): return",
        "    for root, dirs, fns in os.walk(top, followlinks=False):",
        "        dirs[:] = [d for d in dirs if not os.path.islink(os.path.join(root, d)) and safe_path(os.path.join(root, d))]",
        "        yield root, dirs, fns",
        "def gguf_role(name):",
        "    n = name.lower()",
        "    if n.startswith('mmproj') or 'mmproj' in n: return 'projector'",
        "    return 'model'",
        "def gguf_quant(name):",
        "    m = re.search(r'(?i)(UD-)?(IQ[0-9]_[A-Z0-9_]+|Q[0-9](?:_[A-Z0-9]+)+|BF16|F16|FP16|F32|Q8_0)', name)",
        "    return m.group(0).upper() if m else ''",
        "def collect_ggufs(base):",
        "    files = []",
        "    split_groups = {}",
        "    if not os.path.isdir(base) or not safe_path(base): return files",
        "    for root, dirs, fns in safe_walk(base):",
        "        for fn in sorted(fns):",
        "            if not fn.lower().endswith('.gguf'): continue",
        "            fp = os.path.join(root, fn)",
        "            try: size = os.path.getsize(fp)",
        "            except Exception: size = 0",
        "            try: rel = os.path.relpath(fp, base).replace(os.sep, '/')",
        "            except Exception: rel = fn",
        "            sm = re.match(r'(?i)^(.+)-(\\d+)-of-(\\d+)\\.gguf$', fn)",
        "            if sm:",
        "                prefix, part_s, total_s = sm.group(1), sm.group(2), sm.group(3)",
        "                key = (root, prefix, total_s)",
        "                g = split_groups.setdefault(key, {'name':fn,'rel_path':rel,'size_bytes':0,'role':gguf_role(fn),'quant':gguf_quant(fn),'parts':int(total_s),'split':True})",
        "                g['size_bytes'] += size",
        "                if int(part_s) == 1:",
        "                    g.update({'name':fn,'rel_path':rel,'role':gguf_role(fn),'quant':gguf_quant(fn)})",
        "                continue",
        "            files.append({'name':fn,'rel_path':rel,'size_bytes':size,'role':gguf_role(fn),'quant':gguf_quant(fn)})",
        "    files.extend(split_groups.values())",
        "    files.sort(key=lambda f: (f.get('role') != 'model', f.get('rel_path', '')))",
        "    return files",
        "def scan_hf(cache):",
        "    if not os.path.isdir(cache): return",
        "    for d in sorted(os.listdir(cache)):",
        "        if not d.startswith('models--'): continue",
        "        rid = d.replace('models--','').replace('--','/')",
        "        if rid in seen: continue",
        "        seen.add(rid)",
        "        blobs = os.path.join(cache, d, 'blobs')",
        "        sz, nf, ic = 0, 0, False",
        "        if os.path.isdir(blobs):",
        "            for f in os.scandir(blobs):",
        "                if f.is_file(): nf += 1; sz += f.stat().st_size",
        "                if f.name.endswith('.incomplete'): ic = True",
        "        snap = os.path.join(cache, d, 'snapshots')",
        "        is_diffusion = False; gguf_files = []",
        "        if os.path.isdir(snap):",
        "            for sd in os.listdir(snap):",
        "                sf = os.path.join(snap, sd)",
        "                if not os.path.isdir(sf): continue",
        "                if os.path.exists(os.path.join(sf, 'model_index.json')): is_diffusion = True",
        "                for f in collect_ggufs(sf): f['rel_path'] = sd + '/' + f['rel_path']; gguf_files.append(f)",
        "        models.append({'repo_id':rid,'size_bytes':sz,'nb_files':nf,'has_incomplete':ic,'path':cache,'is_diffusion':is_diffusion,'is_gguf':bool(gguf_files),'gguf_files':gguf_files})",
        "def scan_dir(p):",
        "    if not os.path.isdir(p) or not safe_path(p): return",
        "    for d in sorted(os.listdir(p)):",
        "        if d.startswith('.'): continue",
        "        if d.startswith('models--'): continue",
        "        fp = os.path.join(p, d)",
        "        if not os.path.isdir(fp) or os.path.islink(fp) or not safe_path(fp): continue",
        "        if d in seen: continue",
        "        is_model = False; gguf_files = []",
        "        for root, dirs, fns in safe_walk(fp):",
        "            for fn in fns:",
        "                if fn.lower().endswith('.gguf'): is_model = True",
        "                elif fn == 'config.json' or fn.endswith('.safetensors') or fn.endswith('.bin'): is_model = True",
        "            if is_model: break",
        "        if not is_model: continue",
        "        gguf_files = collect_ggufs(fp)",
        "        seen.add(d)",
        "        sz, nf = 0, 0",
        "        for dp, _, fns in safe_walk(fp):",
        "            for fn in fns:",
        "                try: nf += 1; sz += os.path.getsize(os.path.join(dp, fn))",
        "                except Exception: pass",
        "        is_diff = os.path.exists(os.path.join(fp, 'model_index.json'))",
        "        models.append({'repo_id':d,'size_bytes':sz,'nb_files':nf,'has_incomplete':False,'path':p,'is_local_dir':True,'is_diffusion':is_diff,'is_gguf':bool(gguf_files),'gguf_files':gguf_files})",
        "def parse_size(num, unit):",
        "    try: n = float(num)",
        "    except Exception: return 0",
        "    u = (unit or '').upper()",
        "    if u.startswith('TB'): return int(n * 1024 ** 4)",
        "    if u.startswith('GB'): return int(n * 1024 ** 3)",
        "    if u.startswith('MB'): return int(n * 1024 ** 2)",
        "    if u.startswith('KB'): return int(n * 1024)",
        "    return int(n)",
        "def scan_ollama():",
        "    if not shutil.which('ollama'): return",
        "    try:",
        "        p = subprocess.run(['ollama', 'list'], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True, timeout=6)",
        "    except Exception:",
        "        return",
        "    if p.returncode != 0: return",
        "    for line in (p.stdout or '').splitlines()[1:]:",
        "        parts = line.split()",
        "        if len(parts) < 4: continue",
        "        name = parts[0]",
        "        if not name or name in seen: continue",
        "        size_bytes = parse_size(parts[2], parts[3])",
        "        seen.add(name)",
        "        models.append({'repo_id':name,'size_bytes':size_bytes,'nb_files':1,'has_incomplete':False,'path':'ollama','backend':'ollama','is_ollama':True})",
        "def scan_ollama_api():",
        "    urls = ['http://127.0.0.1:11434/api/tags', 'http://localhost:11434/api/tags', 'http://host.docker.internal:11434/api/tags']",
        "    for url in urls:",
        "        try:",
        "            with urllib.request.urlopen(url, timeout=2) as r:",
        "                data = json.loads(r.read().decode('utf-8', 'replace'))",
        "        except Exception:",
        "            continue",
        "        for item in data.get('models', []):",
        "            name = item.get('name') or item.get('model')",
        "            if not name or name in seen: continue",
        "            size_bytes = int(item.get('size') or item.get('size_bytes') or 0)",
        "            seen.add(name)",
        "            models.append({'repo_id':name,'size_bytes':size_bytes,'nb_files':1,'has_incomplete':False,'path':'ollama','backend':'ollama','is_ollama':True})",
        "        return",
        "scan_hf(os.path.expanduser('~/.cache/huggingface/hub'))",
        "scan_ollama()",
        "scan_ollama_api()",
    ]
    for model_dir in model_dirs or []:
        lines.append(f"scan_dir(os.path.expanduser({model_dir!r}))")
    lines.append("print(json.dumps(models))")
    return "\n".join(lines) + "\n"


def _ps_squote(v: str) -> str:
    """Escape a value for PowerShell single-quoted string interpolation.
    Belt-and-suspenders on top of _validate_token's regex — if the regex
    is ever loosened, this still keeps the heredoc shell-safe."""
    return v.replace("'", "''")


def _bash_squote(v: str) -> str:
    """Escape a value for bash/sh single-quoted string interpolation."""
    return v.replace("'", "'\\''")


# Allow-list of binaries permitted as the leading token of `req.cmd` for /api/model/serve.
# Anything else is rejected before the cmd is interpolated into a tmux/PowerShell wrapper.
_SERVE_CMD_ALLOWLIST = {
    "vllm", "llama-server", "llama_server", "llama.cpp", "ollama",
    "python", "python3",
    "sglang", "lmdeploy",
    "node", "npx",
}


# The llama.cpp GGUF launcher (static/js/cookbook.js) emits a fixed-shape
# prelude that resolves the cached .gguf on the target host before serving:
#   MODEL_FILE=$( { find …; find …; } | head -1 ) && { [ -n "$MODEL_FILE" ] && \
#   [ -f "$MODEL_FILE" ]; } || { echo "ERROR…"; exit 1; } && <serve> || <serve>
# That legitimately needs $(...)/&&/||, so we recognise this exact shape and
# validate the serve binaries it guards rather than rejecting it wholesale.
_GGUF_PRELUDE_RE = re.compile(
    r'^MODEL_FILE=\$\([^\n]*?\)\s*&&\s*\{[^{}]*\}\s*\|\|\s*\{[^{}]*\}\s*&&\s*'
)
_OLLAMA_HOST_ASSIGNMENT_RE = re.compile(r"(?:^|\s)OLLAMA_HOST=([^\s]+)")
_OLLAMA_BIND_RE = re.compile(r"^\[([^\]]+)\]:(\d+)$|^([^:]+):(\d+)$")
_OLLAMA_BIND_HOST_RE = re.compile(r"^[A-Za-z0-9._:-]+$")


def _ollama_bind_from_cmd(cmd: str | None, *, default_host: str = "127.0.0.1") -> tuple[str, str]:
    """Return the Ollama bind host/port requested by a serve command.

    Plain local `ollama serve` defaults to loopback. Remote callers can pass a
    wider default host so the resulting API is reachable by Odysseus.
    """
    if not cmd:
        return default_host, "11434"
    match = _OLLAMA_HOST_ASSIGNMENT_RE.search(cmd)
    if not match:
        return default_host, "11434"
    value = match.group(1).strip("'\"")
    bind_match = _OLLAMA_BIND_RE.match(value)
    if not bind_match:
        return "127.0.0.1", "11434"
    bracketed_host = bind_match.group(1)
    host = bracketed_host or bind_match.group(3) or "127.0.0.1"
    port = bind_match.group(2) or bind_match.group(4) or "11434"
    if not _OLLAMA_BIND_HOST_RE.match(host):
        return "127.0.0.1", "11434"
    try:
        port_num = int(port, 10)
    except ValueError:
        return "127.0.0.1", "11434"
    if port_num < 1 or port_num > 65535:
        return "127.0.0.1", "11434"
    return f"[{host}]" if bracketed_host else host, port


def _check_serve_binary(seg: str) -> None:
    """Validate that a single command segment starts with an allowlisted binary
    (after skipping leading env-var assignments like `CUDA_VISIBLE_DEVICES=0`)."""
    try:
        tokens = shlex.split(seg) if seg.strip() else []
    except ValueError:
        raise HTTPException(400, "Invalid cmd — could not parse")
    if not tokens:
        return
    env_re = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*=")
    first = next((t for t in tokens if not env_re.match(t)), "")
    base = os.path.basename(first)
    if base not in _SERVE_CMD_ALLOWLIST:
        raise HTTPException(
            400,
            f"cmd binary '{base or '(empty)'}' is not allowed. Must start with one of: "
            f"{', '.join(sorted(_SERVE_CMD_ALLOWLIST))}",
        )


def _validate_serve_cmd(v: str | None) -> str | None:
    """Reject serve commands that aren't in the allowlist or contain shell metachars.

    `req.cmd` is dropped verbatim into a bash/PowerShell wrapper script and
    executed in a tmux session. Without this gate, an admin (or anyone in the
    pre-fix world) could pass arbitrary shell payloads.

    Leading env-var assignments (e.g. `CUDA_VISIBLE_DEVICES=0 python3 ...`)
    are stripped before checking the binary — several of our cmd builders
    prepend them, and they shouldn't trip the allowlist.
    """
    if v is None or v == "":
        return None
    # Collapse backslash-newline line continuations into single spaces. Serve
    # commands (vLLM especially) are routinely pasted multi-line with trailing
    # `\` — that's a safe shell/shlex continuation, so the command stays ONE
    # logical invocation and the leading-token allowlist below still governs.
    v = re.sub(r"\\[ \t]*\r?\n[ \t]*", " ", v).strip()
    # Backticks and raw newlines are never legitimate here.
    if any(c in v for c in ("`", "\n", "\r")):
        raise HTTPException(400, "Invalid characters in cmd")
    # Known GGUF launcher prelude → validate the serve invocation(s) it guards.
    m = _GGUF_PRELUDE_RE.match(v)
    if m:
        rest = v[m.end():]
        # rest is `[ENV=…] python3 -m llama_cpp.server … || [ENV=…] llama-server …`
        for part in rest.split("||"):
            _check_serve_binary(part.strip())
        return v
    # Otherwise: a single invocation — no shell metacharacters allowed.
    # (`$(` was the original intent; bare `$` is fine for shell-safe paths.)
    if any(c in v for c in (";", "&&", "||", "$(")):
        raise HTTPException(400, "Invalid characters in cmd")
    _check_serve_binary(v)
    return v


def _append_serve_preflight_exit_lines(runner_lines: list[str], *, keep_shell_open: bool) -> None:
    """Append serve-runner lines that surface preflight failures before exit."""
    runner_lines.append('if [ -n "$ODYSSEUS_PREFLIGHT_EXIT" ]; then')
    runner_lines.append('  echo ""; echo "=== Process exited with code $ODYSSEUS_PREFLIGHT_EXIT ==="')
    if keep_shell_open:
        runner_lines.append('  exec "${SHELL:-/bin/bash}"')
    else:
        runner_lines.append('  exit "$ODYSSEUS_PREFLIGHT_EXIT"')
    runner_lines.append('fi')


def _append_serve_exit_code_lines(runner_lines: list[str], *, keep_shell_open: bool) -> None:
    """Append serve-runner lines that preserve and report the command exit code."""
    runner_lines.append('ODYSSEUS_CMD_EXIT=$?')
    if keep_shell_open:
        runner_lines.append('echo ""; echo "=== Process exited with code $ODYSSEUS_CMD_EXIT ==="; exec "${SHELL:-/bin/bash}"')
    else:
        runner_lines.append('echo ""; echo "=== Process exited with code $ODYSSEUS_CMD_EXIT ==="')


class ModelDownloadRequest(BaseModel):
    repo_id: str
    include: str | None = None  # glob pattern e.g. "*Q4_K_M*"
    hf_token: str | None = None
    env_prefix: str | None = None  # e.g. "source ~/venv/bin/activate"
    remote_host: str | None = None  # e.g. "gpu-box" — run download on this host via SSH
    ssh_port: str | None = None    # e.g. "8022" for Termux
    platform: str | None = None    # "linux", "termux", or "windows"
    local_dir: str | None = None   # base dir to download into (a per-model subfolder is created under it); None = default HF cache
    disable_hf_transfer: bool = False  # skip the Rust hf_transfer downloader — slower but far more reliable on large files (used by retries)


class ServeRequest(BaseModel):
    repo_id: str
    cmd: str
    remote_host: str | None = None
    ssh_port: str | None = None
    env_prefix: str | None = None
    hf_token: str | None = None
    gpus: str | None = None
    platform: str | None = None    # "linux", "termux", or "windows"


def _parse_serve_phase(snapshot: str, task_type: str = "serve") -> dict:
    """Parse a tmux snapshot of a serve task into structured phase info.

    Single source of truth for serve task status detection. Returns:
        { "phase": str, "status": "ready"|"running"|"", "tps": float|None,
          "reqs": int|None, "pct": int|None }
    """
    import re
    if task_type != "serve" or not snapshot:
        return {}
    # Strip newlines so tmux line-wrapping doesn't break regex matching
    flat = re.sub(r'\s+', ' ', snapshot)

    load_matches = re.findall(r'Loading safetensors.*?(\d+)%', flat)
    # Prefer "Downloading (incomplete total...)" (real aggregate bytes) over
    # "Fetching N files" (whole-file count, lags with hf_transfer's chunked pulls).
    downloading_matches = re.findall(r'Downloading.*?(\d+)%', flat)
    fetching_matches = re.findall(r'Fetching.*?(\d+)%', flat)
    dl_matches = downloading_matches if downloading_matches else fetching_matches
    # Match "Avg generation throughput: X tokens/s, Running: N reqs" (with line-wrap tolerance)
    tps_matches = re.findall(
        r'(?:Avg )?generation throughput:\s*([\d.]+)\s*tokens/s.*?Running:\s*(\d+)\s*reqs',
        flat,
    )

    # Check throughput FIRST — the throughput log line contains "GPU KV cache usage"
    # which would otherwise false-match the warmup check
    if tps_matches:
        tps_str, reqs_str = tps_matches[-1]
        tps = float(tps_str)
        reqs = int(reqs_str)
        return {
            "phase": f"{tps_str} tok/s" if reqs > 0 else "idle",
            "status": "ready",
            "tps": tps,
            "reqs": reqs,
        }
    if "Application startup complete" in flat:
        return {"phase": "ready", "status": "ready"}
    if re.search(r'Ollama API ready on port\s+\d+', flat, re.I):
        return {"phase": "ready", "status": "ready"}
    # HTTP access logs (e.g. GET /v1/models 200 OK) mean the server is up and serving
    if re.search(r'(?:GET|POST)\s+/[^\s]*\s+HTTP/[\d.]+"\s*\d{3}', flat):
        return {"phase": "idle", "status": "ready"}
    if "Loading weights took" in flat:
        return {"phase": "initializing", "status": "running"}
    # "GPU KV cache" alone (during allocation) — not "GPU KV cache usage" (runtime log)
    if "GPU KV cache" in flat and "GPU KV cache usage" not in flat:
        return {"phase": "warming up", "status": "running"}
    if load_matches:
        pct = int(load_matches[-1])
        return {"phase": f"loading {pct}%", "status": "running", "pct": pct}
    if dl_matches:
        pct = int(dl_matches[-1])
        return {"phase": f"downloading {pct}%", "status": "running", "pct": pct}
    return {}


def _ssh(host, cmd, port=None):
    """Build SSH command string with optional port."""
    pf = f"-p {port} " if port and port != "22" else ""
    return f"ssh {pf}{host} '{cmd}'"


def _safe_env_prefix(ep: str | None) -> str | None:
    """Rewrite a `source <path>` env_prefix so it no-ops if the path is missing.
    Prevents `line N: <path>: No such file or directory` errors when a serve
    task is launched against a host that doesn't have the expected venv.

    Also rewrites leading `~/` → `$HOME/` so the path expands inside double
    quotes (bash only tilde-expands unquoted tokens at word start)."""
    if not ep:
        return ep
    import shlex
    try:
        parts = shlex.split(ep, posix=True)
    except ValueError:
        raise HTTPException(400, "Invalid env_prefix")
    if len(parts) != 2 or parts[0] not in {"source", "."}:
        # Bash conda activation emitted by the frontend:
        #   eval "$(conda shell.bash hook)" && conda activate ENV
        m = re.fullmatch(r'eval "\$\(conda shell\.bash hook\)" && conda activate (.+)', ep)
        if m:
            env = m.group(1).strip()
            try:
                env_parts = shlex.split(env, posix=True)
            except ValueError:
                raise HTTPException(400, "Invalid env_prefix")
            if len(env_parts) != 1:
                raise HTTPException(400, "Invalid env_prefix")
            return 'eval "$(conda shell.bash hook)" && conda activate ' + shlex.quote(env_parts[0])

        # Plain conda activation, used by Windows/PowerShell and some manual callers.
        if len(parts) == 3 and parts[0] == "conda" and parts[1] == "activate":
            return "conda activate " + shlex.quote(parts[2])

        # PowerShell venv activation emitted by the frontend:
        #   & 'C:\path\Scripts\Activate.ps1'
        if len(parts) == 2 and parts[0] == "&":
            path = parts[1]
            if any(c in path for c in "\r\n;&|`$<>"):
                raise HTTPException(400, "Invalid env_prefix")
            return "& '" + path.replace("'", "''") + "'"

        raise HTTPException(400, "Invalid env_prefix")
    path = parts[1]
    if any(c in path for c in "\r\n;&|`$<>"):
        raise HTTPException(400, "Invalid env_prefix")
    # Replace a leading "~/" with "$HOME/" so it survives quoting
    if path.startswith("~/"):
        path = "$HOME/" + path[2:]
    elif path == "~":
        path = "$HOME"
    path = path.replace('"', '\\"')
    return f'[ -f "{path}" ] && source "{path}" || true'


def _ssh_ps(host, script_path, port=None):
    """Build SSH command to run a PowerShell script on a Windows remote."""
    pf = f"-p {port} " if port and port != "22" else ""
    return f'ssh {pf}{host} "powershell -ExecutionPolicy Bypass -File {script_path}"'


# Windows session dir — stored in user's temp on the remote
WIN_SESSION_DIR = "$env:TEMP\\\\odysseus-sessions"