464 lines
19 KiB
Python
Executable File
464 lines
19 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""odysseus-cookbook — shell wrapper for the cookbook feature.
|
|
|
|
The web UI orchestrates HuggingFace model downloads + local serving
|
|
through tmux sessions and writes its bookkeeping to
|
|
`data/cookbook_state.json`. This CLI exposes the same operations on
|
|
the shell so they can be cron'd, piped, or scripted:
|
|
|
|
odysseus-cookbook list # active downloads + servers
|
|
odysseus-cookbook gpus # nvidia-smi per-GPU JSON
|
|
odysseus-cookbook cached # local HF cache snapshot
|
|
odysseus-cookbook hf-latest --vram-gb 24 # trending HF models that fit
|
|
odysseus-cookbook download Qwen/Qwen3-8B # fire off `hf download` in tmux
|
|
odysseus-cookbook kill cookbook-abc123 # tmux kill-session
|
|
|
|
Reads/writes the same `data/cookbook_state.json` the web UI uses, so
|
|
state stays in sync. Output is JSON on stdout, errors on stderr,
|
|
non-zero exit on failure.
|
|
"""
|
|
|
|
|
|
from __future__ import annotations
|
|
import sys
|
|
import os, sys
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "_lib"))
|
|
from cli import quiet_logs, emit, fail, common_parser, run, REPO_ROOT as _REPO_ROOT
|
|
quiet_logs()
|
|
|
|
|
|
import argparse
|
|
import json
|
|
import logging
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
import urllib.request
|
|
import urllib.parse
|
|
import uuid
|
|
from pathlib import Path
|
|
|
|
_DATA_DIR = Path(os.environ.get("DATA_DIR", str(_REPO_ROOT / "data")))
|
|
_STATE_PATH = _DATA_DIR / "cookbook_state.json"
|
|
|
|
# Mirror routes/shell_routes.TMUX_LOG_DIR — don't import it because that pulls
|
|
# the whole web app into the process. Match its definition instead.
|
|
import tempfile
|
|
_TMUX_LOG_DIR = Path(tempfile.gettempdir()) / "odysseus-tmux"
|
|
|
|
|
|
def fail(msg: str, code: int = 1) -> None:
|
|
sys.stderr.write(f"error: {msg}\n")
|
|
sys.exit(code)
|
|
|
|
|
|
def _tmux_sessions() -> list[str]:
|
|
"""Return active tmux session names, or [] if tmux isn't installed."""
|
|
try:
|
|
out = subprocess.run(
|
|
["tmux", "list-sessions", "-F", "#S"],
|
|
capture_output=True, text=True, timeout=5,
|
|
)
|
|
if out.returncode != 0:
|
|
return []
|
|
return [s.strip() for s in out.stdout.splitlines() if s.strip()]
|
|
except FileNotFoundError:
|
|
return []
|
|
except Exception:
|
|
return []
|
|
|
|
|
|
def _read_state() -> dict:
|
|
if not _STATE_PATH.exists():
|
|
return {}
|
|
try:
|
|
return json.loads(_STATE_PATH.read_text())
|
|
except Exception:
|
|
return {}
|
|
|
|
|
|
# ─── list ────────────────────────────────────────────────────────────
|
|
|
|
def cmd_list(args) -> None:
|
|
"""Active tmux sessions + cookbook state, joined.
|
|
Output: {state, sessions, cookbook_sessions} where cookbook_sessions
|
|
is the subset of tmux sessions whose name starts with `cookbook-`."""
|
|
sessions = _tmux_sessions()
|
|
cookbook = [s for s in sessions if s.startswith("cookbook-")]
|
|
emit({
|
|
"state": _read_state(),
|
|
"all_tmux_sessions": sessions,
|
|
"cookbook_sessions": cookbook,
|
|
}, args)
|
|
|
|
|
|
# ─── gpus ────────────────────────────────────────────────────────────
|
|
|
|
def cmd_gpus(args) -> None:
|
|
"""Same shape the web UI gets — index/name/free_mb/total_mb/used_mb/
|
|
util_pct/uuid. Returns `[]` with an `error` field if nvidia-smi is
|
|
missing (laptop / CPU-only box). Pass `--host user@box` to run over
|
|
SSH against a remote machine."""
|
|
query = "nvidia-smi --query-gpu=index,name,memory.free,memory.total,memory.used,utilization.gpu,uuid --format=csv,noheader,nounits"
|
|
prefix = _ssh_prefix(args.host, args.ssh_port)
|
|
cmd = prefix + (query.split() if not prefix else [query])
|
|
try:
|
|
out = subprocess.run(cmd, capture_output=True, text=True, timeout=15)
|
|
except FileNotFoundError:
|
|
msg = "ssh not found" if prefix else "nvidia-smi not found"
|
|
emit({"ok": False, "error": msg, "gpus": []}, args)
|
|
return
|
|
if out.returncode != 0:
|
|
emit({"ok": False, "error": out.stderr.strip()[:200], "gpus": []}, args)
|
|
return
|
|
gpus = []
|
|
for line in out.stdout.strip().splitlines():
|
|
parts = [p.strip() for p in line.split(",")]
|
|
if len(parts) < 7:
|
|
continue
|
|
try:
|
|
idx, name, free_mb, total_mb, used_mb, util, gpu_uuid = parts[:7]
|
|
total_i, free_i = int(total_mb), int(free_mb)
|
|
gpus.append({
|
|
"index": int(idx),
|
|
"name": name,
|
|
"free_mb": free_i,
|
|
"total_mb": total_i,
|
|
"used_mb": int(used_mb),
|
|
"util_pct": int(util),
|
|
"uuid": gpu_uuid,
|
|
"busy": (free_i / total_i) < 0.5 if total_i else False,
|
|
})
|
|
except (ValueError, ZeroDivisionError):
|
|
continue
|
|
emit({"ok": True, "gpus": gpus}, args)
|
|
|
|
|
|
# ─── cached ──────────────────────────────────────────────────────────
|
|
|
|
def cmd_cached(args) -> None:
|
|
"""List cached HuggingFace models. Walks ~/.cache/huggingface/hub
|
|
(or $HF_HOME) and returns directory names with size summaries.
|
|
Cheap version of the route's full-scan helper — good enough for a
|
|
`which models do I already have` glance."""
|
|
hf_home = Path(os.environ.get("HF_HOME") or os.path.expanduser("~/.cache/huggingface"))
|
|
hub = hf_home / "hub"
|
|
if not hub.is_dir():
|
|
emit({"models": [], "hub_path": str(hub), "note": "no hub cache yet"}, args)
|
|
return
|
|
models = []
|
|
for entry in sorted(hub.iterdir()):
|
|
if not entry.is_dir():
|
|
continue
|
|
# Hub layout: `models--<org>--<repo>` becomes `<org>/<repo>`.
|
|
if entry.name.startswith("models--"):
|
|
repo = entry.name[len("models--"):].replace("--", "/")
|
|
elif entry.name.startswith("datasets--"):
|
|
repo = "datasets/" + entry.name[len("datasets--"):].replace("--", "/")
|
|
else:
|
|
repo = entry.name
|
|
size = 0
|
|
try:
|
|
for f in entry.rglob("*"):
|
|
if f.is_file() and not f.is_symlink():
|
|
size += f.stat().st_size
|
|
except Exception:
|
|
pass
|
|
models.append({"repo": repo, "path": str(entry), "size_bytes": size})
|
|
emit({"models": models, "hub_path": str(hub)}, args)
|
|
|
|
|
|
# ─── hf-latest ───────────────────────────────────────────────────────
|
|
|
|
def cmd_hf_latest(args) -> None:
|
|
"""Trending HF models, optionally filtered by VRAM-at-fp16 fit.
|
|
Mirrors `/api/cookbook/hf-latest` so cron jobs that pre-pull
|
|
"models that fit on my box this week" can use the same filter."""
|
|
pool_size = max(args.limit * 15, 100)
|
|
url = (
|
|
"https://huggingface.co/api/models"
|
|
f"?sort=trendingScore&direction=-1&limit={pool_size}&filter={urllib.parse.quote(args.pipeline)}"
|
|
)
|
|
try:
|
|
with urllib.request.urlopen(url, timeout=15) as resp:
|
|
raw = json.loads(resp.read().decode("utf-8"))
|
|
except Exception as e:
|
|
fail(f"HF API request failed: {e}")
|
|
|
|
def _est_vram_fp16(repo_id: str) -> float | None:
|
|
m = re.search(r'[-_/](\d+(?:\.\d+)?)\s*[Bb](?![a-zA-Z])', repo_id)
|
|
if not m:
|
|
return None
|
|
params_b = float(m.group(1))
|
|
return params_b * 2 # fp16 = 2 bytes/param
|
|
|
|
out = []
|
|
for m in raw:
|
|
rid = m.get("id") or m.get("modelId") or ""
|
|
if not rid:
|
|
continue
|
|
vram = _est_vram_fp16(rid)
|
|
if args.vram_gb > 0 and vram is not None and vram > args.vram_gb:
|
|
continue
|
|
out.append({
|
|
"id": rid,
|
|
"downloads": m.get("downloads", 0),
|
|
"likes": m.get("likes", 0),
|
|
"trendingScore": m.get("trendingScore"),
|
|
"pipeline_tag": m.get("pipeline_tag", ""),
|
|
"est_vram_fp16_gb": vram,
|
|
})
|
|
if len(out) >= args.limit:
|
|
break
|
|
emit({"models": out, "vram_gb_filter": args.vram_gb}, args)
|
|
|
|
|
|
# ─── download ────────────────────────────────────────────────────────
|
|
|
|
def cmd_download(args) -> None:
|
|
"""Start `hf download <repo>` in a detached tmux session. Returns
|
|
the session ID so callers can `tail` the log or `kill` later.
|
|
|
|
Pass `--host user@box` to run the download on a remote machine
|
|
over SSH. The remote needs `tmux` and `hf` installed; the local
|
|
side just gets a session-id back."""
|
|
if not re.fullmatch(r"[\w.-]+/[\w.-]+", args.repo):
|
|
fail(f"invalid repo id {args.repo!r} — expected `org/name`")
|
|
|
|
session_id = f"cookbook-dl-{uuid.uuid4().hex[:8]}"
|
|
cmd_parts = ["hf", "download", args.repo]
|
|
if args.include:
|
|
cmd_parts += ["--include", args.include]
|
|
if args.revision:
|
|
cmd_parts += ["--revision", args.revision]
|
|
|
|
if args.host:
|
|
# Remote — let the remote shell decide log location.
|
|
remote_log = f"/tmp/odysseus-tmux/{session_id}.log"
|
|
hf_cmd = " ".join(map(_shell_quote, cmd_parts))
|
|
remote_shell_cmd = (
|
|
f"mkdir -p /tmp/odysseus-tmux && "
|
|
f"tmux new-session -d -s {_shell_quote(session_id)} "
|
|
f"bash -lc {_shell_quote(f'{hf_cmd} 2>&1 | tee {remote_log}; echo DONE')}"
|
|
)
|
|
ssh_argv = _ssh_prefix(args.host, args.ssh_port) + [remote_shell_cmd]
|
|
try:
|
|
out = subprocess.run(ssh_argv, capture_output=True, text=True, timeout=20)
|
|
except FileNotFoundError:
|
|
fail("ssh not installed")
|
|
if out.returncode != 0:
|
|
fail(f"remote tmux launch failed: {out.stderr.strip() or out.stdout.strip()}")
|
|
emit({
|
|
"ok": True,
|
|
"session_id": session_id,
|
|
"repo": args.repo,
|
|
"host": args.host,
|
|
"remote_log_path": remote_log,
|
|
"tail_cmd": f"ssh {args.host} tail -f {remote_log}",
|
|
"kill_cmd": f"odysseus-cookbook kill {session_id} --host {args.host}",
|
|
}, args)
|
|
return
|
|
|
|
_TMUX_LOG_DIR.mkdir(parents=True, exist_ok=True)
|
|
log_path = _TMUX_LOG_DIR / f"{session_id}.log"
|
|
shell_cmd = " ".join(map(_shell_quote, cmd_parts)) + f" 2>&1 | tee {_shell_quote(str(log_path))}; echo DONE"
|
|
try:
|
|
subprocess.run(
|
|
["tmux", "new-session", "-d", "-s", session_id, "bash", "-lc", shell_cmd],
|
|
check=True, capture_output=True, text=True, timeout=10,
|
|
)
|
|
except FileNotFoundError:
|
|
fail("tmux not installed — can't run background sessions from CLI")
|
|
except subprocess.CalledProcessError as e:
|
|
fail(f"tmux failed: {e.stderr or e.stdout}")
|
|
|
|
emit({
|
|
"ok": True,
|
|
"session_id": session_id,
|
|
"repo": args.repo,
|
|
"log_path": str(log_path),
|
|
"tail_cmd": f"tail -f {log_path}",
|
|
"kill_cmd": f"odysseus-cookbook kill {session_id}",
|
|
}, args)
|
|
|
|
|
|
def _shell_quote(s: str) -> str:
|
|
"""Minimal POSIX-shell quoting — wraps `s` in single quotes and
|
|
escapes any embedded single quotes."""
|
|
return "'" + s.replace("'", "'\\''") + "'"
|
|
|
|
|
|
# ─── serve ───────────────────────────────────────────────────────────
|
|
|
|
def cmd_serve(args) -> None:
|
|
"""Run an arbitrary serve command in a detached tmux session.
|
|
|
|
Deliberately not opinionated about flags — the web UI handles
|
|
platform-specific template generation. For the CLI you pass the full
|
|
serve command via `--cmd`. Common patterns:
|
|
|
|
odysseus-cookbook serve qwen3-8b --cmd 'python -m vllm.entrypoints.openai.api_server --model Qwen/Qwen3-8B --port 8000'
|
|
odysseus-cookbook serve sdxl --cmd 'python scripts/diffusion_server.py --model stabilityai/sdxl --port 8006'
|
|
"""
|
|
if not args.cmd or not args.cmd.strip():
|
|
fail("--cmd is required and must be a non-empty serve command")
|
|
_TMUX_LOG_DIR.mkdir(parents=True, exist_ok=True)
|
|
safe_name = re.sub(r"[^\w.-]", "-", args.name)[:32] or "anon"
|
|
session_id = f"serve-{safe_name}-{uuid.uuid4().hex[:6]}"
|
|
log_path = _TMUX_LOG_DIR / f"{session_id}.log"
|
|
shell_cmd = f"{args.cmd} 2>&1 | tee {_shell_quote(str(log_path))}; echo DONE"
|
|
try:
|
|
subprocess.run(
|
|
["tmux", "new-session", "-d", "-s", session_id, "bash", "-lc", shell_cmd],
|
|
check=True, capture_output=True, text=True, timeout=10,
|
|
)
|
|
except FileNotFoundError:
|
|
fail("tmux not installed")
|
|
except subprocess.CalledProcessError as e:
|
|
fail(f"tmux failed: {e.stderr or e.stdout}")
|
|
emit({
|
|
"ok": True,
|
|
"session_id": session_id,
|
|
"log_path": str(log_path),
|
|
"cmd": args.cmd,
|
|
"tail_cmd": f"tail -f {log_path}",
|
|
"kill_cmd": f"odysseus-cookbook kill {session_id}",
|
|
}, args)
|
|
|
|
|
|
# ─── state set ───────────────────────────────────────────────────────
|
|
|
|
def cmd_state_set(args) -> None:
|
|
"""Write JSON from stdin to data/cookbook_state.json. Atomic via
|
|
a temp-file + rename so a partial write can't corrupt the file.
|
|
|
|
Before overwriting we copy the previous state to .bak — if you ever
|
|
nuke your live state by piping the wrong thing into stdin, restore
|
|
with `cp data/cookbook_state.json.bak data/cookbook_state.json`."""
|
|
data = sys.stdin.read()
|
|
if not data.strip():
|
|
fail("expected JSON on stdin")
|
|
try:
|
|
obj = json.loads(data)
|
|
except json.JSONDecodeError as e:
|
|
fail(f"invalid JSON on stdin: {e}")
|
|
_STATE_PATH.parent.mkdir(parents=True, exist_ok=True)
|
|
# Backup the existing state — undo button if a bad pipe clobbers it.
|
|
if _STATE_PATH.exists():
|
|
bak = _STATE_PATH.with_suffix(_STATE_PATH.suffix + ".bak")
|
|
try:
|
|
bak.write_bytes(_STATE_PATH.read_bytes())
|
|
except Exception:
|
|
pass
|
|
tmp = _STATE_PATH.with_suffix(_STATE_PATH.suffix + ".tmp")
|
|
tmp.write_text(json.dumps(obj, indent=2, ensure_ascii=False))
|
|
tmp.replace(_STATE_PATH)
|
|
emit({"ok": True, "path": str(_STATE_PATH), "bytes": len(data)}, args)
|
|
|
|
|
|
# ─── remote helpers ──────────────────────────────────────────────────
|
|
|
|
def _ssh_prefix(host: str | None, port: str | None) -> list[str]:
|
|
"""Return the ssh argv prefix when --host is given, else []."""
|
|
if not host:
|
|
return []
|
|
cmd = ["ssh"]
|
|
if port:
|
|
cmd += ["-p", str(port)]
|
|
cmd += ["-o", "BatchMode=yes", "-o", "ConnectTimeout=5", host]
|
|
return cmd
|
|
|
|
|
|
# ─── kill ────────────────────────────────────────────────────────────
|
|
|
|
def cmd_kill(args) -> None:
|
|
"""Terminate a tmux session by name. Idempotent — exits 0 even if
|
|
the session is already gone. Pass `--host user@box` to kill a
|
|
remote session created via `download --host`."""
|
|
base = ["tmux", "kill-session", "-t", args.session]
|
|
cmd = _ssh_prefix(args.host, args.ssh_port) + base if args.host else base
|
|
try:
|
|
out = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
|
|
except FileNotFoundError:
|
|
fail("tmux not installed" if not args.host else "ssh not installed")
|
|
already_gone = out.returncode != 0 and "can't find session" in (out.stderr or "").lower()
|
|
emit({
|
|
"ok": True,
|
|
"session": args.session,
|
|
"host": args.host or "local",
|
|
"was_running": out.returncode == 0,
|
|
"already_gone": already_gone,
|
|
}, args)
|
|
|
|
|
|
# ─── state ───────────────────────────────────────────────────────────
|
|
|
|
def cmd_state(args) -> None:
|
|
"""Dump the raw cookbook state file (the web UI's localStorage-y
|
|
JSON for active downloads/servers)."""
|
|
emit(_read_state(), args)
|
|
|
|
|
|
# ─── argparse ────────────────────────────────────────────────────────
|
|
|
|
def _build_parser() -> argparse.ArgumentParser:
|
|
common = argparse.ArgumentParser(add_help=False)
|
|
common.add_argument("--pretty", action="store_true", help="Pretty-print JSON")
|
|
|
|
p = argparse.ArgumentParser(
|
|
prog="odysseus-cookbook",
|
|
description="Shell-friendly wrapper around the Odysseus cookbook (model download + serve).",
|
|
parents=[common],
|
|
)
|
|
sub = p.add_subparsers(dest="cmd", required=True)
|
|
|
|
pl = sub.add_parser("list", help="active tmux sessions + cookbook state", parents=[common])
|
|
pl.set_defaults(func=cmd_list)
|
|
|
|
pg = sub.add_parser("gpus", help="per-GPU free/used VRAM (nvidia-smi)", parents=[common])
|
|
pg.add_argument("--host", help="run nvidia-smi over SSH against this host")
|
|
pg.add_argument("--ssh-port", help="SSH port (default: 22)")
|
|
pg.set_defaults(func=cmd_gpus)
|
|
|
|
pc = sub.add_parser("cached", help="HuggingFace local cache snapshot", parents=[common])
|
|
pc.set_defaults(func=cmd_cached)
|
|
|
|
ph = sub.add_parser("hf-latest", help="trending HF models, VRAM-filtered", parents=[common])
|
|
ph.add_argument("--vram-gb", type=float, default=0, help="filter to models that fit (0 = all)")
|
|
ph.add_argument("--limit", type=int, default=10)
|
|
ph.add_argument("--pipeline", default="text-generation",
|
|
help="HF pipeline_tag (text-generation, text-to-image, etc.)")
|
|
ph.set_defaults(func=cmd_hf_latest)
|
|
|
|
pd = sub.add_parser("download", help="`hf download <repo>` in a tmux session", parents=[common])
|
|
pd.add_argument("repo", help="HF repo id, e.g. 'Qwen/Qwen3-8B'")
|
|
pd.add_argument("--include", help="glob filter for specific files")
|
|
pd.add_argument("--revision", help="git ref / branch / tag")
|
|
pd.add_argument("--host", help="run on a remote machine over SSH")
|
|
pd.add_argument("--ssh-port", help="SSH port (default: 22)")
|
|
pd.set_defaults(func=cmd_download)
|
|
|
|
pse = sub.add_parser("serve", help="run an arbitrary serve cmd in tmux", parents=[common])
|
|
pse.add_argument("name", help="short label for the session (e.g. 'qwen3-8b')")
|
|
pse.add_argument("--cmd", required=True, help="full shell command to run")
|
|
pse.set_defaults(func=cmd_serve)
|
|
|
|
pk = sub.add_parser("kill", help="tmux kill-session by name", parents=[common])
|
|
pk.add_argument("session", help="session name, e.g. 'cookbook-dl-abc123'")
|
|
pk.add_argument("--host", help="kill a remote session")
|
|
pk.add_argument("--ssh-port", help="SSH port")
|
|
pk.set_defaults(func=cmd_kill)
|
|
|
|
pst = sub.add_parser("state", help="dump cookbook_state.json", parents=[common])
|
|
pst.set_defaults(func=cmd_state)
|
|
|
|
pss = sub.add_parser("state-set", help="write JSON from stdin into cookbook_state.json", parents=[common])
|
|
pss.set_defaults(func=cmd_state_set)
|
|
|
|
return p
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(run(_build_parser()))
|