Fix native macOS tailnet launch and Metal GPU probe (#756)

* macOS/Apple Silicon: detect Metal backend, surface MLX models, brew tmux hint - hardware.py: add _detect_macos() via sysctl/system_profiler; report backend=metal + unified_memory on Apple Silicon instead of cpu_arm - fit.py: add Apple Silicon (M1-M5) unified-memory bandwidths + metal FALLBACK_K so throughput estimates use the real bandwidth formula - setup.py: Mac-specific 'brew install tmux' hint Verified on M5 Pro 48GB: backend=metal, 273GB/s matched, 6 MLX models now visible (were hidden), cuda still hides MLX, no new test failures. * Fix native macOS tailnet launch and Metal GPU probe --------- Co-authored-by: Elijah (Hermes) <hermes@local>
2026-06-01 19:41:04 -07:00
parent a327df6936
commit c303a29670
4 changed files with 76 additions and 8 deletions
--- a/README.md
+++ b/README.md
@@ -90,7 +90,14 @@ cd odysseus
 ./start-macos.sh
 ```

-It launches at `http://127.0.0.1:7860`. To build a clickable app wrapper:
+It launches at `http://127.0.0.1:7860`. To expose it to your phone over a trusted LAN/VPN such as Tailscale, bind all interfaces:
+
+```bash
+ODYSSEUS_HOST=0.0.0.0 ./start-macos.sh
+# then open http://<tailscale-ip>:7860
+```
+
+Keep auth enabled when binding outside loopback, and do not expose this port directly to the public internet. To build a clickable app wrapper:

 ```bash
 ./build-macos-app.sh
--- a/routes/cookbook_routes.py
+++ b/routes/cookbook_routes.py
@@ -1466,6 +1466,46 @@ def setup_cookbook_routes() -> APIRouter:
        if gpus:
            return {"ok": True, "gpus": gpus, "backend": "cuda", "source": "nvidia-smi"}

+        # Local Apple Silicon / Metal fallback. macOS has no nvidia-smi and no
+        # Linux /sys/class/drm tree, but services.hwfit.hardware already knows
+        # how to size the shared unified-memory GPU budget. Keep this route in
+        # sync so Cookbook's GPU picker doesn't show "nvidia-smi not found" on
+        # native Mac launches.
+        if not host and sys.platform == "darwin":
+            try:
+                from services.hwfit.hardware import detect_system
+                info = detect_system(fresh=True)
+                backend = str(info.get("backend") or "").lower()
+                if backend in {"metal", "mps", "apple"} and info.get("gpu_count", 0) > 0:
+                    total_mb = int(float(info.get("gpu_vram_gb") or info.get("total_ram_gb") or 0) * 1024)
+                    free_mb = int(float(info.get("available_ram_gb") or 0) * 1024)
+                    if total_mb and (free_mb <= 0 or free_mb > total_mb):
+                        free_mb = total_mb
+                    used_mb = max(0, total_mb - max(0, free_mb))
+                    return {
+                        "ok": True,
+                        "gpus": [{
+                            "index": 0,
+                            "name": info.get("gpu_name") or info.get("cpu_name") or "Apple Silicon GPU",
+                            "uuid": "apple-metal-0",
+                            "free_mb": max(0, free_mb),
+                            "total_mb": max(0, total_mb),
+                            "used_mb": used_mb,
+                            "util_pct": 0,
+                            "busy": bool(total_mb and (free_mb / total_mb) < 0.5),
+                            "processes": [],
+                            "backend": "metal",
+                            "source": "apple-metal",
+                            "unified_memory": True,
+                        }],
+                        "backend": "metal",
+                        "source": "apple-metal",
+                        "fallback_from": "nvidia-smi",
+                        "nvidia_error": nvidia_error,
+                    }
+            except Exception as e:
+                logger.warning("Apple Metal GPU fallback failed: %s", e)
+
        amd_gpus = await _probe_amd_sysfs(host, ssh_port)
        if amd_gpus:
            return {
--- a/services/hwfit/fit.py
+++ b/services/hwfit/fit.py
@@ -26,7 +26,8 @@ GPU_BANDWIDTH = {
    "m1 ultra": 800, "m1 max": 400, "m1 pro": 200, "m1": 68,
    "m2 ultra": 800, "m2 max": 400, "m2 pro": 200, "m2": 100,
    "m3 ultra": 800, "m3 max": 300, "m3 pro": 150, "m3": 100,
-    "m4 max": 410, "m4 pro": 273, "m4": 120,
+    "m4 max": 546, "m4 pro": 273, "m4": 120,
+    "m5 max": 546, "m5 pro": 273, "m5": 150,
 }

 # Pre-sort keys by length descending for correct substring matching
--- a/start-macos.sh
+++ b/start-macos.sh
@@ -17,6 +17,11 @@ REPO_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 cd "$REPO_DIR"

 PORT="${ODYSSEUS_PORT:-7860}"   # 7860, not 7000 — macOS AirPlay Receiver holds 7000.
+HOST="${ODYSSEUS_HOST:-127.0.0.1}" # Set ODYSSEUS_HOST=0.0.0.0 for LAN/Tailscale access.
+PROBE_HOST="$HOST"
+if [ "$PROBE_HOST" = "0.0.0.0" ] || [ "$PROBE_HOST" = "::" ]; then
+  PROBE_HOST="127.0.0.1"
+fi

 # Friendly message on any failure — re-running is safe (every step is idempotent).
 trap 'echo; echo "✗ Setup failed above. It is safe to re-run ./start-macos.sh."; exit 1' ERR
@@ -24,8 +29,8 @@ trap 'echo; echo "✗ Setup failed above. It is safe to re-run ./start-macos.sh.
 echo "▶ Odysseus quick start for macOS"

 # Fail fast if the port is already taken (e.g. a previous run still running).
-if (exec 3<>"/dev/tcp/127.0.0.1/$PORT") 2>/dev/null; then
-  echo "✗ Port $PORT is already in use. Stop what's using it, or pick another port:"
+if (exec 3<>"/dev/tcp/$PROBE_HOST/$PORT") 2>/dev/null; then
+  echo "✗ Port $PORT is already in use on $PROBE_HOST. Stop what's using it, or pick another port:"
  echo "    ODYSSEUS_PORT=7900 ./start-macos.sh"
  exit 1
 fi
@@ -100,8 +105,20 @@ echo "▶ Installing Python packages (first run downloads a few — can take a f
 echo "▶ Preparing Odysseus…"
 ODYSSEUS_SKIP_RUN_HINT=1 ./venv/bin/python setup.py

-# 5. Launch. Bind to loopback only (safe default).
-URL="http://127.0.0.1:$PORT"
+# 5. Launch. Bind to loopback by default; opt into LAN/Tailscale with
+#    ODYSSEUS_HOST=0.0.0.0.
+URL_HOST="$HOST"
+if [ "$URL_HOST" = "0.0.0.0" ] || [ "$URL_HOST" = "::" ]; then
+  URL_HOST="127.0.0.1"
+fi
+URL="http://$URL_HOST:$PORT"
+TAILSCALE_URL=""
+if [ "$HOST" = "0.0.0.0" ] && command -v tailscale >/dev/null 2>&1; then
+  TS_IP="$(tailscale ip -4 2>/dev/null | head -n 1 || true)"
+  if [ -n "$TS_IP" ]; then
+    TAILSCALE_URL="http://$TS_IP:$PORT"
+  fi
+fi

 # Open the browser automatically once the server is accepting connections — so
 # the URL isn't lost in the startup logs that keep scrolling. Runs in the
@@ -111,7 +128,7 @@ POLLER_PID=""
 if [ -z "$ODYSSEUS_NO_OPEN" ] && command -v open >/dev/null 2>&1; then
  (
    for _ in $(seq 1 90); do
-      if (exec 3<>"/dev/tcp/127.0.0.1/$PORT") 2>/dev/null; then
+      if (exec 3<>"/dev/tcp/$PROBE_HOST/$PORT") 2>/dev/null; then
        printf '\n'
        printf '  ┌────────────────────────────────────────────┐\n'
        printf '  │  ✓ Odysseus is ready — opening your browser  │\n'
@@ -134,6 +151,9 @@ trap '[ -n "$POLLER_PID" ] && kill "$POLLER_PID" 2>/dev/null' EXIT INT TERM

 echo
 echo "▶ Starting Odysseus — it will open in your browser at $URL"
+if [ -n "$TAILSCALE_URL" ]; then
+  echo "  Tailscale/LAN URL: $TAILSCALE_URL"
+fi
 echo "  (this takes a few seconds; press Ctrl+C here to stop)"
 echo
-"$PY" -m uvicorn app:app --host 127.0.0.1 --port "$PORT"
+"$PY" -m uvicorn app:app --host "$HOST" --port "$PORT"