* Cookbook fit: consumer-AMD GGUF recommendations + accurate estimates (core logic) Split of #746 — the estimate/ranking MATH only, so it can be reviewed with tests first (UI changes follow separately). Backend files only: no static/js here. services/hwfit/fit.py, services/hwfit/hardware.py: - Recommend GGUF/llama.cpp on consumer AMD (RDNA, gfx10/11/12) instead of formats that don't run on consumer Radeon — vLLM-only AWQ/GPTQ/FP8 AND vendor-specific NVFP4 (NVIDIA) / MLX (Apple). Datacenter Instinct (CDNA) and CUDA are left untouched. - More accurate speed estimates across more GPUs (adds RDNA bandwidth data). - Detect AMD/RDNA GPUs (gpu_family from rocminfo) so fit/serve can branch on it. tests/test_hwfit_amd.py: AMD recommendation path, quant/bit matching, estimate realism, gfx RDNA-vs-CDNA classification. Rebased onto current main (analyze_model gained a scoring_use_case param there; kept it). Vision detection intentionally NOT added here — main already ships a "Vision" type filter + multimodal use-case handling; duplicating it was dropped. Checks: py_compile clean; pytest tests/test_hwfit_amd.py + hwfit/serve suites = 28 passed; full suite 0 new failures vs main. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> * Tests: assert NVFP4/MLX/FP8 formats are filtered on consumer RDNA Backs the #972 claim with an explicit regression: no NVIDIA NVFP4, Apple MLX, or vLLM-only FP8/AWQ/GPTQ repos are recommended on a consumer Radeon, and guards against vacuity by asserting such repos exist in the catalog. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
648 lines
25 KiB
Python
648 lines
25 KiB
Python
import os
|
|
import platform
|
|
import re
|
|
import shutil
|
|
import subprocess
|
|
import time
|
|
|
|
CACHE_TTL = 1800 # 30 min — hardware rarely changes; use the Rescan button to force a re-probe
|
|
|
|
|
|
_remote_host = None # set by detect_system(host=...)
|
|
_remote_port = None # set by detect_system(ssh_port=...)
|
|
_remote_platform = None # set by detect_system(platform=...): "windows", "linux", "termux"
|
|
_last_gpu_error = None # set by _detect_nvidia() when nvidia-smi errors (driver mismatch, etc.)
|
|
|
|
|
|
def _run(cmd):
|
|
try:
|
|
if _remote_host:
|
|
# Run command on remote host via SSH
|
|
if isinstance(cmd, list):
|
|
cmd_str = " ".join(cmd)
|
|
else:
|
|
cmd_str = cmd
|
|
ssh_cmd = ["ssh", "-o", "ConnectTimeout=5", "-o", "StrictHostKeyChecking=no"]
|
|
if _remote_port and _remote_port != "22":
|
|
ssh_cmd += ["-p", _remote_port]
|
|
ssh_cmd += [_remote_host, cmd_str]
|
|
r = subprocess.run(
|
|
ssh_cmd,
|
|
capture_output=True, text=True, timeout=15,
|
|
)
|
|
else:
|
|
r = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
|
|
if r.returncode == 0:
|
|
return r.stdout.strip()
|
|
except Exception:
|
|
pass
|
|
return None
|
|
|
|
|
|
def _group_gpus(gpus):
|
|
"""Group identical GPUs by (name, rounded VRAM).
|
|
|
|
vLLM tensor-parallel only works across IDENTICAL GPUs, so a mixed box must
|
|
be split into homogeneous pools. Each group carries the device indices so a
|
|
serve command can pin CUDA_VISIBLE_DEVICES to exactly one pool. Biggest pool
|
|
(by total VRAM) first — that's the sensible auto-default serving target.
|
|
"""
|
|
groups = {}
|
|
order = []
|
|
for g in gpus:
|
|
key = (g["name"], round(g["vram_gb"]))
|
|
if key not in groups:
|
|
groups[key] = {
|
|
"name": g["name"],
|
|
"vram_each": round(g["vram_gb"], 1),
|
|
"count": 0,
|
|
"indices": [],
|
|
}
|
|
order.append(key)
|
|
groups[key]["count"] += 1
|
|
groups[key]["indices"].append(g.get("index"))
|
|
out = []
|
|
for key in order:
|
|
grp = groups[key]
|
|
grp["vram_total"] = round(grp["vram_each"] * grp["count"], 1)
|
|
out.append(grp)
|
|
out.sort(key=lambda x: x["vram_total"], reverse=True)
|
|
return out
|
|
|
|
|
|
def _detect_nvidia():
|
|
global _last_gpu_error
|
|
_last_gpu_error = None
|
|
out = _run(["nvidia-smi", "--query-gpu=memory.total,name", "--format=csv,noheader,nounits"])
|
|
# Remote fallback: a non-interactive SSH shell often has a minimal PATH
|
|
# that omits where nvidia-smi lives (/usr/bin, /usr/local/cuda/bin), so the
|
|
# first call silently returns nothing → "No GPU" on hosts that DO have GPUs.
|
|
# Retry through a login shell with the common CUDA bin dirs on PATH.
|
|
if not out and _remote_host:
|
|
out = _run(
|
|
"bash -lc 'export PATH=\"$PATH:/usr/bin:/usr/local/bin:/usr/local/cuda/bin\"; "
|
|
"nvidia-smi --query-gpu=memory.total,name --format=csv,noheader,nounits'"
|
|
)
|
|
# Last resort: call nvidia-smi by absolute path. Some hosts have a login
|
|
# shell that isn't bash (or a profile that errors), so the bash -lc retry
|
|
# above still comes back empty even though the binary is right there.
|
|
if not out and _remote_host:
|
|
for _p in ("/usr/bin/nvidia-smi", "/usr/local/bin/nvidia-smi", "/usr/local/cuda/bin/nvidia-smi"):
|
|
out = _run(f"{_p} --query-gpu=memory.total,name --format=csv,noheader,nounits")
|
|
if out:
|
|
break
|
|
if not out:
|
|
return None
|
|
|
|
# nvidia-smi present but unable to talk to the driver (e.g. it was updated
|
|
# without a reboot). It prints an error and no GPU rows — surface that as a
|
|
# driver error rather than the misleading "No GPU".
|
|
_low = out.lower()
|
|
if ("nvml" in _low or "driver/library version mismatch" in _low
|
|
or "couldn't communicate" in _low or "no devices were found" in _low
|
|
or "failed to initialize" in _low):
|
|
_last_gpu_error = out.strip().split("\n")[0][:140] or "NVIDIA driver error"
|
|
return None
|
|
|
|
gpus = []
|
|
# nvidia-smi lists GPUs in index order (0,1,2,...), so the row position is
|
|
# the CUDA device index we'd pass to CUDA_VISIBLE_DEVICES.
|
|
for idx, line in enumerate(out.strip().split("\n")):
|
|
parts = [p.strip() for p in line.split(",")]
|
|
if len(parts) >= 2:
|
|
try:
|
|
vram_mb = float(parts[0])
|
|
gpus.append({"index": idx, "name": parts[1], "vram_gb": vram_mb / 1024.0})
|
|
except ValueError:
|
|
continue
|
|
|
|
if not gpus:
|
|
return None
|
|
total_vram = sum(g["vram_gb"] for g in gpus)
|
|
groups = _group_gpus(gpus)
|
|
return {
|
|
"gpu_name": gpus[0]["name"],
|
|
"gpu_vram_gb": round(total_vram, 1),
|
|
"gpu_count": len(gpus),
|
|
"gpus": gpus,
|
|
"gpu_groups": groups,
|
|
"homogeneous": len(groups) <= 1,
|
|
"backend": "cuda",
|
|
}
|
|
|
|
|
|
def classify_amd_gfx(gfx):
|
|
"""Map an AMD ISA target (e.g. "gfx1200") to (gfx, family).
|
|
|
|
family is one of:
|
|
"rdna" — consumer Radeon RX (gfx10xx RDNA1/2, gfx11xx RDNA3, gfx12xx RDNA4)
|
|
"cdna" — datacenter Instinct (gfx908 MI100, gfx90a MI200, gfx94x/95x MI300+)
|
|
"gcn" — older GCN/Vega (gfx900/906)
|
|
"unknown" — empty/unrecognized; callers must treat conservatively
|
|
|
|
This drives the serving decision: vLLM/SGLang on ROCm are validated on CDNA
|
|
but fragile on consumer RDNA (AWQ kernels largely unsupported, FP8 needs
|
|
out-of-tree patches), so RDNA is steered to GGUF/llama.cpp.
|
|
"""
|
|
gfx = (gfx or "").lower().strip()
|
|
m = re.fullmatch(r"gfx(\d+[a-f]?)", gfx)
|
|
if not m:
|
|
return "", "unknown"
|
|
digits = m.group(1)
|
|
if digits[:2] in ("10", "11", "12"):
|
|
return gfx, "rdna"
|
|
if digits in ("908", "90a") or digits[:2] in ("94", "95"):
|
|
return gfx, "cdna"
|
|
if digits[:1] == "9":
|
|
return gfx, "gcn"
|
|
return gfx, "unknown"
|
|
|
|
|
|
def _detect_amd():
|
|
"""Detect AMD GPUs. Handles both discrete cards (with mem_info_vram_total)
|
|
and APUs / unified-memory SoCs like Strix Halo (which expose
|
|
mem_info_vis_vram_total instead, or only mem_info_gtt_total)."""
|
|
def _read(path):
|
|
if _remote_host:
|
|
val = _run(["cat", path])
|
|
return val.strip() if val else None
|
|
try:
|
|
with open(path, encoding="utf-8", errors="replace") as f:
|
|
return f.read().strip()
|
|
except Exception:
|
|
return None
|
|
|
|
def _list_drm_cards():
|
|
if _remote_host:
|
|
out = _run(["ls", "/sys/class/drm"])
|
|
if not out:
|
|
return []
|
|
return [e for e in out.split() if e.startswith("card") and "-" not in e]
|
|
try:
|
|
return [e for e in os.listdir("/sys/class/drm") if e.startswith("card") and "-" not in e]
|
|
except Exception:
|
|
return []
|
|
|
|
def _amd_arch():
|
|
"""Best-effort AMD GPU ISA + family from rocminfo.
|
|
|
|
rocminfo is the source of truth; its GPU agents report a `Name: gfxNNNN`
|
|
line (CPU agents report a brand string, not a gfx target), so the first
|
|
gfx match is the GPU ISA. Returns (gfx, family) — see classify_amd_gfx.
|
|
"""
|
|
info = _run(["rocminfo"]) or _run(["/opt/rocm/bin/rocminfo"]) or ""
|
|
m = re.search(r"gfx\d+[a-f]?", info)
|
|
return classify_amd_gfx(m.group(0) if m else "")
|
|
|
|
try:
|
|
cards = []
|
|
is_apu = False
|
|
for _cidx, entry in enumerate(_list_drm_cards()):
|
|
base = f"/sys/class/drm/{entry}/device"
|
|
vendor = _read(f"{base}/vendor")
|
|
if vendor != "0x1002":
|
|
continue
|
|
# Discrete cards usually report real VRAM in mem_info_vram_total,
|
|
# while some AMD APUs / Docker views expose a tiny vram_total and
|
|
# the usable pool in vis_vram_total. Use the larger of those two;
|
|
# only fall back to GTT if neither VRAM field is available.
|
|
vram_raw = _read(f"{base}/mem_info_vram_total")
|
|
vis_raw = _read(f"{base}/mem_info_vis_vram_total")
|
|
gtt_raw = _read(f"{base}/mem_info_gtt_total")
|
|
vram_val = int(vram_raw) if vram_raw and vram_raw.isdigit() else 0
|
|
vis_val = int(vis_raw) if vis_raw and vis_raw.isdigit() else 0
|
|
gtt_val = int(gtt_raw) if gtt_raw and gtt_raw.isdigit() else 0
|
|
vram_bytes = max(vram_val, vis_val)
|
|
if vram_bytes <= 0:
|
|
vram_bytes = gtt_val
|
|
if vis_val and vis_val >= vram_val:
|
|
is_apu = True
|
|
if vram_bytes <= 0:
|
|
continue
|
|
name = _read(f"{base}/product_name") or f"AMD GPU ({entry})"
|
|
cards.append({"index": _cidx, "name": name, "vram_gb": vram_bytes / (1024**3)})
|
|
|
|
if not cards:
|
|
return None
|
|
total_vram = sum(c["vram_gb"] for c in cards)
|
|
groups = _group_gpus(cards)
|
|
gfx, family = _amd_arch()
|
|
# NOTE: for APUs with BIOS UMA carveout (e.g. Strix Halo), vis_vram_total
|
|
# is the real usable GPU memory — it's physically backed but reserved
|
|
# by BIOS so it doesn't appear in /proc/meminfo. Don't cap it at system
|
|
# RAM: the two pools are separate from the OS's perspective.
|
|
return {
|
|
"gpu_name": cards[0]["name"],
|
|
"gpu_vram_gb": round(total_vram, 1),
|
|
"gpu_count": len(cards),
|
|
"gpus": cards,
|
|
"gpu_groups": groups,
|
|
"homogeneous": len(groups) <= 1,
|
|
"backend": "rocm",
|
|
"unified_memory": is_apu,
|
|
# AMD ISA/family so downstream can tell datacenter Instinct (CDNA,
|
|
# where vLLM/SGLang run AWQ/GPTQ reliably) from consumer Radeon
|
|
# (RDNA, where the practical path is GGUF via llama.cpp). Empty/
|
|
# "unknown" when rocminfo isn't available — callers must treat
|
|
# unknown conservatively, not assume vLLM works.
|
|
"gpu_arch": gfx,
|
|
"gpu_family": family,
|
|
}
|
|
except Exception:
|
|
return None
|
|
|
|
|
|
def _detect_apple_silicon():
|
|
"""Detect Apple Silicon (M-series) GPUs.
|
|
|
|
Macs have no discrete VRAM — the GPU shares the system's unified memory.
|
|
We report a fraction of total RAM as the usable GPU budget (matching macOS's
|
|
default Metal working-set limit) so the Cookbook recommends models that
|
|
actually run on the GPU instead of classifying the machine as CPU-only.
|
|
|
|
backend="metal" is what services.hwfit.fit and the serve-command generation
|
|
key off of (they already understand MLX / llama.cpp-Metal). Works locally
|
|
(platform.system()=="Darwin") and over SSH (uname -s == Darwin).
|
|
"""
|
|
# Gate to macOS — locally via platform, remotely via uname.
|
|
if _remote_host:
|
|
if "darwin" not in (_run(["uname", "-s"]) or "").lower():
|
|
return None
|
|
arch = (_run(["uname", "-m"]) or "").lower()
|
|
else:
|
|
if platform.system() != "Darwin":
|
|
return None
|
|
arch = platform.machine().lower()
|
|
|
|
# Only Apple Silicon (arm64) has a Metal GPU worth serving LLMs on; Intel
|
|
# Macs fall through to the CPU path.
|
|
if "arm" not in arch and "aarch64" not in arch:
|
|
return None
|
|
|
|
# Chip name, e.g. "Apple M4 Max" — carries the Pro/Max/Ultra variant that
|
|
# the fit bandwidth table keys off of.
|
|
brand = (_run(["sysctl", "-n", "machdep.cpu.brand_string"]) or "Apple Silicon").strip()
|
|
|
|
# Total unified memory in bytes.
|
|
memsize = _run(["sysctl", "-n", "hw.memsize"])
|
|
try:
|
|
total_gb = int(memsize) / (1024**3) if memsize else 0.0
|
|
except ValueError:
|
|
total_gb = 0.0
|
|
if total_gb <= 0:
|
|
return None
|
|
|
|
# Usable GPU budget. macOS lets Metal use most of unified memory, but the
|
|
# default working-set limit scales with RAM: small machines have to keep
|
|
# more back for the OS + app. These fractions track Apple's
|
|
# recommendedMaxWorkingSetSize defaults across the lineup. Honour an
|
|
# explicit override if the user raised it with
|
|
# `sudo sysctl iogpu.wired_limit_mb=…`.
|
|
if total_gb <= 16:
|
|
frac = 0.67
|
|
elif total_gb <= 64:
|
|
frac = 0.75
|
|
else:
|
|
frac = 0.80
|
|
vram_gb = round(total_gb * frac, 1)
|
|
wired = _run(["sysctl", "-n", "iogpu.wired_limit_mb"])
|
|
try:
|
|
wired_mb = int(wired) if wired else 0
|
|
if wired_mb > 0:
|
|
vram_gb = round(wired_mb / 1024.0, 1)
|
|
except ValueError:
|
|
pass
|
|
|
|
gpu = {"index": 0, "name": brand, "vram_gb": vram_gb}
|
|
return {
|
|
"gpu_name": brand,
|
|
"gpu_vram_gb": vram_gb,
|
|
"gpu_count": 1,
|
|
"gpus": [gpu],
|
|
"gpu_groups": _group_gpus([gpu]),
|
|
"homogeneous": True,
|
|
"backend": "metal",
|
|
# Unified memory: the "VRAM" above is carved out of system RAM, not a
|
|
# separate pool — downstream fit logic uses this to avoid double-budgeting.
|
|
"unified_memory": True,
|
|
}
|
|
|
|
|
|
def _read_file(path):
|
|
"""Read a file, locally or via SSH."""
|
|
if _remote_host:
|
|
return _run(["cat", path])
|
|
try:
|
|
with open(path, encoding="utf-8", errors="replace") as f:
|
|
return f.read()
|
|
except Exception:
|
|
return None
|
|
|
|
|
|
def _parse_meminfo():
|
|
"""Parse /proc/meminfo into a dict of key -> KB values."""
|
|
text = _read_file("/proc/meminfo")
|
|
if not text:
|
|
return {}
|
|
result = {}
|
|
for line in text.split("\n"):
|
|
if ":" in line:
|
|
key, val = line.split(":", 1)
|
|
parts = val.strip().split()
|
|
if parts:
|
|
try:
|
|
result[key.strip()] = int(parts[0])
|
|
except ValueError:
|
|
pass
|
|
return result
|
|
|
|
|
|
def _get_ram_gb():
|
|
meminfo = _parse_meminfo()
|
|
if "MemTotal" in meminfo:
|
|
return meminfo["MemTotal"] / (1024**2)
|
|
|
|
# os.sysconf only exists on Unix; on Windows it's absent (AttributeError)
|
|
# and these constants aren't defined — guard so this never raises there.
|
|
if not _remote_host and hasattr(os, "sysconf") and "SC_PHYS_PAGES" in getattr(os, "sysconf_names", {}):
|
|
try:
|
|
pages = os.sysconf("SC_PHYS_PAGES")
|
|
page_size = os.sysconf("SC_PAGE_SIZE")
|
|
if pages and page_size:
|
|
return (pages * page_size) / (1024**3)
|
|
except Exception:
|
|
pass
|
|
|
|
# macOS has no /proc/meminfo — fall back to sysctl (works locally and over
|
|
# SSH to a remote Mac, where the sysconf path above isn't taken).
|
|
memsize = _run(["sysctl", "-n", "hw.memsize"])
|
|
if memsize:
|
|
try:
|
|
return int(memsize.strip()) / (1024**3)
|
|
except ValueError:
|
|
pass
|
|
return 0.0
|
|
|
|
|
|
def _get_available_ram_gb():
|
|
meminfo = _parse_meminfo()
|
|
if "MemAvailable" in meminfo:
|
|
return meminfo["MemAvailable"] / (1024**2)
|
|
return _get_ram_gb() * 0.7
|
|
|
|
|
|
def _get_cpu_name():
|
|
text = _read_file("/proc/cpuinfo")
|
|
if text:
|
|
for line in text.split("\n"):
|
|
if line.startswith("model name"):
|
|
return line.split(":", 1)[1].strip()
|
|
|
|
# macOS has no /proc/cpuinfo — sysctl gives the chip name (e.g. "Apple M4").
|
|
# Harmlessly returns nothing on Linux, so it's safe to try unconditionally.
|
|
brand = _run(["sysctl", "-n", "machdep.cpu.brand_string"])
|
|
if brand and brand.strip():
|
|
return brand.strip()
|
|
|
|
if not _remote_host:
|
|
return platform.processor() or "unknown"
|
|
return "unknown"
|
|
|
|
|
|
def _get_cpu_count():
|
|
if _remote_host:
|
|
# nproc on Linux; hw.ncpu via sysctl on a remote Mac (no nproc there).
|
|
out = _run(["nproc"]) or _run(["sysctl", "-n", "hw.ncpu"])
|
|
if out:
|
|
try:
|
|
return int(out.strip())
|
|
except ValueError:
|
|
pass
|
|
# fallback: count "processor" lines in /proc/cpuinfo
|
|
text = _read_file("/proc/cpuinfo")
|
|
if text:
|
|
return sum(1 for line in text.split("\n") if line.startswith("processor"))
|
|
return os.cpu_count() or 1
|
|
|
|
|
|
def _powershell_exe():
|
|
"""Pick the best PowerShell executable for LOCAL execution: prefer pwsh
|
|
(PowerShell 7+), fall back to Windows PowerShell 5.1. Returns an absolute
|
|
path so we don't depend on a particular PATH ordering."""
|
|
return shutil.which("pwsh") or shutil.which("powershell") or "powershell"
|
|
|
|
|
|
def _detect_windows():
|
|
"""Detect Windows hardware via PowerShell/WMI.
|
|
|
|
Works for BOTH local (host="") and remote (SSH) detection:
|
|
* remote -> `_run` ships the string to the host over SSH.
|
|
* local -> `_run` executes a list argv directly (no shell quoting hell).
|
|
"""
|
|
# Single PowerShell command that gathers all hardware info at once
|
|
ps_cmd = (
|
|
"$r = @{}; "
|
|
"$os = Get-CimInstance Win32_OperatingSystem; "
|
|
"$r.ram_gb = [math]::Round($os.TotalVisibleMemorySize / 1048576, 1); "
|
|
"$r.avail_gb = [math]::Round($os.FreePhysicalMemory / 1048576, 1); "
|
|
"$cpu = Get-CimInstance Win32_Processor | Select-Object -First 1; "
|
|
"$r.cpu_name = $cpu.Name; "
|
|
"$r.cpu_cores = (Get-CimInstance Win32_Processor | Measure-Object -Property NumberOfLogicalProcessors -Sum).Sum; "
|
|
"$r.arch = $cpu.AddressWidth; "
|
|
# GPU detection via nvidia-smi (fastest) or WMI fallback
|
|
"try { "
|
|
" $nv = nvidia-smi --query-gpu=memory.total,name --format=csv,noheader,nounits 2>$null; "
|
|
" if ($LASTEXITCODE -eq 0 -and $nv) { "
|
|
" $gpus = @(); "
|
|
" foreach ($line in $nv -split \"`n\") { "
|
|
" $p = $line -split ','; "
|
|
" if ($p.Count -ge 2) { $gpus += [pscustomobject]@{name=$p[1].Trim(); vram_mb=[double]$p[0].Trim()} } "
|
|
" }; "
|
|
" $r.gpu_name = $gpus[0].name; "
|
|
" $r.gpu_vram_gb = [math]::Round(($gpus | Measure-Object -Property vram_mb -Sum).Sum / 1024, 1); "
|
|
" $r.gpu_count = $gpus.Count; "
|
|
" $r.gpu_backend = 'cuda'; "
|
|
" } "
|
|
"} catch {}; "
|
|
"if (-not $r.gpu_name) { "
|
|
" $wmiGpu = Get-CimInstance Win32_VideoController | Where-Object { $_.AdapterRAM -gt 0 } | Select-Object -First 1; "
|
|
" if ($wmiGpu) { "
|
|
" $r.gpu_name = $wmiGpu.Name; "
|
|
" $r.gpu_vram_gb = [math]::Round($wmiGpu.AdapterRAM / 1073741824, 1); "
|
|
" $r.gpu_count = 1; "
|
|
" $r.gpu_backend = 'cpu_x86'; " # WMI doesn't tell us CUDA/ROCm
|
|
" } "
|
|
"}; "
|
|
"$r | ConvertTo-Json -Compress"
|
|
)
|
|
if _remote_host:
|
|
# Remote: ship a single command string over SSH. The remote shell parses
|
|
# the quoting; PowerShell on the far side runs the -Command payload.
|
|
out = _run(f'powershell -Command "{ps_cmd}"')
|
|
else:
|
|
# Local: pass a LIST argv straight to subprocess so the OS hands ps_cmd
|
|
# to PowerShell verbatim — no fragile string-level quote escaping. Prefer
|
|
# pwsh (PS7), else Windows PowerShell 5.1.
|
|
out = _run([_powershell_exe(), "-NoProfile", "-NonInteractive", "-Command", ps_cmd])
|
|
if not out:
|
|
return None
|
|
import json as _json
|
|
try:
|
|
d = _json.loads(out)
|
|
# PowerShell's Measure-Object .Sum / .Count come back as JSON numbers and
|
|
# decode to float; the Linux path returns plain ints for these — coerce
|
|
# so the dict shape (and downstream int math) matches across platforms.
|
|
def _as_int(v, default):
|
|
try:
|
|
return int(v)
|
|
except (TypeError, ValueError):
|
|
return default
|
|
_cpu_name = (d.get("cpu_name") or "unknown")
|
|
if isinstance(_cpu_name, str):
|
|
_cpu_name = _cpu_name.strip() or "unknown"
|
|
result = {
|
|
"total_ram_gb": d.get("ram_gb", 0),
|
|
"available_ram_gb": d.get("avail_gb", 0),
|
|
"cpu_cores": _as_int(d.get("cpu_cores"), 1),
|
|
"cpu_name": _cpu_name,
|
|
"has_gpu": bool(d.get("gpu_name")),
|
|
"gpu_name": d.get("gpu_name"),
|
|
"gpu_vram_gb": d.get("gpu_vram_gb"),
|
|
"gpu_count": _as_int(d.get("gpu_count"), 0),
|
|
"backend": d.get("gpu_backend", "cpu_x86"),
|
|
"homogeneous": True,
|
|
"gpu_error": None,
|
|
}
|
|
# PowerShell only reports aggregate GPU info, not per-card detail, so we
|
|
# can't tell a mixed box from a uniform one here — assume one homogeneous
|
|
# pool spanning all reported GPUs (the common Windows case).
|
|
_n = result["gpu_count"] or 0
|
|
if result["has_gpu"] and _n > 0:
|
|
_each = round((result["gpu_vram_gb"] or 0) / _n, 1)
|
|
result["gpus"] = [
|
|
{"index": i, "name": result["gpu_name"], "vram_gb": _each} for i in range(_n)
|
|
]
|
|
result["gpu_groups"] = [{
|
|
"name": result["gpu_name"],
|
|
"vram_each": _each,
|
|
"count": _n,
|
|
"indices": list(range(_n)),
|
|
"vram_total": result["gpu_vram_gb"],
|
|
}]
|
|
result["homogeneous"] = True
|
|
return result
|
|
except Exception:
|
|
return None
|
|
|
|
|
|
_cache_by_host = {} # host -> (timestamp, result)
|
|
|
|
|
|
def detect_system(host="", ssh_port="", platform="", fresh=False):
|
|
"""Detect system hardware: RAM, CPU, GPU. Cached per host (hardware rarely
|
|
changes, and probing a remote host over SSH is slow). Pass fresh=True to
|
|
bypass the cache and re-probe (the "Rescan" button).
|
|
If host is set (e.g. 'user@server'), runs detection commands over SSH.
|
|
platform: "windows", "linux", "termux", or "" (auto-detect).
|
|
"""
|
|
global _remote_host, _remote_port, _remote_platform
|
|
|
|
cache_key = host or "_local"
|
|
now = time.time()
|
|
if not fresh and cache_key in _cache_by_host:
|
|
ts, cached = _cache_by_host[cache_key]
|
|
if (now - ts) < CACHE_TTL:
|
|
return cached
|
|
|
|
_remote_host = host or None
|
|
_remote_port = ssh_port or None
|
|
_remote_platform = platform or None
|
|
|
|
# Windows: single PowerShell command for all hardware info
|
|
if _remote_platform == "windows" and _remote_host:
|
|
result = _detect_windows()
|
|
if result:
|
|
_remote_host = None
|
|
_remote_platform = None
|
|
_cache_by_host[cache_key] = (now, result)
|
|
return result
|
|
# If Windows detection failed, return error
|
|
result = {"error": f"Cannot connect to {host}", "host": host}
|
|
_remote_host = None
|
|
_remote_platform = None
|
|
_cache_by_host[cache_key] = (now, result)
|
|
return result
|
|
|
|
# Local Windows: the Linux /proc + /sys + os.sysconf path returns 0 GB RAM,
|
|
# "unknown" CPU and no GPU on Windows (and os.sysconf doesn't even exist),
|
|
# so detect locally via PowerShell/WMI instead. _detect_windows() runs the
|
|
# same probe used for remote Windows, but _run() executes it locally.
|
|
if not _remote_host and os.name == "nt":
|
|
result = _detect_windows()
|
|
if result:
|
|
_cache_by_host[cache_key] = (now, result)
|
|
return result
|
|
# PowerShell probe failed entirely — fall through to the generic path
|
|
# below so we at least return a well-shaped dict rather than crashing.
|
|
|
|
# Linux/Termux: existing multi-command detection
|
|
total_ram = round(_get_ram_gb(), 1)
|
|
# If remote host returns 0 RAM, connection likely failed
|
|
if _remote_host and total_ram <= 0:
|
|
result = {"error": f"Cannot connect to {host}", "host": host}
|
|
_cache_by_host[cache_key] = (now, result)
|
|
_remote_host = None
|
|
_remote_platform = None
|
|
return result
|
|
available_ram = round(_get_available_ram_gb(), 1)
|
|
cpu_cores = _get_cpu_count()
|
|
cpu_name = _get_cpu_name()
|
|
|
|
gpu_info = _detect_apple_silicon() or _detect_nvidia() or _detect_amd()
|
|
|
|
if gpu_info:
|
|
result = {
|
|
"total_ram_gb": total_ram,
|
|
"available_ram_gb": available_ram,
|
|
"cpu_cores": cpu_cores,
|
|
"cpu_name": cpu_name,
|
|
"has_gpu": True,
|
|
"gpu_name": gpu_info["gpu_name"],
|
|
"gpu_vram_gb": gpu_info["gpu_vram_gb"],
|
|
"gpu_count": gpu_info["gpu_count"],
|
|
"gpus": gpu_info.get("gpus", []),
|
|
"gpu_groups": gpu_info.get("gpu_groups", []),
|
|
"homogeneous": gpu_info.get("homogeneous", True),
|
|
"backend": gpu_info["backend"],
|
|
# Apple Silicon / AMD APUs share system RAM with the GPU — carry the
|
|
# flag through so callers can tell unified from discrete VRAM.
|
|
"unified_memory": gpu_info.get("unified_memory", False),
|
|
}
|
|
else:
|
|
if _remote_host:
|
|
arch_out = _run(["uname", "-m"]) or ""
|
|
else:
|
|
import platform as _platform
|
|
arch_out = _platform.machine().lower()
|
|
backend = "cpu_arm" if "aarch64" in arch_out or "arm" in arch_out else "cpu_x86"
|
|
result = {
|
|
"total_ram_gb": total_ram,
|
|
"available_ram_gb": available_ram,
|
|
"cpu_cores": cpu_cores,
|
|
"cpu_name": cpu_name,
|
|
"has_gpu": False,
|
|
"gpu_name": None,
|
|
"gpu_vram_gb": None,
|
|
"gpu_count": 0,
|
|
"backend": backend,
|
|
# Set when nvidia-smi exists but failed (e.g. driver/library
|
|
# version mismatch) — lets the UI say "GPU driver error" instead
|
|
# of the misleading "No GPU".
|
|
"gpu_error": _last_gpu_error,
|
|
}
|
|
|
|
_remote_host = None
|
|
_remote_platform = None
|
|
_cache_by_host[cache_key] = (now, result)
|
|
return result
|