fix(hwfit): detect unified-memory NVIDIA (Grace Blackwell GB10 / DGX Spark) instead of 'No GPU' (#1340) (#1372)
_detect_nvidia parsed nvidia-smi --query-gpu=memory.total,name and did float(memory.total) per row, dropping the row on ValueError. Grace Blackwell GB10 (DGX Spark, sm_121) reports memory.total as '[N/A]'/'Not Supported' because the GPU shares the system LPDDR pool rather than carrying discrete VRAM — so the only GPU row was dropped and a real GB10 (even with vLLM running on it) was reported as 'No GPU', breaking Cookbook recommendations and model switching. Keep a named device whose memory.total is non-numeric: when there are no discrete-VRAM rows but such unified devices exist, report a unified-memory CUDA GPU backed by the system RAM pool (has_gpu, name, backend=cuda, count, unified_memory=True) — mirroring how Apple Silicon and AMD APUs are already handled. Discrete GPUs are unchanged, and a box with a real discrete GPU keeps the discrete path. Adds tests/test_hwfit_unified_nvidia.py with a GB10 nvidia-smi fixture: the device is detected (not dropped), surfaces through detect_system with unified_memory propagated, discrete GPUs stay non-unified, and a discrete GPU takes precedence over an N/A-memory row. Co-authored-by: NubsCarson <nubs@nubs.site>
This commit is contained in:
@@ -105,6 +105,8 @@ def _detect_nvidia():
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
gpus = []
|
gpus = []
|
||||||
|
# Devices nvidia-smi lists with a real name but a non-numeric memory.total.
|
||||||
|
unified = []
|
||||||
# nvidia-smi lists GPUs in index order (0,1,2,...), so the row position is
|
# nvidia-smi lists GPUs in index order (0,1,2,...), so the row position is
|
||||||
# the CUDA device index we'd pass to CUDA_VISIBLE_DEVICES.
|
# the CUDA device index we'd pass to CUDA_VISIBLE_DEVICES.
|
||||||
for idx, line in enumerate(out.strip().split("\n")):
|
for idx, line in enumerate(out.strip().split("\n")):
|
||||||
@@ -114,9 +116,32 @@ def _detect_nvidia():
|
|||||||
vram_mb = float(parts[0])
|
vram_mb = float(parts[0])
|
||||||
gpus.append({"index": idx, "name": parts[1], "vram_gb": vram_mb / 1024.0})
|
gpus.append({"index": idx, "name": parts[1], "vram_gb": vram_mb / 1024.0})
|
||||||
except ValueError:
|
except ValueError:
|
||||||
|
# Grace Blackwell GB10 / DGX Spark and other unified-memory
|
||||||
|
# NVIDIA parts report memory.total as "[N/A]"/"Not Supported"
|
||||||
|
# because the GPU shares the system LPDDR pool instead of
|
||||||
|
# carrying discrete VRAM. Don't drop the device — remember it so
|
||||||
|
# we report a unified-memory GPU below rather than "No GPU" (#1340).
|
||||||
|
if parts[1]:
|
||||||
|
unified.append({"index": idx, "name": parts[1]})
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if not gpus:
|
if not gpus:
|
||||||
|
if unified:
|
||||||
|
# Unified-memory CUDA box: report the GPU backed by system RAM so the
|
||||||
|
# Cookbook recommends models and serving works. The pool is shared
|
||||||
|
# (not per-GPU discrete VRAM), so report the RAM total once.
|
||||||
|
ram_gb = round(_get_ram_gb(), 1)
|
||||||
|
gpus = [{"index": g["index"], "name": g["name"], "vram_gb": ram_gb} for g in unified]
|
||||||
|
return {
|
||||||
|
"gpu_name": gpus[0]["name"],
|
||||||
|
"gpu_vram_gb": ram_gb,
|
||||||
|
"gpu_count": len(gpus),
|
||||||
|
"gpus": gpus,
|
||||||
|
"gpu_groups": _group_gpus(gpus),
|
||||||
|
"homogeneous": True,
|
||||||
|
"backend": "cuda",
|
||||||
|
"unified_memory": True,
|
||||||
|
}
|
||||||
return None
|
return None
|
||||||
total_vram = sum(g["vram_gb"] for g in gpus)
|
total_vram = sum(g["vram_gb"] for g in gpus)
|
||||||
groups = _group_gpus(gpus)
|
groups = _group_gpus(gpus)
|
||||||
|
|||||||
73
tests/test_hwfit_unified_nvidia.py
Normal file
73
tests/test_hwfit_unified_nvidia.py
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
"""Unified-memory NVIDIA detection — Grace Blackwell GB10 / DGX Spark (#1340).
|
||||||
|
|
||||||
|
GB10 (and other unified-memory NVIDIA parts) report `nvidia-smi
|
||||||
|
--query-gpu=memory.total` as "[N/A]"/"Not Supported" because the GPU shares the
|
||||||
|
system LPDDR pool instead of carrying discrete VRAM. The detector did
|
||||||
|
`float(memory.total)` and, on the ValueError, `continue`d — dropping the only
|
||||||
|
GPU row, so a real GB10 running vLLM was reported as "No GPU" and Cookbook
|
||||||
|
recommendations/model-switching broke. These pin that such a device is detected
|
||||||
|
as a unified-memory CUDA GPU backed by system RAM, while discrete GPUs are
|
||||||
|
unchanged.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from services.hwfit import hardware
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def _local(monkeypatch):
|
||||||
|
monkeypatch.setattr(hardware, "_remote_host", None)
|
||||||
|
|
||||||
|
|
||||||
|
def test_gb10_unified_memory_detected_not_dropped(monkeypatch):
|
||||||
|
# Real GB10 nvidia-smi --query-gpu=memory.total,name output: memory is N/A.
|
||||||
|
monkeypatch.setattr(hardware, "_run", lambda cmd: "[N/A], NVIDIA GB10")
|
||||||
|
monkeypatch.setattr(hardware, "_get_ram_gb", lambda: 128.0)
|
||||||
|
info = hardware._detect_nvidia()
|
||||||
|
assert info is not None, "GB10 was dropped as 'No GPU'"
|
||||||
|
assert info["gpu_name"] == "NVIDIA GB10"
|
||||||
|
assert info["backend"] == "cuda"
|
||||||
|
assert info["gpu_count"] == 1
|
||||||
|
assert info["unified_memory"] is True
|
||||||
|
assert info["gpu_vram_gb"] == 128.0 # backed by the unified RAM pool
|
||||||
|
assert hardware._last_gpu_error is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_detect_system_reports_gb10_as_gpu(monkeypatch):
|
||||||
|
"""End-to-end through detect_system: has_gpu True + unified_memory propagated."""
|
||||||
|
monkeypatch.setattr(hardware, "_run", lambda cmd: "[N/A], NVIDIA GB10")
|
||||||
|
monkeypatch.setattr(hardware, "_get_ram_gb", lambda: 128.0)
|
||||||
|
monkeypatch.setattr(hardware, "_get_available_ram_gb", lambda: 120.0)
|
||||||
|
monkeypatch.setattr(hardware, "_get_cpu_count", lambda: 20)
|
||||||
|
monkeypatch.setattr(hardware, "_get_cpu_name", lambda: "NVIDIA Grace")
|
||||||
|
monkeypatch.setattr(hardware, "_detect_apple_silicon", lambda: None)
|
||||||
|
s = hardware.detect_system(fresh=True)
|
||||||
|
assert s["has_gpu"] is True
|
||||||
|
assert s["gpu_name"] == "NVIDIA GB10"
|
||||||
|
assert s["backend"] == "cuda"
|
||||||
|
assert s.get("unified_memory") is True
|
||||||
|
|
||||||
|
|
||||||
|
def test_discrete_gpu_unchanged_and_not_unified(monkeypatch):
|
||||||
|
monkeypatch.setattr(hardware, "_run", lambda cmd: "24576, NVIDIA GeForce RTX 4090")
|
||||||
|
info = hardware._detect_nvidia()
|
||||||
|
assert info["gpu_vram_gb"] == 24.0
|
||||||
|
assert info["gpu_count"] == 1
|
||||||
|
assert not info.get("unified_memory")
|
||||||
|
|
||||||
|
|
||||||
|
def test_discrete_takes_precedence_over_unified_row(monkeypatch):
|
||||||
|
"""A box with a real discrete-VRAM GPU keeps the discrete path; the
|
||||||
|
N/A-memory row is not conflated into a unified pool."""
|
||||||
|
monkeypatch.setattr(hardware, "_run", lambda cmd: "24576, NVIDIA RTX 4090\n[N/A], NVIDIA GB10")
|
||||||
|
info = hardware._detect_nvidia()
|
||||||
|
assert info["gpu_name"] == "NVIDIA RTX 4090"
|
||||||
|
assert info["gpu_count"] == 1
|
||||||
|
assert not info.get("unified_memory")
|
||||||
|
|
||||||
|
|
||||||
|
def test_no_gpu_still_none(monkeypatch):
|
||||||
|
"""No nvidia-smi output → still None, no spurious unified GPU."""
|
||||||
|
monkeypatch.setattr(hardware, "_run", lambda cmd: None)
|
||||||
|
assert hardware._detect_nvidia() is None
|
||||||
Reference in New Issue
Block a user