Files
odysseus/tests/test_hwfit_manual_backend.py
Shaw 16f7feee0a fix(hwfit): honor manual "metal" backend in the hardware simulator (#1090)
The Cookbook's manual hardware simulator ("what if I had this setup") let users
pick a backend, but _apply_manual_hardware only accepted cuda/rocm/cpu_x86/
cpu_arm and silently coerced anything else to cuda. So selecting Apple/Metal
simulated a CUDA box instead — and ranked safetensors-only repos a Mac can't
serve, even though the rest of hwfit (services.hwfit.fit, the serve-command
generation) already supports Metal as GGUF-only via llama.cpp/Ollama.

Add "metal" to the accepted backends (now a named _MANUAL_BACKENDS set, kept a
subset of what fit.py understands) and set unified_memory=True for it — Apple
Silicon shares one memory pool with the GPU — while clearing that flag for the
discrete (cuda/rocm) and CPU backends. _apply_manual_hardware is lifted to
module scope so it is directly unit-testable; both route call sites are
unchanged.

Adds tests/test_hwfit_manual_backend.py, including an end-to-end check that a
simulated Metal box only recommends GGUF-servable models.

Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
2026-06-02 23:12:34 +09:00

86 lines
3.6 KiB
Python

"""Manual hardware simulator backend handling (Cookbook "what if I had…").
`_apply_manual_hardware` replaces detected hardware with a user-described box so
the Cookbook can rank models against hardware you don't have yet. These pin that
the accepted backends stay in lock-step with what services.hwfit.fit can rank —
notably that "metal" is honoured (Apple Silicon is GGUF-only via llama.cpp /
Ollama) instead of being silently coerced to CUDA.
"""
from routes.hwfit_routes import _apply_manual_hardware, _MANUAL_BACKENDS
from services.hwfit.fit import rank_models
from services.hwfit.models import get_models
def test_no_manual_mode_leaves_system_untouched():
base = {"backend": "cuda", "gpu_vram_gb": 24.0, "has_gpu": True}
assert _apply_manual_hardware(dict(base), manual_mode="") == base
assert _apply_manual_hardware(dict(base), manual_mode="bogus") == base
def test_manual_metal_backend_is_accepted():
"""The whole point of this change: 'metal' must survive instead of being
rewritten to 'cuda', so the simulated Mac ranks through the Apple path."""
s = _apply_manual_hardware({}, manual_mode="gpu", manual_vram_gb="24", manual_backend="metal")
assert s["backend"] == "metal"
assert s["unified_memory"] is True
assert s["has_gpu"] is True
assert "METAL" in s["gpu_name"]
def test_manual_metal_vram_and_count_math():
s = _apply_manual_hardware({}, manual_mode="gpu", manual_gpu_count="2", manual_vram_gb="24", manual_backend="metal")
assert s["gpu_count"] == 2
assert s["gpu_vram_gb"] == 48.0
assert len(s["gpus"]) == 2
grp = s["gpu_groups"][0]
assert grp["vram_each"] == 24.0
assert grp["count"] == 2
assert grp["vram_total"] == 48.0
def test_manual_backend_whitelist_matches_fit_backends():
"""Guard against drift: every manual backend must be one fit.py understands."""
assert _MANUAL_BACKENDS == {"cuda", "rocm", "metal", "cpu_x86", "cpu_arm"}
def test_unknown_manual_backend_falls_back_to_cuda():
s = _apply_manual_hardware({}, manual_mode="gpu", manual_backend="tpu")
assert s["backend"] == "cuda"
assert "unified_memory" not in s
def test_manual_rocm_and_cuda_are_not_unified_memory():
for backend in ("cuda", "rocm"):
s = _apply_manual_hardware({"unified_memory": True}, manual_mode="gpu", manual_backend=backend)
assert s["backend"] == backend
# Discrete GPUs are not unified memory — a stale flag must be cleared.
assert "unified_memory" not in s
def test_manual_ram_mode_wipes_gpu_and_unified_flag():
s = _apply_manual_hardware({"unified_memory": True}, manual_mode="ram", manual_ram_gb="64")
assert s["has_gpu"] is False
assert s["backend"] == "cpu_x86"
assert s["gpu_vram_gb"] == 0
assert s["total_ram_gb"] == 64.0
assert "unified_memory" not in s
def test_simulated_metal_box_only_recommends_gguf():
"""End-to-end: a simulated Metal box must rank exactly like a real Mac —
only models shipping a servable GGUF (llama.cpp/Ollama) survive. Before
'metal' was accepted, this box ranked as CUDA and surfaced safetensors-only
repos the Mac can't serve."""
system = _apply_manual_hardware(
{"backend": "cuda", "available_ram_gb": 32.0, "total_ram_gb": 64.0},
manual_mode="gpu", manual_vram_gb="48", manual_backend="metal",
)
catalog = {m["name"]: m for m in get_models()}
unservable = [
r["name"] for r in rank_models(system, limit=900)
if not (catalog.get(r["name"], {}).get("is_gguf")
or catalog.get(r["name"], {}).get("gguf_sources"))
]
assert unservable == [], f"{len(unservable)} non-GGUF models on simulated Metal, e.g. {unservable[:3]}"