odysseus/tests/test_hwfit_manual_backend.py

"""Manual hardware simulator backend handling (Cookbook "what if I had…").

`_apply_manual_hardware` replaces detected hardware with a user-described box so
the Cookbook can rank models against hardware you don't have yet. These pin that
the accepted backends stay in lock-step with what services.hwfit.fit can rank —
notably that "metal" is honoured (Apple Silicon is GGUF-only via llama.cpp /
Ollama) instead of being silently coerced to CUDA.
"""

from routes.hwfit_routes import _apply_manual_hardware, _MANUAL_BACKENDS
from services.hwfit.fit import rank_models
from services.hwfit.models import get_models


def test_no_manual_mode_leaves_system_untouched():
    base = {"backend": "cuda", "gpu_vram_gb": 24.0, "has_gpu": True}
    assert _apply_manual_hardware(dict(base), manual_mode="") == base
    assert _apply_manual_hardware(dict(base), manual_mode="bogus") == base


def test_manual_metal_backend_is_accepted():
    """The whole point of this change: 'metal' must survive instead of being
    rewritten to 'cuda', so the simulated Mac ranks through the Apple path."""
    s = _apply_manual_hardware({}, manual_mode="gpu", manual_vram_gb="24", manual_backend="metal")
    assert s["backend"] == "metal"
    assert s["unified_memory"] is True
    assert s["has_gpu"] is True
    assert "METAL" in s["gpu_name"]


def test_manual_metal_vram_and_count_math():
    s = _apply_manual_hardware({}, manual_mode="gpu", manual_gpu_count="2", manual_vram_gb="24", manual_backend="metal")
    assert s["gpu_count"] == 2
    assert s["gpu_vram_gb"] == 48.0
    assert len(s["gpus"]) == 2
    grp = s["gpu_groups"][0]
    assert grp["vram_each"] == 24.0
    assert grp["count"] == 2
    assert grp["vram_total"] == 48.0


def test_manual_backend_whitelist_matches_fit_backends():
    """Guard against drift: every manual backend must be one fit.py understands."""
    assert _MANUAL_BACKENDS == {"cuda", "rocm", "metal", "cpu_x86", "cpu_arm"}


def test_unknown_manual_backend_falls_back_to_cuda():
    s = _apply_manual_hardware({}, manual_mode="gpu", manual_backend="tpu")
    assert s["backend"] == "cuda"
    assert "unified_memory" not in s


def test_manual_rocm_and_cuda_are_not_unified_memory():
    for backend in ("cuda", "rocm"):
        s = _apply_manual_hardware({"unified_memory": True}, manual_mode="gpu", manual_backend=backend)
        assert s["backend"] == backend
        # Discrete GPUs are not unified memory — a stale flag must be cleared.
        assert "unified_memory" not in s


def test_manual_ram_mode_wipes_gpu_and_unified_flag():
    s = _apply_manual_hardware({"unified_memory": True}, manual_mode="ram", manual_ram_gb="64")
    assert s["has_gpu"] is False
    assert s["backend"] == "cpu_x86"
    assert s["gpu_vram_gb"] == 0
    assert s["total_ram_gb"] == 64.0
    assert "unified_memory" not in s


def test_simulated_metal_box_only_recommends_gguf():
    """End-to-end: a simulated Metal box must rank exactly like a real Mac —
    only models shipping a servable GGUF (llama.cpp/Ollama) survive. Before
    'metal' was accepted, this box ranked as CUDA and surfaced safetensors-only
    repos the Mac can't serve."""
    system = _apply_manual_hardware(
        {"backend": "cuda", "available_ram_gb": 32.0, "total_ram_gb": 64.0},
        manual_mode="gpu", manual_vram_gb="48", manual_backend="metal",
    )
    catalog = {m["name"]: m for m in get_models()}
    unservable = [
        r["name"] for r in rank_models(system, limit=900)
        if not (catalog.get(r["name"], {}).get("is_gguf")
                or catalog.get(r["name"], {}).get("gguf_sources"))
    ]
    assert unservable == [], f"{len(unservable)} non-GGUF models on simulated Metal, e.g. {unservable[:3]}"