diff --git a/services/hwfit/fit.py b/services/hwfit/fit.py index 3136d7b..9a45b53 100644 --- a/services/hwfit/fit.py +++ b/services/hwfit/fit.py @@ -377,7 +377,7 @@ def analyze_model(model, system, target_quant=None, scoring_use_case=None, targe # Multi-GPU filter: skip the row if the resolved quant is a GGUF tier # (Q*/IQ-prefixed) — vLLM/SGLang can't serve those, so showing them on # a 2+ GPU rig just clutters the list with unservable candidates. - if gpu_count >= 2 and quant_to_try and quant_to_try.upper().startswith(("Q2", "Q3", "Q4", "Q5", "Q6", "Q8", "IQ")): + if gpu_count >= 2 and quant_to_try and not target_quant and quant_to_try.upper().startswith(("Q2", "Q3", "Q4", "Q5", "Q6", "Q8", "IQ")): return None result = _try_quant_at(model, quant_to_try, ctx, effective_vram, 0 if native_gpu_only else eff_ram)