Fix native Cookbook quant classification

2026-06-02 14:07:20 +10:00
parent 65b5d65059
commit cd4f496cb4
6 changed files with 201 additions and 44 deletions
--- a/static/js/cookbook-hwfit.js
+++ b/static/js/cookbook-hwfit.js
@@ -827,7 +827,9 @@ export function _hwfitRenderList(el, models) {
    const pcount = m.parameter_count || '?';
    const ctx = m.context ? (m.context >= 1024 ? (m.context / 1024).toFixed(0) + 'k' : m.context) : '?';
    const fitLabel = (m.fit_level || '').replace('_', ' ');
-    const modeLabel = (m.run_mode || '').replace('_', '+');
+    const modeLabel = m.run_mode === 'cpu_offload'
+      ? 'cpu+offload'
+      : (m.run_mode || '').replace(/_/g, ' ');
    const vramLabel = m.required_gb ? m.required_gb.toFixed(1) + 'G' : '?';
    const moeBadge = m.is_moe ? '<span class="hwfit-badge hwfit-moe">MoE</span>' : '';
    const imgBadge = m.is_image_gen ? '<span class="hwfit-badge" style="background:color-mix(in srgb, var(--red) 20%, transparent);color:var(--red);font-size:8px;padding:1px 4px;border-radius:3px;margin-left:4px;">IMG</span>' : '';
--- a/static/js/cookbook.js
+++ b/static/js/cookbook.js
@@ -262,10 +262,10 @@ export function _detectBackend(model) {
  const isRocm = sysBackend === 'rocm';
  const isAppleSilicon = ['metal', 'mps', 'apple'].includes(sysBackend);
  const _nm = `${model.repo_id || ''} ${model.path || ''} ${model.name || ''}`.toLowerCase();
-  if (!isAppleSilicon && (/\bmlx\b|mlx-|_mlx/i.test(_nm) || q.startsWith('MLX'))) {
+  if (/\bmlx\b|mlx-|_mlx/i.test(_nm) || q.startsWith('MLX')) {
    return { backend: 'unsupported', label: 'Unsupported' };
  }
-  const isAwqLike = /^AWQ|^GPTQ|^NVFP4/.test(q) || q === 'FP8' || /\b(awq|gptq|fp8|nvfp4)\b/i.test(_nm);
+  const isAwqLike = /^AWQ|^GPTQ|^NVFP4/.test(q) || ['FP8', 'FP4', 'MXFP4', 'NF4', 'INT4', 'INT8', 'W4A16', 'W8A8', 'W8A16'].includes(q) || /\b(awq|gptq|fp8|fp4|nvfp4|mxfp4|nf4|int4|int8|w4a16|w8a8|w8a16)\b/i.test(_nm);
  const isGgufLike = model.is_gguf || /^Q[2-8]/.test(q) || /^IQ/.test(q) || q === 'GGUF' || _nm.includes('gguf');

  // Image gen models → diffusers
@@ -291,7 +291,7 @@ export function _detectBackend(model) {
  }

  // Apple Silicon (Metal) → llama.cpp (GGUF). vLLM/SGLang are CUDA/ROCm-only and
-  // don't run on macOS; AWQ/GPTQ/FP8 (vLLM-only) models are already filtered out
+  // don't run on macOS; vLLM-native quantized models are already filtered out
  // of metal Cookbook results, so llama.cpp is always the right engine here.
  if (['metal', 'mps', 'apple'].includes(sysBackend)) {
    return { backend: 'llamacpp', label: 'llama.cpp' };
@@ -1516,7 +1516,7 @@ function _renderRecipes() {
  html += '<option value="Q4_K_M">Q4</option><option value="Q8_0">Q8</option>';
  html += '<option value="Q6_K">Q6</option><option value="Q5_K_M">Q5</option>';
  html += '<option value="Q3_K_M">Q3</option><option value="Q2_K">Q2</option>';
-  html += '<option value="AWQ-4bit">AWQ</option><option value="FP8">FP8</option>';
+  html += '<option value="AWQ-4bit">AWQ</option><option value="FP8">FP8</option><option value="FP4">FP4</option>';
  html += '<option value="">Native</option></select>';
  // Engine filter: show only models whose serve engine matches. "llama.cpp"
  // (GGUF) runs everywhere incl. consumer AMD/Apple; vLLM/SGLang are CUDA /