From 033852ab142c9135a6948d9002ae59409e398272 Mon Sep 17 00:00:00 2001
From: spooky <partialabstraction@gmail.com>
Date: Mon, 1 Jun 2026 23:47:47 +1000
Subject: [PATCH] fix: require GGUF sources for llama downloads (#368)

---
 services/hwfit/data/hf_models.json | 19 +++++--
 static/js/cookbook-hwfit.js        | 30 ++++++++++--
 static/js/cookbookDownload.js      | 79 ++++++++++++++++++++++++------
 tests/test_hwfit_macos.py          | 15 ++++++
 4 files changed, 122 insertions(+), 21 deletions(-)
diff --git a/services/hwfit/data/hf_models.json b/services/hwfit/data/hf_models.json
index 19ce4ef..0267535 100644
--- a/services/hwfit/data/hf_models.json
+++ b/services/hwfit/data/hf_models.json
@@ -7035,7 +7035,8 @@
   "gguf_sources": [
    {
     "repo": "unsloth/Qwen3.5-9B-GGUF",
-    "provider": "unsloth"
+    "provider": "unsloth",
+    "file": "Qwen3.5-9B-Q4_K_M.gguf"
    }
   ]
  },
@@ -13733,7 +13734,13 @@
   "architecture": "qwen3",
   "pipeline_tag": "text-generation",
   "release_date": "2026-04-01",
-  "gguf_sources": [],
+  "gguf_sources": [
+   {
+    "repo": "unsloth/Qwen3.6-27B-GGUF",
+    "provider": "unsloth",
+    "file": "Qwen3.6-27B-Q4_K_M.gguf"
+   }
+  ],
   "capabilities": []
  },
  {
@@ -13796,7 +13803,13 @@
   "architecture": "qwen3_moe",
   "pipeline_tag": "text-generation",
   "release_date": "2026-04-01",
-  "gguf_sources": [],
+  "gguf_sources": [
+   {
+    "repo": "unsloth/Qwen3.6-35B-A3B-GGUF",
+    "provider": "unsloth",
+    "file": "Qwen3.6-35B-A3B-UD-Q4_K_M.gguf"
+   }
+  ],
   "capabilities": []
  },
  {
diff --git a/static/js/cookbook-hwfit.js b/static/js/cookbook-hwfit.js
index 818ca7d..e6445f8 100644
--- a/static/js/cookbook-hwfit.js
+++ b/static/js/cookbook-hwfit.js
@@ -48,6 +48,28 @@ let _removedHwChips = new Set();
 
 export let _gpuToggleTotal = 0; // real GPU count from first scan, never overridden
 
+function _firstGgufSource(model) {
+  const sources = Array.isArray(model?.gguf_sources) ? model.gguf_sources : [];
+  return sources.find(src => src && src.repo) || null;
+}
+
+function _looksLikeGgufRepo(model) {
+  const haystack = `${model?.quant_repo || ''} ${model?.repo_id || ''} ${model?.path || ''} ${model?.name || ''}`.toLowerCase();
+  return !!model?.is_gguf || haystack.includes('gguf') || haystack.includes('.gguf');
+}
+
+function _downloadSourceRepo(model, backend) {
+  if (backend === 'llamacpp') {
+    const ggufSource = _firstGgufSource(model);
+    if (ggufSource) return { repo: ggufSource.repo, kind: 'GGUF' };
+    if (_looksLikeGgufRepo(model)) {
+      const repo = model?.quant_repo || model?.repo_id || model?.name;
+      if (repo) return { repo, kind: 'GGUF' };
+    }
+  }
+  return { repo: model?.quant_repo || model?.name || '', kind: '' };
+}
+
 // Reset GPU-toggle state so the next scan re-renders the RAM/GPU buttons for a
 // (possibly different) server, WITHOUT clearing the markup now — clearing it made
 // the buttons flicker out and back in. The old buttons stay visible until the
@@ -847,13 +869,13 @@ export function _expandModelRow(row, modelData) {
   const isLlamaCpp = backend === 'llamacpp';
   const ctx = modelData.context || 8192;
 
-  const dlRepo = modelData.quant_repo || modelData.name;
-  const hfUrl = `https://huggingface.co/${dlRepo}`;
+  const dlSource = _downloadSourceRepo(modelData, backend);
+  const hfUrl = `https://huggingface.co/${dlSource.repo}`;
   let html = `<div class="hwfit-action-panel" data-model-name="${esc(modelData.name)}">`;
   html += `<div class="hwfit-panel-header">`;
-  html += `<span class="hwfit-panel-model">${esc(modelData.name)}${modelData.quant_repo ? ` <span style="opacity:0.5;font-size:10px;">(${esc(modelData.quant)})</span>` : ''}</span>`;
+  html += `<span class="hwfit-panel-model">${esc(modelData.name)}${dlSource.kind ? ` <span style="opacity:0.5;font-size:10px;">(${esc(dlSource.kind)} ${esc(modelData.quant || '')})</span>` : (modelData.quant_repo ? ` <span style="opacity:0.5;font-size:10px;">(${esc(modelData.quant)})</span>` : '')}</span>`;
   html += `<span class="hwfit-panel-badge">${esc(label)}</span>`;
-  html += `<a href="${esc(hfUrl)}" target="_blank" rel="noopener" class="hwfit-panel-hf-link" title="View on HuggingFace">HF \u2197</a>`;
+  html += `<a href="${esc(hfUrl)}" target="_blank" rel="noopener" class="hwfit-panel-hf-link" title="View download source on HuggingFace">HF \u2197</a>`;
   html += `</div>`;
   html += `<div class="hwfit-panel-actions">`;
   html += `<button class="cookbook-btn hwfit-dl-btn">Download</button>`;
diff --git a/static/js/cookbookDownload.js b/static/js/cookbookDownload.js
index d4da9fe..2046897 100644
--- a/static/js/cookbookDownload.js
+++ b/static/js/cookbookDownload.js
@@ -57,21 +57,68 @@ export function _setPanelCheckbox(panel, field, checked) {
 
 // ── Command builder: download ──
 
+function _firstGgufSource(model) {
+  const sources = Array.isArray(model?.gguf_sources) ? model.gguf_sources : [];
+  return sources.find(src => src && src.repo) || null;
+}
+
+function _looksLikeGgufRepo(model) {
+  const haystack = `${model?.quant_repo || ''} ${model?.repo_id || ''} ${model?.path || ''} ${model?.name || ''}`.toLowerCase();
+  return !!model?.is_gguf || haystack.includes('gguf') || haystack.includes('.gguf');
+}
+
+function _ggufDownloadSource(model, backend) {
+  if (backend !== 'llamacpp') return null;
+  const source = _firstGgufSource(model);
+  if (source) return source;
+  if (_looksLikeGgufRepo(model)) {
+    const repo = model?.quant_repo || model?.repo_id || model?.name;
+    if (repo) return { repo };
+  }
+  return null;
+}
+
+function _ggufIncludePattern(model, source) {
+  if (source?.file) return source.file;
+  if (model?.quant) return `*${model.quant}*`;
+  return '*.gguf';
+}
+
+function _missingGgufMessage(model) {
+  const name = model?.name || 'this model';
+  return `No GGUF source is configured for ${name}. Pick a model with a GGUF source, or paste the GGUF repo in Download.`;
+}
+
+function _bashQuote(value) {
+  return "'" + String(value ?? '').replace(/'/g, "'\\''") + "'";
+}
+
+function _missingGgufCommand(model) {
+  const msg = _missingGgufMessage(model);
+  if (_isWindows()) {
+    return `Write-Error ${JSON.stringify(msg)}; exit 1`;
+  }
+  return `printf '%s\\n' ${_bashQuote(msg)} >&2; exit 1`;
+}
+
 export function _buildDownloadCmd(model, backend) {
   let cmd = '';
   if (backend === 'ollama') {
     cmd = `ollama pull ${model.name.split('/').pop().toLowerCase()}`;
   } else {
-    const repo = (backend === 'llamacpp' && model.gguf_sources && model.gguf_sources.length)
-      ? model.gguf_sources[0].repo : model.name;
-    const includeArg = (backend === 'llamacpp' && model.gguf_sources && model.gguf_sources.length)
-      ? `, allow_patterns=["*${model.quant || ''}*"]` : '';
-    // Reflect the server's download target in the preview (matches the real
-    // download path built server-side). '' = default HF cache.
-    const _dlDir = (_envState.servers.find(s => s.host === (_envState.remoteHost || '')) || {}).downloadDir || '';
-    const _localDirArg = _dlDir ? `, local_dir=os.path.expanduser('${_dlDir.replace(/\/$/, '')}/${repo.split('/').pop()}')` : '';
-    const _py = _isWindows() ? 'python' : 'python3';
-    cmd = `${_py} -u -c "
+    const ggufSource = _ggufDownloadSource(model, backend);
+    if (backend === 'llamacpp' && !ggufSource) {
+      cmd = _missingGgufCommand(model);
+    } else {
+      const repo = ggufSource?.repo || model.name;
+      const includePattern = backend === 'llamacpp' ? _ggufIncludePattern(model, ggufSource) : null;
+      const includeArg = includePattern ? `, allow_patterns=["${includePattern.replace(/\\/g, '\\\\').replace(/"/g, '\\"')}"]` : '';
+      // Reflect the server's download target in the preview (matches the real
+      // download path built server-side). '' = default HF cache.
+      const _dlDir = (_envState.servers.find(s => s.host === (_envState.remoteHost || '')) || {}).downloadDir || '';
+      const _localDirArg = _dlDir ? `, local_dir=os.path.expanduser('${_dlDir.replace(/\/$/, '')}/${repo.split('/').pop()}')` : '';
+      const _py = _isWindows() ? 'python' : 'python3';
+      cmd = `${_py} -u -c "
 import sys, time, os
 os.environ['HF_HUB_DISABLE_PROGRESS_BARS']='0'
 os.environ['TQDM_DISABLE']='0'
@@ -125,6 +172,7 @@ try:
 except Exception as e:
  print(f'ERROR {e}',file=sys.stderr,flush=True);sys.exit(1)
 "`;
+    }
   }
   const prefix = _buildEnvPrefix();
   let full = prefix ? prefix + ' ' + cmd : cmd;
@@ -402,10 +450,13 @@ export async function _runPanelCmd(panel, cmd, opts = {}) {
 // ── Model download (dedicated endpoint, tmux-backed) ──
 
 export async function _runModelDownload(panel, model, backend, hostOverride) {
-  const repo = (backend === 'llamacpp' && model.gguf_sources && model.gguf_sources.length)
-    ? model.gguf_sources[0].repo : (model.quant_repo || model.name);
-  const include = (backend === 'llamacpp' && model.gguf_sources && model.gguf_sources.length)
-    ? `*${model.quant || ''}*` : null;
+  const ggufSource = _ggufDownloadSource(model, backend);
+  if (backend === 'llamacpp' && !ggufSource) {
+    uiModule.showToast(_missingGgufMessage(model));
+    return;
+  }
+  const repo = ggufSource?.repo || model.quant_repo || model.name;
+  const include = backend === 'llamacpp' ? _ggufIncludePattern(model, ggufSource) : null;
 
   _syncEnvFromPanel(panel);
 
diff --git a/tests/test_hwfit_macos.py b/tests/test_hwfit_macos.py
index ca3b902..b0f7b9b 100644
--- a/tests/test_hwfit_macos.py
+++ b/tests/test_hwfit_macos.py
@@ -70,6 +70,21 @@ def test_only_gguf_models_recommended_on_metal():
     assert unservable == [], f"{len(unservable)} non-GGUF models on Metal, e.g. {unservable[:3]}"
 
 
+def test_qwen_catalog_entries_point_at_verified_gguf_repos():
+    """Qwen GGUF-looking Cookbook rows must download GGUF repos, not the base
+    safetensors repositories."""
+    catalog = {m["name"]: m for m in get_models()}
+    expected = {
+        "Qwen/Qwen3.5-9B": ("unsloth/Qwen3.5-9B-GGUF", "Qwen3.5-9B-Q4_K_M.gguf"),
+        "Qwen/Qwen3.6-27B": ("unsloth/Qwen3.6-27B-GGUF", "Qwen3.6-27B-Q4_K_M.gguf"),
+        "Qwen/Qwen3.6-35B-A3B": ("unsloth/Qwen3.6-35B-A3B-GGUF", "Qwen3.6-35B-A3B-UD-Q4_K_M.gguf"),
+    }
+
+    for model_name, (repo, filename) in expected.items():
+        sources = catalog[model_name].get("gguf_sources") or []
+        assert any(src.get("repo") == repo and src.get("file") == filename for src in sources)
+
+
 def test_safetensors_models_still_recommended_on_cuda():
     """Regression guard: vLLM serves safetensors on CUDA, so non-GGUF repos must
     NOT be filtered there — the GGUF-only rule is Metal-specific."""