fix: require GGUF sources for llama downloads (#368)

2026-06-01 23:47:47 +10:00
parent f2d55f8726
commit 033852ab14
4 changed files with 122 additions and 21 deletions
--- a/services/hwfit/data/hf_models.json
+++ b/services/hwfit/data/hf_models.json
@@ -7035,7 +7035,8 @@
  "gguf_sources": [
   {
    "repo": "unsloth/Qwen3.5-9B-GGUF",
-    "provider": "unsloth"
+    "provider": "unsloth",
    "file": "Qwen3.5-9B-Q4_K_M.gguf"
   }
  ]
 },
@@ -13733,7 +13734,13 @@
  "architecture": "qwen3",
  "pipeline_tag": "text-generation",
  "release_date": "2026-04-01",
-  "gguf_sources": [],
+  "gguf_sources": [
   {
    "repo": "unsloth/Qwen3.6-27B-GGUF",
    "provider": "unsloth",
    "file": "Qwen3.6-27B-Q4_K_M.gguf"
   }
  ],
  "capabilities": []
 },
 {
@@ -13796,7 +13803,13 @@
  "architecture": "qwen3_moe",
  "pipeline_tag": "text-generation",
  "release_date": "2026-04-01",
-  "gguf_sources": [],
+  "gguf_sources": [
   {
    "repo": "unsloth/Qwen3.6-35B-A3B-GGUF",
    "provider": "unsloth",
    "file": "Qwen3.6-35B-A3B-UD-Q4_K_M.gguf"
   }
  ],
  "capabilities": []
 },
 {
--- a/static/js/cookbook-hwfit.js
+++ b/static/js/cookbook-hwfit.js
@@ -48,6 +48,28 @@ let _removedHwChips = new Set();
 export let _gpuToggleTotal = 0; // real GPU count from first scan, never overridden
 function _firstGgufSource(model) {
  const sources = Array.isArray(model?.gguf_sources) ? model.gguf_sources : [];
  return sources.find(src => src && src.repo) || null;
 }
 function _looksLikeGgufRepo(model) {
  const haystack = `${model?.quant_repo || ''} ${model?.repo_id || ''} ${model?.path || ''} ${model?.name || ''}`.toLowerCase();
  return !!model?.is_gguf || haystack.includes('gguf') || haystack.includes('.gguf');
 }
 function _downloadSourceRepo(model, backend) {
  if (backend === 'llamacpp') {
    const ggufSource = _firstGgufSource(model);
    if (ggufSource) return { repo: ggufSource.repo, kind: 'GGUF' };
    if (_looksLikeGgufRepo(model)) {
      const repo = model?.quant_repo || model?.repo_id || model?.name;
      if (repo) return { repo, kind: 'GGUF' };
    }
  }
  return { repo: model?.quant_repo || model?.name || '', kind: '' };
 }
 // Reset GPU-toggle state so the next scan re-renders the RAM/GPU buttons for a
 // (possibly different) server, WITHOUT clearing the markup now — clearing it made
 // the buttons flicker out and back in. The old buttons stay visible until the
@@ -847,13 +869,13 @@ export function _expandModelRow(row, modelData) {
  const isLlamaCpp = backend === 'llamacpp';
  const ctx = modelData.context || 8192;
-  const dlRepo = modelData.quant_repo || modelData.name;
+  const dlSource = _downloadSourceRepo(modelData, backend);
-  const hfUrl = `https://huggingface.co/${dlRepo}`;
+  const hfUrl = `https://huggingface.co/${dlSource.repo}`;
  let html = `<div class="hwfit-action-panel" data-model-name="${esc(modelData.name)}">`;
  html += `<div class="hwfit-panel-header">`;
-  html += `<span class="hwfit-panel-model">${esc(modelData.name)}${modelData.quant_repo ? ` <span style="opacity:0.5;font-size:10px;">(${esc(modelData.quant)})</span>` : ''}</span>`;
+  html += `<span class="hwfit-panel-model">${esc(modelData.name)}${dlSource.kind ? ` <span style="opacity:0.5;font-size:10px;">(${esc(dlSource.kind)} ${esc(modelData.quant || '')})</span>` : (modelData.quant_repo ? ` <span style="opacity:0.5;font-size:10px;">(${esc(modelData.quant)})</span>` : '')}</span>`;
  html += `<span class="hwfit-panel-badge">${esc(label)}</span>`;
-  html += `<a href="${esc(hfUrl)}" target="_blank" rel="noopener" class="hwfit-panel-hf-link" title="View on HuggingFace">HF \u2197</a>`;
+  html += `<a href="${esc(hfUrl)}" target="_blank" rel="noopener" class="hwfit-panel-hf-link" title="View download source on HuggingFace">HF \u2197</a>`;
  html += `</div>`;
  html += `<div class="hwfit-panel-actions">`;
  html += `<button class="cookbook-btn hwfit-dl-btn">Download</button>`;
--- a/static/js/cookbookDownload.js
+++ b/static/js/cookbookDownload.js
@@ -57,21 +57,68 @@ export function _setPanelCheckbox(panel, field, checked) {
 // ── Command builder: download ──
 function _firstGgufSource(model) {
  const sources = Array.isArray(model?.gguf_sources) ? model.gguf_sources : [];
  return sources.find(src => src && src.repo) || null;
 }
 function _looksLikeGgufRepo(model) {
  const haystack = `${model?.quant_repo || ''} ${model?.repo_id || ''} ${model?.path || ''} ${model?.name || ''}`.toLowerCase();
  return !!model?.is_gguf || haystack.includes('gguf') || haystack.includes('.gguf');
 }
 function _ggufDownloadSource(model, backend) {
  if (backend !== 'llamacpp') return null;
  const source = _firstGgufSource(model);
  if (source) return source;
  if (_looksLikeGgufRepo(model)) {
    const repo = model?.quant_repo || model?.repo_id || model?.name;
    if (repo) return { repo };
  }
  return null;
 }
 function _ggufIncludePattern(model, source) {
  if (source?.file) return source.file;
  if (model?.quant) return `*${model.quant}*`;
  return '*.gguf';
 }
 function _missingGgufMessage(model) {
  const name = model?.name || 'this model';
  return `No GGUF source is configured for ${name}. Pick a model with a GGUF source, or paste the GGUF repo in Download.`;
 }
 function _bashQuote(value) {
  return "'" + String(value ?? '').replace(/'/g, "'\\''") + "'";
 }
 function _missingGgufCommand(model) {
  const msg = _missingGgufMessage(model);
  if (_isWindows()) {
    return `Write-Error ${JSON.stringify(msg)}; exit 1`;
  }
  return `printf '%s\\n' ${_bashQuote(msg)} >&2; exit 1`;
 }
 export function _buildDownloadCmd(model, backend) {
  let cmd = '';
  if (backend === 'ollama') {
    cmd = `ollama pull ${model.name.split('/').pop().toLowerCase()}`;
  } else {
-    const repo = (backend === 'llamacpp' && model.gguf_sources && model.gguf_sources.length)
+    const ggufSource = _ggufDownloadSource(model, backend);
-      ? model.gguf_sources[0].repo : model.name;
+    if (backend === 'llamacpp' && !ggufSource) {
-    const includeArg = (backend === 'llamacpp' && model.gguf_sources && model.gguf_sources.length)
+      cmd = _missingGgufCommand(model);
-      ? `, allow_patterns=["*${model.quant || ''}*"]` : '';
+    } else {
-    // Reflect the server's download target in the preview (matches the real
+      const repo = ggufSource?.repo || model.name;
-    // download path built server-side). '' = default HF cache.
+      const includePattern = backend === 'llamacpp' ? _ggufIncludePattern(model, ggufSource) : null;
-    const _dlDir = (_envState.servers.find(s => s.host === (_envState.remoteHost || '')) || {}).downloadDir || '';
+      const includeArg = includePattern ? `, allow_patterns=["${includePattern.replace(/\\/g, '\\\\').replace(/"/g, '\\"')}"]` : '';
-    const _localDirArg = _dlDir ? `, local_dir=os.path.expanduser('${_dlDir.replace(/\/$/, '')}/${repo.split('/').pop()}')` : '';
+      // Reflect the server's download target in the preview (matches the real
-    const _py = _isWindows() ? 'python' : 'python3';
+      // download path built server-side). '' = default HF cache.
-    cmd = `${_py} -u -c "
+      const _dlDir = (_envState.servers.find(s => s.host === (_envState.remoteHost || '')) || {}).downloadDir || '';
      const _localDirArg = _dlDir ? `, local_dir=os.path.expanduser('${_dlDir.replace(/\/$/, '')}/${repo.split('/').pop()}')` : '';
      const _py = _isWindows() ? 'python' : 'python3';
      cmd = `${_py} -u -c "
 import sys, time, os
 os.environ['HF_HUB_DISABLE_PROGRESS_BARS']='0'
 os.environ['TQDM_DISABLE']='0'
@@ -125,6 +172,7 @@ try:
 except Exception as e:
 print(f'ERROR {e}',file=sys.stderr,flush=True);sys.exit(1)
 "`;
    }
  }
  const prefix = _buildEnvPrefix();
  let full = prefix ? prefix + ' ' + cmd : cmd;
@@ -402,10 +450,13 @@ export async function _runPanelCmd(panel, cmd, opts = {}) {
 // ── Model download (dedicated endpoint, tmux-backed) ──
 export async function _runModelDownload(panel, model, backend, hostOverride) {
-  const repo = (backend === 'llamacpp' && model.gguf_sources && model.gguf_sources.length)
+  const ggufSource = _ggufDownloadSource(model, backend);
-    ? model.gguf_sources[0].repo : (model.quant_repo || model.name);
+  if (backend === 'llamacpp' && !ggufSource) {
-  const include = (backend === 'llamacpp' && model.gguf_sources && model.gguf_sources.length)
+    uiModule.showToast(_missingGgufMessage(model));
-    ? `*${model.quant || ''}*` : null;
+    return;
  }
  const repo = ggufSource?.repo || model.quant_repo || model.name;
  const include = backend === 'llamacpp' ? _ggufIncludePattern(model, ggufSource) : null;
  _syncEnvFromPanel(panel);
--- a/tests/test_hwfit_macos.py
+++ b/tests/test_hwfit_macos.py
@@ -70,6 +70,21 @@ def test_only_gguf_models_recommended_on_metal():
    assert unservable == [], f"{len(unservable)} non-GGUF models on Metal, e.g. {unservable[:3]}"
 def test_qwen_catalog_entries_point_at_verified_gguf_repos():
    """Qwen GGUF-looking Cookbook rows must download GGUF repos, not the base
    safetensors repositories."""
    catalog = {m["name"]: m for m in get_models()}
    expected = {
        "Qwen/Qwen3.5-9B": ("unsloth/Qwen3.5-9B-GGUF", "Qwen3.5-9B-Q4_K_M.gguf"),
        "Qwen/Qwen3.6-27B": ("unsloth/Qwen3.6-27B-GGUF", "Qwen3.6-27B-Q4_K_M.gguf"),
        "Qwen/Qwen3.6-35B-A3B": ("unsloth/Qwen3.6-35B-A3B-GGUF", "Qwen3.6-35B-A3B-UD-Q4_K_M.gguf"),
    }
    for model_name, (repo, filename) in expected.items():
        sources = catalog[model_name].get("gguf_sources") or []
        assert any(src.get("repo") == repo and src.get("file") == filename for src in sources)
 def test_safetensors_models_still_recommended_on_cuda():
    """Regression guard: vLLM serves safetensors on CUDA, so non-GGUF repos must
    NOT be filtered there — the GGUF-only rule is Metal-specific."""