From 033852ab142c9135a6948d9002ae59409e398272 Mon Sep 17 00:00:00 2001 From: spooky Date: Mon, 1 Jun 2026 23:47:47 +1000 Subject: [PATCH] fix: require GGUF sources for llama downloads (#368) --- services/hwfit/data/hf_models.json | 19 +++++-- static/js/cookbook-hwfit.js | 30 ++++++++++-- static/js/cookbookDownload.js | 79 ++++++++++++++++++++++++------ tests/test_hwfit_macos.py | 15 ++++++ 4 files changed, 122 insertions(+), 21 deletions(-) diff --git a/services/hwfit/data/hf_models.json b/services/hwfit/data/hf_models.json index 19ce4ef..0267535 100644 --- a/services/hwfit/data/hf_models.json +++ b/services/hwfit/data/hf_models.json @@ -7035,7 +7035,8 @@ "gguf_sources": [ { "repo": "unsloth/Qwen3.5-9B-GGUF", - "provider": "unsloth" + "provider": "unsloth", + "file": "Qwen3.5-9B-Q4_K_M.gguf" } ] }, @@ -13733,7 +13734,13 @@ "architecture": "qwen3", "pipeline_tag": "text-generation", "release_date": "2026-04-01", - "gguf_sources": [], + "gguf_sources": [ + { + "repo": "unsloth/Qwen3.6-27B-GGUF", + "provider": "unsloth", + "file": "Qwen3.6-27B-Q4_K_M.gguf" + } + ], "capabilities": [] }, { @@ -13796,7 +13803,13 @@ "architecture": "qwen3_moe", "pipeline_tag": "text-generation", "release_date": "2026-04-01", - "gguf_sources": [], + "gguf_sources": [ + { + "repo": "unsloth/Qwen3.6-35B-A3B-GGUF", + "provider": "unsloth", + "file": "Qwen3.6-35B-A3B-UD-Q4_K_M.gguf" + } + ], "capabilities": [] }, { diff --git a/static/js/cookbook-hwfit.js b/static/js/cookbook-hwfit.js index 818ca7d..e6445f8 100644 --- a/static/js/cookbook-hwfit.js +++ b/static/js/cookbook-hwfit.js @@ -48,6 +48,28 @@ let _removedHwChips = new Set(); export let _gpuToggleTotal = 0; // real GPU count from first scan, never overridden +function _firstGgufSource(model) { + const sources = Array.isArray(model?.gguf_sources) ? model.gguf_sources : []; + return sources.find(src => src && src.repo) || null; +} + +function _looksLikeGgufRepo(model) { + const haystack = `${model?.quant_repo || ''} ${model?.repo_id || ''} ${model?.path || ''} ${model?.name || ''}`.toLowerCase(); + return !!model?.is_gguf || haystack.includes('gguf') || haystack.includes('.gguf'); +} + +function _downloadSourceRepo(model, backend) { + if (backend === 'llamacpp') { + const ggufSource = _firstGgufSource(model); + if (ggufSource) return { repo: ggufSource.repo, kind: 'GGUF' }; + if (_looksLikeGgufRepo(model)) { + const repo = model?.quant_repo || model?.repo_id || model?.name; + if (repo) return { repo, kind: 'GGUF' }; + } + } + return { repo: model?.quant_repo || model?.name || '', kind: '' }; +} + // Reset GPU-toggle state so the next scan re-renders the RAM/GPU buttons for a // (possibly different) server, WITHOUT clearing the markup now — clearing it made // the buttons flicker out and back in. The old buttons stay visible until the @@ -847,13 +869,13 @@ export function _expandModelRow(row, modelData) { const isLlamaCpp = backend === 'llamacpp'; const ctx = modelData.context || 8192; - const dlRepo = modelData.quant_repo || modelData.name; - const hfUrl = `https://huggingface.co/${dlRepo}`; + const dlSource = _downloadSourceRepo(modelData, backend); + const hfUrl = `https://huggingface.co/${dlSource.repo}`; let html = `
`; html += `
`; - html += `${esc(modelData.name)}${modelData.quant_repo ? ` (${esc(modelData.quant)})` : ''}`; + html += `${esc(modelData.name)}${dlSource.kind ? ` (${esc(dlSource.kind)} ${esc(modelData.quant || '')})` : (modelData.quant_repo ? ` (${esc(modelData.quant)})` : '')}`; html += `${esc(label)}`; - html += `HF \u2197`; + html += `HF \u2197`; html += `
`; html += `
`; html += ``; diff --git a/static/js/cookbookDownload.js b/static/js/cookbookDownload.js index d4da9fe..2046897 100644 --- a/static/js/cookbookDownload.js +++ b/static/js/cookbookDownload.js @@ -57,21 +57,68 @@ export function _setPanelCheckbox(panel, field, checked) { // ── Command builder: download ── +function _firstGgufSource(model) { + const sources = Array.isArray(model?.gguf_sources) ? model.gguf_sources : []; + return sources.find(src => src && src.repo) || null; +} + +function _looksLikeGgufRepo(model) { + const haystack = `${model?.quant_repo || ''} ${model?.repo_id || ''} ${model?.path || ''} ${model?.name || ''}`.toLowerCase(); + return !!model?.is_gguf || haystack.includes('gguf') || haystack.includes('.gguf'); +} + +function _ggufDownloadSource(model, backend) { + if (backend !== 'llamacpp') return null; + const source = _firstGgufSource(model); + if (source) return source; + if (_looksLikeGgufRepo(model)) { + const repo = model?.quant_repo || model?.repo_id || model?.name; + if (repo) return { repo }; + } + return null; +} + +function _ggufIncludePattern(model, source) { + if (source?.file) return source.file; + if (model?.quant) return `*${model.quant}*`; + return '*.gguf'; +} + +function _missingGgufMessage(model) { + const name = model?.name || 'this model'; + return `No GGUF source is configured for ${name}. Pick a model with a GGUF source, or paste the GGUF repo in Download.`; +} + +function _bashQuote(value) { + return "'" + String(value ?? '').replace(/'/g, "'\\''") + "'"; +} + +function _missingGgufCommand(model) { + const msg = _missingGgufMessage(model); + if (_isWindows()) { + return `Write-Error ${JSON.stringify(msg)}; exit 1`; + } + return `printf '%s\\n' ${_bashQuote(msg)} >&2; exit 1`; +} + export function _buildDownloadCmd(model, backend) { let cmd = ''; if (backend === 'ollama') { cmd = `ollama pull ${model.name.split('/').pop().toLowerCase()}`; } else { - const repo = (backend === 'llamacpp' && model.gguf_sources && model.gguf_sources.length) - ? model.gguf_sources[0].repo : model.name; - const includeArg = (backend === 'llamacpp' && model.gguf_sources && model.gguf_sources.length) - ? `, allow_patterns=["*${model.quant || ''}*"]` : ''; - // Reflect the server's download target in the preview (matches the real - // download path built server-side). '' = default HF cache. - const _dlDir = (_envState.servers.find(s => s.host === (_envState.remoteHost || '')) || {}).downloadDir || ''; - const _localDirArg = _dlDir ? `, local_dir=os.path.expanduser('${_dlDir.replace(/\/$/, '')}/${repo.split('/').pop()}')` : ''; - const _py = _isWindows() ? 'python' : 'python3'; - cmd = `${_py} -u -c " + const ggufSource = _ggufDownloadSource(model, backend); + if (backend === 'llamacpp' && !ggufSource) { + cmd = _missingGgufCommand(model); + } else { + const repo = ggufSource?.repo || model.name; + const includePattern = backend === 'llamacpp' ? _ggufIncludePattern(model, ggufSource) : null; + const includeArg = includePattern ? `, allow_patterns=["${includePattern.replace(/\\/g, '\\\\').replace(/"/g, '\\"')}"]` : ''; + // Reflect the server's download target in the preview (matches the real + // download path built server-side). '' = default HF cache. + const _dlDir = (_envState.servers.find(s => s.host === (_envState.remoteHost || '')) || {}).downloadDir || ''; + const _localDirArg = _dlDir ? `, local_dir=os.path.expanduser('${_dlDir.replace(/\/$/, '')}/${repo.split('/').pop()}')` : ''; + const _py = _isWindows() ? 'python' : 'python3'; + cmd = `${_py} -u -c " import sys, time, os os.environ['HF_HUB_DISABLE_PROGRESS_BARS']='0' os.environ['TQDM_DISABLE']='0' @@ -125,6 +172,7 @@ try: except Exception as e: print(f'ERROR {e}',file=sys.stderr,flush=True);sys.exit(1) "`; + } } const prefix = _buildEnvPrefix(); let full = prefix ? prefix + ' ' + cmd : cmd; @@ -402,10 +450,13 @@ export async function _runPanelCmd(panel, cmd, opts = {}) { // ── Model download (dedicated endpoint, tmux-backed) ── export async function _runModelDownload(panel, model, backend, hostOverride) { - const repo = (backend === 'llamacpp' && model.gguf_sources && model.gguf_sources.length) - ? model.gguf_sources[0].repo : (model.quant_repo || model.name); - const include = (backend === 'llamacpp' && model.gguf_sources && model.gguf_sources.length) - ? `*${model.quant || ''}*` : null; + const ggufSource = _ggufDownloadSource(model, backend); + if (backend === 'llamacpp' && !ggufSource) { + uiModule.showToast(_missingGgufMessage(model)); + return; + } + const repo = ggufSource?.repo || model.quant_repo || model.name; + const include = backend === 'llamacpp' ? _ggufIncludePattern(model, ggufSource) : null; _syncEnvFromPanel(panel); diff --git a/tests/test_hwfit_macos.py b/tests/test_hwfit_macos.py index ca3b902..b0f7b9b 100644 --- a/tests/test_hwfit_macos.py +++ b/tests/test_hwfit_macos.py @@ -70,6 +70,21 @@ def test_only_gguf_models_recommended_on_metal(): assert unservable == [], f"{len(unservable)} non-GGUF models on Metal, e.g. {unservable[:3]}" +def test_qwen_catalog_entries_point_at_verified_gguf_repos(): + """Qwen GGUF-looking Cookbook rows must download GGUF repos, not the base + safetensors repositories.""" + catalog = {m["name"]: m for m in get_models()} + expected = { + "Qwen/Qwen3.5-9B": ("unsloth/Qwen3.5-9B-GGUF", "Qwen3.5-9B-Q4_K_M.gguf"), + "Qwen/Qwen3.6-27B": ("unsloth/Qwen3.6-27B-GGUF", "Qwen3.6-27B-Q4_K_M.gguf"), + "Qwen/Qwen3.6-35B-A3B": ("unsloth/Qwen3.6-35B-A3B-GGUF", "Qwen3.6-35B-A3B-UD-Q4_K_M.gguf"), + } + + for model_name, (repo, filename) in expected.items(): + sources = catalog[model_name].get("gguf_sources") or [] + assert any(src.get("repo") == repo and src.get("file") == filename for src in sources) + + def test_safetensors_models_still_recommended_on_cuda(): """Regression guard: vLLM serves safetensors on CUDA, so non-GGUF repos must NOT be filtered there — the GGUF-only rule is Metal-specific."""