fix: require GGUF sources for llama downloads (#368)

This commit is contained in:
spooky
2026-06-01 23:47:47 +10:00
committed by GitHub
parent f2d55f8726
commit 033852ab14
4 changed files with 122 additions and 21 deletions

View File

@@ -7035,7 +7035,8 @@
"gguf_sources": [ "gguf_sources": [
{ {
"repo": "unsloth/Qwen3.5-9B-GGUF", "repo": "unsloth/Qwen3.5-9B-GGUF",
"provider": "unsloth" "provider": "unsloth",
"file": "Qwen3.5-9B-Q4_K_M.gguf"
} }
] ]
}, },
@@ -13733,7 +13734,13 @@
"architecture": "qwen3", "architecture": "qwen3",
"pipeline_tag": "text-generation", "pipeline_tag": "text-generation",
"release_date": "2026-04-01", "release_date": "2026-04-01",
"gguf_sources": [], "gguf_sources": [
{
"repo": "unsloth/Qwen3.6-27B-GGUF",
"provider": "unsloth",
"file": "Qwen3.6-27B-Q4_K_M.gguf"
}
],
"capabilities": [] "capabilities": []
}, },
{ {
@@ -13796,7 +13803,13 @@
"architecture": "qwen3_moe", "architecture": "qwen3_moe",
"pipeline_tag": "text-generation", "pipeline_tag": "text-generation",
"release_date": "2026-04-01", "release_date": "2026-04-01",
"gguf_sources": [], "gguf_sources": [
{
"repo": "unsloth/Qwen3.6-35B-A3B-GGUF",
"provider": "unsloth",
"file": "Qwen3.6-35B-A3B-UD-Q4_K_M.gguf"
}
],
"capabilities": [] "capabilities": []
}, },
{ {

View File

@@ -48,6 +48,28 @@ let _removedHwChips = new Set();
export let _gpuToggleTotal = 0; // real GPU count from first scan, never overridden export let _gpuToggleTotal = 0; // real GPU count from first scan, never overridden
function _firstGgufSource(model) {
const sources = Array.isArray(model?.gguf_sources) ? model.gguf_sources : [];
return sources.find(src => src && src.repo) || null;
}
function _looksLikeGgufRepo(model) {
const haystack = `${model?.quant_repo || ''} ${model?.repo_id || ''} ${model?.path || ''} ${model?.name || ''}`.toLowerCase();
return !!model?.is_gguf || haystack.includes('gguf') || haystack.includes('.gguf');
}
function _downloadSourceRepo(model, backend) {
if (backend === 'llamacpp') {
const ggufSource = _firstGgufSource(model);
if (ggufSource) return { repo: ggufSource.repo, kind: 'GGUF' };
if (_looksLikeGgufRepo(model)) {
const repo = model?.quant_repo || model?.repo_id || model?.name;
if (repo) return { repo, kind: 'GGUF' };
}
}
return { repo: model?.quant_repo || model?.name || '', kind: '' };
}
// Reset GPU-toggle state so the next scan re-renders the RAM/GPU buttons for a // Reset GPU-toggle state so the next scan re-renders the RAM/GPU buttons for a
// (possibly different) server, WITHOUT clearing the markup now — clearing it made // (possibly different) server, WITHOUT clearing the markup now — clearing it made
// the buttons flicker out and back in. The old buttons stay visible until the // the buttons flicker out and back in. The old buttons stay visible until the
@@ -847,13 +869,13 @@ export function _expandModelRow(row, modelData) {
const isLlamaCpp = backend === 'llamacpp'; const isLlamaCpp = backend === 'llamacpp';
const ctx = modelData.context || 8192; const ctx = modelData.context || 8192;
const dlRepo = modelData.quant_repo || modelData.name; const dlSource = _downloadSourceRepo(modelData, backend);
const hfUrl = `https://huggingface.co/${dlRepo}`; const hfUrl = `https://huggingface.co/${dlSource.repo}`;
let html = `<div class="hwfit-action-panel" data-model-name="${esc(modelData.name)}">`; let html = `<div class="hwfit-action-panel" data-model-name="${esc(modelData.name)}">`;
html += `<div class="hwfit-panel-header">`; html += `<div class="hwfit-panel-header">`;
html += `<span class="hwfit-panel-model">${esc(modelData.name)}${modelData.quant_repo ? ` <span style="opacity:0.5;font-size:10px;">(${esc(modelData.quant)})</span>` : ''}</span>`; html += `<span class="hwfit-panel-model">${esc(modelData.name)}${dlSource.kind ? ` <span style="opacity:0.5;font-size:10px;">(${esc(dlSource.kind)} ${esc(modelData.quant || '')})</span>` : (modelData.quant_repo ? ` <span style="opacity:0.5;font-size:10px;">(${esc(modelData.quant)})</span>` : '')}</span>`;
html += `<span class="hwfit-panel-badge">${esc(label)}</span>`; html += `<span class="hwfit-panel-badge">${esc(label)}</span>`;
html += `<a href="${esc(hfUrl)}" target="_blank" rel="noopener" class="hwfit-panel-hf-link" title="View on HuggingFace">HF \u2197</a>`; html += `<a href="${esc(hfUrl)}" target="_blank" rel="noopener" class="hwfit-panel-hf-link" title="View download source on HuggingFace">HF \u2197</a>`;
html += `</div>`; html += `</div>`;
html += `<div class="hwfit-panel-actions">`; html += `<div class="hwfit-panel-actions">`;
html += `<button class="cookbook-btn hwfit-dl-btn">Download</button>`; html += `<button class="cookbook-btn hwfit-dl-btn">Download</button>`;

View File

@@ -57,21 +57,68 @@ export function _setPanelCheckbox(panel, field, checked) {
// ── Command builder: download ── // ── Command builder: download ──
function _firstGgufSource(model) {
const sources = Array.isArray(model?.gguf_sources) ? model.gguf_sources : [];
return sources.find(src => src && src.repo) || null;
}
function _looksLikeGgufRepo(model) {
const haystack = `${model?.quant_repo || ''} ${model?.repo_id || ''} ${model?.path || ''} ${model?.name || ''}`.toLowerCase();
return !!model?.is_gguf || haystack.includes('gguf') || haystack.includes('.gguf');
}
function _ggufDownloadSource(model, backend) {
if (backend !== 'llamacpp') return null;
const source = _firstGgufSource(model);
if (source) return source;
if (_looksLikeGgufRepo(model)) {
const repo = model?.quant_repo || model?.repo_id || model?.name;
if (repo) return { repo };
}
return null;
}
function _ggufIncludePattern(model, source) {
if (source?.file) return source.file;
if (model?.quant) return `*${model.quant}*`;
return '*.gguf';
}
function _missingGgufMessage(model) {
const name = model?.name || 'this model';
return `No GGUF source is configured for ${name}. Pick a model with a GGUF source, or paste the GGUF repo in Download.`;
}
function _bashQuote(value) {
return "'" + String(value ?? '').replace(/'/g, "'\\''") + "'";
}
function _missingGgufCommand(model) {
const msg = _missingGgufMessage(model);
if (_isWindows()) {
return `Write-Error ${JSON.stringify(msg)}; exit 1`;
}
return `printf '%s\\n' ${_bashQuote(msg)} >&2; exit 1`;
}
export function _buildDownloadCmd(model, backend) { export function _buildDownloadCmd(model, backend) {
let cmd = ''; let cmd = '';
if (backend === 'ollama') { if (backend === 'ollama') {
cmd = `ollama pull ${model.name.split('/').pop().toLowerCase()}`; cmd = `ollama pull ${model.name.split('/').pop().toLowerCase()}`;
} else { } else {
const repo = (backend === 'llamacpp' && model.gguf_sources && model.gguf_sources.length) const ggufSource = _ggufDownloadSource(model, backend);
? model.gguf_sources[0].repo : model.name; if (backend === 'llamacpp' && !ggufSource) {
const includeArg = (backend === 'llamacpp' && model.gguf_sources && model.gguf_sources.length) cmd = _missingGgufCommand(model);
? `, allow_patterns=["*${model.quant || ''}*"]` : ''; } else {
// Reflect the server's download target in the preview (matches the real const repo = ggufSource?.repo || model.name;
// download path built server-side). '' = default HF cache. const includePattern = backend === 'llamacpp' ? _ggufIncludePattern(model, ggufSource) : null;
const _dlDir = (_envState.servers.find(s => s.host === (_envState.remoteHost || '')) || {}).downloadDir || ''; const includeArg = includePattern ? `, allow_patterns=["${includePattern.replace(/\\/g, '\\\\').replace(/"/g, '\\"')}"]` : '';
const _localDirArg = _dlDir ? `, local_dir=os.path.expanduser('${_dlDir.replace(/\/$/, '')}/${repo.split('/').pop()}')` : ''; // Reflect the server's download target in the preview (matches the real
const _py = _isWindows() ? 'python' : 'python3'; // download path built server-side). '' = default HF cache.
cmd = `${_py} -u -c " const _dlDir = (_envState.servers.find(s => s.host === (_envState.remoteHost || '')) || {}).downloadDir || '';
const _localDirArg = _dlDir ? `, local_dir=os.path.expanduser('${_dlDir.replace(/\/$/, '')}/${repo.split('/').pop()}')` : '';
const _py = _isWindows() ? 'python' : 'python3';
cmd = `${_py} -u -c "
import sys, time, os import sys, time, os
os.environ['HF_HUB_DISABLE_PROGRESS_BARS']='0' os.environ['HF_HUB_DISABLE_PROGRESS_BARS']='0'
os.environ['TQDM_DISABLE']='0' os.environ['TQDM_DISABLE']='0'
@@ -125,6 +172,7 @@ try:
except Exception as e: except Exception as e:
print(f'ERROR {e}',file=sys.stderr,flush=True);sys.exit(1) print(f'ERROR {e}',file=sys.stderr,flush=True);sys.exit(1)
"`; "`;
}
} }
const prefix = _buildEnvPrefix(); const prefix = _buildEnvPrefix();
let full = prefix ? prefix + ' ' + cmd : cmd; let full = prefix ? prefix + ' ' + cmd : cmd;
@@ -402,10 +450,13 @@ export async function _runPanelCmd(panel, cmd, opts = {}) {
// ── Model download (dedicated endpoint, tmux-backed) ── // ── Model download (dedicated endpoint, tmux-backed) ──
export async function _runModelDownload(panel, model, backend, hostOverride) { export async function _runModelDownload(panel, model, backend, hostOverride) {
const repo = (backend === 'llamacpp' && model.gguf_sources && model.gguf_sources.length) const ggufSource = _ggufDownloadSource(model, backend);
? model.gguf_sources[0].repo : (model.quant_repo || model.name); if (backend === 'llamacpp' && !ggufSource) {
const include = (backend === 'llamacpp' && model.gguf_sources && model.gguf_sources.length) uiModule.showToast(_missingGgufMessage(model));
? `*${model.quant || ''}*` : null; return;
}
const repo = ggufSource?.repo || model.quant_repo || model.name;
const include = backend === 'llamacpp' ? _ggufIncludePattern(model, ggufSource) : null;
_syncEnvFromPanel(panel); _syncEnvFromPanel(panel);

View File

@@ -70,6 +70,21 @@ def test_only_gguf_models_recommended_on_metal():
assert unservable == [], f"{len(unservable)} non-GGUF models on Metal, e.g. {unservable[:3]}" assert unservable == [], f"{len(unservable)} non-GGUF models on Metal, e.g. {unservable[:3]}"
def test_qwen_catalog_entries_point_at_verified_gguf_repos():
"""Qwen GGUF-looking Cookbook rows must download GGUF repos, not the base
safetensors repositories."""
catalog = {m["name"]: m for m in get_models()}
expected = {
"Qwen/Qwen3.5-9B": ("unsloth/Qwen3.5-9B-GGUF", "Qwen3.5-9B-Q4_K_M.gguf"),
"Qwen/Qwen3.6-27B": ("unsloth/Qwen3.6-27B-GGUF", "Qwen3.6-27B-Q4_K_M.gguf"),
"Qwen/Qwen3.6-35B-A3B": ("unsloth/Qwen3.6-35B-A3B-GGUF", "Qwen3.6-35B-A3B-UD-Q4_K_M.gguf"),
}
for model_name, (repo, filename) in expected.items():
sources = catalog[model_name].get("gguf_sources") or []
assert any(src.get("repo") == repo and src.get("file") == filename for src in sources)
def test_safetensors_models_still_recommended_on_cuda(): def test_safetensors_models_still_recommended_on_cuda():
"""Regression guard: vLLM serves safetensors on CUDA, so non-GGUF repos must """Regression guard: vLLM serves safetensors on CUDA, so non-GGUF repos must
NOT be filtered there — the GGUF-only rule is Metal-specific.""" NOT be filtered there — the GGUF-only rule is Metal-specific."""