fix: require GGUF sources for llama downloads (#368)
This commit is contained in:
@@ -7035,7 +7035,8 @@
|
|||||||
"gguf_sources": [
|
"gguf_sources": [
|
||||||
{
|
{
|
||||||
"repo": "unsloth/Qwen3.5-9B-GGUF",
|
"repo": "unsloth/Qwen3.5-9B-GGUF",
|
||||||
"provider": "unsloth"
|
"provider": "unsloth",
|
||||||
|
"file": "Qwen3.5-9B-Q4_K_M.gguf"
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@@ -13733,7 +13734,13 @@
|
|||||||
"architecture": "qwen3",
|
"architecture": "qwen3",
|
||||||
"pipeline_tag": "text-generation",
|
"pipeline_tag": "text-generation",
|
||||||
"release_date": "2026-04-01",
|
"release_date": "2026-04-01",
|
||||||
"gguf_sources": [],
|
"gguf_sources": [
|
||||||
|
{
|
||||||
|
"repo": "unsloth/Qwen3.6-27B-GGUF",
|
||||||
|
"provider": "unsloth",
|
||||||
|
"file": "Qwen3.6-27B-Q4_K_M.gguf"
|
||||||
|
}
|
||||||
|
],
|
||||||
"capabilities": []
|
"capabilities": []
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -13796,7 +13803,13 @@
|
|||||||
"architecture": "qwen3_moe",
|
"architecture": "qwen3_moe",
|
||||||
"pipeline_tag": "text-generation",
|
"pipeline_tag": "text-generation",
|
||||||
"release_date": "2026-04-01",
|
"release_date": "2026-04-01",
|
||||||
"gguf_sources": [],
|
"gguf_sources": [
|
||||||
|
{
|
||||||
|
"repo": "unsloth/Qwen3.6-35B-A3B-GGUF",
|
||||||
|
"provider": "unsloth",
|
||||||
|
"file": "Qwen3.6-35B-A3B-UD-Q4_K_M.gguf"
|
||||||
|
}
|
||||||
|
],
|
||||||
"capabilities": []
|
"capabilities": []
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -48,6 +48,28 @@ let _removedHwChips = new Set();
|
|||||||
|
|
||||||
export let _gpuToggleTotal = 0; // real GPU count from first scan, never overridden
|
export let _gpuToggleTotal = 0; // real GPU count from first scan, never overridden
|
||||||
|
|
||||||
|
function _firstGgufSource(model) {
|
||||||
|
const sources = Array.isArray(model?.gguf_sources) ? model.gguf_sources : [];
|
||||||
|
return sources.find(src => src && src.repo) || null;
|
||||||
|
}
|
||||||
|
|
||||||
|
function _looksLikeGgufRepo(model) {
|
||||||
|
const haystack = `${model?.quant_repo || ''} ${model?.repo_id || ''} ${model?.path || ''} ${model?.name || ''}`.toLowerCase();
|
||||||
|
return !!model?.is_gguf || haystack.includes('gguf') || haystack.includes('.gguf');
|
||||||
|
}
|
||||||
|
|
||||||
|
function _downloadSourceRepo(model, backend) {
|
||||||
|
if (backend === 'llamacpp') {
|
||||||
|
const ggufSource = _firstGgufSource(model);
|
||||||
|
if (ggufSource) return { repo: ggufSource.repo, kind: 'GGUF' };
|
||||||
|
if (_looksLikeGgufRepo(model)) {
|
||||||
|
const repo = model?.quant_repo || model?.repo_id || model?.name;
|
||||||
|
if (repo) return { repo, kind: 'GGUF' };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return { repo: model?.quant_repo || model?.name || '', kind: '' };
|
||||||
|
}
|
||||||
|
|
||||||
// Reset GPU-toggle state so the next scan re-renders the RAM/GPU buttons for a
|
// Reset GPU-toggle state so the next scan re-renders the RAM/GPU buttons for a
|
||||||
// (possibly different) server, WITHOUT clearing the markup now — clearing it made
|
// (possibly different) server, WITHOUT clearing the markup now — clearing it made
|
||||||
// the buttons flicker out and back in. The old buttons stay visible until the
|
// the buttons flicker out and back in. The old buttons stay visible until the
|
||||||
@@ -847,13 +869,13 @@ export function _expandModelRow(row, modelData) {
|
|||||||
const isLlamaCpp = backend === 'llamacpp';
|
const isLlamaCpp = backend === 'llamacpp';
|
||||||
const ctx = modelData.context || 8192;
|
const ctx = modelData.context || 8192;
|
||||||
|
|
||||||
const dlRepo = modelData.quant_repo || modelData.name;
|
const dlSource = _downloadSourceRepo(modelData, backend);
|
||||||
const hfUrl = `https://huggingface.co/${dlRepo}`;
|
const hfUrl = `https://huggingface.co/${dlSource.repo}`;
|
||||||
let html = `<div class="hwfit-action-panel" data-model-name="${esc(modelData.name)}">`;
|
let html = `<div class="hwfit-action-panel" data-model-name="${esc(modelData.name)}">`;
|
||||||
html += `<div class="hwfit-panel-header">`;
|
html += `<div class="hwfit-panel-header">`;
|
||||||
html += `<span class="hwfit-panel-model">${esc(modelData.name)}${modelData.quant_repo ? ` <span style="opacity:0.5;font-size:10px;">(${esc(modelData.quant)})</span>` : ''}</span>`;
|
html += `<span class="hwfit-panel-model">${esc(modelData.name)}${dlSource.kind ? ` <span style="opacity:0.5;font-size:10px;">(${esc(dlSource.kind)} ${esc(modelData.quant || '')})</span>` : (modelData.quant_repo ? ` <span style="opacity:0.5;font-size:10px;">(${esc(modelData.quant)})</span>` : '')}</span>`;
|
||||||
html += `<span class="hwfit-panel-badge">${esc(label)}</span>`;
|
html += `<span class="hwfit-panel-badge">${esc(label)}</span>`;
|
||||||
html += `<a href="${esc(hfUrl)}" target="_blank" rel="noopener" class="hwfit-panel-hf-link" title="View on HuggingFace">HF \u2197</a>`;
|
html += `<a href="${esc(hfUrl)}" target="_blank" rel="noopener" class="hwfit-panel-hf-link" title="View download source on HuggingFace">HF \u2197</a>`;
|
||||||
html += `</div>`;
|
html += `</div>`;
|
||||||
html += `<div class="hwfit-panel-actions">`;
|
html += `<div class="hwfit-panel-actions">`;
|
||||||
html += `<button class="cookbook-btn hwfit-dl-btn">Download</button>`;
|
html += `<button class="cookbook-btn hwfit-dl-btn">Download</button>`;
|
||||||
|
|||||||
@@ -57,21 +57,68 @@ export function _setPanelCheckbox(panel, field, checked) {
|
|||||||
|
|
||||||
// ── Command builder: download ──
|
// ── Command builder: download ──
|
||||||
|
|
||||||
|
function _firstGgufSource(model) {
|
||||||
|
const sources = Array.isArray(model?.gguf_sources) ? model.gguf_sources : [];
|
||||||
|
return sources.find(src => src && src.repo) || null;
|
||||||
|
}
|
||||||
|
|
||||||
|
function _looksLikeGgufRepo(model) {
|
||||||
|
const haystack = `${model?.quant_repo || ''} ${model?.repo_id || ''} ${model?.path || ''} ${model?.name || ''}`.toLowerCase();
|
||||||
|
return !!model?.is_gguf || haystack.includes('gguf') || haystack.includes('.gguf');
|
||||||
|
}
|
||||||
|
|
||||||
|
function _ggufDownloadSource(model, backend) {
|
||||||
|
if (backend !== 'llamacpp') return null;
|
||||||
|
const source = _firstGgufSource(model);
|
||||||
|
if (source) return source;
|
||||||
|
if (_looksLikeGgufRepo(model)) {
|
||||||
|
const repo = model?.quant_repo || model?.repo_id || model?.name;
|
||||||
|
if (repo) return { repo };
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
function _ggufIncludePattern(model, source) {
|
||||||
|
if (source?.file) return source.file;
|
||||||
|
if (model?.quant) return `*${model.quant}*`;
|
||||||
|
return '*.gguf';
|
||||||
|
}
|
||||||
|
|
||||||
|
function _missingGgufMessage(model) {
|
||||||
|
const name = model?.name || 'this model';
|
||||||
|
return `No GGUF source is configured for ${name}. Pick a model with a GGUF source, or paste the GGUF repo in Download.`;
|
||||||
|
}
|
||||||
|
|
||||||
|
function _bashQuote(value) {
|
||||||
|
return "'" + String(value ?? '').replace(/'/g, "'\\''") + "'";
|
||||||
|
}
|
||||||
|
|
||||||
|
function _missingGgufCommand(model) {
|
||||||
|
const msg = _missingGgufMessage(model);
|
||||||
|
if (_isWindows()) {
|
||||||
|
return `Write-Error ${JSON.stringify(msg)}; exit 1`;
|
||||||
|
}
|
||||||
|
return `printf '%s\\n' ${_bashQuote(msg)} >&2; exit 1`;
|
||||||
|
}
|
||||||
|
|
||||||
export function _buildDownloadCmd(model, backend) {
|
export function _buildDownloadCmd(model, backend) {
|
||||||
let cmd = '';
|
let cmd = '';
|
||||||
if (backend === 'ollama') {
|
if (backend === 'ollama') {
|
||||||
cmd = `ollama pull ${model.name.split('/').pop().toLowerCase()}`;
|
cmd = `ollama pull ${model.name.split('/').pop().toLowerCase()}`;
|
||||||
} else {
|
} else {
|
||||||
const repo = (backend === 'llamacpp' && model.gguf_sources && model.gguf_sources.length)
|
const ggufSource = _ggufDownloadSource(model, backend);
|
||||||
? model.gguf_sources[0].repo : model.name;
|
if (backend === 'llamacpp' && !ggufSource) {
|
||||||
const includeArg = (backend === 'llamacpp' && model.gguf_sources && model.gguf_sources.length)
|
cmd = _missingGgufCommand(model);
|
||||||
? `, allow_patterns=["*${model.quant || ''}*"]` : '';
|
} else {
|
||||||
// Reflect the server's download target in the preview (matches the real
|
const repo = ggufSource?.repo || model.name;
|
||||||
// download path built server-side). '' = default HF cache.
|
const includePattern = backend === 'llamacpp' ? _ggufIncludePattern(model, ggufSource) : null;
|
||||||
const _dlDir = (_envState.servers.find(s => s.host === (_envState.remoteHost || '')) || {}).downloadDir || '';
|
const includeArg = includePattern ? `, allow_patterns=["${includePattern.replace(/\\/g, '\\\\').replace(/"/g, '\\"')}"]` : '';
|
||||||
const _localDirArg = _dlDir ? `, local_dir=os.path.expanduser('${_dlDir.replace(/\/$/, '')}/${repo.split('/').pop()}')` : '';
|
// Reflect the server's download target in the preview (matches the real
|
||||||
const _py = _isWindows() ? 'python' : 'python3';
|
// download path built server-side). '' = default HF cache.
|
||||||
cmd = `${_py} -u -c "
|
const _dlDir = (_envState.servers.find(s => s.host === (_envState.remoteHost || '')) || {}).downloadDir || '';
|
||||||
|
const _localDirArg = _dlDir ? `, local_dir=os.path.expanduser('${_dlDir.replace(/\/$/, '')}/${repo.split('/').pop()}')` : '';
|
||||||
|
const _py = _isWindows() ? 'python' : 'python3';
|
||||||
|
cmd = `${_py} -u -c "
|
||||||
import sys, time, os
|
import sys, time, os
|
||||||
os.environ['HF_HUB_DISABLE_PROGRESS_BARS']='0'
|
os.environ['HF_HUB_DISABLE_PROGRESS_BARS']='0'
|
||||||
os.environ['TQDM_DISABLE']='0'
|
os.environ['TQDM_DISABLE']='0'
|
||||||
@@ -125,6 +172,7 @@ try:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f'ERROR {e}',file=sys.stderr,flush=True);sys.exit(1)
|
print(f'ERROR {e}',file=sys.stderr,flush=True);sys.exit(1)
|
||||||
"`;
|
"`;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
const prefix = _buildEnvPrefix();
|
const prefix = _buildEnvPrefix();
|
||||||
let full = prefix ? prefix + ' ' + cmd : cmd;
|
let full = prefix ? prefix + ' ' + cmd : cmd;
|
||||||
@@ -402,10 +450,13 @@ export async function _runPanelCmd(panel, cmd, opts = {}) {
|
|||||||
// ── Model download (dedicated endpoint, tmux-backed) ──
|
// ── Model download (dedicated endpoint, tmux-backed) ──
|
||||||
|
|
||||||
export async function _runModelDownload(panel, model, backend, hostOverride) {
|
export async function _runModelDownload(panel, model, backend, hostOverride) {
|
||||||
const repo = (backend === 'llamacpp' && model.gguf_sources && model.gguf_sources.length)
|
const ggufSource = _ggufDownloadSource(model, backend);
|
||||||
? model.gguf_sources[0].repo : (model.quant_repo || model.name);
|
if (backend === 'llamacpp' && !ggufSource) {
|
||||||
const include = (backend === 'llamacpp' && model.gguf_sources && model.gguf_sources.length)
|
uiModule.showToast(_missingGgufMessage(model));
|
||||||
? `*${model.quant || ''}*` : null;
|
return;
|
||||||
|
}
|
||||||
|
const repo = ggufSource?.repo || model.quant_repo || model.name;
|
||||||
|
const include = backend === 'llamacpp' ? _ggufIncludePattern(model, ggufSource) : null;
|
||||||
|
|
||||||
_syncEnvFromPanel(panel);
|
_syncEnvFromPanel(panel);
|
||||||
|
|
||||||
|
|||||||
@@ -70,6 +70,21 @@ def test_only_gguf_models_recommended_on_metal():
|
|||||||
assert unservable == [], f"{len(unservable)} non-GGUF models on Metal, e.g. {unservable[:3]}"
|
assert unservable == [], f"{len(unservable)} non-GGUF models on Metal, e.g. {unservable[:3]}"
|
||||||
|
|
||||||
|
|
||||||
|
def test_qwen_catalog_entries_point_at_verified_gguf_repos():
|
||||||
|
"""Qwen GGUF-looking Cookbook rows must download GGUF repos, not the base
|
||||||
|
safetensors repositories."""
|
||||||
|
catalog = {m["name"]: m for m in get_models()}
|
||||||
|
expected = {
|
||||||
|
"Qwen/Qwen3.5-9B": ("unsloth/Qwen3.5-9B-GGUF", "Qwen3.5-9B-Q4_K_M.gguf"),
|
||||||
|
"Qwen/Qwen3.6-27B": ("unsloth/Qwen3.6-27B-GGUF", "Qwen3.6-27B-Q4_K_M.gguf"),
|
||||||
|
"Qwen/Qwen3.6-35B-A3B": ("unsloth/Qwen3.6-35B-A3B-GGUF", "Qwen3.6-35B-A3B-UD-Q4_K_M.gguf"),
|
||||||
|
}
|
||||||
|
|
||||||
|
for model_name, (repo, filename) in expected.items():
|
||||||
|
sources = catalog[model_name].get("gguf_sources") or []
|
||||||
|
assert any(src.get("repo") == repo and src.get("file") == filename for src in sources)
|
||||||
|
|
||||||
|
|
||||||
def test_safetensors_models_still_recommended_on_cuda():
|
def test_safetensors_models_still_recommended_on_cuda():
|
||||||
"""Regression guard: vLLM serves safetensors on CUDA, so non-GGUF repos must
|
"""Regression guard: vLLM serves safetensors on CUDA, so non-GGUF repos must
|
||||||
NOT be filtered there — the GGUF-only rule is Metal-specific."""
|
NOT be filtered there — the GGUF-only rule is Metal-specific."""
|
||||||
|
|||||||
Reference in New Issue
Block a user