diff --git a/static/js/cookbook-diagnosis.js b/static/js/cookbook-diagnosis.js
index ade31bd..4d6e807 100644
--- a/static/js/cookbook-diagnosis.js
+++ b/static/js/cookbook-diagnosis.js
@@ -23,6 +23,44 @@ import {
// browser loads it once. See cookbook-hwfit.js.
} from './cookbook.js';
import uiModule from './ui.js';
+
+// Tiny HTML-escape — keeps the file standalone instead of leaning on a
+// shared helper that may not be exported from this module's import surface.
+function _diagEsc(s) {
+ return String(s ?? '').replace(/[&<>"']/g, c => ({'&':'&','<':'<','>':'>','"':'"',"'":'''}[c]));
+}
+
+// Pick an icon for a diagnosis-action button based on the label. The icon
+// renders on the LEFT of the button text. Keeps the strokes consistent
+// across the set so they read as one family.
+function _diagFixIcon(label) {
+ const l = String(label || '').toLowerCase();
+ const _svg = (path) => ``;
+ if (l.startsWith('retry') || l.includes('relaunch') || l.includes('restart')) {
+ // Circular-arrow refresh
+ return _svg('');
+ }
+ if (l.startsWith('copy')) {
+ return _svg('');
+ }
+ if (l.startsWith('edit')) {
+ return _svg('');
+ }
+ if (l.startsWith('open') || l.includes('dependencies')) {
+ return _svg('');
+ }
+ if (l.startsWith('install') || l.includes('upgrade')) {
+ return _svg('');
+ }
+ if (l.startsWith('kill') || l.startsWith('stop')) {
+ return _svg('');
+ }
+ if (l.startsWith('switch') || l.includes('use ')) {
+ return _svg('');
+ }
+ // Default: lightbulb (generic "suggestion")
+ return _svg('');
+}
import spinnerModule from './spinner.js';
// ── Error diagnosis ──
@@ -577,7 +615,7 @@ export function _showDiagnosis(panel, diagnosis, sourceText) {
const btn = document.createElement('button');
btn.className = 'cookbook-btn cookbook-diag-btn';
btn.type = 'button';
- btn.textContent = fix.label;
+ btn.innerHTML = _diagFixIcon(fix.label) + '' + _diagEsc(fix.label) + '';
btn.addEventListener('click', (e) => {
e.stopPropagation();
runFix(fix, btn);
@@ -603,7 +641,7 @@ export function _showDiagnosis(panel, diagnosis, sourceText) {
for (const fix of fixes) {
const item = document.createElement('button');
item.type = 'button';
- item.textContent = fix.label;
+ item.innerHTML = _diagFixIcon(fix.label) + '' + _diagEsc(fix.label) + '';
item.addEventListener('click', async (e) => {
e.stopPropagation();
if (item.dataset.busy || trigger.dataset.busy) return;
diff --git a/static/js/cookbook-hwfit.js b/static/js/cookbook-hwfit.js
index 68ba334..161b6f3 100644
--- a/static/js/cookbook-hwfit.js
+++ b/static/js/cookbook-hwfit.js
@@ -527,6 +527,9 @@ export async function _hwfitFetch(fresh = false) {
if (useCase) params.set('use_case', useCase);
if (quantPref) params.set('quant', quantPref);
if (targetCtx) params.set('ctx', String(targetCtx));
+ // Fit-only filter — set by the dot in the Fit column header.
+ const _fitOnly = (() => { try { return localStorage.getItem('hwfit_fit_only_v1') === '1'; } catch { return false; } })();
+ if (_fitOnly) params.set('fit_only', '1');
}
const endpoint = isImageMode ? `/api/hwfit/image-models?${params}` : `/api/hwfit/models?${params}`;
const res = await fetch(endpoint);
@@ -888,9 +891,15 @@ export function _hwfitRenderList(el, models) {
arrow = isReversed ? ' \u25B2' : ' \u25BC';
}
const dataAttr = col.key ? ` data-sort="${col.key}"` : '';
- const label = (col.cls === 'hwfit-fit' && _budget)
- ? `${col.label} (${_budget})`
- : col.label;
+ // Fit column gets a small dot to its left that toggles "show only models
+ // that fit" — replaces the old Fits On/Off button next to the toolbar.
+ let label = col.label;
+ if (col.cls === 'hwfit-fit') {
+ const _fitOnly = (() => { try { return localStorage.getItem('hwfit_fit_only_v1') === '1'; } catch { return false; } })();
+ label = `●${col.label}`;
+ // (Budget tag removed — the GPU/RAM/N-GPU suffix next to "Fit" was noise;
+ // the toggle row already shows which budget is active.)
+ }
html += `${label}${arrow}`;
}
html += '
';
@@ -910,9 +919,31 @@ export function _hwfitRenderList(el, models) {
const dlDot = (_cachedModelIds && (_cachedModelIds.has(m.name) || [..._cachedModelIds].some(id => id === m.name?.split('/').pop()))) ? '\u25CF' : '';
html += `
`;
html += `${esc(fitLabel)}`;
- html += `${modelLogo(m.name)}${esc(m.name?.split('/').pop() || m.name)}${moeBadge}${imgBadge}${dlDot}`;
+ // Append quant to the title when it's not already in the repo name. The
+ // suffix strips quant-parts the name already contains — e.g. for
+ // QuantTrio/MiniMax-M2-AWQ + quant=AWQ-4bit we just show "(4bit)", not
+ // "(AWQ-4bit)". DeepSeek-V4-Flash + FP4-MoE-Mixed keeps the full tag
+ // (none of those parts are in the repo id).
+ const _short = m.name?.split('/').pop() || m.name || '';
+ const _quantTag = (m.quant || '').trim();
+ const _lowerShort = _short.toLowerCase();
+ let _quantSuffix = '';
+ if (_quantTag) {
+ const _parts = _quantTag.split(/[-_]/).filter(Boolean);
+ const _remaining = _parts.filter(p => !_lowerShort.includes(p.toLowerCase()));
+ if (_remaining.length && _remaining.length < _parts.length + 1) { // at least one part is new
+ let _display = _remaining.join('-');
+ if (_display.length > 9) _display = _display.slice(0, 9) + '…';
+ _quantSuffix = ` (${esc(_display)})`;
+ }
+ }
+ html += `${modelLogo(m.name)}${esc(_short)}${_quantSuffix}${moeBadge}${imgBadge}${dlDot}`;
html += `${esc(pcount)}`;
- html += `${esc(m.quant || '?')}`;
+ // Truncate the Quant cell to 9 chars + ellipsis so long tags like
+ // "FP4-MoE-Mixed" don't push neighboring columns. Full tag stays in title.
+ const _qRaw = m.quant || '?';
+ const _qShort = _qRaw.length > 9 ? _qRaw.slice(0, 9) + '…' : _qRaw;
+ html += `${esc(_qShort)}`;
html += `${vramLabel}`;
html += `${m.is_image_gen ? '\u2014' : ctx}`;
html += `${m.is_image_gen ? '\u2014' : tps + ' t/s'}`;
@@ -934,7 +965,26 @@ export function _hwfitRenderList(el, models) {
});
// Clickable header columns → sort (click again to toggle direction)
el.querySelectorAll('.hwfit-header .hwfit-sortable').forEach(col => {
- col.addEventListener('click', () => {
+ col.addEventListener('click', (e) => {
+ // The little dot inside the Fit header is its own toggle (fit-only
+ // filter), don't let it fall through to a sort click.
+ if (e.target.closest('[data-fit-dot]')) {
+ const on = !e.target.classList.contains('active');
+ try { localStorage.setItem('hwfit_fit_only_v1', on ? '1' : '0'); } catch {}
+ // Un-toggling the fit filter (off → showing too-tight rows again) is
+ // typically because the user wants to see the LARGE models they can't
+ // run yet — re-sort by VRAM descending so the biggest surface first.
+ if (!on) {
+ const sortSel = document.getElementById('hwfit-sort');
+ if (sortSel) {
+ sortSel.value = 'vram';
+ sortSel.dataset.reverse = '0'; // descending (biggest first)
+ }
+ }
+ _hwfitCache = null;
+ _hwfitFetch();
+ return;
+ }
const sortKey = col.dataset.sort;
if (!sortKey) return;
const sel = document.getElementById('hwfit-sort');
@@ -1018,7 +1068,16 @@ export function _expandModelRow(row, modelData) {
if (modelData.is_image_gen) {
html += `
`;
} else if (_requiresAcceleratorBackend(modelData)) {
- html += `
This is a safetensors GPU-serving format. Use vLLM/SGLang with a visible CUDA/ROCm accelerator, or pick a GGUF download for llama.cpp/Ollama.
`;
+ // Only show the "needs CUDA/ROCm" note when the host doesn't already have
+ // one. With a visible CUDA/ROCm accelerator the note is noise — the user
+ // can already serve the model and reading the warning on every row makes
+ // the panel feel like everything's broken.
+ const _sys = _hwfitCache?.system || {};
+ const _backend = (_sys.backend || '').toLowerCase();
+ const _hasGpuAccel = !!_sys.has_gpu && (_backend === 'cuda' || _backend === 'rocm');
+ if (!_hasGpuAccel) {
+ html += `
This is a safetensors GPU-serving format. Use vLLM/SGLang with a visible CUDA/ROCm accelerator, or pick a GGUF download for llama.cpp/Ollama.
`;
+ }
}
html += `
`;
@@ -1243,14 +1302,14 @@ export function _hwfitInit() {
const targetCtx = _ctxValue();
try { localStorage.setItem(_CTX_KEY, String(targetCtx)); } catch {}
// Ctx drag affects sort mode: a specific ctx target (anything < Max)
- // implies the user is hunting for "what fits at this context length",
- // so re-rank by fit (lowest first). Dragging back to Max means no
- // ctx constraint → go back to the default score-based ranking.
+ // implies "what runs at this context length" — sort by VRAM ascending
+ // so the cheapest-fitting models surface first. Dragging back to Max
+ // releases the constraint → go back to the default score ranking.
const sortSel = document.getElementById('hwfit-sort');
if (sortSel) {
if (targetCtx) {
- sortSel.value = 'fit';
- sortSel.dataset.reverse = '1';
+ sortSel.value = 'vram';
+ sortSel.dataset.reverse = '1'; // ascending = smallest VRAM first
} else {
sortSel.value = 'score';
sortSel.dataset.reverse = '';
diff --git a/static/js/cookbook.js b/static/js/cookbook.js
index d60dd2d..507777a 100644
--- a/static/js/cookbook.js
+++ b/static/js/cookbook.js
@@ -18,6 +18,7 @@ import {
_launchServeTask, _serveAutoFix, _serveAutoRetry, _serveAutoRetryReplace, _serveAutoRetryRemove,
_startBackgroundMonitor, _syncFromServer,
_retryDownload, _nextAvailablePort, _processQueue,
+ _selfHealStaleTasks,
} from './cookbookRunning.js';
import {
@@ -641,6 +642,13 @@ async function _fetchDependencies() {
const winBlocked = !isLocal && _isWindows() && _winUnsupported.has(pkg.name);
const note = pkg.status_note ? `
` : '';
+ // Inline "Rebuild" tag for the llama_cpp row only. Styled as a
+ // .cookbook-dep-tag so it matches the LLM category tag's pill look,
+ // and lives to the LEFT of the category tag (clear affordance before
+ // the row "value").
+ const _rebuildBtn = (pkg.name === 'llama_cpp')
+ ? ``
+ : '';
return `
`;
@@ -1237,6 +1246,10 @@ function _wireTabEvents(body) {
const folded = dlFoldBody.style.display === 'none';
dlFoldBody.style.display = folded ? '' : 'none';
dlFoldChevron.textContent = folded ? '▾' : '▸';
+ // Toggle is-folded class on the h2 so the line under it only shows when
+ // the section is collapsed (the body's content normally provides
+ // separation; with no body visible, the line gives the h2 definition).
+ dlFold.classList.toggle('is-folded', !folded);
try { localStorage.setItem('cookbook_dl_tab_folded_v1', folded ? '0' : '1'); } catch {}
});
}
@@ -1456,7 +1469,7 @@ export function _serverEntryHtml(s, i, defaultServer, forceRemote, isNew) {
html += ``;
html += `${esc(_srvTitle)}`;
html += _pIco ? `${_pIco}` : '';
- html += ``;
+ html += ``;
if (isNew) {
// New server: Cancel (discard) sits top-right; the default toggle only makes
// sense once the server is saved.
@@ -1535,7 +1548,7 @@ function _renderRecipes() {
// State persisted to localStorage so the fold survives reloads.
const _dlTabFolded = (() => { try { return localStorage.getItem('cookbook_dl_tab_folded_v1') === '1'; } catch { return false; } })();
html += '
';
- html += `
Download${_dlTabFolded ? '▸' : '▾'}
`;
+ html += `
Download${_dlTabFolded ? '▸' : '▾'}
`;
html += '
';
html += `
`;
html += '
Download from HuggingFace by pasting model link, or download directly in the Scan section below.
';
@@ -1605,36 +1618,43 @@ function _renderRecipes() {
html += '
Scans your hardware for what models you can run. Hardware is cached; hit the scan button to re-probe after changing GPUs.
';
html += '
';
html += '';
- html += '';
- // Quant (Q4/Q8/…) lives next to the search now. Default is "All" so the
- // list shows the best-scoring quant for every model instead of silently
- // filtering to Q4 (which used to be the implicit default).
- html += '';
- // Engine filter — show only models whose serve engine matches. Composes
- // with quant / type / search filters.
+ // Engine sits next to the type filter so the "what category / which serving
+ // path" filters live together; Quant + Context are storage-format and budget
+ // levers, grouped to the right.
+ html += '';
html += '';
- html += '?';
+ html += '?';
+ html += '';
+ // Quant (Q4/Q8/…). Default is "All" so the list shows the best-scoring
+ // quant for every model instead of silently filtering to Q4.
+ html += '';
+ html += '';
+ html += '?';
+ html += '';
// Ctx slider — lets you target a context length for fit estimates; the
// hwfit ranking uses _ctxValue() to factor that into VRAM math, so
// dragging this re-sorts the list toward models that fit your chosen ctx.
html += '';
+ // Search lives at the far right of the toolbar so the controls (Type/Quant/
+ // Engine/Context) read as a row of compact filters followed by free-text.
+ html += '';
html += '
';
html += '
';
html += '
';
html += '
Detected hardware
';
html += '';
+ // Footer: link to the public discussion where users can request additions
+ // to the curated model list. Sits below the list so it reads as a callout
+ // after browsing, not a header.
+ html += '';
html += '
';
@@ -1707,7 +1737,8 @@ function _renderRecipes() {
html += '
';
html += '
';
html += '
Dependencies
';
- html += '';
+ // Rebuild llama.cpp button moved into the llama_cpp dep row (see _depRow);
+ // having it in the title polluted the section header.
html += 'Server';
html += '';
html += _buildServerOpts(false);
@@ -1746,7 +1777,7 @@ function _renderRecipes() {
// ── Servers block ───────────────────────────────────────────────────
html += '