Cookbook: scoring fixes, UI polish, false-finished + stale-state bug fixes

Backend (services/hwfit + routes):
- rank_models picks visible set by REQUESTED column, not always score —
  sorting by Param now shows highest-param models PERIOD (incl. too_tight).
- New fit_only param. Multi-GPU rigs filter GGUF Q*/IQ quants (vLLM/SGLang
  cannot serve them); default non-prequantized to BF16 on 2+ GPUs.
- AWQ / GPTQ-8bit get a -1.0 quality penalty (was 0.0, tied with FP8), so
  FP8 wins when both fit.
- Version-aware tiebreaker (parse Mn.n / Vn) — MiniMax-M2.7 ranks above
  M2.5 on equal composite score; >=100B integers not misread as versions.
- /api/cookbook/hf-latest no longer drops models without an "NB" pattern in
  the repo id (MiniMax-M2.7, DeepSeek-V4-Pro etc. were silently filtered).
- Cached-model scan: atexit flushes models JSON even if the script is
  killed mid-walk; each scan_dir wrapped in try/except; timeout 60s -> 180s.
- KB granularity for sub-MB sizes (was "0 MB" for 12 KB shells). New
  "stalled" status for shells <1 MB with no .incomplete files.
- /api/cookbook/state POST guard: rejects "done" download tasks lacking
  DOWNLOAD_OK / DOWNLOAD_FAILED / /snapshots/ when the last-mentioned
  shard is N<total — stops stale tabs from poisoning persisted state.
- hf_models.json: add zai-org/GLM-5.1; flip zai-org/GLM-5 quantization
  Q4_K_M -> BF16 (it is the native base, not a quant).

Frontend (static/js):
- Scan/Download toolbar: quant defaults to All; ctx slider (8k/16k/32k/
  50k/128k/Max) ported from origin/main with sort=fit on drag, sort=score
  on Max. GPU toggle commits _activeCount to maxGpu on initial render. Fit
  column header tagged with active budget (RAM / GPU / N GPU).
- Foldable Download admin-card: the Download h2 is the chevron trigger;
  state persists in localStorage.
- Download card surfaces destination dir (Dir: <path>). Same dir on running
  task row, font/color matched to uptime (9px Fira Code muted, opacity .4).
- Serve panel ctx text input always resets to model max on open. Sub-MB
  cached models show with red "download stalled" badge.
- Bulk-select Cancel + Delete reset the Select button label on exit.
- Cookbook running: false-finished bug fixed — DOWNLOAD_OK or /snapshots/
  required; bare "Download complete" no longer marks the task done after
  the first config file. Clear button now sends tmux kill-session too.
  True overall % for multi-shard downloads: ((N-1)+frac)/total instead of
  hf_transfer per-shard aggregate.
- Diagnosis card simplified: removed fold toggle, copy button, dismiss X.
  Suggestion font matches message body (12px).
- HF token field flashes green check + "Saved" on save.
- Cached scan no longer counts stalled rows as downloaded in Scan/Download.

CSS:
- dep Install button width pinned to 76px to match Installed split.
- task-sub row +1px; task-status badge gets margin-right 8px.
- Ctx slider styled like gallery editor sliders (thin pill rail, red thumb).
- Bulk-select cancel button top -3px -> -5px.
This commit is contained in:
pewdiepie-archdaemon
2026-06-03 16:32:20 +09:00
parent ab0a480f30
commit eb79b76432
15 changed files with 1175 additions and 198 deletions

View File

@@ -27,6 +27,56 @@ import spinnerModule from './spinner.js';
// ── Error diagnosis ──
function _openCookbookDependencies(pkgName = '') {
const cookbook = window.cookbookModule;
if (cookbook && typeof cookbook.open === 'function') {
cookbook.open({ tab: 'Dependencies' });
} else {
document.getElementById('tool-cookbook-btn')?.click();
}
const wanted = String(pkgName || '').toLowerCase();
const tryHighlight = (attempt = 0) => {
const modal = document.getElementById('cookbook-modal');
const tab = modal?.querySelector('.cookbook-tab[data-backend="Dependencies"]');
if (tab && !tab.classList.contains('active')) tab.click();
const rows = [...document.querySelectorAll('#cookbook-deps-list [data-pkg-name]')];
if (!rows.length) {
if (attempt < 45) setTimeout(() => tryHighlight(attempt + 1), 100);
return;
}
if (!wanted) return;
const row = rows.find(r => {
const name = (r.dataset.pkgName || '').toLowerCase();
const pip = (r.dataset.depPip || '').toLowerCase();
return name === wanted || pip.includes(wanted) || wanted.includes(name);
});
if (row) {
row.scrollIntoView({ block: 'center' });
row.classList.add('cookbook-pkg-flash');
setTimeout(() => row.classList.remove('cookbook-pkg-flash'), 1800);
}
};
tryHighlight();
}
function _openServeEditFromDiagnosis(panel, fields = null) {
const task = panel?.closest?.('.cookbook-task');
if (!task) return;
task.dispatchEvent(new CustomEvent('cookbook:edit-serve', { bubbles: true, detail: { fields } }));
}
function _openCpuServeEdit(panel) {
_openServeEditFromDiagnosis(panel, {
backend: 'llamacpp',
gpus: '',
tp: '1',
gpu_mem: '0.80',
_forceBackend: true,
});
}
// Infer the gated base repo that single-file checkpoints need configs from
function _inferBaseRepo(text) {
if (!text) return null;
@@ -70,17 +120,12 @@ export const ERROR_PATTERNS = [
},
{
pattern: /not divisible by weight quantization|quantization block/i,
message: 'Model quantization format incompatible with this vLLM version. Try a different quant (AWQ) or update vLLM.',
message: 'FP8 MoE quantization is incompatible with this tensor-parallel split.',
suggestion: 'Suggested action: retry with a lower tensor-parallel size, such as TP=4 or TP=2. If it still fails, use a non-FP8/GGUF version of the model.',
fixes: [
{ label: 'Update vLLM on server', action: (panel) => {
const taskEl = panel.closest('.cookbook-task');
const task = taskEl ? _loadTasks().find(t => t.sessionId === taskEl.dataset.taskId) : null;
const host = task?.remoteHost || '';
const prefix = _buildEnvPrefix();
const pipCmd = prefix ? prefix + ' pip install -U vllm' : 'pip install -U vllm';
const cmd = host ? _sshCmd(host, pipCmd) : pipCmd;
_launchServeTask('update-vllm', 'pip-update', cmd);
}},
{ label: 'Retry with TP=4', action: (panel) => _serveAutoRetryReplace(panel, '--tensor-parallel-size', '4') },
{ label: 'Retry with TP=2', action: (panel) => _serveAutoRetryReplace(panel, '--tensor-parallel-size', '2') },
{ label: 'Edit serve', action: (panel) => _openServeEditFromDiagnosis(panel) },
],
},
{
@@ -218,6 +263,7 @@ export const ERROR_PATTERNS = [
pattern: /vllm.*command not found|No module named vllm/i,
message: 'vLLM is not installed or not in PATH.',
fixes: [
{ label: 'Open Dependencies', action: () => _openCookbookDependencies('vllm') },
{ label: 'Check environment is set', action: (panel) => {
const el = panel.querySelector('[data-field="env_type"]');
if (el) { el.focus(); el.style.borderColor = 'var(--red)'; }
@@ -226,11 +272,21 @@ export const ERROR_PATTERNS = [
},
{
pattern: /sglang.*command not found|No module named sglang|SGLang is not installed/i,
message: 'SGLang is not installed or not in PATH. Open Cookbook → Dependencies and install sglang on this server.',
message: 'SGLang is not installed or not in PATH.',
fixes: [
{ label: 'Open Dependencies', action: () => _openCookbookDependencies('sglang') },
{ label: 'Copy install command', action: () => _copyText('python3 -m pip install "sglang[all]"') },
],
},
{
pattern: /No accelerator \(CUDA, XPU, HPU, NPU, MUSA, MPS\) is available|Triton is not supported on current platform/i,
message: 'SGLang needs a visible GPU/accelerator on this server.',
suggestion: 'Suggested action: switch this serve config to llama.cpp for CPU/local serving, or choose a GPU server.',
fixes: [
{ label: 'Switch to llama.cpp', action: (panel) => _openCpuServeEdit(panel) },
{ label: 'Choose GPU server', action: (panel) => _openServeEditFromDiagnosis(panel) },
],
},
{
pattern: /flashinfer.*version.*does not match|flashinfer-cubin version/i,
message: 'FlashInfer version mismatch.',
@@ -241,8 +297,12 @@ export const ERROR_PATTERNS = [
},
{
pattern: /torch\.cuda\.is_available\(\).*False|No CUDA runtime/i,
message: 'CUDA not available in this environment.',
fixes: [],
message: 'vLLM needs a visible CUDA/ROCm GPU.',
suggestion: 'Suggested action: switch this serve config to llama.cpp for CPU/local serving, or choose a GPU server.',
fixes: [
{ label: 'Switch to llama.cpp', action: (panel) => _openCpuServeEdit(panel) },
{ label: 'Choose GPU server', action: (panel) => _openServeEditFromDiagnosis(panel) },
],
},
{
pattern: /Engine core initialization failed/i,
@@ -295,17 +355,20 @@ export const ERROR_PATTERNS = [
},
{
pattern: /Either a revision or a version must be specified|transformers\.integrations\.hub_kernels|kernels\/layer/i,
message: 'vLLM/Transformers kernel package mismatch.',
message: 'Transformers/kernels package mismatch.',
fixes: [
{ label: 'Update vLLM/Transformers/kernels', action: (panel) => {
{ label: 'Repair kernel package', action: (panel) => {
const taskEl = panel.closest('.cookbook-task');
const task = taskEl ? _loadTasks().find(t => t.sessionId === taskEl.dataset.taskId) : null;
const host = task?.remoteHost || '';
const prefix = _buildEnvPrefix();
const pipCmd = prefix ? prefix + ' python3 -m pip install -U vllm transformers kernels' : 'python3 -m pip install -U vllm transformers kernels';
const pipCmd = prefix
? prefix + ' python3 -m pip install --user --break-system-packages "kernels<0.15"'
: 'python3 -m pip install --user --break-system-packages "kernels<0.15"';
const cmd = host ? _sshCmd(host, pipCmd) : pipCmd;
_launchServeTask('update-vllm-stack', 'pip-update', cmd);
_launchServeTask('repair-kernels', 'pip-update', cmd);
}},
{ label: 'Open Dependencies', action: () => _openCookbookDependencies('sglang') },
],
},
{
@@ -319,13 +382,24 @@ export const ERROR_PATTERNS = [
pattern: /llama-server.*command not found|llama\.cpp.*not found|No module named.*llama_cpp|No module named 'starlette_context'/i,
message: 'llama-cpp-python server is not installed. Run: pip install "llama-cpp-python[server]"',
fixes: [
{ label: 'Open Dependencies', action: () => _openCookbookDependencies('llama_cpp') },
{ label: 'Copy install command', action: () => _copyText('pip install "llama-cpp-python[server]"') },
],
},
{
pattern: /CUDA Toolkit not found|Unable to find cudart library|missing:\s*CUDA_CUDART/i,
message: 'llama.cpp found nvcc, but the CUDA runtime library is missing.',
suggestion: 'Suggested action: relaunch with the updated runner so llama.cpp builds CPU-only, or install a complete CUDA toolkit/runtime on this server for GPU llama.cpp.',
fixes: [
{ label: 'Edit serve', action: (panel) => _openServeEditFromDiagnosis(panel) },
{ label: 'Open Dependencies', action: () => _openCookbookDependencies('llama_cpp') },
],
},
{
pattern: /No module named ['"]?torch|No module named ['"]?diffusers|diffusers.*command not found/i,
message: 'Diffusion serving needs PyTorch and diffusers. Install diffusers from Cookbook → Dependencies.',
fixes: [
{ label: 'Open Dependencies', action: () => _openCookbookDependencies('diffusers') },
{ label: 'Copy install command', action: () => _copyText('python3 -m pip install "diffusers[torch]"') },
],
},
@@ -402,10 +476,32 @@ export function _diagnose(text) {
return null;
}
function _diagnosisCopyBundle(task, diagnosis, sourceText, suggestionText) {
const lines = ['## Odysseus Cookbook troubleshooting'];
if (task) {
lines.push(
'',
'### Task',
`- ID: ${task.sessionId || task.id || 'unknown'}`,
`- Type: ${task.type || 'unknown'}`,
`- Status: ${task.status || 'unknown'}`,
`- Model: ${task.payload?.repo_id || task.name || 'unknown'}`,
`- Host: ${task.remoteHost || 'local'}${task.sshPort ? `:${task.sshPort}` : ''}`,
);
}
lines.push('', '### Diagnosis', diagnosis?.message || '(none)');
if (suggestionText) lines.push('', '### Suggested action', suggestionText.replace(/^Suggested action:\s*/i, ''));
const cmd = task?.payload?._cmd || '';
if (cmd) lines.push('', '### Launch command', '```bash', cmd, '```');
if (sourceText) lines.push('', '### Captured output', '```text', String(sourceText).trim(), '```');
return lines.join('\n');
}
export function _showDiagnosis(panel, diagnosis, sourceText) {
if (panel._lastDiagMsg === diagnosis.message) return;
if (panel._diagDismissed === diagnosis.message) return; // stay dismissed until new error
const wasCollapsed = panel._lastDiagMsg === diagnosis.message && panel._diagCollapsed;
if (panel._diagDismissed === diagnosis.message) return;
panel._lastDiagMsg = diagnosis.message;
panel._diagCollapsed = !!wasCollapsed;
let diag = panel.querySelector('.cookbook-diagnosis');
if (!diag) {
@@ -417,57 +513,116 @@ export function _showDiagnosis(panel, diagnosis, sourceText) {
}
diag.classList.remove('hidden');
diag.innerHTML = '';
const taskEl = panel?.closest?.('.cookbook-task');
const task = taskEl ? _loadTasks().find(t => t.sessionId === taskEl.dataset.taskId) : null;
const fixes = [...(diagnosis.fixes || [])];
if (task?.type === 'serve' && task.payload?._cmd && !fixes.some(f => f.label === 'Edit serve')) {
fixes.push({ label: 'Edit serve', action: (p) => _openServeEditFromDiagnosis(p) });
}
const suggestionText = diagnosis.suggestion || (fixes.length
? `Suggested action: ${fixes[0].label}.`
: 'Suggested action: copy the error and adjust the serve settings.');
const header = document.createElement('div');
header.style.cssText = 'display:flex;align-items:center;justify-content:space-between;';
// Simplified diagnosis card: just the error message + suggestion + fix
// button(s). Removed the fold toggle, copy button, and × dismiss — they
// made the card noisy without earning their keep. _diagCollapsed is kept
// as a stub so callers don't have to change.
panel._diagCollapsed = false;
const body = document.createElement('div');
body.className = 'cookbook-diag-body';
const msg = document.createElement('div');
msg.className = 'cookbook-diag-message';
msg.textContent = diagnosis.message;
header.appendChild(msg);
body.appendChild(msg);
const suggestion = document.createElement('div');
suggestion.className = 'cookbook-diag-suggestion';
suggestion.textContent = suggestionText;
body.appendChild(suggestion);
diag.appendChild(body);
const dismiss = document.createElement('button');
dismiss.className = 'close-btn';
dismiss.style.cssText = 'width:16px;height:16px;font-size:9px;flex-shrink:0;';
dismiss.textContent = '\u2715';
dismiss.addEventListener('click', () => { panel._diagDismissed = diagnosis.message; _clearDiagnosis(panel); });
header.appendChild(dismiss);
const runFix = async (fix, button, busyLabel = fix.label, onStart = null, onDone = null) => {
if (!fix || !button || button.dataset.busy) return;
button.dataset.busy = '1';
const _orig = button.textContent;
const wp = spinnerModule.createWhirlpool(12);
wp.element.style.cssText = 'display:inline-block;vertical-align:middle;width:12px;height:12px;margin-right:5px;';
button.textContent = '';
button.appendChild(wp.element);
const _lbl = document.createElement('span');
_lbl.textContent = busyLabel;
_lbl.style.verticalAlign = 'middle';
button.appendChild(_lbl);
try {
if (typeof onStart === 'function') onStart();
await fix.action(panel, sourceText);
} catch (err) {
console.error('[cookbook] diagnosis fix failed', err);
} finally {
if (button.isConnected) {
try { wp.destroy(); } catch {}
button.textContent = _orig;
delete button.dataset.busy;
}
if (typeof onDone === 'function') onDone();
}
};
diag.appendChild(header);
if (diagnosis.fixes && diagnosis.fixes.length) {
if (fixes.length) {
const row = document.createElement('div');
row.className = 'cookbook-diag-fixes';
for (const fix of diagnosis.fixes) {
const btn = document.createElement('button');
btn.className = 'cookbook-btn cookbook-diag-btn';
btn.textContent = fix.label;
btn.addEventListener('click', async () => {
if (btn.dataset.busy) return;
btn.dataset.busy = '1';
// Spinner feedback while the fix runs (kill + relaunch takes a moment).
const _orig = btn.textContent;
const wp = spinnerModule.createWhirlpool(12);
wp.element.style.cssText = 'display:inline-block;vertical-align:middle;width:12px;height:12px;margin-right:5px;';
btn.textContent = '';
btn.appendChild(wp.element);
const _lbl = document.createElement('span');
_lbl.textContent = _orig;
_lbl.style.verticalAlign = 'middle';
btn.appendChild(_lbl);
try {
await fix.action(panel, sourceText);
} catch (e) {
console.error('[cookbook] diagnosis fix failed', e);
} finally {
// Retries animate the whole card away (button goes with it). For fixes
// that leave the card in place, restore the label.
if (btn.isConnected) { try { wp.destroy(); } catch {} btn.textContent = _orig; delete btn.dataset.busy; }
}
});
row.appendChild(btn);
if (fixes.length <= 3) {
for (const fix of fixes) {
const btn = document.createElement('button');
btn.className = 'cookbook-btn cookbook-diag-btn';
btn.type = 'button';
btn.textContent = fix.label;
btn.addEventListener('click', (e) => {
e.stopPropagation();
runFix(fix, btn);
});
row.appendChild(btn);
}
body.appendChild(row);
return;
}
diag.appendChild(row);
const wrap = document.createElement('div');
wrap.className = 'cookbook-diag-actions';
const trigger = document.createElement('button');
trigger.className = 'cookbook-btn cookbook-diag-action-trigger';
trigger.type = 'button';
trigger.textContent = 'Actions';
trigger.appendChild(document.createTextNode(' ▾'));
wrap.appendChild(trigger);
const menu = document.createElement('div');
menu.className = 'dropdown cookbook-diag-menu hidden';
for (const fix of fixes) {
const item = document.createElement('button');
item.type = 'button';
item.textContent = fix.label;
item.addEventListener('click', async (e) => {
e.stopPropagation();
if (item.dataset.busy || trigger.dataset.busy) return;
item.dataset.busy = '1';
await runFix(fix, trigger, fix.label, () => menu.classList.add('hidden'), () => delete item.dataset.busy);
});
menu.appendChild(item);
}
wrap.appendChild(menu);
trigger.addEventListener('click', (e) => {
e.stopPropagation();
if (trigger.dataset.busy) return;
document.querySelectorAll('.cookbook-diag-menu').forEach(m => {
if (m !== menu) m.classList.add('hidden');
});
menu.classList.toggle('hidden');
});
row.appendChild(wrap);
body.appendChild(row);
}
}

View File

@@ -153,14 +153,31 @@ export function _renderGpuToggles(system) {
}
const validCounts = _validTpCounts(poolSize);
const maxGpu = validCounts.length ? validCounts[validCounts.length - 1] : 0;
// Commit the data layer to maxGpu on initial render so it matches the
// visual highlight. Before this, _activeCount stayed undefined → no
// gpu_count param sent → backend's fallback could rank against RAM on
// mixed-resource boxes ("tightest" sorted by RAM instead of GPU).
if (container._activeCount === undefined && validCounts.length) {
container._activeCount = maxGpu;
}
html += '<button class="hwfit-gpu-btn" data-count="0" title="CPU / RAM only">RAM</button>';
const hasExplicitCount = typeof container._activeCount === 'number';
for (const n of validCounts) {
const text = n === 1 ? 'GPU' : n + ' GPU';
const isActive = hasExplicitCount ? (n === container._activeCount) : (container._activeCount === undefined && n === maxGpu);
const isActive = hasExplicitCount && n === container._activeCount;
html += `<button class="hwfit-gpu-btn${isActive ? ' active' : ''}" data-count="${n}" title="${n} GPU${n > 1 ? 's' : ''}">${text}</button>`;
}
// Also mark the RAM button active when the user explicitly chose RAM (0)
// — the loop above only handles GPU buttons.
if (container._activeCount === 0) {
const ramBtn = container.querySelector('.hwfit-gpu-btn[data-count="0"]');
// (we just set innerHTML so we re-mark below after assignment)
}
container.innerHTML = html;
if (container._activeCount === 0) {
const ramBtn = container.querySelector('.hwfit-gpu-btn[data-count="0"]');
if (ramBtn) ramBtn.classList.add('active');
}
// Pool dropdown: switch pools, reset the count to the new pool's max, rebuild.
const sel = container.querySelector('#hwfit-gpu-group');
@@ -188,9 +205,12 @@ export function _renderGpuToggles(system) {
} else {
btn.classList.add('active');
container._activeCount = count;
// Auto-set quant based on hardware selection
// Auto-suggest a quant based on hardware selection — but ONLY when the
// user has already picked a specific quant. When they're on "All"
// (value === ""), leave them on All: toggling a GPU shouldn't silently
// yank them out of the All view they wanted to see.
const quantSel = document.getElementById('hwfit-quant');
if (quantSel) {
if (quantSel && quantSel.value !== '') {
if (count <= 1) {
quantSel.value = 'Q4_K_M'; // RAM or 1 GPU -> Q4 sweet spot
} else {
@@ -211,9 +231,34 @@ export function _renderGpuToggles(system) {
// reload paints instantly, then we refresh in the background and swap.
const _SCAN_CACHE_KEY = 'hwfit_scan_cache_v1';
const _MANUAL_HW_KEY = 'hwfit_manual_hardware_v1';
const _CTX_KEY = 'hwfit_target_context_v1';
const _CTX_PRESETS = [8192, 16384, 32768, 50000, 131072, 0]; // 0 = model max
const _SCAN_CACHE_MAX = 12; // keep the newest N signatures
const _SCAN_CACHE_TTL = 6 * 3600 * 1000; // 6 h — hardware rarely changes
// Ctx slider helpers (ported from origin/main). The slider picks an INDEX into
// _CTX_PRESETS; _ctxValue() resolves it to a token count (0 = "Max"). The label
// next to the slider re-renders to "8k" / "16k" / … / "Max".
function _ctxLabel(value) {
const n = Number(value) || 0;
if (!n) return 'Max';
return n >= 1000 ? Math.round(n / 1000) + 'k' : String(n);
}
function _ctxValue() {
const slider = document.getElementById('hwfit-context');
const idx = Math.max(0, Math.min(_CTX_PRESETS.length - 1, Number(slider?.value ?? 3) || 0));
return _CTX_PRESETS[idx] || 0;
}
function _syncCtxControl() {
const slider = document.getElementById('hwfit-context');
const label = document.getElementById('hwfit-context-label');
if (!slider) return;
const saved = localStorage.getItem(_CTX_KEY);
const savedIdx = saved == null ? 3 : _CTX_PRESETS.indexOf(Number(saved));
slider.value = String(savedIdx >= 0 ? savedIdx : 3);
if (label) label.textContent = _ctxLabel(_ctxValue());
}
function _manualHwState() {
try {
const s = JSON.parse(localStorage.getItem(_MANUAL_HW_KEY) || '{}');
@@ -749,6 +794,13 @@ export function _hwfitRenderList(el, models) {
const sortSel = document.getElementById('hwfit-sort');
const currentSort = sortSel?.value || 'score';
const isReversed = sortSel?.dataset.reverse === '1';
// Active budget for the Fit column label \u2014 make it obvious whether the
// ranking is against GPU or RAM so "tightest" can't be ambiguous on a
// mixed-resource box.
const tc = document.getElementById('hwfit-gpu-toggles');
const _budget = (tc && typeof tc._activeCount === 'number')
? (tc._activeCount === 0 ? 'RAM' : (tc._activeCount === 1 ? 'GPU' : tc._activeCount + ' GPU'))
: null;
let html = '<div class="hwfit-row hwfit-header">';
for (const col of _hwfitColumns) {
const sortable = col.key ? ' hwfit-sortable' : '';
@@ -760,7 +812,10 @@ export function _hwfitRenderList(el, models) {
arrow = isReversed ? ' \u25B2' : ' \u25BC';
}
const dataAttr = col.key ? ` data-sort="${col.key}"` : '';
html += `<span class="hwfit-col ${col.cls}${sortable}${active}"${dataAttr}>${col.label}${arrow}</span>`;
const label = (col.cls === 'hwfit-fit' && _budget)
? `${col.label} <span style="font-size:0.75em;opacity:0.6;font-weight:normal;">(${_budget})</span>`
: col.label;
html += `<span class="hwfit-col ${col.cls}${sortable}${active}"${dataAttr}>${label}${arrow}</span>`;
}
html += '</div>';
for (const m of models) {
@@ -1082,11 +1137,40 @@ export function _hwfitInit() {
const uc = document.getElementById('hwfit-usecase');
const sort = document.getElementById('hwfit-sort');
const qpref = document.getElementById('hwfit-quant');
const ctx = document.getElementById('hwfit-context');
const ctxLabel = document.getElementById('hwfit-context-label');
const search = document.getElementById('hwfit-search');
const remote = document.getElementById('hwfit-host');
_syncCtxControl();
if (uc) uc.addEventListener('change', () => _hwfitFetch());
if (sort) sort.addEventListener('change', () => _hwfitFetch());
if (qpref) qpref.addEventListener('change', () => _hwfitFetch());
if (ctx && !ctx.dataset.bound) {
ctx.dataset.bound = '1';
ctx.addEventListener('input', () => {
if (ctxLabel) ctxLabel.textContent = _ctxLabel(_ctxValue());
});
ctx.addEventListener('change', () => {
const targetCtx = _ctxValue();
try { localStorage.setItem(_CTX_KEY, String(targetCtx)); } catch {}
// Ctx drag affects sort mode: a specific ctx target (anything < Max)
// implies the user is hunting for "what fits at this context length",
// so re-rank by fit (lowest first). Dragging back to Max means no
// ctx constraint → go back to the default score-based ranking.
const sortSel = document.getElementById('hwfit-sort');
if (sortSel) {
if (targetCtx) {
sortSel.value = 'fit';
sortSel.dataset.reverse = '1';
} else {
sortSel.value = 'score';
sortSel.dataset.reverse = '';
}
}
_hwfitCache = null;
_hwfitFetch();
});
}
// Rescan — force a fresh hardware probe (bypasses the per-host cache).
const rescan = document.getElementById('hwfit-rescan');
if (rescan && !rescan.dataset.bound) {

View File

@@ -1083,6 +1083,20 @@ function _wireTabEvents(body) {
}
// Latest HF models that fit — collapsible card list
// Foldable Download admin-card — h2 "Download" doubles as the chevron
// toggle; collapses the entire card body (description + input + HF list).
// State persisted to localStorage so the fold sticks across reloads.
const dlFold = document.getElementById('cookbook-dl-tab-fold');
const dlFoldBody = document.getElementById('cookbook-dl-tab-fold-body');
const dlFoldChevron = document.getElementById('cookbook-dl-tab-chevron');
if (dlFold && dlFoldBody && dlFoldChevron) {
dlFold.addEventListener('click', () => {
const folded = dlFoldBody.style.display === 'none';
dlFoldBody.style.display = folded ? '' : 'none';
dlFoldChevron.textContent = folded ? '▾' : '▸';
try { localStorage.setItem('cookbook_dl_tab_folded_v1', folded ? '0' : '1'); } catch {}
});
}
const hfToggle = document.getElementById('cookbook-hf-latest-toggle');
const hfArrow = document.getElementById('cookbook-hf-latest-arrow');
const hfList = document.getElementById('cookbook-hf-latest-list');
@@ -1342,9 +1356,14 @@ function _renderRecipes() {
// Search group
html += '<div class="cookbook-group" data-backend-group="Search" style="flex:0 0 auto;">';
html += '<div class="admin-card" style="display:flex;flex-direction:column;overflow:hidden;">';
html += '<div style="display:flex;align-items:baseline;gap:8px;margin-bottom:2px;">';
html += '<h2 style="margin:0;padding:0;line-height:1;">Download</h2>';
// Foldable Download admin-card: clicking the h2 header collapses the
// entire card body (description + download input + HF latest section).
// State persisted to localStorage so the fold survives reloads.
const _dlTabFolded = (() => { try { return localStorage.getItem('cookbook_dl_tab_folded_v1') === '1'; } catch { return false; } })();
html += '<div style="display:flex;align-items:center;gap:8px;margin-bottom:2px;">';
html += `<h2 id="cookbook-dl-tab-fold" style="margin:0;padding:0;line-height:1;cursor:pointer;display:flex;align-items:center;justify-content:space-between;user-select:none;flex:1;">Download<span id="cookbook-dl-tab-chevron" style="display:inline-block;transition:transform 0.15s;font-size:1.1em;margin-left:8px;opacity:0.85;">${_dlTabFolded ? '▸' : '▾'}</span></h2>`;
html += '</div>';
html += `<div id="cookbook-dl-tab-fold-body" style="${_dlTabFolded ? 'display:none;' : ''}">`;
html += '<p class="memory-desc doclib-desc" style="margin-top:6px;">Download from <a href="https://huggingface.co/models" target="_blank" rel="noopener" style="color:var(--accent,var(--red));text-decoration:none;"><svg width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="vertical-align:-1px;margin-right:1px;"><path d="M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6"/><polyline points="15 3 21 3 21 9"/><line x1="10" y1="14" x2="21" y2="3"/></svg>HuggingFace</a> by pasting model link, or download directly in the Scan section below.</p>';
html += '<div class="hwfit-container" id="hwfit-container">';
@@ -1400,6 +1419,7 @@ function _renderRecipes() {
html += `</div>`;
html += `<div id="cookbook-hf-latest-list" style="display:none;margin-top:4px;max-height:320px;overflow-y:auto;flex-direction:column;gap:4px;"></div>`;
html += `</div>`;
html += `</div>`; // /#cookbook-dl-tab-fold-body (whole Download card body)
// Search section
html += '</div></div></div>';
@@ -1417,13 +1437,22 @@ function _renderRecipes() {
// remains, which uses its own settings panel). Vision (multimodal) stays.
html += '<option value="multimodal">Vision</option></select>';
html += '<input type="text" class="cookbook-field-input hwfit-search" id="hwfit-search" placeholder="Search models..." style="flex:1;" />';
// Quant (Q4/Q8/…) lives next to the search now.
// Quant (Q4/Q8/…) lives next to the search now. Default is "All" so the
// list shows the best-scoring quant for every model instead of silently
// filtering to Q4 (which used to be the implicit default).
html += '<select class="cookbook-field-input hwfit-quant" id="hwfit-quant" style="height:28px;">';
html += '<option value="" selected>All</option>';
html += '<option value="Q4_K_M">Q4</option><option value="Q8_0">Q8</option>';
html += '<option value="Q6_K">Q6</option><option value="Q5_K_M">Q5</option>';
html += '<option value="Q3_K_M">Q3</option><option value="Q2_K">Q2</option>';
html += '<option value="AWQ-4bit">AWQ</option><option value="FP8">FP8</option>';
html += '<option value="">Native</option></select>';
html += '<option value="AWQ-4bit">AWQ</option><option value="FP8">FP8</option></select>';
// Ctx slider — ported from origin/main. Lets you target a context length
// for fit estimates; the hwfit ranking uses _ctxValue() to factor that into
// VRAM math, so dragging this re-sorts the list toward models that fit
// your chosen ctx.
html += '<label class="hwfit-ctx-control" title="Context length for fit estimates. Lower it to find more models that could fit your hardware.">';
html += '<span>Ctx</span><input type="range" id="hwfit-context" min="0" max="5" step="1" value="3" />';
html += '<output id="hwfit-context-label">50k</output></label>';
html += '</div>';
html += '<div class="hwfit-toolbar" style="margin-top:7px;">';
html += '<select class="cookbook-field-input hwfit-server-select" id="hwfit-server-select" style="height:28px;min-width:88px;position:relative;top:0px;">';

View File

@@ -169,6 +169,9 @@ export function _parseServePhase(snapshot) {
if (flat.includes('Application startup complete')) {
return { phase: 'ready', status: 'ready' };
}
if (/Ollama API ready on port\s+\d+/i.test(flat)) {
return { phase: 'ready', status: 'ready' };
}
// HTTP access logs (e.g. GET /v1/models 200 OK) mean the server is up
if (/(?:GET|POST)\s+\/[^\s]*\s+HTTP\/[\d.]+"\s*\d{3}/.test(flat)) {
return { phase: 'idle', status: 'ready' };
@@ -2295,15 +2298,24 @@ async function _reconnectTask(el, task) {
if (task.type === 'serve' && !task._endpointAdded && !task._endpointAddInFlight && task._serveReady) {
task._endpointAddInFlight = true;
const rawHost = task.remoteHost || 'localhost';
const host = rawHost.includes('@') ? rawHost.split('@').pop() : rawHost;
let host = rawHost.includes('@') ? rawHost.split('@').pop() : rawHost;
const portMatch = task.payload?._cmd?.match(/--port[=\s]+(\d+)/)
|| task.payload?._cmd?.match(/(?:^|\s)-p[=\s]+(\d+)/)
|| snapshot.match(/Uvicorn running on\D*?:(\d+)/i)
|| snapshot.match(/running on\D*?:(\d+)/i)
|| snapshot.match(/listening on\D*?:(\d+)/i)
|| snapshot.match(/port[:=\s]+(\d+)/i);
const port = portMatch ? portMatch[1] : '8000';
const baseUrl = `http://${host}:${port}/v1`;
let port = portMatch ? portMatch[1] : '8000';
let baseUrl = `http://${host}:${port}/v1`;
const ollamaUrlMatch = snapshot.match(/Ollama API ready on port\s+\d+:\s*(http:\/\/[^\s]+)/i);
if (ollamaUrlMatch) {
try {
const u = new URL(ollamaUrlMatch[1]);
host = u.hostname || host;
port = u.port || '11434';
baseUrl = `${u.origin}/v1`;
} catch {}
}
fetch('/api/model-endpoints', { credentials: 'same-origin' })
.then(r => r.json())
.then(async (eps) => {
@@ -2642,10 +2654,21 @@ async function _pollBackgroundStatus() {
if (localTask && localTask._endpointAdded) continue;
const rawHost = localTask?.remoteHost || t.remote || 'localhost';
const host = rawHost.includes('@') ? rawHost.split('@').pop() : (rawHost === 'local' ? 'localhost' : rawHost);
const portMatch = localTask?.payload?._cmd?.match(/--port\s+(\d+)/);
const port = portMatch ? portMatch[1] : '8000';
const baseUrl = `http://${host}:${port}/v1`;
let host = rawHost.includes('@') ? rawHost.split('@').pop() : (rawHost === 'local' ? 'localhost' : rawHost);
const portMatch = localTask?.payload?._cmd?.match(/--port\s+(\d+)/)
|| localTask?.payload?._cmd?.match(/OLLAMA_HOST=[^\s:]+:(\d+)/);
let port = portMatch ? portMatch[1] : '8000';
let baseUrl = `http://${host}:${port}/v1`;
const snapshot = t.output || localTask?.output || '';
const ollamaUrlMatch = snapshot.match(/Ollama API ready on port\s+\d+:\s*(http:\/\/[^\s]+)/i);
if (ollamaUrlMatch) {
try {
const u = new URL(ollamaUrlMatch[1]);
host = u.hostname || host;
port = u.port || '11434';
baseUrl = `${u.origin}/v1`;
} catch {}
}
const _isDiffusion = localTask?.payload?._cmd?.includes('diffusion_server');
_updateTask(t.session_id, { _serveReady: true, _endpointAdded: true });

View File

@@ -391,7 +391,8 @@ function _rerenderCachedModels() {
panelHtml += `<label>${_l('Backend','Inference engine: vLLM, SGLang, llama.cpp, Ollama, or Diffusers')}<select class="hwfit-sf" data-field="backend">${backendOpts}</select></label>`;
panelHtml += `<input type="hidden" class="hwfit-sf" data-field="host" value="${esc(_es.remoteHost || '')}" />`;
panelHtml += `<label>${_l('venv','Path to Python venv or conda env activate script')}<input type="text" class="hwfit-sf hwfit-sf-wide" data-field="venv" value="${esc(sv('venv', _es.envPath || _srvVenv || ''))}" placeholder="~/venv" /></label>`;
panelHtml += `<label>${_l('Port','HTTP port for the API server')}<input type="text" class="hwfit-sf" data-field="port" value="${esc(sv('port', _nextAvailablePort()))}" /></label>`;
const defaultPort = defaultBackend === 'ollama' ? '11434' : _nextAvailablePort();
panelHtml += `<label>${_l('Port','HTTP port for the API server')}<input type="text" class="hwfit-sf" data-field="port" value="${esc(sv('port', defaultPort))}" /></label>`;
const _activeGpus = (defaultGpus || '').split(',').map(s => s.trim()).filter(Boolean);
const detectedGpuCount = Number(_getGpuToggleTotal?.() || 0);
const _gpuMax = Math.max(detectedGpuCount || 8, ...(_activeGpus.map(Number).filter(n => !isNaN(n)).map(n => n + 1)));
@@ -405,7 +406,9 @@ function _rerenderCachedModels() {
// Row 2: Core settings
panelHtml += `<div class="hwfit-serve-row hwfit-backend-vllm hwfit-backend-sglang hwfit-backend-llamacpp">`;
panelHtml += `<label class="hwfit-backend-vllm hwfit-backend-sglang">${_l('TP','Tensor Parallelism — split model across N GPUs')}<select class="hwfit-sf" data-field="tp">${tpOpts}</select></label>`;
panelHtml += `<label>${_l('Context','Max tokens per request. Lower = less VRAM')}<input type="text" class="hwfit-sf" data-field="ctx" value="${esc(sv('ctx', '8192'))}" /></label>`;
// ctx resets to the model's max on every panel open (the real ctx slider
// lives in the Scan/Download toolbar — see cookbook.js .hwfit-ctx-control).
panelHtml += `<label>${_l('Context','Max tokens per request — resets to the model max on every open. Lower = less VRAM')}<input type="text" class="hwfit-sf" data-field="ctx" value="${esc(m.context_length || m.context || '8192')}" /></label>`;
panelHtml += `<label>${_l('GPU','Which GPU to use. Leave empty for default')}<input type="text" class="hwfit-sf" data-field="gpu_id" value="${esc(sv('gpu_id', ''))}" placeholder="auto" style="width:50px;" /></label>`;
panelHtml += `<label class="hwfit-backend-vllm hwfit-backend-sglang">${_l('GPU Mem','Fraction of GPU memory (0.01.0). Lower if OOM')}<input type="text" class="hwfit-sf" data-field="gpu_mem" value="${esc(sv('gpu_mem', '0.90'))}" /></label>`;
panelHtml += `<label class="hwfit-backend-vllm">${_l('Swap','CPU swap space in GB. Leave empty to omit (removed in newer vLLM)')}<input type="text" class="hwfit-sf" data-field="swap" value="${esc(sv('swap', ''))}" placeholder="off" /></label>`;

View File

@@ -18,7 +18,7 @@ const EXCLUDED = new Set(['flip','roll','8ball','fortune','odyssey','ascii']);
// are the short forms people will actually type (/new, /clear, /web, etc.)
// rather than the full /chats new, /toggle web equivalents.
const PROMOTED_ALIASES = new Set([
'new','clear','rename','fork','export','archive','important','star',
'new','clear','rename','fork','export','archive','favorite','unfavorite',
'web','bash','research','doc',
'memories','forget',
]);

View File

@@ -5393,8 +5393,8 @@ const COMMANDS = {
'delete': { handler: _cmdSessionDelete, alias: ['del','rm'], help: 'Delete chat', usage: '/chats delete [id]' },
'archive': { handler: _cmdSessionArchive, alias: ['tar'], help: 'Archive chat', usage: '/chats archive [id]' },
'rename': { handler: _cmdSessionRename, alias: ['mv'], help: 'Rename current chat', usage: '/chats rename Name' },
'important': { handler: _cmdSessionImportant, alias: ['pin'], help: 'Mark as important', usage: '/chats important' },
'unimportant': { handler: _cmdSessionUnimportant, alias: ['unpin'], help: 'Unmark important', usage: '/chats unimportant' },
'favorite': { handler: _cmdSessionImportant, alias: ['pin','important'], help: 'Mark as favorite', usage: '/chats favorite' },
'unfavorite': { handler: _cmdSessionUnimportant, alias: ['unpin','unimportant'], help: 'Unmark favorite', usage: '/chats unfavorite' },
'fork': { handler: _cmdSessionFork, alias: ['cp'], help: 'Fork chat (keep first N msgs)', usage: '/chats fork [N]' },
'truncate': { handler: _cmdSessionTruncate, alias: [], help: 'Delete older messages, keep last N', usage: '/chats truncate N' },
'switch': { handler: _cmdSessionSwitch, alias: ['goto','cd'], help: 'Switch to chat by name/id', usage: '/chats switch name' },
@@ -5732,10 +5732,12 @@ export const LEGACY_ALIASES = {
'del': { parent: 'chats', sub: 'delete' },
'archive': { parent: 'chats', sub: 'archive' },
'rename': { parent: 'chats', sub: 'rename' },
'important': { parent: 'chats', sub: 'important' },
'star': { parent: 'chats', sub: 'important' },
'unimportant': { parent: 'chats', sub: 'unimportant' },
'unstar': { parent: 'chats', sub: 'unimportant' },
'favorite': { parent: 'chats', sub: 'favorite' },
'important': { parent: 'chats', sub: 'favorite' },
'star': { parent: 'chats', sub: 'favorite' },
'unfavorite': { parent: 'chats', sub: 'unfavorite' },
'unimportant': { parent: 'chats', sub: 'unfavorite' },
'unstar': { parent: 'chats', sub: 'unfavorite' },
'fork': { parent: 'chats', sub: 'fork' },
'truncate': { parent: 'chats', sub: 'truncate' },
'sessions': { parent: 'chats', sub: 'info' },

View File

@@ -349,10 +349,23 @@ function _taskIcon(task) {
return `<svg width="13" height="13" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="opacity:0.4;flex-shrink:0;position:relative;top:-4px;">${path}</svg>`;
}
const _MODEL_BACKED_ACTIONS = new Set([
'summarize_emails',
'draft_email_replies',
'extract_email_events',
'classify_events',
'mark_email_boundaries',
'learn_sender_signatures',
'check_email_urgency',
'test_skills',
'audit_skills',
'consolidate_memory',
]);
function _taskAiMark(task) {
const kind = task?.task_type || task?.kind || '';
const action = task?.action || '';
const aiAction = /(^|_)(ai|summarize|summary|draft|reply|classify|triage|audit|research|brief|skills?)($|_)/i.test(action);
const aiAction = _MODEL_BACKED_ACTIONS.has(action);
if (!(kind === 'llm' || kind === 'research' || task?.model || task?.endpointUrl || aiAction)) return '';
return '<svg class="task-ai-mark" width="10" height="10" viewBox="0 0 24 24" fill="currentColor" aria-label="Uses model" title="Uses model"><path d="M12 0L14.59 8.41L23 12L14.59 15.59L12 24L9.41 15.59L1 12L9.41 8.41Z"/></svg>';
}
@@ -708,7 +721,7 @@ function _renderList() {
const runBtn = document.createElement('button');
runBtn.className = 'task-status-badge task-run-now-badge task-card-run-btn';
runBtn.title = 'Run now';
runBtn.style.cssText = 'position:relative;top:4px;margin-right:4px;';
runBtn.style.cssText = 'position:relative;top:1px;margin-right:4px;';
runBtn.innerHTML = '<svg width="11" height="11" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.2" stroke-linecap="round" stroke-linejoin="round"><polyline points="13 2 3 14 12 14 11 22 21 10 12 10 13 2"/></svg><span>Run</span>';
runBtn.addEventListener('click', (e) => { e.stopPropagation(); _doRunNow(task.id); });
actionsWrap.insertBefore(runBtn, menuBtn);

View File

@@ -10203,6 +10203,12 @@ textarea.memory-add-input {
height: 20px;
min-height: 0;
box-sizing: border-box;
position: relative;
top: -4px;
}
.task-state-badge svg {
position: relative;
top: -1px;
}
.task-status-badge:hover {
filter: brightness(1.08) saturate(1.15);
@@ -18498,6 +18504,85 @@ body.gallery-selecting .gallery-dl-btn,
display: block;
margin-top: 2px;
}
/* Ctx slider ported from origin/main. Sits in the Scan/Download toolbar
next to the quant dropdown. Drives _ctxValue() in cookbook-hwfit.js. */
.hwfit-ctx-control {
height: 28px;
min-width: 134px;
flex-shrink: 0;
display: inline-flex;
align-items: center;
gap: 5px;
padding: 0 7px;
border: 1px solid var(--border);
border-radius: 4px;
background: var(--bg);
font-size: 11px;
}
.hwfit-ctx-control span {
text-transform: uppercase;
letter-spacing: 0.3px;
opacity: 0.75;
}
/* Editor-style slider (same look as the gallery editor sliders): thin pill
rail that fattens on interaction, circular red thumb that grows on hover. */
.hwfit-ctx-control input[type="range"] {
width: 64px;
min-width: 64px;
height: 4px;
padding: 0;
border: 0;
-webkit-appearance: none;
appearance: none;
background: color-mix(in srgb, var(--fg) 25%, transparent);
border-radius: 999px;
accent-color: var(--red);
cursor: pointer;
transition: height 0.15s ease;
}
.hwfit-ctx-control input[type="range"]:hover,
.hwfit-ctx-control input[type="range"]:focus,
.hwfit-ctx-control input[type="range"]:active {
height: 10px;
}
.hwfit-ctx-control input[type="range"]::-webkit-slider-thumb {
-webkit-appearance: none;
appearance: none;
width: 12px;
height: 12px;
border-radius: 50%;
background: var(--red);
border: none;
cursor: pointer;
transition: width 0.12s ease, height 0.12s ease;
}
.hwfit-ctx-control input[type="range"]::-moz-range-thumb {
width: 12px;
height: 12px;
border-radius: 50%;
background: var(--red);
border: none;
cursor: pointer;
transition: width 0.12s ease, height 0.12s ease;
}
.hwfit-ctx-control input[type="range"]:hover::-webkit-slider-thumb,
.hwfit-ctx-control input[type="range"]:focus::-webkit-slider-thumb,
.hwfit-ctx-control input[type="range"]:active::-webkit-slider-thumb {
width: 18px;
height: 18px;
}
.hwfit-ctx-control input[type="range"]:hover::-moz-range-thumb,
.hwfit-ctx-control input[type="range"]:focus::-moz-range-thumb,
.hwfit-ctx-control input[type="range"]:active::-moz-range-thumb {
width: 18px;
height: 18px;
}
.hwfit-ctx-control output {
min-width: 28px;
text-align: right;
color: var(--fg);
font-weight: 600;
}
.hwfit-sf {
background: var(--bg);
border: 1px solid var(--border);
@@ -21253,6 +21338,7 @@ a.chat-link[href^="#research-"] {
}
.task-card .task-card-run-btn {
margin-right: 1px !important;
top: 0;
}
}
@@ -34765,7 +34851,7 @@ body.theme-frosted .modal {
.slash-autocomplete-popup {
position: fixed;
z-index: 9000;
background: var(--bg-elev-2, #1a1a1a);
background: var(--panel, var(--bg));
border: 1px solid var(--border, rgba(255,255,255,0.08));
border-radius: 8px;
box-shadow: 0 8px 24px rgba(0,0,0,0.35);
@@ -34793,8 +34879,8 @@ body.theme-frosted .modal {
white-space: nowrap;
overflow: hidden;
}
.slash-ac-row:hover { background: color-mix(in srgb, var(--fg) 6%, transparent); }
.slash-ac-row-sel { background: color-mix(in srgb, var(--accent, var(--red)) 14%, transparent); }
.slash-ac-row:hover { background-color: color-mix(in srgb, var(--accent, var(--red)) 10%, transparent); }
.slash-ac-row-sel { background-color: color-mix(in srgb, var(--accent, var(--red)) 14%, transparent); }
.slash-ac-token {
font-family: 'Fira Code', ui-monospace, monospace;
color: var(--accent, var(--red));