diff --git a/services/hwfit/data/hf_models.json b/services/hwfit/data/hf_models.json index b7c45ab..e73cc26 100644 --- a/services/hwfit/data/hf_models.json +++ b/services/hwfit/data/hf_models.json @@ -5110,6 +5110,100 @@ "release_date": "2023-10-29", "_discovered": true }, + { + "name": "deepseek-ai/DeepSeek-V4-Flash", + "provider": "deepseek-ai", + "parameter_count": "284B", + "parameters_raw": 284000000000, + "active_parameters": 13000000000, + "is_moe": true, + "min_ram_gb": 200.0, + "recommended_ram_gb": 320.0, + "min_vram_gb": 156.0, + "quantization": "FP4-MoE-Mixed", + "context_length": 1000000, + "use_case": "General-purpose reasoning, long-context", + "capabilities": [ + "long_context", + "reasoning", + "moe" + ], + "pipeline_tag": "text-generation", + "architecture": "deepseek_v4_moe", + "hf_downloads": 3542202, + "hf_likes": 0, + "release_date": "2026-05-15" + }, + { + "name": "deepseek-ai/DeepSeek-V4-Flash-Base", + "provider": "deepseek-ai", + "parameter_count": "284B", + "parameters_raw": 284000000000, + "active_parameters": 13000000000, + "is_moe": true, + "min_ram_gb": 290.0, + "recommended_ram_gb": 460.0, + "min_vram_gb": 284.0, + "quantization": "FP8-Mixed", + "context_length": 1000000, + "use_case": "Base pretrained \u2014 fine-tuning starting point", + "capabilities": [ + "long_context", + "moe" + ], + "pipeline_tag": "text-generation", + "architecture": "deepseek_v4_moe", + "hf_downloads": 0, + "hf_likes": 0, + "release_date": "2026-05-15" + }, + { + "name": "deepseek-ai/DeepSeek-V4-Pro", + "provider": "deepseek-ai", + "parameter_count": "1.6T", + "parameters_raw": 1600000000000, + "active_parameters": 49000000000, + "is_moe": true, + "min_ram_gb": 1100.0, + "recommended_ram_gb": 1800.0, + "min_vram_gb": 880.0, + "quantization": "FP4-MoE-Mixed", + "context_length": 1000000, + "use_case": "Flagship reasoning, long-context", + "capabilities": [ + "long_context", + "reasoning", + "moe" + ], + "pipeline_tag": "text-generation", + "architecture": "deepseek_v4_moe", + "hf_downloads": 0, + "hf_likes": 0, + "release_date": "2026-05-15" + }, + { + "name": "deepseek-ai/DeepSeek-V4-Pro-Base", + "provider": "deepseek-ai", + "parameter_count": "1.6T", + "parameters_raw": 1600000000000, + "active_parameters": 49000000000, + "is_moe": true, + "min_ram_gb": 1700.0, + "recommended_ram_gb": 2600.0, + "min_vram_gb": 1600.0, + "quantization": "FP8-Mixed", + "context_length": 1000000, + "use_case": "Base pretrained \u2014 fine-tuning starting point", + "capabilities": [ + "long_context", + "moe" + ], + "pipeline_tag": "text-generation", + "architecture": "deepseek_v4_moe", + "hf_downloads": 0, + "hf_likes": 0, + "release_date": "2026-05-15" + }, { "name": "deepseek-ai/deepseek-coder-6.7b-base", "provider": "DeepSeek", @@ -13886,53 +13980,6 @@ "gguf_sources": [], "capabilities": [] }, - { - "name": "deepseek-ai/DeepSeek-V4-Flash", - "provider": "DeepSeek", - "parameter_count": "158B", - "parameters_raw": 158000000000, - "min_ram_gb": 165.0, - "recommended_ram_gb": 205.0, - "min_vram_gb": 165.0, - "quantization": "FP8", - "context_length": 1000000, - "use_case": "General purpose, reasoning (MoE)", - "is_moe": true, - "num_experts": null, - "active_experts": null, - "active_parameters": 13000000000, - "architecture": "deepseek_v4", - "pipeline_tag": "text-generation", - "release_date": "2026-04-22", - "gguf_sources": [ - { - "repo": "unsloth/DeepSeek-V4-Flash", - "provider": "unsloth" - } - ], - "capabilities": [] - }, - { - "name": "deepseek-ai/DeepSeek-V4-Pro", - "provider": "DeepSeek", - "parameter_count": "1600B", - "parameters_raw": 1600000000000, - "min_ram_gb": 928.5, - "recommended_ram_gb": 1207.0, - "min_vram_gb": 928.5, - "quantization": "Q4_K_M", - "context_length": 1000000, - "use_case": "Frontier reasoning (MoE)", - "is_moe": true, - "num_experts": null, - "active_experts": null, - "active_parameters": 49000000000, - "architecture": "deepseek_v4", - "pipeline_tag": "text-generation", - "release_date": "2026-04-22", - "gguf_sources": [], - "capabilities": [] - }, { "name": "google/gemma-4-E2B-it", "provider": "Google", diff --git a/services/hwfit/fit.py b/services/hwfit/fit.py index 69b00ee..3136d7b 100644 --- a/services/hwfit/fit.py +++ b/services/hwfit/fit.py @@ -564,7 +564,7 @@ def rank_models(system, use_case=None, limit=50, search=None, sort="score", quan }) if use_case == "image_gen": sort_fn = SORT_KEYS.get(sort, SORT_KEYS["score"]) - results.sort(key=sort_fn, reverse=(sort != "vram")) + results.sort(key=sort_fn, reverse=True) # see main path below return results[:limit] # If user picked a native prequantized format, filter to only those models. @@ -661,7 +661,10 @@ def rank_models(system, use_case=None, limit=50, search=None, sort="score", quan # explicitly asked for a Fit-only view. results = [r for r in results if r.get("fit_level") != "too_tight"] sort_fn = SORT_KEYS.get(sort, SORT_KEYS["score"]) - # vram ascending (smallest first), everything else descending (biggest first) - results.sort(key=sort_fn, reverse=(sort != "vram")) + # Always sort descending then truncate top-N so each column shows the + # global highest by that metric. Before, vram was special-cased + # ascending → truncate kept the 50 SMALLEST models and "highest VRAM" + # could never appear, breaking the column-click toggle. + results.sort(key=sort_fn, reverse=True) results = results[:limit] return results diff --git a/services/hwfit/hardware.py b/services/hwfit/hardware.py index 0af62a0..9815327 100644 --- a/services/hwfit/hardware.py +++ b/services/hwfit/hardware.py @@ -5,7 +5,9 @@ import shutil import subprocess import time -CACHE_TTL = 1800 # 30 min — hardware rarely changes; use the Rescan button to force a re-probe +CACHE_TTL = 24 * 3600 # 24 h — hardware probes are user-initiated via the Rescan button; bumped + # from 30 min so changing filters doesn't keep re-probing the rig every + # half-hour during a long session. _remote_host = None # set by detect_system(host=...) diff --git a/services/hwfit/models.py b/services/hwfit/models.py index 75885e8..11a6366 100644 --- a/services/hwfit/models.py +++ b/services/hwfit/models.py @@ -13,6 +13,13 @@ QUANT_BPP = { "AWQ-4bit": 0.50, "AWQ-8bit": 1.0, "GPTQ-Int4": 0.50, "GPTQ-Int8": 1.0, "mlx-4bit": 0.55, "mlx-8bit": 1.0, "mlx-6bit": 0.75, + # DeepSeek-V4-style mixed: MoE experts in FP4 (bulk), attention + non- + # expert dense in FP8, embeddings/LM head in BF16. By weight count the + # experts dominate so the effective BPP sits closer to FP4 than FP8. + # Empirical: DeepSeek-V4-Flash 284B / 156 GB ≈ 0.55 B/param. + "FP4-MoE-Mixed": 0.55, + # FP8-Mixed = the *-Base variants (MoE experts also FP8, not FP4). + "FP8-Mixed": 1.0, } QUANT_SPEED_MULT = { @@ -24,6 +31,8 @@ QUANT_SPEED_MULT = { "AWQ-4bit": 1.2, "AWQ-8bit": 0.85, "GPTQ-Int4": 1.2, "GPTQ-Int8": 0.85, "mlx-4bit": 1.15, "mlx-8bit": 0.85, "mlx-6bit": 1.0, + "FP4-MoE-Mixed": 1.10, # slightly slower than pure FP4 because of mixed-dtype dispatch + "FP8-Mixed": 0.85, } QUANT_QUALITY_PENALTY = { @@ -39,6 +48,11 @@ QUANT_QUALITY_PENALTY = { "AWQ": -1.0, "AWQ-4bit": -4.0, "AWQ-8bit": -1.0, "GPTQ": -1.0, "GPTQ-Int4": -4.0, "GPTQ-Int8": -1.0, "mlx-4bit": -4.0, "mlx-8bit": -0.5, "mlx-6bit": -1.5, + # DeepSeek-V4 mixed: only MoE experts at FP4 (the rest is FP8/BF16), + # so the realized quality is much closer to FP8 than to pure FP4 — + # the activation-sensitive layers stay high-precision. ~0 penalty. + "FP4-MoE-Mixed": -0.5, + "FP8-Mixed": 0.0, } QUANT_BYTES_PER_PARAM = { @@ -50,6 +64,8 @@ QUANT_BYTES_PER_PARAM = { "AWQ-4bit": 0.5, "AWQ-8bit": 1.0, "GPTQ-Int4": 0.5, "GPTQ-Int8": 1.0, "mlx-4bit": 0.5, "mlx-8bit": 1.0, "mlx-6bit": 0.75, + "FP4-MoE-Mixed": 0.55, + "FP8-Mixed": 1.0, } # Pre-quantized formats that should NOT go through the GGUF quant hierarchy. @@ -57,6 +73,7 @@ QUANT_BYTES_PER_PARAM = { PREQUANTIZED_PREFIXES = ( "AWQ-", "GPTQ-", "mlx-", "FP8", "FP4", "NVFP4", "MXFP4", "NF4", "INT4", "INT8", "W4A16", "W8A8", "W8A16", + "FP4-MoE-Mixed", "FP8-Mixed", ) diff --git a/static/index.html b/static/index.html index 3e7600f..9cbf76e 100644 --- a/static/index.html +++ b/static/index.html @@ -843,7 +843,7 @@ Cookbook - +
diff --git a/static/js/cookbook-diagnosis.js b/static/js/cookbook-diagnosis.js index ade31bd..4d6e807 100644 --- a/static/js/cookbook-diagnosis.js +++ b/static/js/cookbook-diagnosis.js @@ -23,6 +23,44 @@ import { // browser loads it once. See cookbook-hwfit.js. } from './cookbook.js'; import uiModule from './ui.js'; + +// Tiny HTML-escape — keeps the file standalone instead of leaning on a +// shared helper that may not be exported from this module's import surface. +function _diagEsc(s) { + return String(s ?? '').replace(/[&<>"']/g, c => ({'&':'&','<':'<','>':'>','"':'"',"'":'''}[c])); +} + +// Pick an icon for a diagnosis-action button based on the label. The icon +// renders on the LEFT of the button text. Keeps the strokes consistent +// across the set so they read as one family. +function _diagFixIcon(label) { + const l = String(label || '').toLowerCase(); + const _svg = (path) => ``; + if (l.startsWith('retry') || l.includes('relaunch') || l.includes('restart')) { + // Circular-arrow refresh + return _svg(''); + } + if (l.startsWith('copy')) { + return _svg(''); + } + if (l.startsWith('edit')) { + return _svg(''); + } + if (l.startsWith('open') || l.includes('dependencies')) { + return _svg(''); + } + if (l.startsWith('install') || l.includes('upgrade')) { + return _svg(''); + } + if (l.startsWith('kill') || l.startsWith('stop')) { + return _svg(''); + } + if (l.startsWith('switch') || l.includes('use ')) { + return _svg(''); + } + // Default: lightbulb (generic "suggestion") + return _svg(''); +} import spinnerModule from './spinner.js'; // ── Error diagnosis ── @@ -577,7 +615,7 @@ export function _showDiagnosis(panel, diagnosis, sourceText) { const btn = document.createElement('button'); btn.className = 'cookbook-btn cookbook-diag-btn'; btn.type = 'button'; - btn.textContent = fix.label; + btn.innerHTML = _diagFixIcon(fix.label) + '' + _diagEsc(fix.label) + ''; btn.addEventListener('click', (e) => { e.stopPropagation(); runFix(fix, btn); @@ -603,7 +641,7 @@ export function _showDiagnosis(panel, diagnosis, sourceText) { for (const fix of fixes) { const item = document.createElement('button'); item.type = 'button'; - item.textContent = fix.label; + item.innerHTML = _diagFixIcon(fix.label) + '' + _diagEsc(fix.label) + ''; item.addEventListener('click', async (e) => { e.stopPropagation(); if (item.dataset.busy || trigger.dataset.busy) return; diff --git a/static/js/cookbook-hwfit.js b/static/js/cookbook-hwfit.js index 68ba334..161b6f3 100644 --- a/static/js/cookbook-hwfit.js +++ b/static/js/cookbook-hwfit.js @@ -527,6 +527,9 @@ export async function _hwfitFetch(fresh = false) { if (useCase) params.set('use_case', useCase); if (quantPref) params.set('quant', quantPref); if (targetCtx) params.set('ctx', String(targetCtx)); + // Fit-only filter — set by the dot in the Fit column header. + const _fitOnly = (() => { try { return localStorage.getItem('hwfit_fit_only_v1') === '1'; } catch { return false; } })(); + if (_fitOnly) params.set('fit_only', '1'); } const endpoint = isImageMode ? `/api/hwfit/image-models?${params}` : `/api/hwfit/models?${params}`; const res = await fetch(endpoint); @@ -888,9 +891,15 @@ export function _hwfitRenderList(el, models) { arrow = isReversed ? ' \u25B2' : ' \u25BC'; } const dataAttr = col.key ? ` data-sort="${col.key}"` : ''; - const label = (col.cls === 'hwfit-fit' && _budget) - ? `${col.label} (${_budget})` - : col.label; + // Fit column gets a small dot to its left that toggles "show only models + // that fit" — replaces the old Fits On/Off button next to the toolbar. + let label = col.label; + if (col.cls === 'hwfit-fit') { + const _fitOnly = (() => { try { return localStorage.getItem('hwfit_fit_only_v1') === '1'; } catch { return false; } })(); + label = `${col.label}`; + // (Budget tag removed — the GPU/RAM/N-GPU suffix next to "Fit" was noise; + // the toggle row already shows which budget is active.) + } html += `${label}${arrow}`; } html += '
'; @@ -910,9 +919,31 @@ export function _hwfitRenderList(el, models) { const dlDot = (_cachedModelIds && (_cachedModelIds.has(m.name) || [..._cachedModelIds].some(id => id === m.name?.split('/').pop()))) ? '\u25CF' : ''; html += `
`; html += `${esc(fitLabel)}`; - html += `${modelLogo(m.name)}${esc(m.name?.split('/').pop() || m.name)}${moeBadge}${imgBadge}${dlDot}`; + // Append quant to the title when it's not already in the repo name. The + // suffix strips quant-parts the name already contains — e.g. for + // QuantTrio/MiniMax-M2-AWQ + quant=AWQ-4bit we just show "(4bit)", not + // "(AWQ-4bit)". DeepSeek-V4-Flash + FP4-MoE-Mixed keeps the full tag + // (none of those parts are in the repo id). + const _short = m.name?.split('/').pop() || m.name || ''; + const _quantTag = (m.quant || '').trim(); + const _lowerShort = _short.toLowerCase(); + let _quantSuffix = ''; + if (_quantTag) { + const _parts = _quantTag.split(/[-_]/).filter(Boolean); + const _remaining = _parts.filter(p => !_lowerShort.includes(p.toLowerCase())); + if (_remaining.length && _remaining.length < _parts.length + 1) { // at least one part is new + let _display = _remaining.join('-'); + if (_display.length > 9) _display = _display.slice(0, 9) + '…'; + _quantSuffix = ` (${esc(_display)})`; + } + } + html += `${modelLogo(m.name)}${esc(_short)}${_quantSuffix}${moeBadge}${imgBadge}${dlDot}`; html += `${esc(pcount)}`; - html += `${esc(m.quant || '?')}`; + // Truncate the Quant cell to 9 chars + ellipsis so long tags like + // "FP4-MoE-Mixed" don't push neighboring columns. Full tag stays in title. + const _qRaw = m.quant || '?'; + const _qShort = _qRaw.length > 9 ? _qRaw.slice(0, 9) + '…' : _qRaw; + html += `${esc(_qShort)}`; html += `${vramLabel}`; html += `${m.is_image_gen ? '\u2014' : ctx}`; html += `${m.is_image_gen ? '\u2014' : tps + ' t/s'}`; @@ -934,7 +965,26 @@ export function _hwfitRenderList(el, models) { }); // Clickable header columns → sort (click again to toggle direction) el.querySelectorAll('.hwfit-header .hwfit-sortable').forEach(col => { - col.addEventListener('click', () => { + col.addEventListener('click', (e) => { + // The little dot inside the Fit header is its own toggle (fit-only + // filter), don't let it fall through to a sort click. + if (e.target.closest('[data-fit-dot]')) { + const on = !e.target.classList.contains('active'); + try { localStorage.setItem('hwfit_fit_only_v1', on ? '1' : '0'); } catch {} + // Un-toggling the fit filter (off → showing too-tight rows again) is + // typically because the user wants to see the LARGE models they can't + // run yet — re-sort by VRAM descending so the biggest surface first. + if (!on) { + const sortSel = document.getElementById('hwfit-sort'); + if (sortSel) { + sortSel.value = 'vram'; + sortSel.dataset.reverse = '0'; // descending (biggest first) + } + } + _hwfitCache = null; + _hwfitFetch(); + return; + } const sortKey = col.dataset.sort; if (!sortKey) return; const sel = document.getElementById('hwfit-sort'); @@ -1018,7 +1068,16 @@ export function _expandModelRow(row, modelData) { if (modelData.is_image_gen) { html += `
${esc((modelData.capabilities || []).join(' \u00B7 ') || '')}${modelData.description ? ' \u2014 ' + esc(modelData.description) : ''}
`; } else if (_requiresAcceleratorBackend(modelData)) { - html += `
This is a safetensors GPU-serving format. Use vLLM/SGLang with a visible CUDA/ROCm accelerator, or pick a GGUF download for llama.cpp/Ollama.
`; + // Only show the "needs CUDA/ROCm" note when the host doesn't already have + // one. With a visible CUDA/ROCm accelerator the note is noise — the user + // can already serve the model and reading the warning on every row makes + // the panel feel like everything's broken. + const _sys = _hwfitCache?.system || {}; + const _backend = (_sys.backend || '').toLowerCase(); + const _hasGpuAccel = !!_sys.has_gpu && (_backend === 'cuda' || _backend === 'rocm'); + if (!_hasGpuAccel) { + html += `
This is a safetensors GPU-serving format. Use vLLM/SGLang with a visible CUDA/ROCm accelerator, or pick a GGUF download for llama.cpp/Ollama.
`; + } } html += `
`; @@ -1243,14 +1302,14 @@ export function _hwfitInit() { const targetCtx = _ctxValue(); try { localStorage.setItem(_CTX_KEY, String(targetCtx)); } catch {} // Ctx drag affects sort mode: a specific ctx target (anything < Max) - // implies the user is hunting for "what fits at this context length", - // so re-rank by fit (lowest first). Dragging back to Max means no - // ctx constraint → go back to the default score-based ranking. + // implies "what runs at this context length" — sort by VRAM ascending + // so the cheapest-fitting models surface first. Dragging back to Max + // releases the constraint → go back to the default score ranking. const sortSel = document.getElementById('hwfit-sort'); if (sortSel) { if (targetCtx) { - sortSel.value = 'fit'; - sortSel.dataset.reverse = '1'; + sortSel.value = 'vram'; + sortSel.dataset.reverse = '1'; // ascending = smallest VRAM first } else { sortSel.value = 'score'; sortSel.dataset.reverse = ''; diff --git a/static/js/cookbook.js b/static/js/cookbook.js index d60dd2d..507777a 100644 --- a/static/js/cookbook.js +++ b/static/js/cookbook.js @@ -18,6 +18,7 @@ import { _launchServeTask, _serveAutoFix, _serveAutoRetry, _serveAutoRetryReplace, _serveAutoRetryRemove, _startBackgroundMonitor, _syncFromServer, _retryDownload, _nextAvailablePort, _processQueue, + _selfHealStaleTasks, } from './cookbookRunning.js'; import { @@ -641,6 +642,13 @@ async function _fetchDependencies() { const winBlocked = !isLocal && _isWindows() && _winUnsupported.has(pkg.name); const note = pkg.status_note ? `
${esc(pkg.status_note)}
` : ''; const updateNote = pkg.installed && pkg.pip_update_available === false && pkg.update_note ? `
${esc(pkg.update_note)}
` : ''; + // Inline "Rebuild" tag for the llama_cpp row only. Styled as a + // .cookbook-dep-tag so it matches the LLM category tag's pill look, + // and lives to the LEFT of the category tag (clear affordance before + // the row "value"). + const _rebuildBtn = (pkg.name === 'llama_cpp') + ? `` + : ''; return `
` + `
` + `
${esc(pkg.name)}
` @@ -648,6 +656,7 @@ async function _fetchDependencies() { + note + updateNote + `
` + + _rebuildBtn + `${esc(pkg.category)}` + _statusTag(pkg, isLocal, isSystemDep, winBlocked) + `
`; @@ -1237,6 +1246,10 @@ function _wireTabEvents(body) { const folded = dlFoldBody.style.display === 'none'; dlFoldBody.style.display = folded ? '' : 'none'; dlFoldChevron.textContent = folded ? '▾' : '▸'; + // Toggle is-folded class on the h2 so the line under it only shows when + // the section is collapsed (the body's content normally provides + // separation; with no body visible, the line gives the h2 definition). + dlFold.classList.toggle('is-folded', !folded); try { localStorage.setItem('cookbook_dl_tab_folded_v1', folded ? '0' : '1'); } catch {} }); } @@ -1456,7 +1469,7 @@ export function _serverEntryHtml(s, i, defaultServer, forceRemote, isNew) { html += ``; html += `${esc(_srvTitle)}`; html += _pIco ? `${_pIco}` : ''; - html += ``; + html += ``; if (isNew) { // New server: Cancel (discard) sits top-right; the default toggle only makes // sense once the server is saved. @@ -1535,7 +1548,7 @@ function _renderRecipes() { // State persisted to localStorage so the fold survives reloads. const _dlTabFolded = (() => { try { return localStorage.getItem('cookbook_dl_tab_folded_v1') === '1'; } catch { return false; } })(); html += '
'; - html += `

Download${_dlTabFolded ? '▸' : '▾'}

`; + html += `

Download${_dlTabFolded ? '▸' : '▾'}

`; html += '
'; html += `
`; html += '

Download from HuggingFace by pasting model link, or download directly in the Scan section below.

'; @@ -1605,36 +1618,43 @@ function _renderRecipes() { html += '

Scans your hardware for what models you can run. Hardware is cached; hit the scan button to re-probe after changing GPUs.

'; html += '
'; html += ''; - html += ''; - // Quant (Q4/Q8/…) lives next to the search now. Default is "All" so the - // list shows the best-scoring quant for every model instead of silently - // filtering to Q4 (which used to be the implicit default). - html += ''; - // Engine filter — show only models whose serve engine matches. Composes - // with quant / type / search filters. + // Engine sits next to the type filter so the "what category / which serving + // path" filters live together; Quant + Context are storage-format and budget + // levers, grouped to the right. + html += ''; html += ''; - html += '?'; + html += '?'; + html += ''; + // Quant (Q4/Q8/…). Default is "All" so the list shows the best-scoring + // quant for every model instead of silently filtering to Q4. + html += ''; + html += ''; + html += '?'; + html += ''; // Ctx slider — lets you target a context length for fit estimates; the // hwfit ranking uses _ctxValue() to factor that into VRAM math, so // dragging this re-sorts the list toward models that fit your chosen ctx. html += ''; + // Search lives at the far right of the toolbar so the controls (Type/Quant/ + // Engine/Context) read as a row of compact filters followed by free-text. + html += ''; html += '
'; html += '
'; html += ''; @@ -1663,6 +1683,16 @@ function _renderRecipes() { html += '
'; html += ''; html += '
'; + // Footer: link to the public discussion where users can request additions + // to the curated model list. Sits below the list so it reads as a callout + // after browsing, not a header. + html += ''; html += '
'; @@ -1707,7 +1737,8 @@ function _renderRecipes() { html += '
'; html += '
'; html += '

Dependencies

'; - html += ''; + // Rebuild llama.cpp button moved into the llama_cpp dep row (see _depRow); + // having it in the title polluted the section header. html += 'Server'; html += '${tpOpts}`; // ctx resets to the model's max on every panel open (the real ctx slider // lives in the Scan/Download toolbar — see cookbook.js .hwfit-ctx-control). - panelHtml += ``; + panelHtml += ``; panelHtml += ``; panelHtml += ``; panelHtml += ``; - panelHtml += ``; + panelHtml += ``; panelHtml += ``; - panelHtml += ``; + panelHtml += ``; panelHtml += `
`; // Row 2b: Diffusers settings const diffDtypeOpts = ['bfloat16','float16','float32'].map(d => ``).join(''); @@ -696,7 +705,7 @@ function _rerenderCachedModels() { if (!_specMethods.includes(_specMethod)) _specMethods.unshift(_specMethod); const _specOpts = _specMethods.map(m => ``).join(''); - panelHtml += ``; + panelHtml += ``; } if (_opts2.envVars.length) panelHtml += ``; panelHtml += `
`; @@ -721,7 +730,7 @@ function _rerenderCachedModels() { // pushes Cancel + Launch to the right. panelHtml += ``; panelHtml += ``; - panelHtml += ``; + panelHtml += ``; panelHtml += ``; panelHtml += ``; @@ -1657,6 +1666,37 @@ function _rerenderCachedModels() { }); return; } + // Pre-launch GPU probe — common failure pattern: vLLM/SGLang launched + // on a host where no GPU is visible (driver missing, $CUDA_VISIBLE_DEVICES + // unset, container without --gpus). Catch it BEFORE the user spends + // minutes watching the task fail. + const _needsGpu = ['vllm', 'sglang'].includes(serveState.backend) + || (serveState.backend === 'diffusers'); + if (_needsGpu) { + try { + const _probeHost = (_envState.remoteHost || '').trim(); + const _probeParams = new URLSearchParams(); + if (_probeHost) { + _probeParams.set('host', _probeHost); + const _sp = (_envState.servers || []).find(s => s.host === _probeHost)?.port; + if (_sp) _probeParams.set('ssh_port', _sp); + } + const _probeRes = await fetch('/api/cookbook/gpus' + (_probeParams.toString() ? '?' + _probeParams : ''), { credentials: 'same-origin' }); + const _probeData = await _probeRes.json(); + const _probeGpus = Array.isArray(_probeData) ? _probeData : (_probeData.gpus || []); + if (!_probeGpus.length) { + const _proceed = await window.styledConfirm( + `No GPU detected on ${_probeHost ? _probeHost : 'this host'}. ${serveState.backend.toUpperCase()} needs a visible CUDA/ROCm accelerator to start — launching now will most likely crash early.\n\nLaunch anyway?`, + { title: 'No GPU detected', confirmText: 'Launch anyway', cancelText: 'Cancel', danger: true }, + ); + if (!_proceed) return; + } + } catch { + // Network / probe failure — don't block. Better to let the launch + // proceed than to silently refuse because the probe endpoint + // hiccuped (the user can read the real error in the task output). + } + } // Save in the { _byRepo, _lastUsed } schema — no legacy flat keys at // the root so per-model state doesn't leak between models. try { diff --git a/static/style.css b/static/style.css index c259cb5..30725ed 100644 --- a/static/style.css +++ b/static/style.css @@ -18628,16 +18628,41 @@ body.gallery-selecting .gallery-dl-btn, background: color-mix(in srgb, var(--fg) 10%, transparent); color: color-mix(in srgb, var(--fg) 60%, transparent); } +/* Rebuild tag — same look as the LLM category tag, sits to its left. */ +.cookbook-dep-rebuild { + background: color-mix(in srgb, var(--fg) 10%, transparent); + color: color-mix(in srgb, var(--fg) 75%, transparent); + border: 1px solid color-mix(in srgb, var(--fg) 20%, transparent); + cursor: pointer; + font-family: inherit; + appearance: none; + -webkit-appearance: none; + -moz-appearance: none; +} +.cookbook-dep-rebuild:hover { + background: color-mix(in srgb, var(--accent, var(--red)) 18%, transparent); + color: var(--accent, var(--red)); + border-color: color-mix(in srgb, var(--accent, var(--red)) 45%, transparent); +} .cookbook-dep-installed { background: color-mix(in srgb, var(--green, #50fa7b) 18%, transparent); color: var(--green, #50fa7b); border: 1px solid color-mix(in srgb, var(--green, #50fa7b) 35%, transparent); + /* Match the Install button + Installed ▾ split width so all three variants + align in a mixed row. */ + min-width: 75.85px; + padding: 0 10px; + box-sizing: border-box; } .cookbook-dep-na { background: color-mix(in srgb, var(--fg) 8%, transparent); color: color-mix(in srgb, var(--fg) 60%, transparent); border: 1px solid color-mix(in srgb, var(--fg) 16%, transparent); cursor: help; + /* Match other dep tag widths so N/A rows line up with Install / Installed. */ + min-width: 75.85px; + padding: 0 10px; + box-sizing: border-box; } .cookbook-dep-install { background: var(--accent, var(--red)); @@ -18648,12 +18673,30 @@ body.gallery-selecting .gallery-dl-btn, font-weight: 500; position: relative; top: -3px; + /* Width matches the measured Installed ▾ split button (75.85px) so a row of + mixed Install / Installed deps lines up. */ + min-width: 75.85px; + padding: 0 10px; /* Strip the native button box so it's the same height as the sibling tags (Firefox renders