Backend (services/hwfit + routes):
- VRAM column sort now shows global highest first (was special-cased to
ascending then truncated top-N, which made "highest VRAM" mathematically
unreachable). Every column path uses reverse=True for the truncation.
- Hardware probe cache TTL 30min -> 24h so changing filters doesn't keep
re-probing the rig during a session; Rescan button still forces fresh.
- Multi-GPU rigs filter GGUF Q*/IQ quants (vLLM/SGLang can't serve them);
default non-prequantized to BF16 on 2+ GPUs.
- AWQ / AWQ-8bit / GPTQ-8bit get a -1.0 quality penalty so FP8 wins ties.
- Version-aware tiebreaker (parse Mn.n / Vn) — MiniMax-M2.7 ranks above M2.5.
- hf_models.json: zai-org/GLM-5.1 added; zai-org/GLM-5 quantization flipped
Q4_K_M -> BF16. DeepSeek-V4-Flash / -Pro + their -Base variants registered
with new FP4-MoE-Mixed / FP8-Mixed quant keys (calibrated BPP from the
actual 156 GB / 284 GB disk footprints).
- New FP4-MoE-Mixed + FP8-Mixed entries in QUANT_BPP / QUANT_SPEED_MULT /
QUANT_QUALITY_PENALTY / QUANT_BYTES_PER_PARAM / PREQUANTIZED_PREFIXES.
Frontend — Scan/Download:
- Engine + Quant swapped in the toolbar; Quant defaults to "All".
- Ctx (range slider) ported from origin/main: 8k/16k/32k/50k/128k/Max. Drag
re-sorts by vram ascending (smallest fitting first); back to Max → score.
- Ctx slider rail now visible — was background:transparent in a duplicate
later-cascade rule. Hardcoded grey + !important.
- Search input moved to the far right of the toolbar.
- Type/Standard default; "Context" not uppercased; Search placeholder dimmed.
- Engine "?" + Quant "?" inline help chips inside their dropdown boxes.
- Fit-column dot toggles fit-only filter; un-toggling re-sorts by VRAM desc.
- Quant column truncates to 9 chars + ellipsis ("FP4-MoE-M..."), full in
tooltip. Smart title-suffix strips the parts already in the repo name
(QuantTrio/MiniMax-M2-AWQ + quant AWQ-4bit -> just "(4bit)").
- Conditional warning for safetensors models on non-GPU rigs only.
- Dependency Install / Installed / Installed▾ / N/A all 75.85px wide.
- Rebuild llama.cpp moved into the llama_cpp dep row, styled as a tag.
- Foldable Download admin-card (h2 chevron); line under h2 only when folded.
- HF token save gets a green ✓ + "Saved" flash.
- Cached scan no longer counts stalled rows as downloaded.
- Footer: "Request it →" link with GitHub mark to the public discussion
(#1962) for model-add requests.
Frontend — Running tab:
- Strict download-finish check (DOWNLOAD_OK or /snapshots/, not bare
"Download complete"). True overall % for multi-shard downloads:
((N-1)+frac)/total instead of hf_transfer's per-shard aggregate.
- ETA in the uptime ticker: "downloading: 12m 34s · ETA 1h 23m".
- Clear button kills the tmux session too; if the output still shows a
live shard line, the pill is hidden + relabels as "reconnect" + revives
on click.
- Self-heal: on cookbook open AND every bg-monitor cycle (10s, throttled
to 8s), scan persisted done/error/crashed downloads and probe their
tmux session — if alive, flip status back to running and reattach.
- Per-launch zombie probe: clicking Download on a model whose persisted
state is done but tmux is still alive revives the existing task and
refuses to start a duplicate.
- Pre-launch GPU probe: vllm / sglang / diffusers serve check
/api/cookbook/gpus first; warns + confirms if no GPU is visible.
- Server-side state guard: rejects "done" POSTs for downloads lacking
DOWNLOAD_OK / DOWNLOAD_FAILED / /snapshots/ when the last-mentioned
shard is N<total — stale tabs can't poison persisted state any more.
- Running count includes tasks whose output looks active even if persisted
status got stuck. Dir text on the running row, font matched to uptime.
Serve panel:
- Ctx text input always resets to model max on open (default 20000 when
metadata is missing).
- Max Seqs default 8 -> 4. KV Cache dtype select 32px tall.
- Lightning icon on Launch (same as Action toggle).
- Diagnosis card simplified (no fold/copy/dismiss), suggestion font
matches body; action buttons get icons on the left (Retry/Copy/Edit/
Install/Kill/Switch/etc.).
- Incomplete-download serve warning when model status is
downloading / stalled / has_incomplete.
- MTP "?" tooltip ("supported on a few model families … up to ~3× faster").
695 lines
31 KiB
JavaScript
695 lines
31 KiB
JavaScript
// ============================================
|
||
// COOKBOOK DIAGNOSIS SUB-MODULE
|
||
// Error pattern matching and diagnosis UI
|
||
// ============================================
|
||
|
||
import {
|
||
_envState,
|
||
_loadTasks,
|
||
_removeTask,
|
||
_launchServeTask,
|
||
_buildEnvPrefix,
|
||
_sshCmd,
|
||
_setPanelField,
|
||
_setPanelCheckbox,
|
||
_copyText,
|
||
_persistEnvState,
|
||
_tmuxCmd,
|
||
_serveAutoRetry,
|
||
_serveAutoRetryReplace,
|
||
_serveAutoRetryRemove,
|
||
_serveAutoFix,
|
||
// Plain specifier (no ?v=) — must match every other cookbook.js importer so the
|
||
// browser loads it once. See cookbook-hwfit.js.
|
||
} from './cookbook.js';
|
||
import uiModule from './ui.js';
|
||
|
||
// Tiny HTML-escape — keeps the file standalone instead of leaning on a
|
||
// shared helper that may not be exported from this module's import surface.
|
||
function _diagEsc(s) {
|
||
return String(s ?? '').replace(/[&<>"']/g, c => ({'&':'&','<':'<','>':'>','"':'"',"'":'''}[c]));
|
||
}
|
||
|
||
// Pick an icon for a diagnosis-action button based on the label. The icon
|
||
// renders on the LEFT of the button text. Keeps the strokes consistent
|
||
// across the set so they read as one family.
|
||
function _diagFixIcon(label) {
|
||
const l = String(label || '').toLowerCase();
|
||
const _svg = (path) => `<svg width="11" height="11" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.2" stroke-linecap="round" stroke-linejoin="round" class="cookbook-diag-btn-ico" aria-hidden="true">${path}</svg>`;
|
||
if (l.startsWith('retry') || l.includes('relaunch') || l.includes('restart')) {
|
||
// Circular-arrow refresh
|
||
return _svg('<polyline points="23 4 23 10 17 10"/><polyline points="1 20 1 14 7 14"/><path d="M3.51 9a9 9 0 0 1 14.85-3.36L23 10M1 14l4.64 4.36A9 9 0 0 0 20.49 15"/>');
|
||
}
|
||
if (l.startsWith('copy')) {
|
||
return _svg('<rect x="9" y="9" width="13" height="13" rx="2"/><path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"/>');
|
||
}
|
||
if (l.startsWith('edit')) {
|
||
return _svg('<path d="M12 20h9"/><path d="M16.5 3.5a2.121 2.121 0 0 1 3 3L7 19l-4 1 1-4Z"/>');
|
||
}
|
||
if (l.startsWith('open') || l.includes('dependencies')) {
|
||
return _svg('<path d="M14 3h7v7"/><path d="M21 3l-9 9"/><path d="M21 14v5a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V5a2 2 0 0 1 2-2h5"/>');
|
||
}
|
||
if (l.startsWith('install') || l.includes('upgrade')) {
|
||
return _svg('<path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/><polyline points="7 10 12 15 17 10"/><line x1="12" y1="15" x2="12" y2="3"/>');
|
||
}
|
||
if (l.startsWith('kill') || l.startsWith('stop')) {
|
||
return _svg('<rect x="6" y="6" width="12" height="12" rx="1"/>');
|
||
}
|
||
if (l.startsWith('switch') || l.includes('use ')) {
|
||
return _svg('<polyline points="17 1 21 5 17 9"/><path d="M3 11V9a4 4 0 0 1 4-4h14"/><polyline points="7 23 3 19 7 15"/><path d="M21 13v2a4 4 0 0 1-4 4H3"/>');
|
||
}
|
||
// Default: lightbulb (generic "suggestion")
|
||
return _svg('<path d="M9 21h6"/><path d="M12 17v4"/><path d="M12 3a6 6 0 0 0-4 10.5c1 1 1.5 2 1.5 3.5h5c0-1.5.5-2.5 1.5-3.5A6 6 0 0 0 12 3Z"/>');
|
||
}
|
||
import spinnerModule from './spinner.js';
|
||
|
||
// ── Error diagnosis ──
|
||
|
||
function _openCookbookDependencies(pkgName = '') {
|
||
const cookbook = window.cookbookModule;
|
||
if (cookbook && typeof cookbook.open === 'function') {
|
||
cookbook.open({ tab: 'Dependencies' });
|
||
} else {
|
||
document.getElementById('tool-cookbook-btn')?.click();
|
||
}
|
||
|
||
const wanted = String(pkgName || '').toLowerCase();
|
||
const tryHighlight = (attempt = 0) => {
|
||
const modal = document.getElementById('cookbook-modal');
|
||
const tab = modal?.querySelector('.cookbook-tab[data-backend="Dependencies"]');
|
||
if (tab && !tab.classList.contains('active')) tab.click();
|
||
|
||
const rows = [...document.querySelectorAll('#cookbook-deps-list [data-pkg-name]')];
|
||
if (!rows.length) {
|
||
if (attempt < 45) setTimeout(() => tryHighlight(attempt + 1), 100);
|
||
return;
|
||
}
|
||
if (!wanted) return;
|
||
const row = rows.find(r => {
|
||
const name = (r.dataset.pkgName || '').toLowerCase();
|
||
const pip = (r.dataset.depPip || '').toLowerCase();
|
||
return name === wanted || pip.includes(wanted) || wanted.includes(name);
|
||
});
|
||
if (row) {
|
||
row.scrollIntoView({ block: 'center' });
|
||
row.classList.add('cookbook-pkg-flash');
|
||
setTimeout(() => row.classList.remove('cookbook-pkg-flash'), 1800);
|
||
}
|
||
};
|
||
tryHighlight();
|
||
}
|
||
|
||
function _openServeEditFromDiagnosis(panel, fields = null) {
|
||
const task = panel?.closest?.('.cookbook-task');
|
||
if (!task) return;
|
||
task.dispatchEvent(new CustomEvent('cookbook:edit-serve', { bubbles: true, detail: { fields } }));
|
||
}
|
||
|
||
function _openCpuServeEdit(panel) {
|
||
_openServeEditFromDiagnosis(panel, {
|
||
backend: 'llamacpp',
|
||
gpus: '',
|
||
tp: '1',
|
||
gpu_mem: '0.80',
|
||
_forceBackend: true,
|
||
});
|
||
}
|
||
|
||
// Infer the gated base repo that single-file checkpoints need configs from
|
||
function _inferBaseRepo(text) {
|
||
if (!text) return null;
|
||
const t = text.toLowerCase();
|
||
if (t.includes('sd3.5') || t.includes('stable-diffusion-3.5')) return 'stabilityai/stable-diffusion-3.5-large';
|
||
if (t.includes('sd3') || t.includes('stable-diffusion-3')) return 'stabilityai/stable-diffusion-3-medium-diffusers';
|
||
if (t.includes('flux')) return 'black-forest-labs/FLUX.1-schnell';
|
||
if (t.includes('sdxl') || t.includes('stable-diffusion-xl')) return 'stabilityai/stable-diffusion-xl-base-1.0';
|
||
return null;
|
||
}
|
||
|
||
export const ERROR_PATTERNS = [
|
||
{
|
||
pattern: /No available memory for the cache blocks|Available KV cache memory:.*-/i,
|
||
message: 'No GPU memory left for KV cache after loading model.',
|
||
fixes: [
|
||
{ label: 'Retry with GPU mem 0.95', action: (panel) => _serveAutoRetryReplace(panel, '--gpu-memory-utilization', '0.95') },
|
||
{ label: 'Retry with context 2048', action: (panel) => _serveAutoRetryReplace(panel, '--max-model-len', '2048') },
|
||
{ label: 'Retry with more GPUs (TP=8)', action: (panel) => _serveAutoRetryReplace(panel, '--tensor-parallel-size', '8') },
|
||
],
|
||
},
|
||
{
|
||
pattern: /warming up sampler|max_num_seqs.*gpu_memory_utilization/i,
|
||
message: 'OOM during warmup. Lower GPU memory or max sequences.',
|
||
fixes: [
|
||
{ label: 'Retry with GPU mem 0.80', action: (panel) => _serveAutoRetryReplace(panel, '--gpu-memory-utilization', '0.80') },
|
||
{ label: 'Retry with --max-num-seqs 64', action: (panel) => _serveAutoRetry(panel, '--max-num-seqs 64') },
|
||
{ label: 'Retry with --max-num-seqs 32', action: (panel) => _serveAutoRetry(panel, '--max-num-seqs 32') },
|
||
],
|
||
},
|
||
{
|
||
pattern: /CUDA out of memory|torch\.cuda\.OutOfMemoryError|CUDA error: out of memory/i,
|
||
message: 'GPU ran out of memory. Try more GPUs (higher TP) or lower context.',
|
||
fixes: [
|
||
{ label: 'Retry with TP=2', action: (panel) => _serveAutoRetryReplace(panel, '--tensor-parallel-size', '2') },
|
||
{ label: 'Retry with TP=4', action: (panel) => _serveAutoRetryReplace(panel, '--tensor-parallel-size', '4') },
|
||
{ label: 'Retry with GPU mem 0.80', action: (panel) => _serveAutoRetryReplace(panel, '--gpu-memory-utilization', '0.80') },
|
||
{ label: 'Retry with context 4096', action: (panel) => _serveAutoRetryReplace(panel, '--max-model-len', '4096') },
|
||
{ label: 'Retry with --enforce-eager', action: (panel) => _serveAutoRetry(panel, '--enforce-eager') },
|
||
],
|
||
},
|
||
{
|
||
pattern: /not divisible by weight quantization|quantization block/i,
|
||
message: 'FP8 MoE quantization is incompatible with this tensor-parallel split.',
|
||
suggestion: 'Suggested action: retry with a lower tensor-parallel size, such as TP=4 or TP=2. If it still fails, use a non-FP8/GGUF version of the model.',
|
||
fixes: [
|
||
{ label: 'Retry with TP=4', action: (panel) => _serveAutoRetryReplace(panel, '--tensor-parallel-size', '4') },
|
||
{ label: 'Retry with TP=2', action: (panel) => _serveAutoRetryReplace(panel, '--tensor-parallel-size', '2') },
|
||
{ label: 'Edit serve', action: (panel) => _openServeEditFromDiagnosis(panel) },
|
||
],
|
||
},
|
||
{
|
||
pattern: /not divisib|must be divisible|attention heads.*divisible/i,
|
||
message: 'Tensor parallel size incompatible with model dimensions.',
|
||
fixes: [
|
||
{ label: 'Retry with TP=1', action: (panel) => _serveAutoRetryReplace(panel, '--tensor-parallel-size', '1') },
|
||
{ label: 'Retry with TP=2', action: (panel) => _serveAutoRetryReplace(panel, '--tensor-parallel-size', '2') },
|
||
{ label: 'Retry with TP=4', action: (panel) => _serveAutoRetryReplace(panel, '--tensor-parallel-size', '4') },
|
||
],
|
||
},
|
||
{
|
||
pattern: /Too large swap space|swap space.*total CPU memory/i,
|
||
message: 'Swap space too large for available CPU memory.',
|
||
fixes: [
|
||
{ label: 'Retry without swap', action: (panel) => _serveAutoRetryRemove(panel, '--swap-space') },
|
||
{ label: 'Retry with swap 1', action: (panel) => _serveAutoRetryReplace(panel, '--swap-space', '1') },
|
||
],
|
||
},
|
||
{
|
||
pattern: /swap space|not enough.*memory.*cpu|Cannot allocate memory/i,
|
||
message: 'Not enough CPU RAM or swap space.',
|
||
fixes: [
|
||
{ label: 'Retry without swap', action: (panel) => _serveAutoRetryRemove(panel, '--swap-space') },
|
||
{ label: 'Lower max context to 4096', action: (panel) => _setPanelField(panel, 'ctx', '4096') },
|
||
],
|
||
},
|
||
{
|
||
pattern: /unrecognized arguments:\s*--swap-space/i,
|
||
message: '--swap-space was removed in newer vLLM versions. Remove it from the command.',
|
||
fixes: [
|
||
{ label: 'Retry without swap', action: (panel) => _serveAutoRetryRemove(panel, '--swap-space') },
|
||
],
|
||
},
|
||
{
|
||
pattern: /Address already in use|bind.*address.*in use/i,
|
||
message: 'Port is already in use. Another server may be running.',
|
||
fixes: [
|
||
{ label: 'Kill existing vLLM', action: (panel) => _runQuickCmd(panel, 'pkill -f vllm') },
|
||
{ label: 'Use port 8001', action: (panel) => _setPanelField(panel, 'port', '8001') },
|
||
],
|
||
},
|
||
{
|
||
pattern: /No CUDA GPUs are available|no GPU.*found|CUDA_VISIBLE_DEVICES.*invalid/i,
|
||
message: 'No GPUs visible. Check your GPU selection or driver.',
|
||
fixes: [
|
||
{ label: 'Clear GPU selection (use all)', action: (panel) => {
|
||
_setPanelField(panel, 'gpus', '');
|
||
_envState.gpus = '';
|
||
_persistEnvState();
|
||
}},
|
||
],
|
||
},
|
||
{
|
||
pattern: /403 Forbidden|401 Unauthorized|Access to model.*is restricted|gated repo|not in the authorized list|awaiting a review/i,
|
||
message: 'Gated model. Your HF token IS being sent — but its account must be granted access first: open the model page, accept the license, and wait for approval (Meta models can take a while).',
|
||
// Extract repo name from error text to build HF link
|
||
_repoPattern: /Access to model\s+(\S+)\s+is restricted|gated repo.*?huggingface\.co\/([^\s/]+\/[^\s/]+)/i,
|
||
fixes: [
|
||
{ label: 'Request access on HF', action: (panel, _text) => {
|
||
const m = _text && (_text.match(/Access to model\s+(\S+)\s+is restricted/i) || _text.match(/huggingface\.co\/([^\s/]+\/[^\s/]+)/i));
|
||
const repo = m && (m[1] || m[2]);
|
||
if (repo) window.open('https://huggingface.co/' + repo, '_blank');
|
||
else window.open('https://huggingface.co/settings/gated-repos', '_blank');
|
||
}},
|
||
{ label: 'Check HF Token', action: (panel) => {
|
||
const el = panel.querySelector('[data-field="hf_token"]');
|
||
if (el) { el.focus(); el.style.borderColor = 'var(--red)'; }
|
||
}},
|
||
],
|
||
},
|
||
{
|
||
pattern: /Weights for this component appear to be missing|load the component before passing/i,
|
||
message: 'Single-file checkpoint needs a base model for missing components (text encoder, VAE). The base model may be gated — accept the license and set your HF token.',
|
||
fixes: [
|
||
{ label: 'Request access to base model', action: (panel, _text) => {
|
||
// Extract gated repo from error, or infer from model name
|
||
const gated = _text && _text.match(/Access to model\s+(\S+)\s+is restricted/i);
|
||
const base = _text && _text.match(/config=([^\s,)]+)/i);
|
||
const model = _text && _text.match(/load model from\s+(\S+)/i);
|
||
const repo = (gated && gated[1]) || (base && base[1]) || _inferBaseRepo(_text);
|
||
if (repo) window.open('https://huggingface.co/' + repo, '_blank');
|
||
else if (model && model[1]) window.open('https://huggingface.co/' + model[1].replace(/[.]$/, ''), '_blank');
|
||
}},
|
||
{ label: 'Check HF Token', action: (panel) => {
|
||
const el = panel.querySelector('[data-field="hf_token"]');
|
||
if (el) { el.focus(); el.style.borderColor = 'var(--red)'; }
|
||
}},
|
||
],
|
||
},
|
||
{
|
||
pattern: /Entry Not Found.*model_index\.json|Could not load model.*Check diffusers/i,
|
||
message: 'Single-file model — needs base config from a gated repo. Accept the license and set your HF token.',
|
||
fixes: [
|
||
{ label: 'Request access to base model', action: (panel, _text) => {
|
||
const gated = _text && _text.match(/Access to model\s+(\S+)\s+is restricted/i);
|
||
const repo = (gated && gated[1]) || _inferBaseRepo(_text);
|
||
if (repo) window.open('https://huggingface.co/' + repo, '_blank');
|
||
else window.open('https://huggingface.co/settings/gated-repos', '_blank');
|
||
}},
|
||
{ label: 'Check HF Token', action: (panel) => {
|
||
const el = panel.querySelector('[data-field="hf_token"]');
|
||
if (el) { el.focus(); el.style.borderColor = 'var(--red)'; }
|
||
}},
|
||
],
|
||
},
|
||
{
|
||
pattern: /does not appear to have a file named|not a valid model|No such file or directory.*model/i,
|
||
message: 'Model path or ID not found.',
|
||
fixes: [
|
||
{ label: 'Check model name', action: (panel) => {
|
||
const header = panel.querySelector('.hwfit-panel-model');
|
||
if (header) header.style.color = 'var(--red)';
|
||
}},
|
||
],
|
||
},
|
||
{
|
||
pattern: /NCCL error|ncclSystemError|ncclInternalError/i,
|
||
message: 'Multi-GPU communication (NCCL) failed.',
|
||
fixes: [
|
||
{ label: 'Set TP to 1 (single GPU)', action: (panel) => _setPanelField(panel, 'tp', '1') },
|
||
{ label: 'Enable enforce eager', action: (panel) => _setPanelCheckbox(panel, 'enforce_eager', true) },
|
||
],
|
||
},
|
||
{
|
||
pattern: /KV cache.*too (small|large)|max_model_len.*exceeds|maximum.*context/i,
|
||
message: 'Context length too large for available GPU memory.',
|
||
fixes: [
|
||
{ label: 'Lower to 8192', action: (panel) => _setPanelField(panel, 'ctx', '8192') },
|
||
{ label: 'Lower to 4096', action: (panel) => _setPanelField(panel, 'ctx', '4096') },
|
||
{ label: 'Lower to 2048', action: (panel) => _setPanelField(panel, 'ctx', '2048') },
|
||
],
|
||
},
|
||
{
|
||
pattern: /vllm.*command not found|No module named vllm/i,
|
||
message: 'vLLM is not installed or not in PATH.',
|
||
fixes: [
|
||
{ label: 'Open Dependencies', action: () => _openCookbookDependencies('vllm') },
|
||
{ label: 'Check environment is set', action: (panel) => {
|
||
const el = panel.querySelector('[data-field="env_type"]');
|
||
if (el) { el.focus(); el.style.borderColor = 'var(--red)'; }
|
||
}},
|
||
],
|
||
},
|
||
{
|
||
pattern: /sglang.*command not found|No module named sglang|SGLang is not installed/i,
|
||
message: 'SGLang is not installed or not in PATH.',
|
||
fixes: [
|
||
{ label: 'Open Dependencies', action: () => _openCookbookDependencies('sglang') },
|
||
{ label: 'Copy install command', action: () => _copyText('python3 -m pip install "sglang[all]"') },
|
||
],
|
||
},
|
||
{
|
||
pattern: /No accelerator \(CUDA, XPU, HPU, NPU, MUSA, MPS\) is available|Triton is not supported on current platform/i,
|
||
message: 'SGLang needs a visible GPU/accelerator on this server.',
|
||
suggestion: 'Suggested action: switch this serve config to llama.cpp for CPU/local serving, or choose a GPU server.',
|
||
fixes: [
|
||
{ label: 'Switch to llama.cpp', action: (panel) => _openCpuServeEdit(panel) },
|
||
{ label: 'Choose GPU server', action: (panel) => _openServeEditFromDiagnosis(panel) },
|
||
],
|
||
},
|
||
{
|
||
pattern: /flashinfer.*version.*does not match|flashinfer-cubin version/i,
|
||
message: 'FlashInfer version mismatch.',
|
||
fixes: [
|
||
{ label: 'Auto-fix: bypass version check', action: (panel) => _serveAutoFix(panel, 'FLASHINFER_DISABLE_VERSION_CHECK=1'), autofix: true },
|
||
{ label: 'Fix properly: pip install matching version', action: () => {} },
|
||
],
|
||
},
|
||
{
|
||
pattern: /torch\.cuda\.is_available\(\).*False|No CUDA runtime/i,
|
||
message: 'vLLM needs a visible CUDA/ROCm GPU.',
|
||
suggestion: 'Suggested action: switch this serve config to llama.cpp for CPU/local serving, or choose a GPU server.',
|
||
fixes: [
|
||
{ label: 'Switch to llama.cpp', action: (panel) => _openCpuServeEdit(panel) },
|
||
{ label: 'Choose GPU server', action: (panel) => _openServeEditFromDiagnosis(panel) },
|
||
],
|
||
},
|
||
{
|
||
pattern: /Engine core initialization failed/i,
|
||
message: 'vLLM engine failed to start. Check the error above.',
|
||
fixes: [
|
||
{ label: 'Retry with --enforce-eager', action: (panel) => _serveAutoRetry(panel, '--enforce-eager'), autofix: true },
|
||
{ label: 'Retry with context 4096', action: (panel) => _serveAutoRetry(panel, '--max-model-len 4096'), autofix: true },
|
||
{ label: 'Lower context to 4096', action: (panel) => _setPanelField(panel, 'ctx', '4096') },
|
||
{ label: 'Lower GPU mem to 0.80', action: (panel) => _setPanelField(panel, 'gpu_mem', '0.80') },
|
||
],
|
||
},
|
||
{
|
||
pattern: /weight_loader.*unexpected keyword|Unexpected key.*state_dict/i,
|
||
message: 'Model format incompatible with this vLLM version.',
|
||
fixes: [
|
||
{ label: 'Try trust remote code', action: (panel) => _setPanelCheckbox(panel, 'trust_remote', true) },
|
||
],
|
||
},
|
||
{
|
||
pattern: /enable-auto-tool-choice requires --tool-call-parser/i,
|
||
message: 'Auto tool choice needs a tool call parser.',
|
||
fixes: [
|
||
{ label: 'Retry with --tool-call-parser hermes', action: (panel) => _serveAutoRetry(panel, '--tool-call-parser hermes'), autofix: true },
|
||
],
|
||
},
|
||
{
|
||
pattern: /Please pass.*trust.remote.code=True|contains custom code which must be executed to correctly load/i,
|
||
message: 'Model requires custom code. Enable --trust-remote-code.',
|
||
fixes: [
|
||
{ label: 'Retry with --trust-remote-code', action: (panel) => _serveAutoRetry(panel, '--trust-remote-code'), autofix: true },
|
||
],
|
||
},
|
||
{
|
||
pattern: /does not recognize this architecture|model type.*but Transformers does not/i,
|
||
message: 'Model architecture too new for installed vLLM/transformers.',
|
||
fixes: [
|
||
{ label: 'Try --trust-remote-code', action: (panel) => _serveAutoRetry(panel, '--trust-remote-code'), autofix: true },
|
||
{ label: 'Update vLLM on server', action: (panel) => {
|
||
const taskEl = panel.closest('.cookbook-task');
|
||
const task = taskEl ? _loadTasks().find(t => t.sessionId === taskEl.dataset.taskId) : null;
|
||
const host = task?.remoteHost || '';
|
||
const prefix = _buildEnvPrefix();
|
||
const pipCmd = prefix ? prefix + ' pip install -U vllm transformers' : 'pip install -U vllm transformers';
|
||
const cmd = host ? _sshCmd(host, pipCmd) : pipCmd;
|
||
// Run in tmux so it doesn't timeout
|
||
const name = 'update-vllm';
|
||
_launchServeTask(name, 'pip-update', cmd);
|
||
}},
|
||
],
|
||
},
|
||
{
|
||
pattern: /Either a revision or a version must be specified|transformers\.integrations\.hub_kernels|kernels\/layer/i,
|
||
message: 'Transformers/kernels package mismatch.',
|
||
fixes: [
|
||
{ label: 'Repair kernel package', action: (panel) => {
|
||
const taskEl = panel.closest('.cookbook-task');
|
||
const task = taskEl ? _loadTasks().find(t => t.sessionId === taskEl.dataset.taskId) : null;
|
||
const host = task?.remoteHost || '';
|
||
const prefix = _buildEnvPrefix();
|
||
const pipCmd = prefix
|
||
? prefix + ' python3 -m pip install --user --break-system-packages "kernels<0.15"'
|
||
: 'python3 -m pip install --user --break-system-packages "kernels<0.15"';
|
||
const cmd = host ? _sshCmd(host, pipCmd) : pipCmd;
|
||
_launchServeTask('repair-kernels', 'pip-update', cmd);
|
||
}},
|
||
{ label: 'Open Dependencies', action: () => _openCookbookDependencies('sglang') },
|
||
],
|
||
},
|
||
{
|
||
pattern: /ollama.*command not found/i,
|
||
message: 'Ollama is not installed on this server. Run: curl -fsSL https://ollama.com/install.sh | sh',
|
||
fixes: [
|
||
{ label: 'Copy install command', action: () => _copyText('curl -fsSL https://ollama.com/install.sh | sh') },
|
||
],
|
||
},
|
||
{
|
||
pattern: /llama-server.*command not found|llama\.cpp.*not found|No module named.*llama_cpp|No module named 'starlette_context'/i,
|
||
message: 'llama-cpp-python server is not installed. Run: pip install "llama-cpp-python[server]"',
|
||
fixes: [
|
||
{ label: 'Open Dependencies', action: () => _openCookbookDependencies('llama_cpp') },
|
||
{ label: 'Copy install command', action: () => _copyText('pip install "llama-cpp-python[server]"') },
|
||
],
|
||
},
|
||
{
|
||
pattern: /CUDA Toolkit not found|Unable to find cudart library|missing:\s*CUDA_CUDART/i,
|
||
message: 'llama.cpp found nvcc, but the CUDA runtime library is missing.',
|
||
suggestion: 'Suggested action: relaunch with the updated runner so llama.cpp builds CPU-only, or install a complete CUDA toolkit/runtime on this server for GPU llama.cpp.',
|
||
fixes: [
|
||
{ label: 'Edit serve', action: (panel) => _openServeEditFromDiagnosis(panel) },
|
||
{ label: 'Open Dependencies', action: () => _openCookbookDependencies('llama_cpp') },
|
||
],
|
||
},
|
||
{
|
||
pattern: /No module named ['"]?torch|No module named ['"]?diffusers|diffusers.*command not found/i,
|
||
message: 'Diffusion serving needs PyTorch and diffusers. Install diffusers from Cookbook → Dependencies.',
|
||
fixes: [
|
||
{ label: 'Open Dependencies', action: () => _openCookbookDependencies('diffusers') },
|
||
{ label: 'Copy install command', action: () => _copyText('python3 -m pip install "diffusers[torch]"') },
|
||
],
|
||
},
|
||
{
|
||
pattern: /Triton kernels.*Failed to import|cannot import name '\w+' from 'triton_kernels/i,
|
||
message: 'Triton kernels version mismatch. Non-fatal warning — model will still run, just without optimized MoE kernels.',
|
||
fixes: [
|
||
{ label: 'Update triton on server', action: (panel) => {
|
||
const taskEl = panel.closest('.cookbook-task');
|
||
const task = taskEl ? _loadTasks().find(t => t.sessionId === taskEl.dataset.taskId) : null;
|
||
const host = task?.remoteHost || '';
|
||
const prefix = _buildEnvPrefix();
|
||
const pipCmd = prefix ? prefix + ' pip install -U triton triton-kernels' : 'pip install -U triton triton-kernels';
|
||
const cmd = host ? _sshCmd(host, pipCmd) : pipCmd;
|
||
_launchServeTask('update-triton', 'pip-update', cmd);
|
||
}},
|
||
],
|
||
},
|
||
{
|
||
pattern: /No space left on device|Disk quota exceeded|ENOSPC/i,
|
||
message: 'Disk full on the server. Free up space before retrying.',
|
||
fixes: [
|
||
{ label: 'Check HF cache size', action: (panel) => _runQuickCmd(panel, 'du -sh ~/.cache/huggingface 2>/dev/null') },
|
||
],
|
||
},
|
||
{
|
||
pattern: /Connection refused|Could not connect|Connection reset by peer/i,
|
||
message: 'Network connection failed. Server may be unreachable or HuggingFace is down.',
|
||
fixes: [
|
||
{ label: 'Test HF connectivity', action: (panel) => _runQuickCmd(panel, 'curl -sI https://huggingface.co 2>&1 | head -3') },
|
||
],
|
||
},
|
||
{
|
||
pattern: /attention_sink|sliding.window.*not supported|sliding_window.*incompatible/i,
|
||
message: 'Model uses attention features unsupported in this vLLM version.',
|
||
fixes: [
|
||
{ label: 'Update vLLM on server', action: (panel) => {
|
||
const taskEl = panel.closest('.cookbook-task');
|
||
const task = taskEl ? _loadTasks().find(t => t.sessionId === taskEl.dataset.taskId) : null;
|
||
const host = task?.remoteHost || '';
|
||
const prefix = _buildEnvPrefix();
|
||
const pipCmd = prefix ? prefix + ' pip install -U vllm' : 'pip install -U vllm';
|
||
const cmd = host ? _sshCmd(host, pipCmd) : pipCmd;
|
||
_launchServeTask('update-vllm', 'pip-update', cmd);
|
||
}},
|
||
],
|
||
},
|
||
{
|
||
// Tail-only + healthy-server suppression. tmux capture-pane returns the
|
||
// entire scrollback every poll, so a one-shot startup traceback would
|
||
// otherwise stick on the panel forever even while the server happily
|
||
// serves /v1/models. Only fire if the traceback is in recent output AND
|
||
// the server isn't currently logging healthy traffic.
|
||
match: (text) => {
|
||
const TAIL = text.slice(-4096);
|
||
if (!/Traceback \(most recent call last\)/i.test(TAIL)) return false;
|
||
// Healthy markers in the tail mean whatever blew up has been recovered
|
||
// from — the server is up and answering requests.
|
||
if (/Application startup complete|"GET \/v1\/[^"]+ HTTP\/[\d.]+" 2\d\d|Uvicorn running on/i.test(TAIL)) return false;
|
||
return true;
|
||
},
|
||
message: 'Python traceback detected — may be a handled error, check logs.',
|
||
fixes: [
|
||
{ label: 'Kill vLLM processes', action: (panel) => _runQuickCmd(panel, 'pkill -f vllm') },
|
||
],
|
||
},
|
||
];
|
||
|
||
export function _diagnose(text) {
|
||
for (const entry of ERROR_PATTERNS) {
|
||
const hit = entry.match ? entry.match(text) : entry.pattern.test(text);
|
||
if (hit) return entry;
|
||
}
|
||
return null;
|
||
}
|
||
|
||
function _diagnosisCopyBundle(task, diagnosis, sourceText, suggestionText) {
|
||
const lines = ['## Odysseus Cookbook troubleshooting'];
|
||
if (task) {
|
||
lines.push(
|
||
'',
|
||
'### Task',
|
||
`- ID: ${task.sessionId || task.id || 'unknown'}`,
|
||
`- Type: ${task.type || 'unknown'}`,
|
||
`- Status: ${task.status || 'unknown'}`,
|
||
`- Model: ${task.payload?.repo_id || task.name || 'unknown'}`,
|
||
`- Host: ${task.remoteHost || 'local'}${task.sshPort ? `:${task.sshPort}` : ''}`,
|
||
);
|
||
}
|
||
lines.push('', '### Diagnosis', diagnosis?.message || '(none)');
|
||
if (suggestionText) lines.push('', '### Suggested action', suggestionText.replace(/^Suggested action:\s*/i, ''));
|
||
const cmd = task?.payload?._cmd || '';
|
||
if (cmd) lines.push('', '### Launch command', '```bash', cmd, '```');
|
||
if (sourceText) lines.push('', '### Captured output', '```text', String(sourceText).trim(), '```');
|
||
return lines.join('\n');
|
||
}
|
||
|
||
export function _showDiagnosis(panel, diagnosis, sourceText) {
|
||
const wasCollapsed = panel._lastDiagMsg === diagnosis.message && panel._diagCollapsed;
|
||
if (panel._diagDismissed === diagnosis.message) return;
|
||
panel._lastDiagMsg = diagnosis.message;
|
||
panel._diagCollapsed = !!wasCollapsed;
|
||
|
||
let diag = panel.querySelector('.cookbook-diagnosis');
|
||
if (!diag) {
|
||
diag = document.createElement('div');
|
||
diag.className = 'cookbook-diagnosis';
|
||
const output = panel.querySelector('.cookbook-output-pre');
|
||
if (output) output.after(diag);
|
||
else panel.appendChild(diag);
|
||
}
|
||
diag.classList.remove('hidden');
|
||
diag.innerHTML = '';
|
||
const taskEl = panel?.closest?.('.cookbook-task');
|
||
const task = taskEl ? _loadTasks().find(t => t.sessionId === taskEl.dataset.taskId) : null;
|
||
const fixes = [...(diagnosis.fixes || [])];
|
||
if (task?.type === 'serve' && task.payload?._cmd && !fixes.some(f => f.label === 'Edit serve')) {
|
||
fixes.push({ label: 'Edit serve', action: (p) => _openServeEditFromDiagnosis(p) });
|
||
}
|
||
const suggestionText = diagnosis.suggestion || (fixes.length
|
||
? `Suggested action: ${fixes[0].label}.`
|
||
: 'Suggested action: copy the error and adjust the serve settings.');
|
||
|
||
// Simplified diagnosis card: just the error message + suggestion + fix
|
||
// button(s). Removed the fold toggle, copy button, and × dismiss — they
|
||
// made the card noisy without earning their keep. _diagCollapsed is kept
|
||
// as a stub so callers don't have to change.
|
||
panel._diagCollapsed = false;
|
||
|
||
const body = document.createElement('div');
|
||
body.className = 'cookbook-diag-body';
|
||
const msg = document.createElement('div');
|
||
msg.className = 'cookbook-diag-message';
|
||
msg.textContent = diagnosis.message;
|
||
body.appendChild(msg);
|
||
const suggestion = document.createElement('div');
|
||
suggestion.className = 'cookbook-diag-suggestion';
|
||
suggestion.textContent = suggestionText;
|
||
body.appendChild(suggestion);
|
||
diag.appendChild(body);
|
||
|
||
const runFix = async (fix, button, busyLabel = fix.label, onStart = null, onDone = null) => {
|
||
if (!fix || !button || button.dataset.busy) return;
|
||
button.dataset.busy = '1';
|
||
const _orig = button.textContent;
|
||
const wp = spinnerModule.createWhirlpool(12);
|
||
wp.element.style.cssText = 'display:inline-block;vertical-align:middle;width:12px;height:12px;margin-right:5px;';
|
||
button.textContent = '';
|
||
button.appendChild(wp.element);
|
||
const _lbl = document.createElement('span');
|
||
_lbl.textContent = busyLabel;
|
||
_lbl.style.verticalAlign = 'middle';
|
||
button.appendChild(_lbl);
|
||
try {
|
||
if (typeof onStart === 'function') onStart();
|
||
await fix.action(panel, sourceText);
|
||
} catch (err) {
|
||
console.error('[cookbook] diagnosis fix failed', err);
|
||
} finally {
|
||
if (button.isConnected) {
|
||
try { wp.destroy(); } catch {}
|
||
button.textContent = _orig;
|
||
delete button.dataset.busy;
|
||
}
|
||
if (typeof onDone === 'function') onDone();
|
||
}
|
||
};
|
||
|
||
if (fixes.length) {
|
||
const row = document.createElement('div');
|
||
row.className = 'cookbook-diag-fixes';
|
||
|
||
if (fixes.length <= 3) {
|
||
for (const fix of fixes) {
|
||
const btn = document.createElement('button');
|
||
btn.className = 'cookbook-btn cookbook-diag-btn';
|
||
btn.type = 'button';
|
||
btn.innerHTML = _diagFixIcon(fix.label) + '<span class="cookbook-diag-btn-label">' + _diagEsc(fix.label) + '</span>';
|
||
btn.addEventListener('click', (e) => {
|
||
e.stopPropagation();
|
||
runFix(fix, btn);
|
||
});
|
||
row.appendChild(btn);
|
||
}
|
||
body.appendChild(row);
|
||
return;
|
||
}
|
||
|
||
const wrap = document.createElement('div');
|
||
wrap.className = 'cookbook-diag-actions';
|
||
|
||
const trigger = document.createElement('button');
|
||
trigger.className = 'cookbook-btn cookbook-diag-action-trigger';
|
||
trigger.type = 'button';
|
||
trigger.textContent = 'Actions';
|
||
trigger.appendChild(document.createTextNode(' ▾'));
|
||
wrap.appendChild(trigger);
|
||
|
||
const menu = document.createElement('div');
|
||
menu.className = 'dropdown cookbook-diag-menu hidden';
|
||
for (const fix of fixes) {
|
||
const item = document.createElement('button');
|
||
item.type = 'button';
|
||
item.innerHTML = _diagFixIcon(fix.label) + '<span class="cookbook-diag-btn-label">' + _diagEsc(fix.label) + '</span>';
|
||
item.addEventListener('click', async (e) => {
|
||
e.stopPropagation();
|
||
if (item.dataset.busy || trigger.dataset.busy) return;
|
||
item.dataset.busy = '1';
|
||
await runFix(fix, trigger, fix.label, () => menu.classList.add('hidden'), () => delete item.dataset.busy);
|
||
});
|
||
menu.appendChild(item);
|
||
}
|
||
wrap.appendChild(menu);
|
||
trigger.addEventListener('click', (e) => {
|
||
e.stopPropagation();
|
||
if (trigger.dataset.busy) return;
|
||
document.querySelectorAll('.cookbook-diag-menu').forEach(m => {
|
||
if (m !== menu) m.classList.add('hidden');
|
||
});
|
||
menu.classList.toggle('hidden');
|
||
});
|
||
row.appendChild(wrap);
|
||
body.appendChild(row);
|
||
}
|
||
}
|
||
|
||
export function _clearDiagnosis(panel) {
|
||
panel._lastDiagMsg = null;
|
||
const diag = panel.querySelector('.cookbook-diagnosis');
|
||
if (diag) { diag.innerHTML = ''; diag.classList.add('hidden'); }
|
||
}
|
||
|
||
// ── Quick command ──
|
||
|
||
export async function _runQuickCmd(panel, cmd) {
|
||
let fullCmd = cmd;
|
||
if (_envState.remoteHost) {
|
||
fullCmd = _sshCmd(_envState.remoteHost, cmd);
|
||
}
|
||
const diag = panel.querySelector('.cookbook-diagnosis');
|
||
if (diag) { diag.classList.remove('hidden'); diag.textContent = `Running: ${fullCmd}...`; }
|
||
|
||
try {
|
||
const res = await fetch('/api/shell/stream', {
|
||
method: 'POST',
|
||
credentials: 'same-origin',
|
||
headers: { 'Content-Type': 'application/json' },
|
||
body: JSON.stringify({ command: fullCmd }),
|
||
});
|
||
if (diag) diag.textContent = res.ok ? `Done: ${cmd}` : `Failed (HTTP ${res.status})`;
|
||
} catch (e) {
|
||
if (diag) diag.textContent = `Error: ${e.message}`;
|
||
}
|
||
}
|