Cookbook polish: auto-reconnect, ctx slider fixes, scoring, lots of UI
Backend (services/hwfit + routes):
- VRAM column sort now shows global highest first (was special-cased to
ascending then truncated top-N, which made "highest VRAM" mathematically
unreachable). Every column path uses reverse=True for the truncation.
- Hardware probe cache TTL 30min -> 24h so changing filters doesn't keep
re-probing the rig during a session; Rescan button still forces fresh.
- Multi-GPU rigs filter GGUF Q*/IQ quants (vLLM/SGLang can't serve them);
default non-prequantized to BF16 on 2+ GPUs.
- AWQ / AWQ-8bit / GPTQ-8bit get a -1.0 quality penalty so FP8 wins ties.
- Version-aware tiebreaker (parse Mn.n / Vn) — MiniMax-M2.7 ranks above M2.5.
- hf_models.json: zai-org/GLM-5.1 added; zai-org/GLM-5 quantization flipped
Q4_K_M -> BF16. DeepSeek-V4-Flash / -Pro + their -Base variants registered
with new FP4-MoE-Mixed / FP8-Mixed quant keys (calibrated BPP from the
actual 156 GB / 284 GB disk footprints).
- New FP4-MoE-Mixed + FP8-Mixed entries in QUANT_BPP / QUANT_SPEED_MULT /
QUANT_QUALITY_PENALTY / QUANT_BYTES_PER_PARAM / PREQUANTIZED_PREFIXES.
Frontend — Scan/Download:
- Engine + Quant swapped in the toolbar; Quant defaults to "All".
- Ctx (range slider) ported from origin/main: 8k/16k/32k/50k/128k/Max. Drag
re-sorts by vram ascending (smallest fitting first); back to Max → score.
- Ctx slider rail now visible — was background:transparent in a duplicate
later-cascade rule. Hardcoded grey + !important.
- Search input moved to the far right of the toolbar.
- Type/Standard default; "Context" not uppercased; Search placeholder dimmed.
- Engine "?" + Quant "?" inline help chips inside their dropdown boxes.
- Fit-column dot toggles fit-only filter; un-toggling re-sorts by VRAM desc.
- Quant column truncates to 9 chars + ellipsis ("FP4-MoE-M..."), full in
tooltip. Smart title-suffix strips the parts already in the repo name
(QuantTrio/MiniMax-M2-AWQ + quant AWQ-4bit -> just "(4bit)").
- Conditional warning for safetensors models on non-GPU rigs only.
- Dependency Install / Installed / Installed▾ / N/A all 75.85px wide.
- Rebuild llama.cpp moved into the llama_cpp dep row, styled as a tag.
- Foldable Download admin-card (h2 chevron); line under h2 only when folded.
- HF token save gets a green ✓ + "Saved" flash.
- Cached scan no longer counts stalled rows as downloaded.
- Footer: "Request it →" link with GitHub mark to the public discussion
(#1962) for model-add requests.
Frontend — Running tab:
- Strict download-finish check (DOWNLOAD_OK or /snapshots/, not bare
"Download complete"). True overall % for multi-shard downloads:
((N-1)+frac)/total instead of hf_transfer's per-shard aggregate.
- ETA in the uptime ticker: "downloading: 12m 34s · ETA 1h 23m".
- Clear button kills the tmux session too; if the output still shows a
live shard line, the pill is hidden + relabels as "reconnect" + revives
on click.
- Self-heal: on cookbook open AND every bg-monitor cycle (10s, throttled
to 8s), scan persisted done/error/crashed downloads and probe their
tmux session — if alive, flip status back to running and reattach.
- Per-launch zombie probe: clicking Download on a model whose persisted
state is done but tmux is still alive revives the existing task and
refuses to start a duplicate.
- Pre-launch GPU probe: vllm / sglang / diffusers serve check
/api/cookbook/gpus first; warns + confirms if no GPU is visible.
- Server-side state guard: rejects "done" POSTs for downloads lacking
DOWNLOAD_OK / DOWNLOAD_FAILED / /snapshots/ when the last-mentioned
shard is N<total — stale tabs can't poison persisted state any more.
- Running count includes tasks whose output looks active even if persisted
status got stuck. Dir text on the running row, font matched to uptime.
Serve panel:
- Ctx text input always resets to model max on open (default 20000 when
metadata is missing).
- Max Seqs default 8 -> 4. KV Cache dtype select 32px tall.
- Lightning icon on Launch (same as Action toggle).
- Diagnosis card simplified (no fold/copy/dismiss), suggestion font
matches body; action buttons get icons on the left (Retry/Copy/Edit/
Install/Kill/Switch/etc.).
- Incomplete-download serve warning when model status is
downloading / stalled / has_incomplete.
- MTP "?" tooltip ("supported on a few model families … up to ~3× faster").
This commit is contained in:
@@ -35,13 +35,34 @@ function _taskBadge(task) {
|
||||
return { text: _statusLabel(task.status, task.type), cls: 'cookbook-task-' + task.status };
|
||||
}
|
||||
|
||||
// A download task whose tmux output still shows an active per-shard line
|
||||
// (e.g. "model-00012-of-00082.safetensors: 56%|") is NOT actually finished —
|
||||
// the cookbook just lost track. The clear pill becomes a "reconnect" affordance
|
||||
// in that case (click → revive the row + reattach the poll loop).
|
||||
function _downloadOutputLooksActive(task) {
|
||||
if (!task || task.type !== 'download') return false;
|
||||
const out = task.output || '';
|
||||
if (!out) return false;
|
||||
if (out.includes('DOWNLOAD_OK') || out.includes('DOWNLOAD_FAILED')) return false;
|
||||
// An active shard line: filename + a colon + a percentage that isn't 100%.
|
||||
// We catch any in-flight shard or "Downloading 'X' to ..." line (no %).
|
||||
return /model-\d+-of-\d+\.[a-z]+:\s+(?!100%)\d+%/i.test(out)
|
||||
|| /Downloading\s+'[^']+'\s+to\s+'[^']*\.incomplete'/i.test(out);
|
||||
}
|
||||
|
||||
function _canClearTask(task) {
|
||||
if (!task || task.status === 'running') return false;
|
||||
if (task.type === 'serve' && (task.status === 'ready' || task._serveReady)) return false;
|
||||
// If the tmux output still shows an in-flight download, the task isn't
|
||||
// actually finished — hide the clear/check pill so it doesn't show on a
|
||||
// task that's still doing work. (The next render will reflect this and
|
||||
// ideally the self-heal flips status back to running.)
|
||||
if (_downloadOutputLooksActive(task)) return false;
|
||||
return ['done', 'stopped', 'error', 'crashed', 'failed'].includes(task.status);
|
||||
}
|
||||
|
||||
function _clearPillLabel(task) {
|
||||
if (_downloadOutputLooksActive(task)) return 'reconnect';
|
||||
return 'clear';
|
||||
}
|
||||
|
||||
@@ -1537,7 +1558,16 @@ export function _renderRunningTab() {
|
||||
|
||||
const tasks = _loadTasks();
|
||||
const hasContent = tasks.length > 0;
|
||||
const activeCount = tasks.filter(t => t.status === 'running' || t.status === 'queued').length;
|
||||
// Count anything that's really active: explicit 'running'/'queued' status,
|
||||
// OR a download whose tmux output is still showing live shard progress.
|
||||
// Without the output check, a task whose status got stuck at 'done' /
|
||||
// 'crashed' (before auto-reconnect catches it) would read as "Running 0"
|
||||
// even when the model is actively downloading on the host.
|
||||
const activeCount = tasks.filter(t =>
|
||||
t.status === 'running'
|
||||
|| t.status === 'queued'
|
||||
|| _downloadOutputLooksActive(t)
|
||||
).length;
|
||||
const activeCountHtml = activeCount ? ` <span class="cookbook-tab-count">${activeCount}</span>` : '';
|
||||
|
||||
let tabBar = body.querySelector('.cookbook-tabs');
|
||||
@@ -1824,9 +1854,31 @@ export function _renderRunningTab() {
|
||||
const h = Math.floor(secs / 3600);
|
||||
const m = Math.floor((secs % 3600) / 60);
|
||||
const s = secs % 60;
|
||||
_uptimeEl.textContent = h > 0
|
||||
const _timer = h > 0
|
||||
? `${_prefix}: ${h}h ${String(m).padStart(2,'0')}m`
|
||||
: `${_prefix}: ${m}m ${String(s).padStart(2,'0')}s`;
|
||||
// ETA — only for downloads, only when we have a meaningful overall %.
|
||||
// Reads the badge text (which already shows the true overall % we
|
||||
// compute in the live-polling block) and back-derives a remaining-time
|
||||
// estimate from elapsed/done. Hidden until pct >= 3% so the early-job
|
||||
// wild estimates don't show.
|
||||
let _eta = '';
|
||||
if (task.type === 'download') {
|
||||
const _badge = el.querySelector('.cookbook-task-status');
|
||||
const _m = _badge && /^(\d+)%/.exec(_badge.textContent || '');
|
||||
const _pct = _m ? parseInt(_m[1], 10) : 0;
|
||||
if (_pct >= 3 && _pct < 100 && secs > 5) {
|
||||
const _totalSec = Math.round(secs * (100 / _pct));
|
||||
const _remain = Math.max(0, _totalSec - secs);
|
||||
const _eh = Math.floor(_remain / 3600);
|
||||
const _em = Math.floor((_remain % 3600) / 60);
|
||||
const _es = _remain % 60;
|
||||
_eta = _eh > 0
|
||||
? ` · ETA ${_eh}h ${String(_em).padStart(2,'0')}m`
|
||||
: (_em > 0 ? ` · ETA ${_em}m ${String(_es).padStart(2,'0')}s` : ` · ETA ${_es}s`);
|
||||
}
|
||||
}
|
||||
_uptimeEl.textContent = _timer + _eta;
|
||||
}, 1000);
|
||||
}
|
||||
|
||||
@@ -1874,11 +1926,32 @@ export function _renderRunningTab() {
|
||||
if (_clearChk) {
|
||||
_clearChk.addEventListener('click', (e) => {
|
||||
e.stopPropagation();
|
||||
// Belt-and-suspenders: kill the tmux session too. For a real-finished
|
||||
// task the session is already gone and kill-session errors silently,
|
||||
// but for a task that was falsely flagged done (the strict-finish
|
||||
// bug), this guarantees the still-running download actually stops
|
||||
// rather than continuing to write to disk after the row is removed.
|
||||
// If the output still shows an active shard line, the task isn't
|
||||
// actually finished — clicking is "reconnect" (flip back to running
|
||||
// + let _reconnectTask reattach to the live tmux session), not
|
||||
// "clear". The pill label already reflects this via _clearPillLabel.
|
||||
if (_downloadOutputLooksActive(task)) {
|
||||
const _fresh = _loadTasks();
|
||||
const _ft = _fresh.find(t => t.sessionId === task.sessionId);
|
||||
if (_ft) {
|
||||
_ft.status = 'running';
|
||||
_ft._selfHealed = true;
|
||||
_saveTasks(_fresh);
|
||||
}
|
||||
// Visually flip without waiting for a full re-render — same path the
|
||||
// self-heal uses on cookbook open.
|
||||
const _chk = el.querySelector('.cookbook-task-check');
|
||||
if (_chk) _chk.style.display = 'none';
|
||||
const _wave = el.querySelector('.cookbook-task-wave');
|
||||
if (_wave) _wave.style.display = '';
|
||||
const _up = el.querySelector('.cookbook-task-uptime');
|
||||
if (_up) _up.style.display = '';
|
||||
el.dataset.status = 'running';
|
||||
_renderRunningTab();
|
||||
return;
|
||||
}
|
||||
// Otherwise: real clear. Kill the tmux session as belt-and-suspenders,
|
||||
// then animate out + remove the row.
|
||||
try {
|
||||
fetch('/api/shell/exec', {
|
||||
method: 'POST', credentials: 'same-origin',
|
||||
@@ -2964,9 +3037,84 @@ function _refreshServerDots() {
|
||||
_syncSettingsServerDots(byKey);
|
||||
}
|
||||
|
||||
// Self-heal: scan persisted download tasks marked done/error/crashed and
|
||||
// check whether their tmux session is still alive on the host. If yes —
|
||||
// the task isn't actually finished, the cookbook just lost the in-flight
|
||||
// status during restart — flip status back to 'running' so _reconnectTask
|
||||
// picks it up. The one-shot guard is enforced by callers (open path) or
|
||||
// time-throttled inside (background-monitor path).
|
||||
let _selfHealRan = false;
|
||||
let _selfHealLastTs = 0;
|
||||
export async function _selfHealStaleTasks(opts = {}) {
|
||||
// Open-path call: one-shot per page load.
|
||||
if (opts.oneShot) {
|
||||
if (_selfHealRan) return;
|
||||
_selfHealRan = true;
|
||||
} else {
|
||||
// Background-monitor call: throttle to once every 8s (the bg monitor
|
||||
// itself fires every 10s, so this almost always fires too, but the
|
||||
// guard keeps a fast manual call from doubling up).
|
||||
const now = Date.now();
|
||||
if (now - _selfHealLastTs < 8000) return;
|
||||
_selfHealLastTs = now;
|
||||
}
|
||||
const tasks = _loadTasks();
|
||||
const candidates = tasks.filter(t =>
|
||||
t.type === 'download'
|
||||
&& ['done', 'error', 'crashed', 'stopped'].includes(t.status)
|
||||
&& t.sessionId
|
||||
&& !String(t.sessionId).startsWith('queue-')
|
||||
);
|
||||
if (!candidates.length) return;
|
||||
let flipped = 0;
|
||||
for (const t of candidates) {
|
||||
try {
|
||||
const res = await fetch('/api/shell/exec', {
|
||||
method: 'POST', credentials: 'same-origin',
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
body: JSON.stringify({ command: _tmuxCmd(t, `has-session -t ${t.sessionId}`), timeout: 5 }),
|
||||
});
|
||||
const data = await res.json();
|
||||
if (data.exit_code === 0) {
|
||||
// Session still alive → the task is actually still running.
|
||||
const fresh = _loadTasks();
|
||||
const ft = fresh.find(x => x.sessionId === t.sessionId);
|
||||
if (ft && ft.status !== 'running') {
|
||||
ft.status = 'running';
|
||||
ft._selfHealed = true;
|
||||
_saveTasks(fresh);
|
||||
flipped++;
|
||||
const _el = document.querySelector(`.cookbook-task[data-task-id="${t.sessionId}"]`);
|
||||
if (_el) {
|
||||
const _chk = _el.querySelector('.cookbook-task-check');
|
||||
if (_chk) _chk.style.display = 'none';
|
||||
const _wave = _el.querySelector('.cookbook-task-wave');
|
||||
if (_wave) _wave.style.display = '';
|
||||
const _up = _el.querySelector('.cookbook-task-uptime');
|
||||
if (_up) _up.style.display = '';
|
||||
_el.dataset.status = 'running';
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch { /* network blip — skip this one */ }
|
||||
}
|
||||
if (flipped) {
|
||||
console.log(`[cookbook] auto-reconnect: revived ${flipped} task(s) whose tmux session was still alive`);
|
||||
_renderRunningTab();
|
||||
}
|
||||
}
|
||||
|
||||
export function _startBackgroundMonitor() {
|
||||
if (_bgMonitorInterval) return;
|
||||
_bgMonitorInterval = setInterval(() => { _pollBackgroundStatus(); _checkServeReachability(); }, BG_MONITOR_INTERVAL_MS);
|
||||
_bgMonitorInterval = setInterval(() => {
|
||||
_pollBackgroundStatus();
|
||||
_checkServeReachability();
|
||||
// Auto-reconnect: every cycle, look for download tasks marked finished/
|
||||
// crashed/etc. whose tmux session is actually still running, and flip
|
||||
// them back to running. Internally throttled to 8s so a manual call from
|
||||
// the open path or a fast invocation doesn't double up.
|
||||
_selfHealStaleTasks().catch(() => {});
|
||||
}, BG_MONITOR_INTERVAL_MS);
|
||||
_pollBackgroundStatus();
|
||||
_checkServeReachability();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user