fix(cookbook): don't mark successful dependency installs as crashed (#1315)
Pip dependency installs are tracked as download tasks but finish with the runner's "=== Process exited with code 0 ===" sentinel and pip's "Successfully installed" line — never the HuggingFace download markers (DONE / 100% / /snapshots/ / DOWNLOAD_OK) the download heuristics look for. Once the tmux pane is gone, the backend's only completion check is the HF cache lookup, which a pip package (e.g. llama-cpp-python[server], no "/") never matches, so it reports "stopped" — and the frontend maps a stopped download to "crashed". The reconnect loop's session-gone heuristic had the same gap. Result: a clean install (exit 0) showed "crashed" in the Running tab while the Dependencies tab correctly showed it installed. Add a shared _depInstallSucceeded() helper that keys off the exit-0 sentinel (falling back to pip's success line, rejecting ERROR/Traceback) and wire it into both the session-gone heuristic and the background status reconciler, gated on payload._dep so real model downloads are unaffected. Also fixes the pre-existing test_background_status_poll_reconciles_into_local_tasks assertion that no longer matched the evolved reconciler, and adds regression coverage for both paths.
This commit is contained in:
@@ -66,6 +66,23 @@ function _clearPillLabel(task) {
|
||||
return 'clear';
|
||||
}
|
||||
|
||||
// A pip dependency/driver install (payload._dep) reports success with the
|
||||
// runner's "=== Process exited with code 0 ===" sentinel and pip's
|
||||
// "Successfully installed" line — never the HuggingFace download markers
|
||||
// (DONE / 100% / /snapshots/ / DOWNLOAD_OK) that the download heuristics look
|
||||
// for. Without this, a clean install whose tmux pane has already gone away is
|
||||
// misread as crashed/stopped even though pip exited 0. Prefer the authoritative
|
||||
// exit-code sentinel; fall back to pip's success line when no sentinel was
|
||||
// captured (and there's no install error in the same output).
|
||||
function _depInstallSucceeded(output) {
|
||||
const text = String(output || '');
|
||||
if (!text) return false;
|
||||
const exitMatch = text.match(/=== Process exited with code (-?\d+) ===/);
|
||||
if (exitMatch) return Number(exitMatch[1]) === 0;
|
||||
return /\b(?:Successfully installed|Requirement already satisfied)\b/.test(text)
|
||||
&& !/\bERROR\b|No matching distribution|Could not find a version|Traceback \(most recent call last\)/.test(text);
|
||||
}
|
||||
|
||||
function _shouldOfferCrashReport(task) {
|
||||
if (!task) return false;
|
||||
if (task._unreachable && task.type === 'serve') return true;
|
||||
@@ -2448,7 +2465,10 @@ async function _reconnectTask(el, task) {
|
||||
const downloadLooksSuccessful = !lastOutput.includes('DOWNLOAD_FAILED')
|
||||
&& (lastOutput.includes('DONE') || lastOutput.includes('100%') || lastOutput.includes('/snapshots/') || lastOutput.includes('Download complete') || lastOutput.includes('DOWNLOAD_OK'));
|
||||
const serveLooksReady = task.type === 'serve' && _serveOutputLooksReady({ ...task, output: lastOutput });
|
||||
const looksSuccessful = task.type === 'download' ? downloadLooksSuccessful : serveLooksReady;
|
||||
// Dependency installs are tracked as download tasks but finish with a
|
||||
// pip exit-0 sentinel, not HF download markers — so check that too.
|
||||
const depInstallSucceeded = !!task.payload?._dep && _depInstallSucceeded(lastOutput);
|
||||
const looksSuccessful = depInstallSucceeded || (task.type === 'download' ? downloadLooksSuccessful : serveLooksReady);
|
||||
if (!lastOutput.trim() || !looksSuccessful) {
|
||||
_updateTask(task.sessionId, { status: 'crashed' });
|
||||
el.dataset.status = 'crashed';
|
||||
@@ -3309,11 +3329,18 @@ async function _pollBackgroundStatus() {
|
||||
const live = statusById.get(task.sessionId);
|
||||
if (!live) continue;
|
||||
const updates = {};
|
||||
// A finished dependency install whose tmux pane is gone is reported
|
||||
// "stopped" by the backend (its pip package is never in the HF cache the
|
||||
// dead-session check inspects). Recover "done" from the retained output's
|
||||
// exit-0 sentinel so a clean install isn't downgraded to crashed.
|
||||
const depDone = !!task.payload?._dep && _depInstallSucceeded(task.output);
|
||||
const nextStatus = live.status === 'completed'
|
||||
? 'done'
|
||||
: (live.status === 'error'
|
||||
? 'error'
|
||||
: (live.status === 'stopped' ? (task.type === 'download' ? 'crashed' : 'stopped') : null));
|
||||
: (live.status === 'stopped'
|
||||
? (depDone ? 'done' : (task.type === 'download' ? 'crashed' : 'stopped'))
|
||||
: null));
|
||||
if (nextStatus && task.status !== nextStatus) {
|
||||
updates.status = nextStatus;
|
||||
if (nextStatus === 'done' && task.payload?._dep) completedDeps.push(task);
|
||||
|
||||
Reference in New Issue
Block a user