fix(cookbook): don't mark successful dependency installs as crashed (#1315)

Pip dependency installs are tracked as download tasks but finish with the
runner's "=== Process exited with code 0 ===" sentinel and pip's
"Successfully installed" line — never the HuggingFace download markers
(DONE / 100% / /snapshots/ / DOWNLOAD_OK) the download heuristics look for.

Once the tmux pane is gone, the backend's only completion check is the HF
cache lookup, which a pip package (e.g. llama-cpp-python[server], no "/")
never matches, so it reports "stopped" — and the frontend maps a stopped
download to "crashed". The reconnect loop's session-gone heuristic had the
same gap. Result: a clean install (exit 0) showed "crashed" in the Running
tab while the Dependencies tab correctly showed it installed.

Add a shared _depInstallSucceeded() helper that keys off the exit-0
sentinel (falling back to pip's success line, rejecting ERROR/Traceback)
and wire it into both the session-gone heuristic and the background status
reconciler, gated on payload._dep so real model downloads are unaffected.

Also fixes the pre-existing test_background_status_poll_reconciles_into_local_tasks
assertion that no longer matched the evolved reconciler, and adds regression
coverage for both paths.
This commit is contained in:
SHORYA BAJ
2026-06-04 17:25:06 +05:30
committed by GitHub
parent 28c43121d7
commit f876fc7704
2 changed files with 65 additions and 2 deletions

View File

@@ -66,6 +66,23 @@ function _clearPillLabel(task) {
return 'clear';
}
// A pip dependency/driver install (payload._dep) reports success with the
// runner's "=== Process exited with code 0 ===" sentinel and pip's
// "Successfully installed" line — never the HuggingFace download markers
// (DONE / 100% / /snapshots/ / DOWNLOAD_OK) that the download heuristics look
// for. Without this, a clean install whose tmux pane has already gone away is
// misread as crashed/stopped even though pip exited 0. Prefer the authoritative
// exit-code sentinel; fall back to pip's success line when no sentinel was
// captured (and there's no install error in the same output).
function _depInstallSucceeded(output) {
const text = String(output || '');
if (!text) return false;
const exitMatch = text.match(/=== Process exited with code (-?\d+) ===/);
if (exitMatch) return Number(exitMatch[1]) === 0;
return /\b(?:Successfully installed|Requirement already satisfied)\b/.test(text)
&& !/\bERROR\b|No matching distribution|Could not find a version|Traceback \(most recent call last\)/.test(text);
}
function _shouldOfferCrashReport(task) {
if (!task) return false;
if (task._unreachable && task.type === 'serve') return true;
@@ -2448,7 +2465,10 @@ async function _reconnectTask(el, task) {
const downloadLooksSuccessful = !lastOutput.includes('DOWNLOAD_FAILED')
&& (lastOutput.includes('DONE') || lastOutput.includes('100%') || lastOutput.includes('/snapshots/') || lastOutput.includes('Download complete') || lastOutput.includes('DOWNLOAD_OK'));
const serveLooksReady = task.type === 'serve' && _serveOutputLooksReady({ ...task, output: lastOutput });
const looksSuccessful = task.type === 'download' ? downloadLooksSuccessful : serveLooksReady;
// Dependency installs are tracked as download tasks but finish with a
// pip exit-0 sentinel, not HF download markers — so check that too.
const depInstallSucceeded = !!task.payload?._dep && _depInstallSucceeded(lastOutput);
const looksSuccessful = depInstallSucceeded || (task.type === 'download' ? downloadLooksSuccessful : serveLooksReady);
if (!lastOutput.trim() || !looksSuccessful) {
_updateTask(task.sessionId, { status: 'crashed' });
el.dataset.status = 'crashed';
@@ -3309,11 +3329,18 @@ async function _pollBackgroundStatus() {
const live = statusById.get(task.sessionId);
if (!live) continue;
const updates = {};
// A finished dependency install whose tmux pane is gone is reported
// "stopped" by the backend (its pip package is never in the HF cache the
// dead-session check inspects). Recover "done" from the retained output's
// exit-0 sentinel so a clean install isn't downgraded to crashed.
const depDone = !!task.payload?._dep && _depInstallSucceeded(task.output);
const nextStatus = live.status === 'completed'
? 'done'
: (live.status === 'error'
? 'error'
: (live.status === 'stopped' ? (task.type === 'download' ? 'crashed' : 'stopped') : null));
: (live.status === 'stopped'
? (depDone ? 'done' : (task.type === 'download' ? 'crashed' : 'stopped'))
: null));
if (nextStatus && task.status !== nextStatus) {
updates.status = nextStatus;
if (nextStatus === 'done' && task.payload?._dep) completedDeps.push(task);