fix(cookbook): surface backend diagnosis when serve fails in background (#1636)

* refactor(cookbook): move _diagnose_serve_output to module level in cookbook_helpers

Extracts the nested _diagnose_serve_output function from inside
setup_cookbook_routes() and moves it to module level in cookbook_helpers.py,
alongside the other helper functions it logically belongs with.

No behaviour change — the function is now importable directly for testing
and by other callers without going through the route factory closure.

* fix(cookbook): surface backend diagnosis when serve fails in background

The background poll (_pollBackgroundStatus) already received `diagnosis`
and `cmd` from /api/cookbook/tasks/status but discarded both. When a serve
job died while the Cookbook modal was closed, reopening it showed only a
red error badge with no context.

- Persist live.diagnosis into task._backendDiagnosis in localStorage so it
  survives modal close/reopen and page refresh
- Persist live.cmd into task.payload._cmd for agent-spawned tasks so the
  crash report includes the actual command
- After _renderRunningTab(), walk rendered cards and call _showDiagnosis()
  for any that have a stored _backendDiagnosis but no panel yet
- In _renderTaskCard(), use _backendDiagnosis as a fallback when the
  client-side _terminalServeDiagnosis() finds nothing

* test(cookbook): add coverage for _diagnose_serve_output error patterns

10 tests verifying the 16 serve-failure patterns:
- CUDA OOM, port-in-use, vLLM missing, gated model
- Traceback fallback fires without startup success marker
- Traceback suppressed when server actually started
- Clean/empty output returns None
- trust-remote-code and no-GGUF patterns
This commit is contained in:
Lucas Daniel
2026-06-05 05:52:07 -03:00
committed by GitHub
parent 367858a587
commit f5d834b0c5
4 changed files with 210 additions and 122 deletions

View File

@@ -1900,6 +1900,9 @@ export function _renderRunningTab() {
const terminalDiag = _terminalServeDiagnosis(task, task.output || '');
if (terminalDiag) _showDiagnosis(el, terminalDiag, task.output || '');
if (!terminalDiag && (task.status === 'error' || task.status === 'crashed') && task._backendDiagnosis) {
_showDiagnosis(el, task._backendDiagnosis, task.output || '');
}
const _uptimeEl = el.querySelector('.cookbook-task-uptime');
if (_uptimeEl && (task.type === 'serve' || task.type === 'download') && task.status === 'running') {
@@ -3515,6 +3518,12 @@ async function _pollBackgroundStatus() {
updates.output = `${previous ? `${previous}\n` : ''}${tail}`.slice(-5000);
}
}
if (live.diagnosis && !task._diagnosisDismissed) {
updates._backendDiagnosis = live.diagnosis;
}
if (live.cmd && !task.payload?._cmd) {
updates.payload = { ...(task.payload || {}), _cmd: live.cmd };
}
if (Object.keys(updates).length) {
Object.assign(task, updates);
changed = true;
@@ -3523,6 +3532,12 @@ async function _pollBackgroundStatus() {
if (changed) {
_saveTasks(localTasks);
_renderRunningTab();
for (const task of localTasks) {
if (!task._backendDiagnosis) continue;
const el = document.querySelector(`[data-session-id="${CSS.escape(task.sessionId)}"]`);
if (!el || el.querySelector('.cookbook-diagnosis')) continue;
_showDiagnosis(el, task._backendDiagnosis, task.output || '');
}
completedDeps.forEach(t => _refreshDepsAfterInstall(t));
}
} catch (_) { /* non-fatal: background status should never break polling */ }