// ============================================ // COOKBOOK SERVE SUB-MODULE // Serve tab: cached model list, serve panel building, // command building, preset slots, launch logic // ============================================ import uiModule from './ui.js'; import spinnerModule from './spinner.js'; import { providerLogo } from './providers.js'; import { modelColor } from './chatRenderer.js'; import { bindMenuDismiss, dismissOrRemove } from './escMenuStack.js'; // Shared state/functions injected by init() let _envState; let _sshCmd; let _getPort; let _sshPrefix; let _getPlatform; let _isWindows; let _isMetal; let _buildEnvPrefix; let _buildServeCmd; let _shellQuote; let _psQuote; let _detectBackend; let _detectToolParser; let _detectModelOptimizations; let _loadPresets; let _savePresets; let _copyText; let _persistEnvState; let _getGpuToggleTotal; let modelLogo; let esc; let _launchServeTask; let _retryDownload; let _nextAvailablePort; // Storage keys const SERVE_STATE_KEY = 'cookbook-serve-state'; let _cachedAllModels = []; function _repoLooksAwqLike(model, repo) { const q = String(model?.quant || '').toUpperCase(); const n = `${repo || ''} ${model?.repo_id || ''} ${model?.name || ''} ${model?.path || ''}`.toLowerCase(); return /^AWQ|^GPTQ/.test(q) || q === 'FP8' || /\b(awq|gptq|fp8)\b/i.test(n); } function _repoLooksGgufLike(model, repo) { const q = String(model?.quant || '').toUpperCase(); const n = `${repo || ''} ${model?.repo_id || ''} ${model?.name || ''} ${model?.path || ''}`.toLowerCase(); return !!model?.is_gguf || /^Q[2-8]/.test(q) || /^IQ/.test(q) || q === 'GGUF' || n.includes('gguf'); } function _serveBackendWarning(model, repo, backend, fields = {}) { const awqLike = _repoLooksAwqLike(model, repo); const ggufLike = _repoLooksGgufLike(model, repo); if (awqLike && (backend === 'llamacpp' || backend === 'ollama')) { return { title: 'AWQ needs vLLM or SGLang', body: 'This model looks like AWQ/GPTQ/FP8 safetensors. llama.cpp and Ollama need GGUF files, so this backend cannot serve it. Choose vLLM/SGLang on a CUDA/ROCm GPU server, or download a GGUF version for llama.cpp/Ollama.', }; } if (awqLike && _isMetal() && (backend === 'vllm' || backend === 'sglang')) { return { title: 'AWQ is not a unified-memory path', body: 'This model looks like AWQ/GPTQ/FP8 safetensors. AWQ is for vLLM/SGLang on CUDA/ROCm-style GPU servers, not local unified-memory llama.cpp/Ollama serving. For unified memory, download a GGUF model and use llama.cpp/Ollama.', }; } if (awqLike && fields.unified_mem) { return { title: 'AWQ is not a unified-memory path', body: 'This model looks like AWQ/GPTQ/FP8 safetensors, but unified-memory local serving expects GGUF. Use vLLM/SGLang on a compatible GPU server, or download a GGUF version for llama.cpp/Ollama.', }; } if (ggufLike && (backend === 'vllm' || backend === 'sglang')) { return { title: 'GGUF needs llama.cpp or Ollama', body: 'This model looks like GGUF. vLLM/SGLang expect HuggingFace safetensors-style repos. Choose llama.cpp/Ollama for GGUF, or download a safetensors model for vLLM/SGLang.', }; } return null; } function _hasOwn(obj, key) { return Object.prototype.hasOwnProperty.call(obj || {}, key); } function _allGpuIds(count) { const n = Number(count || 0); if (!Number.isFinite(n) || n <= 0) return ''; return Array.from({ length: Math.floor(n) }, (_, i) => String(i)).join(','); } function _selectedServeTarget(panel) { const select = document.getElementById('hwfit-server-select') || document.getElementById('hwfit-dl-server'); const servers = Array.isArray(_envState.servers) ? _envState.servers : []; let host = _envState.remoteHost || ''; let server = host ? servers.find(s => s.host === host) : null; if (select && select.value != null) { if (select.value === 'local') { host = ''; server = servers.find(s => !s.host || s.host === 'local') || null; } else { const idx = /^\d+$/.test(String(select.value)) ? parseInt(select.value, 10) : -1; server = servers.find(s => s.host === select.value) || (idx >= 0 ? servers[idx] : null) || null; host = server?.host || ''; } } const venv = panel?.querySelector('[data-field="venv"]')?.value?.trim() || server?.envPath || _envState.envPath || ''; const label = host ? (server?.name ? `${server.name} (${host})` : host) : (server?.name || 'local server'); return { host, port: host ? (_getPort(host) || server?.port || '') : '', venv, label, }; } async function _fetchServeRuntimePackage(panel, backend) { const packageByBackend = { vllm: 'vllm', sglang: 'sglang', llamacpp: 'llama_cpp', diffusers: 'diffusers', }; const packageName = packageByBackend[backend]; if (!packageName) return null; const target = _selectedServeTarget(panel); const params = new URLSearchParams(); if (target.host) { params.set('host', target.host); if (target.port) params.set('ssh_port', target.port); if (target.venv) params.set('venv', target.venv); } const res = await fetch('/api/cookbook/packages' + (params.toString() ? '?' + params.toString() : ''), { credentials: 'same-origin' }); if (!res.ok) throw new Error(`HTTP ${res.status}`); const data = await res.json(); const pkg = (data.packages || []).find(p => p.name === packageName); return { pkg, target }; } function _runtimeNoteText(backend, pkg, target) { const labels = { vllm: 'vLLM', sglang: 'SGLang', llamacpp: 'llama.cpp', diffusers: 'Diffusers' }; const label = labels[backend] || backend; if (!pkg) return `${label} readiness unavailable for ${target.label}.`; const note = pkg.status_note || pkg.update_note || ''; if (pkg.installed) { return note ? `${label} ready on ${target.label}: ${note}` : `${label} ready on ${target.label}.`; } return note ? `${label} missing on ${target.label}: ${note}` : `${label} missing on ${target.label}.`; } // ── Filter/sort cached model list ── function _filterCachedList() { const list = document.getElementById('hwfit-cached-list'); const tagContainer = document.getElementById('serve-tags'); if (!list) return; const activeTag = tagContainer?.querySelector('.memory-cat-chip.active')?.dataset.serveTag || ''; const searchVal = (document.getElementById('serve-search')?.value || '').toLowerCase().trim(); const isFamily = activeTag.startsWith('fam:'); const familyVal = isFamily ? activeTag.slice(4) : ''; list.querySelectorAll('.memory-item[data-repo]').forEach(item => { const repo = (item.dataset.repo || '').toLowerCase(); const tag = item.dataset.tag || ''; const family = item.dataset.family || ''; const tagMatch = !activeTag || (isFamily ? family === familyVal : tag === activeTag); const searchMatch = !searchVal || repo.includes(searchVal); item.style.display = (tagMatch && searchMatch) ? '' : 'none'; }); } // Is there a live download task for this repo in the Running tab? The cache // reports any incomplete download dir as "downloading", but if nothing is // actively pulling it, it's really a stalled/partial download — so we label it // accordingly. Reads the running-tab tasks straight from localStorage (same // key the running module writes) to avoid a cross-module import cycle. function _isActivelyDownloading(repoId) { try { const tasks = JSON.parse(localStorage.getItem('cookbook-tasks')) || []; const short = (repoId || '').split('/').pop(); return tasks.some(t => t.type === 'download' && t.status === 'running' && (t.payload?.repo_id === repoId || t.name === repoId || t.name === short || (t.payload?.repo_id || '').split('/').pop() === short)); } catch { return false; } } // Same idea for serve: is there a live serve task for this repo? Used to // surface a "running" pill on the Serve tab card. function _isActivelyServing(repoId) { try { const tasks = JSON.parse(localStorage.getItem('cookbook-tasks')) || []; const short = (repoId || '').split('/').pop(); return tasks.some(t => t.type === 'serve' && t.status === 'running' && (t.payload?.repo_id === repoId || t.name === repoId || t.name === short || (t.payload?.repo_id || '').split('/').pop() === short)); } catch { return false; } } function _formatGgufSize(bytes) { const n = Number(bytes || 0); if (!Number.isFinite(n) || n <= 0) return ''; if (n >= 1024 ** 3) return `${(n / (1024 ** 3)).toFixed(1)} GB`; if (n >= 1024 ** 2) return `${Math.round(n / (1024 ** 2))} MB`; return `${Math.max(1, Math.round(n / 1024))} KB`; } function _ggufFilesForModel(model) { return Array.isArray(model?.gguf_files) ? model.gguf_files.filter(f => f && typeof f.rel_path === 'string' && f.rel_path) : []; } function _runnableGgufFiles(model) { const files = _ggufFilesForModel(model); const primary = files.filter(f => (f.role || 'model') === 'model'); return primary.length ? primary : files; } function _ggufFileLabel(file) { const base = (file.name || file.rel_path || '').split('/').pop(); const size = _formatGgufSize(file.size_bytes); const quant = file.quant ? `${file.quant} ` : ''; const parts = Number(file.parts || 0); const split = parts > 1 ? `, ${parts} parts` : ''; const role = file.role && file.role !== 'model' ? ` ${file.role}` : ''; return `${quant}${base}${size || split ? ` (${[size, split.replace(/^, /, '')].filter(Boolean).join(', ')})` : ''}${role}`; } function _shellPathExpr(path) { const s = String(path || ''); if (s === '~') return '${HOME}'; if (s.startsWith('~/')) return '${HOME}' + _shellQuote(s.slice(1)); return _shellQuote(s); } function _selectedGgufExpr(model, repo, relPath) { const rel = String(relPath || '').replace(/^\/+/, ''); if (!rel) return ''; if (model.is_local_dir && model.path) { const base = String(model.path || '').replace(/\/+$/, ''); return `$(printf %s ${_shellPathExpr(`${base}/${repo}/${rel}`)})`; } if (model.path) { const base = String(model.path || '').replace(/\/+$/, ''); return `$(printf %s ${_shellPathExpr(`${base}/models--${repo.replace(/\//g, '--')}/snapshots/${rel}`)})`; } const cacheRepo = repo.replace(/\//g, '--'); return `$(printf %s \${HOME}${_shellQuote(`/.cache/huggingface/hub/models--${cacheRepo}/snapshots/${rel}`)})`; } function _ggufSearchDirExpr(model, repo) { if (model.is_local_dir && model.path) return _shellQuote(`${String(model.path || '').replace(/\/+$/, '')}/${repo}`); if (model.path) return _shellQuote(`${String(model.path || '').replace(/\/+$/, '')}/models--${repo.replace(/\//g, '--')}/snapshots`); return `"$HOME/.cache/huggingface/hub/models--${repo.replace(/\//g, '--')}/snapshots"`; } function _rerenderCachedModels() { const list = document.getElementById('hwfit-cached-list'); const tagContainer = document.getElementById('serve-tags'); if (!list || !_cachedAllModels.length) return; const allModels = _cachedAllModels; const _h = (text) => `?`; const activeTag = tagContainer?.querySelector('.memory-cat-chip.active')?.dataset.serveTag || ''; const searchVal = (document.getElementById('serve-search')?.value || '').toLowerCase().trim(); const sortVal = document.getElementById('serve-sort')?.value || 'name'; const _parseSize = (s) => { const m = (s || '').match(/([\d.]+)\s*(GB|MB|KB)/i); if (!m) return 0; const n = parseFloat(m[1]); if (m[2] === 'GB') return n * 1024; if (m[2] === 'MB') return n; return n / 1024; }; if (sortVal === 'name') allModels.sort((a, b) => (a.repo_id || '').localeCompare(b.repo_id || '')); else if (sortVal === 'size-desc') allModels.sort((a, b) => _parseSize(b.size) - _parseSize(a.size)); else if (sortVal === 'size-asc') allModels.sort((a, b) => _parseSize(a.size) - _parseSize(b.size)); else if (sortVal === 'recent') allModels.sort((a, b) => (b.mtime || 0) - (a.mtime || 0)); let html = ''; let visibleCount = 0; for (const m of allModels) { if (activeTag && m._tag !== activeTag) continue; if (searchVal && !(m.repo_id || '').toLowerCase().includes(searchVal)) continue; visibleCount++; const shortName = m.repo_id.split('/').pop() || m.repo_id; const hfLink = m.repo_id.includes('/') ? `https://huggingface.co/${m.repo_id}` : ''; const metaParts = []; if (m.repo_id.includes('/')) metaParts.push(m.repo_id.split('/')[0]); metaParts.push(m.size); if (m.path) { metaParts.push(`${esc(m.path)}`); } const ggufCount = _runnableGgufFiles(m).length; if (ggufCount > 1) metaParts.push(`${ggufCount} GGUFs`); // "downloading" status now renders as a title-row pill instead of // a meta-row text label, matching the "running" pill style and // living on the same line as the model name. const _isDownloading = m.status === 'downloading'; const _isDlActive = _isDownloading ? _isActivelyDownloading(m.repo_id) : false; const isSelectMode = document.getElementById('hwfit-cache-select')?.classList.contains('active'); html += `
data/huggingface. Download a model here, or copy an existing host HuggingFace cache into that folder once.