Show Ollama models in Cookbook Serve
This commit is contained in:
@@ -21,6 +21,10 @@ _REPO_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]*/[A-Za-z0-9][A-Za-z0-9._-]
|
||||
# the real on-disk path separately; this identifier is only for UI/task
|
||||
# bookkeeping, so serving should accept the same safe glyph set as repo IDs.
|
||||
_LOCAL_MODEL_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]*$")
|
||||
# Ollama model names include tags, e.g. `qwen2.5:0.5b` or `llama3.2:latest`.
|
||||
# Some registries also use a namespace path. Keep this shell-safe: no spaces,
|
||||
# quotes, `$`, `;`, `&`, pipes, or redirects.
|
||||
_OLLAMA_MODEL_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._:/-]{0,200}$")
|
||||
# Include pattern is a glob: allow typical safe glyphs only.
|
||||
_INCLUDE_RE = re.compile(r"^[A-Za-z0-9._\-*?/\[\]]+$")
|
||||
# Remote host: user@host (optionally with :port-free hostname parts).
|
||||
@@ -48,9 +52,9 @@ def _validate_repo_id(v: str | None) -> str:
|
||||
def _validate_serve_model_id(v: str | None) -> str:
|
||||
if not v:
|
||||
raise HTTPException(400, "repo_id is required")
|
||||
if _REPO_ID_RE.match(v) or _LOCAL_MODEL_ID_RE.match(v):
|
||||
if _REPO_ID_RE.match(v) or _LOCAL_MODEL_ID_RE.match(v) or _OLLAMA_MODEL_ID_RE.match(v):
|
||||
return v
|
||||
raise HTTPException(400, "Invalid repo_id — must be <org>/<name> or a cached local model id using [A-Za-z0-9._-]")
|
||||
raise HTTPException(400, "Invalid repo_id — must be <org>/<name>, an Ollama name:tag, or a cached local model id")
|
||||
|
||||
|
||||
def _validate_include(v: str | None) -> str | None:
|
||||
@@ -147,7 +151,7 @@ def _local_tooling_path_export(executable: str) -> str:
|
||||
def _cached_model_scan_script(model_dirs: list[str] | None = None) -> str:
|
||||
"""Build the standalone Python scanner used by /api/model/cached."""
|
||||
lines = [
|
||||
"import json, os",
|
||||
"import json, os, re, shutil, subprocess, urllib.request",
|
||||
"models = []",
|
||||
"seen = set()",
|
||||
"BLOCKED_ROOTS = ('/sys', '/proc', '/dev', '/run', '/var/run')",
|
||||
@@ -209,7 +213,48 @@ def _cached_model_scan_script(model_dirs: list[str] | None = None) -> str:
|
||||
" except Exception: pass",
|
||||
" is_diff = os.path.exists(os.path.join(fp, 'model_index.json'))",
|
||||
" models.append({'repo_id':d,'size_bytes':sz,'nb_files':nf,'has_incomplete':False,'path':p,'is_local_dir':True,'is_diffusion':is_diff,'is_gguf':is_gguf})",
|
||||
"def parse_size(num, unit):",
|
||||
" try: n = float(num)",
|
||||
" except Exception: return 0",
|
||||
" u = (unit or '').upper()",
|
||||
" if u.startswith('TB'): return int(n * 1024 ** 4)",
|
||||
" if u.startswith('GB'): return int(n * 1024 ** 3)",
|
||||
" if u.startswith('MB'): return int(n * 1024 ** 2)",
|
||||
" if u.startswith('KB'): return int(n * 1024)",
|
||||
" return int(n)",
|
||||
"def scan_ollama():",
|
||||
" if not shutil.which('ollama'): return",
|
||||
" try:",
|
||||
" p = subprocess.run(['ollama', 'list'], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True, timeout=6)",
|
||||
" except Exception:",
|
||||
" return",
|
||||
" if p.returncode != 0: return",
|
||||
" for line in (p.stdout or '').splitlines()[1:]:",
|
||||
" parts = line.split()",
|
||||
" if len(parts) < 4: continue",
|
||||
" name = parts[0]",
|
||||
" if not name or name in seen: continue",
|
||||
" size_bytes = parse_size(parts[2], parts[3])",
|
||||
" seen.add(name)",
|
||||
" models.append({'repo_id':name,'size_bytes':size_bytes,'nb_files':1,'has_incomplete':False,'path':'ollama','backend':'ollama','is_ollama':True})",
|
||||
"def scan_ollama_api():",
|
||||
" urls = ['http://127.0.0.1:11434/api/tags', 'http://localhost:11434/api/tags', 'http://host.docker.internal:11434/api/tags']",
|
||||
" for url in urls:",
|
||||
" try:",
|
||||
" with urllib.request.urlopen(url, timeout=2) as r:",
|
||||
" data = json.loads(r.read().decode('utf-8', 'replace'))",
|
||||
" except Exception:",
|
||||
" continue",
|
||||
" for item in data.get('models', []):",
|
||||
" name = item.get('name') or item.get('model')",
|
||||
" if not name or name in seen: continue",
|
||||
" size_bytes = int(item.get('size') or item.get('size_bytes') or 0)",
|
||||
" seen.add(name)",
|
||||
" models.append({'repo_id':name,'size_bytes':size_bytes,'nb_files':1,'has_incomplete':False,'path':'ollama','backend':'ollama','is_ollama':True})",
|
||||
" return",
|
||||
"scan_hf(os.path.expanduser('~/.cache/huggingface/hub'))",
|
||||
"scan_ollama()",
|
||||
"scan_ollama_api()",
|
||||
]
|
||||
for model_dir in model_dirs or []:
|
||||
lines.append(f"scan_dir(os.path.expanduser({model_dir!r}))")
|
||||
|
||||
@@ -710,6 +710,10 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
entry["is_local_dir"] = True
|
||||
if m.get("is_gguf"):
|
||||
entry["is_gguf"] = True
|
||||
if m.get("backend"):
|
||||
entry["backend"] = m.get("backend")
|
||||
if m.get("is_ollama"):
|
||||
entry["is_ollama"] = True
|
||||
models.append(entry)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to parse cached models: {e}")
|
||||
|
||||
@@ -245,6 +245,9 @@ export function _detectToolParser(modelName) {
|
||||
// ── Backend detection ──
|
||||
|
||||
export function _detectBackend(model) {
|
||||
if (model?.backend === 'ollama' || model?.is_ollama) {
|
||||
return { backend: 'ollama', label: 'Ollama' };
|
||||
}
|
||||
const q = (model.quant || '').toUpperCase();
|
||||
const sysBackend = String(_hwfitCache?.system?.backend || '').toLowerCase();
|
||||
const isRocm = sysBackend === 'rocm';
|
||||
@@ -407,11 +410,9 @@ export function _buildServeCmd(f, modelName, backend) {
|
||||
cmd += ` || ${_lcpServer}`;
|
||||
}
|
||||
} else if (backend === 'ollama') {
|
||||
const ollamaName = modelName.split('/').pop().toLowerCase().replace(/[-_]gguf$/i, '');
|
||||
const ollamaPort = f.port || '11434';
|
||||
const hostEnv = ollamaPort !== '11434' ? `OLLAMA_HOST=0.0.0.0:${ollamaPort} ` : '';
|
||||
// Start serve in background if not running, then pull model
|
||||
cmd = `${hostEnv}ollama serve &>/dev/null & sleep 2 && ${hostEnv}ollama pull ${ollamaName} && wait`;
|
||||
cmd = `${hostEnv}ollama serve`;
|
||||
} else if (backend === 'diffusers') {
|
||||
const gpuStr = f.gpus?.trim();
|
||||
if (gpuStr) cmd += `CUDA_VISIBLE_DEVICES=${gpuStr} `;
|
||||
|
||||
@@ -386,9 +386,9 @@ function _rerenderCachedModels() {
|
||||
: _isMetal()
|
||||
// Diffusers (diffusion_server.py) is CUDA-only — omit it on Metal.
|
||||
? [['llamacpp','llama.cpp'],['ollama','Ollama']]
|
||||
: [['vllm','vLLM'],['sglang','SGLang'],['llamacpp','llama.cpp'],['diffusers','Diffusers']];
|
||||
: [['vllm','vLLM'],['sglang','SGLang'],['llamacpp','llama.cpp'],['ollama','Ollama'],['diffusers','Diffusers']];
|
||||
const backendOpts = _backendChoices.map(([v,l]) => `<option value="${v}"${defaultBackend===v?' selected':''}>${l}</option>`).join('');
|
||||
panelHtml += `<label>${_l('Backend','Inference engine: vLLM, SGLang, llama.cpp, or Diffusers')}<select class="hwfit-sf" data-field="backend">${backendOpts}</select></label>`;
|
||||
panelHtml += `<label>${_l('Backend','Inference engine: vLLM, SGLang, llama.cpp, Ollama, or Diffusers')}<select class="hwfit-sf" data-field="backend">${backendOpts}</select></label>`;
|
||||
panelHtml += `<input type="hidden" class="hwfit-sf" data-field="host" value="${esc(_es.remoteHost || '')}" />`;
|
||||
panelHtml += `<label>${_l('venv','Path to Python venv or conda env activate script')}<input type="text" class="hwfit-sf hwfit-sf-wide" data-field="venv" value="${esc(sv('venv', _es.envPath || _srvVenv || ''))}" placeholder="~/venv" /></label>`;
|
||||
panelHtml += `<label>${_l('Port','HTTP port for the API server')}<input type="text" class="hwfit-sf" data-field="port" value="${esc(sv('port', _nextAvailablePort()))}" /></label>`;
|
||||
@@ -1512,7 +1512,7 @@ export async function _fetchCachedModels() {
|
||||
const data = await res.json();
|
||||
_dlWp.destroy();
|
||||
|
||||
const ready = data.models.filter(m => m.status === 'ready' && !m.size.includes('MB'));
|
||||
const ready = data.models.filter(m => m.status === 'ready' && (m.backend === 'ollama' || !m.size.includes('MB')));
|
||||
const downloading = data.models.filter(m => m.status === 'downloading');
|
||||
const allModels = [...ready, ...downloading];
|
||||
_cachedAllModels = allModels;
|
||||
@@ -1541,7 +1541,8 @@ export async function _fetchCachedModels() {
|
||||
for (const m of allModels) {
|
||||
const n = (m.repo_id || '').toLowerCase();
|
||||
let tag = 'other';
|
||||
if (m.is_diffusion || /flux|sdxl|stable-diffusion|z-image|qwen-image|diffusion|dreamshar/i.test(n)) tag = 'image';
|
||||
if (m.backend === 'ollama' || m.is_ollama) tag = 'llm';
|
||||
else if (m.is_diffusion || /flux|sdxl|stable-diffusion|z-image|qwen-image|diffusion|dreamshar/i.test(n)) tag = 'image';
|
||||
else if (/whisper|stt|asr/i.test(n)) tag = 'stt';
|
||||
else if (/tts|cosyvoice|parler/i.test(n)) tag = 'tts';
|
||||
else if (/embed|bge|minilm|e5-/i.test(n)) tag = 'embedding';
|
||||
@@ -1553,6 +1554,10 @@ export async function _fetchCachedModels() {
|
||||
for (const [re, fam] of _families) {
|
||||
if (re.test(n)) { m._family = fam; _familyMap[fam] = (_familyMap[fam] || 0) + 1; break; }
|
||||
}
|
||||
if ((m.backend === 'ollama' || m.is_ollama) && !m._family) {
|
||||
m._family = 'ollama';
|
||||
_familyMap.ollama = (_familyMap.ollama || 0) + 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Render tag chips
|
||||
|
||||
Reference in New Issue
Block a user