From ab0a480f3007604520b2b408317e1476bb19d492 Mon Sep 17 00:00:00 2001 From: pewdiepie-archdaemon Date: Tue, 2 Jun 2026 07:14:59 +0900 Subject: [PATCH] Show Ollama models in Cookbook Serve --- routes/cookbook_helpers.py | 51 +++++++++++++++++++++++++++++++++++--- routes/cookbook_routes.py | 4 +++ static/js/cookbook.js | 7 +++--- static/js/cookbookServe.js | 13 +++++++--- 4 files changed, 65 insertions(+), 10 deletions(-) diff --git a/routes/cookbook_helpers.py b/routes/cookbook_helpers.py index e468a5a..ca954ab 100644 --- a/routes/cookbook_helpers.py +++ b/routes/cookbook_helpers.py @@ -21,6 +21,10 @@ _REPO_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]*/[A-Za-z0-9][A-Za-z0-9._-] # the real on-disk path separately; this identifier is only for UI/task # bookkeeping, so serving should accept the same safe glyph set as repo IDs. _LOCAL_MODEL_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]*$") +# Ollama model names include tags, e.g. `qwen2.5:0.5b` or `llama3.2:latest`. +# Some registries also use a namespace path. Keep this shell-safe: no spaces, +# quotes, `$`, `;`, `&`, pipes, or redirects. +_OLLAMA_MODEL_ID_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._:/-]{0,200}$") # Include pattern is a glob: allow typical safe glyphs only. _INCLUDE_RE = re.compile(r"^[A-Za-z0-9._\-*?/\[\]]+$") # Remote host: user@host (optionally with :port-free hostname parts). @@ -48,9 +52,9 @@ def _validate_repo_id(v: str | None) -> str: def _validate_serve_model_id(v: str | None) -> str: if not v: raise HTTPException(400, "repo_id is required") - if _REPO_ID_RE.match(v) or _LOCAL_MODEL_ID_RE.match(v): + if _REPO_ID_RE.match(v) or _LOCAL_MODEL_ID_RE.match(v) or _OLLAMA_MODEL_ID_RE.match(v): return v - raise HTTPException(400, "Invalid repo_id — must be / or a cached local model id using [A-Za-z0-9._-]") + raise HTTPException(400, "Invalid repo_id — must be /, an Ollama name:tag, or a cached local model id") def _validate_include(v: str | None) -> str | None: @@ -147,7 +151,7 @@ def _local_tooling_path_export(executable: str) -> str: def _cached_model_scan_script(model_dirs: list[str] | None = None) -> str: """Build the standalone Python scanner used by /api/model/cached.""" lines = [ - "import json, os", + "import json, os, re, shutil, subprocess, urllib.request", "models = []", "seen = set()", "BLOCKED_ROOTS = ('/sys', '/proc', '/dev', '/run', '/var/run')", @@ -209,7 +213,48 @@ def _cached_model_scan_script(model_dirs: list[str] | None = None) -> str: " except Exception: pass", " is_diff = os.path.exists(os.path.join(fp, 'model_index.json'))", " models.append({'repo_id':d,'size_bytes':sz,'nb_files':nf,'has_incomplete':False,'path':p,'is_local_dir':True,'is_diffusion':is_diff,'is_gguf':is_gguf})", + "def parse_size(num, unit):", + " try: n = float(num)", + " except Exception: return 0", + " u = (unit or '').upper()", + " if u.startswith('TB'): return int(n * 1024 ** 4)", + " if u.startswith('GB'): return int(n * 1024 ** 3)", + " if u.startswith('MB'): return int(n * 1024 ** 2)", + " if u.startswith('KB'): return int(n * 1024)", + " return int(n)", + "def scan_ollama():", + " if not shutil.which('ollama'): return", + " try:", + " p = subprocess.run(['ollama', 'list'], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True, timeout=6)", + " except Exception:", + " return", + " if p.returncode != 0: return", + " for line in (p.stdout or '').splitlines()[1:]:", + " parts = line.split()", + " if len(parts) < 4: continue", + " name = parts[0]", + " if not name or name in seen: continue", + " size_bytes = parse_size(parts[2], parts[3])", + " seen.add(name)", + " models.append({'repo_id':name,'size_bytes':size_bytes,'nb_files':1,'has_incomplete':False,'path':'ollama','backend':'ollama','is_ollama':True})", + "def scan_ollama_api():", + " urls = ['http://127.0.0.1:11434/api/tags', 'http://localhost:11434/api/tags', 'http://host.docker.internal:11434/api/tags']", + " for url in urls:", + " try:", + " with urllib.request.urlopen(url, timeout=2) as r:", + " data = json.loads(r.read().decode('utf-8', 'replace'))", + " except Exception:", + " continue", + " for item in data.get('models', []):", + " name = item.get('name') or item.get('model')", + " if not name or name in seen: continue", + " size_bytes = int(item.get('size') or item.get('size_bytes') or 0)", + " seen.add(name)", + " models.append({'repo_id':name,'size_bytes':size_bytes,'nb_files':1,'has_incomplete':False,'path':'ollama','backend':'ollama','is_ollama':True})", + " return", "scan_hf(os.path.expanduser('~/.cache/huggingface/hub'))", + "scan_ollama()", + "scan_ollama_api()", ] for model_dir in model_dirs or []: lines.append(f"scan_dir(os.path.expanduser({model_dir!r}))") diff --git a/routes/cookbook_routes.py b/routes/cookbook_routes.py index 5718167..c622d38 100644 --- a/routes/cookbook_routes.py +++ b/routes/cookbook_routes.py @@ -710,6 +710,10 @@ def setup_cookbook_routes() -> APIRouter: entry["is_local_dir"] = True if m.get("is_gguf"): entry["is_gguf"] = True + if m.get("backend"): + entry["backend"] = m.get("backend") + if m.get("is_ollama"): + entry["is_ollama"] = True models.append(entry) except Exception as e: logger.warning(f"Failed to parse cached models: {e}") diff --git a/static/js/cookbook.js b/static/js/cookbook.js index 8d230d2..98f5dc7 100644 --- a/static/js/cookbook.js +++ b/static/js/cookbook.js @@ -245,6 +245,9 @@ export function _detectToolParser(modelName) { // ── Backend detection ── export function _detectBackend(model) { + if (model?.backend === 'ollama' || model?.is_ollama) { + return { backend: 'ollama', label: 'Ollama' }; + } const q = (model.quant || '').toUpperCase(); const sysBackend = String(_hwfitCache?.system?.backend || '').toLowerCase(); const isRocm = sysBackend === 'rocm'; @@ -407,11 +410,9 @@ export function _buildServeCmd(f, modelName, backend) { cmd += ` || ${_lcpServer}`; } } else if (backend === 'ollama') { - const ollamaName = modelName.split('/').pop().toLowerCase().replace(/[-_]gguf$/i, ''); const ollamaPort = f.port || '11434'; const hostEnv = ollamaPort !== '11434' ? `OLLAMA_HOST=0.0.0.0:${ollamaPort} ` : ''; - // Start serve in background if not running, then pull model - cmd = `${hostEnv}ollama serve &>/dev/null & sleep 2 && ${hostEnv}ollama pull ${ollamaName} && wait`; + cmd = `${hostEnv}ollama serve`; } else if (backend === 'diffusers') { const gpuStr = f.gpus?.trim(); if (gpuStr) cmd += `CUDA_VISIBLE_DEVICES=${gpuStr} `; diff --git a/static/js/cookbookServe.js b/static/js/cookbookServe.js index 1f976c1..3894b9b 100644 --- a/static/js/cookbookServe.js +++ b/static/js/cookbookServe.js @@ -386,9 +386,9 @@ function _rerenderCachedModels() { : _isMetal() // Diffusers (diffusion_server.py) is CUDA-only — omit it on Metal. ? [['llamacpp','llama.cpp'],['ollama','Ollama']] - : [['vllm','vLLM'],['sglang','SGLang'],['llamacpp','llama.cpp'],['diffusers','Diffusers']]; + : [['vllm','vLLM'],['sglang','SGLang'],['llamacpp','llama.cpp'],['ollama','Ollama'],['diffusers','Diffusers']]; const backendOpts = _backendChoices.map(([v,l]) => ``).join(''); - panelHtml += ``; + panelHtml += ``; panelHtml += ``; panelHtml += ``; panelHtml += ``; @@ -1512,7 +1512,7 @@ export async function _fetchCachedModels() { const data = await res.json(); _dlWp.destroy(); - const ready = data.models.filter(m => m.status === 'ready' && !m.size.includes('MB')); + const ready = data.models.filter(m => m.status === 'ready' && (m.backend === 'ollama' || !m.size.includes('MB'))); const downloading = data.models.filter(m => m.status === 'downloading'); const allModels = [...ready, ...downloading]; _cachedAllModels = allModels; @@ -1541,7 +1541,8 @@ export async function _fetchCachedModels() { for (const m of allModels) { const n = (m.repo_id || '').toLowerCase(); let tag = 'other'; - if (m.is_diffusion || /flux|sdxl|stable-diffusion|z-image|qwen-image|diffusion|dreamshar/i.test(n)) tag = 'image'; + if (m.backend === 'ollama' || m.is_ollama) tag = 'llm'; + else if (m.is_diffusion || /flux|sdxl|stable-diffusion|z-image|qwen-image|diffusion|dreamshar/i.test(n)) tag = 'image'; else if (/whisper|stt|asr/i.test(n)) tag = 'stt'; else if (/tts|cosyvoice|parler/i.test(n)) tag = 'tts'; else if (/embed|bge|minilm|e5-/i.test(n)) tag = 'embedding'; @@ -1553,6 +1554,10 @@ export async function _fetchCachedModels() { for (const [re, fam] of _families) { if (re.test(n)) { m._family = fam; _familyMap[fam] = (_familyMap[fam] || 0) + 1; break; } } + if ((m.backend === 'ollama' || m.is_ollama) && !m._family) { + m._family = 'ollama'; + _familyMap.ollama = (_familyMap.ollama || 0) + 1; + } } // Render tag chips