Improve Cookbook serve diagnostics and recommendations
This commit is contained in:
@@ -962,13 +962,23 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
# failed CUDA attempt) doesn't cause the next configure to reuse
|
||||
# stale settings and silently produce a CPU-only binary.
|
||||
runner_lines.append(' cd ~/llama.cpp && rm -rf build')
|
||||
runner_lines.append(' _ody_has_cuda_runtime=0')
|
||||
runner_lines.append(' if command -v nvcc &>/dev/null; then')
|
||||
runner_lines.append(' for _cudalib in "${CUDA_HOME:-}/lib64"/libcudart.so* "${CUDA_HOME:-}/lib"/libcudart.so* /usr/local/cuda/lib64/libcudart.so* /usr/lib*/libcudart.so*; do')
|
||||
runner_lines.append(' [ -e "$_cudalib" ] && _ody_has_cuda_runtime=1 && break')
|
||||
runner_lines.append(' done')
|
||||
runner_lines.append(' fi')
|
||||
runner_lines.append(' if command -v nvcc &>/dev/null && [ "$_ody_has_cuda_runtime" = "1" ]; then')
|
||||
runner_lines.append(' echo "[odysseus] CUDA nvcc found — building llama-server with CUDA (GPU) support..."')
|
||||
runner_lines.append(' cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_CUDA=ON \\')
|
||||
runner_lines.append(' && cmake --build build -j"$NPROC" --target llama-server \\')
|
||||
runner_lines.append(' && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
|
||||
runner_lines.append(' else')
|
||||
runner_lines.append(' if command -v nvcc &>/dev/null; then')
|
||||
runner_lines.append(' echo "[odysseus] WARNING: nvcc found but CUDA runtime library was not found — building llama-server for CPU only."')
|
||||
runner_lines.append(' else')
|
||||
runner_lines.append(' echo "[odysseus] WARNING: nvcc not found — building llama-server for CPU only."')
|
||||
runner_lines.append(' fi')
|
||||
runner_lines.append(' echo "[odysseus] GPU inference will not be available for this llama.cpp build."')
|
||||
runner_lines.append(' echo "[odysseus] To get a GPU build, first install vLLM via Cookbook -> Dependencies"')
|
||||
runner_lines.append(' echo "[odysseus] (its CUDA wheels include nvcc), then re-launch this serve task."')
|
||||
@@ -982,6 +992,10 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
runner_lines.append(' echo "llama-server build failed — installing Python bindings as fallback..."')
|
||||
runner_lines.append(f" {_pip_install_fallback_chain('llama-cpp-python', python_cmd='pip')} || true")
|
||||
runner_lines.append(' fi')
|
||||
runner_lines.append(' if ! command -v llama-server &>/dev/null && ! python3 -c "import llama_cpp" 2>/dev/null; then')
|
||||
runner_lines.append(' echo "ERROR: llama.cpp serving is not available after install/build attempts."')
|
||||
runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127')
|
||||
runner_lines.append(' fi')
|
||||
runner_lines.append('fi')
|
||||
elif "ollama" in req.cmd:
|
||||
handled_ollama_serve = True
|
||||
@@ -1037,19 +1051,24 @@ def setup_cookbook_routes() -> APIRouter:
|
||||
# find the `vllm` CLI ("command not found"). Mirrors llama.cpp above.
|
||||
runner_lines.append('export PATH="$HOME/.local/bin:$PATH"')
|
||||
runner_lines.append('if ! command -v vllm &>/dev/null; then')
|
||||
runner_lines.append(' echo "ERROR: vLLM is not installed. Open Cookbook -> Dependencies and install vllm on this server, then launch again."')
|
||||
runner_lines.append(' echo "ERROR: vLLM is not installed."')
|
||||
runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127')
|
||||
runner_lines.append('fi')
|
||||
elif "sglang.launch_server" in req.cmd:
|
||||
runner_lines.append('export PATH="$HOME/.local/bin:$PATH"')
|
||||
runner_lines.append('if ! python3 -c "import sglang" 2>/dev/null; then')
|
||||
runner_lines.append(' echo "ERROR: SGLang is not installed. Open Cookbook -> Dependencies and install sglang on this server, then launch again."')
|
||||
runner_lines.append('if ! command -v sglang &>/dev/null; then')
|
||||
runner_lines.append(' echo "ERROR: SGLang is not installed."')
|
||||
runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127')
|
||||
runner_lines.append('elif ! ODYSSEUS_SGLANG_IMPORT_ERROR="$(python3 -c "import sglang" 2>&1)"; then')
|
||||
runner_lines.append(' echo "ERROR: SGLang is installed but failed to import."')
|
||||
runner_lines.append(' printf "%s\\n" "$ODYSSEUS_SGLANG_IMPORT_ERROR"')
|
||||
runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127')
|
||||
runner_lines.append('fi')
|
||||
elif "scripts/diffusion_server.py" in req.cmd or ".diffusion_server.py" in req.cmd:
|
||||
runner_lines.append('export PATH="$HOME/.local/bin:$PATH"')
|
||||
runner_lines.append('if ! python3 -c "import torch, diffusers" 2>/dev/null; then')
|
||||
runner_lines.append(' echo "ERROR: Diffusion serving requires PyTorch + diffusers. Open Cookbook -> Dependencies and install diffusers on this server, then launch again."')
|
||||
runner_lines.append('if ! ODYSSEUS_DIFFUSION_IMPORT_ERROR="$(python3 -c "import torch, diffusers" 2>&1)"; then')
|
||||
runner_lines.append(' echo "ERROR: Diffusion serving requires PyTorch + diffusers."')
|
||||
runner_lines.append(' printf "%s\\n" "$ODYSSEUS_DIFFUSION_IMPORT_ERROR"')
|
||||
runner_lines.append(' ODYSSEUS_PREFLIGHT_EXIT=127')
|
||||
runner_lines.append('fi')
|
||||
|
||||
|
||||
@@ -88,6 +88,8 @@ def _quant_from_name(name):
|
||||
if "6bit" in n:
|
||||
return "mlx-6bit"
|
||||
return "mlx-8bit" if is8 else "mlx-4bit"
|
||||
if "nvfp4" in n:
|
||||
return "NVFP4"
|
||||
if "fp8" in n:
|
||||
return "FP8"
|
||||
if "int4" in n or "4bit" in n or "4-bit" in n:
|
||||
@@ -136,7 +138,7 @@ def _entry_from_modelinfo(mi, overrides):
|
||||
params_by_dtype = getattr(st, "parameters", None) or {}
|
||||
if quant.endswith("4bit") or quant.endswith("Int4"):
|
||||
pack_factor = 8
|
||||
elif quant.endswith("8bit") or quant.endswith("Int8") or quant == "FP8":
|
||||
elif quant.endswith("8bit") or quant.endswith("Int8") or quant in ("FP8", "NVFP4"):
|
||||
pack_factor = 4
|
||||
else:
|
||||
pack_factor = 1
|
||||
@@ -158,7 +160,7 @@ def _entry_from_modelinfo(mi, overrides):
|
||||
rel = created.strftime("%Y-%m-%d") if created else datetime.utcnow().strftime("%Y-%m-%d")
|
||||
# Rough RAM/VRAM hints (fit.py recomputes the real requirement from params+quant).
|
||||
_BPP = {"AWQ-4bit": 0.58, "GPTQ-Int4": 0.58, "mlx-4bit": 0.55, "mlx-6bit": 0.85,
|
||||
"AWQ-8bit": 1.1, "GPTQ-Int8": 1.1, "mlx-8bit": 1.1, "FP8": 1.1, "Q4_K_M": 0.6}
|
||||
"AWQ-8bit": 1.1, "GPTQ-Int8": 1.1, "mlx-8bit": 1.1, "FP8": 1.1, "NVFP4": 0.6, "Q4_K_M": 0.6}
|
||||
bpp = _BPP.get(quant, 0.6)
|
||||
vram = round(pb * bpp + 0.5, 1)
|
||||
entry = {
|
||||
|
||||
@@ -13919,7 +13919,12 @@
|
||||
"architecture": "gemma4",
|
||||
"pipeline_tag": "image-text-to-text",
|
||||
"release_date": "2026-04-01",
|
||||
"gguf_sources": [],
|
||||
"gguf_sources": [
|
||||
{
|
||||
"repo": "unsloth/gemma-4-E2B-it-GGUF",
|
||||
"provider": "unsloth"
|
||||
}
|
||||
],
|
||||
"capabilities": [
|
||||
"vision"
|
||||
]
|
||||
@@ -13942,7 +13947,12 @@
|
||||
"architecture": "gemma4",
|
||||
"pipeline_tag": "image-text-to-text",
|
||||
"release_date": "2026-04-01",
|
||||
"gguf_sources": [],
|
||||
"gguf_sources": [
|
||||
{
|
||||
"repo": "unsloth/gemma-4-E4B-it-GGUF",
|
||||
"provider": "unsloth"
|
||||
}
|
||||
],
|
||||
"capabilities": [
|
||||
"vision"
|
||||
]
|
||||
@@ -13965,7 +13975,12 @@
|
||||
"architecture": "gemma4",
|
||||
"pipeline_tag": "image-text-to-text",
|
||||
"release_date": "2026-04-01",
|
||||
"gguf_sources": [],
|
||||
"gguf_sources": [
|
||||
{
|
||||
"repo": "unsloth/gemma-4-31B-it-GGUF",
|
||||
"provider": "unsloth"
|
||||
}
|
||||
],
|
||||
"capabilities": [
|
||||
"vision"
|
||||
]
|
||||
@@ -13988,7 +14003,12 @@
|
||||
"architecture": "gemma4",
|
||||
"pipeline_tag": "image-text-to-text",
|
||||
"release_date": "2026-04-01",
|
||||
"gguf_sources": [],
|
||||
"gguf_sources": [
|
||||
{
|
||||
"repo": "unsloth/gemma-4-26B-A4B-it-GGUF",
|
||||
"provider": "unsloth"
|
||||
}
|
||||
],
|
||||
"capabilities": [
|
||||
"vision"
|
||||
]
|
||||
@@ -18719,5 +18739,307 @@
|
||||
"hf_likes": 0,
|
||||
"release_date": "2026-04-19",
|
||||
"_discovered": true
|
||||
},
|
||||
{
|
||||
"name": "Qwen/Qwen3.6-27B-MTP",
|
||||
"provider": "Qwen",
|
||||
"parameter_count": "27.8B",
|
||||
"parameters_raw": 27781427952,
|
||||
"min_ram_gb": 16.6,
|
||||
"recommended_ram_gb": 21.6,
|
||||
"min_vram_gb": 16.6,
|
||||
"quantization": "Q4_K_M",
|
||||
"context_length": 262144,
|
||||
"use_case": "General purpose, coding, MTP",
|
||||
"is_moe": false,
|
||||
"num_experts": null,
|
||||
"active_experts": null,
|
||||
"active_parameters": null,
|
||||
"architecture": "qwen3",
|
||||
"pipeline_tag": "text-generation",
|
||||
"release_date": "2026-04-01",
|
||||
"gguf_sources": [
|
||||
{
|
||||
"repo": "unsloth/Qwen3.6-27B-MTP-GGUF",
|
||||
"provider": "unsloth"
|
||||
}
|
||||
],
|
||||
"capabilities": [
|
||||
"mtp"
|
||||
],
|
||||
"_discovered": true
|
||||
},
|
||||
{
|
||||
"name": "Qwen/Qwen3.6-35B-A3B-MTP",
|
||||
"provider": "Qwen",
|
||||
"parameter_count": "36.0B",
|
||||
"parameters_raw": 35951822704,
|
||||
"min_ram_gb": 21.4,
|
||||
"recommended_ram_gb": 27.8,
|
||||
"min_vram_gb": 21.4,
|
||||
"quantization": "Q4_K_M",
|
||||
"context_length": 262144,
|
||||
"use_case": "General purpose (MoE), MTP",
|
||||
"is_moe": true,
|
||||
"num_experts": null,
|
||||
"active_experts": null,
|
||||
"active_parameters": 3000000000,
|
||||
"architecture": "qwen3_moe",
|
||||
"pipeline_tag": "text-generation",
|
||||
"release_date": "2026-04-01",
|
||||
"gguf_sources": [
|
||||
{
|
||||
"repo": "unsloth/Qwen3.6-35B-A3B-MTP-GGUF",
|
||||
"provider": "unsloth"
|
||||
}
|
||||
],
|
||||
"capabilities": [
|
||||
"mtp"
|
||||
],
|
||||
"_discovered": true
|
||||
},
|
||||
{
|
||||
"name": "Qwen/Qwen3.5-0.8B-MTP",
|
||||
"provider": "Qwen",
|
||||
"parameter_count": "873M",
|
||||
"parameters_raw": 873438784,
|
||||
"min_ram_gb": 1.0,
|
||||
"recommended_ram_gb": 2.0,
|
||||
"min_vram_gb": 0.5,
|
||||
"quantization": "Q4_K_M",
|
||||
"context_length": 262144,
|
||||
"use_case": "General purpose, MTP",
|
||||
"capabilities": [
|
||||
"mtp",
|
||||
"tool_use",
|
||||
"vision"
|
||||
],
|
||||
"pipeline_tag": "image-text-to-text",
|
||||
"architecture": "qwen3_5",
|
||||
"hf_downloads": 93448,
|
||||
"hf_likes": 208,
|
||||
"release_date": "2026-02-28",
|
||||
"gguf_sources": [
|
||||
{
|
||||
"repo": "unsloth/Qwen3.5-0.8B-MTP-GGUF",
|
||||
"provider": "unsloth"
|
||||
}
|
||||
],
|
||||
"_discovered": true
|
||||
},
|
||||
{
|
||||
"name": "Qwen/Qwen3.5-2B-MTP",
|
||||
"provider": "Qwen",
|
||||
"parameter_count": "2.3B",
|
||||
"parameters_raw": 2274069824,
|
||||
"min_ram_gb": 1.3,
|
||||
"recommended_ram_gb": 2.1,
|
||||
"min_vram_gb": 1.2,
|
||||
"quantization": "Q4_K_M",
|
||||
"context_length": 262144,
|
||||
"use_case": "General purpose, MTP",
|
||||
"capabilities": [
|
||||
"mtp",
|
||||
"tool_use",
|
||||
"vision"
|
||||
],
|
||||
"pipeline_tag": "image-text-to-text",
|
||||
"architecture": "qwen3_5",
|
||||
"hf_downloads": 46974,
|
||||
"hf_likes": 115,
|
||||
"release_date": "2026-02-28",
|
||||
"gguf_sources": [
|
||||
{
|
||||
"repo": "unsloth/Qwen3.5-2B-MTP-GGUF",
|
||||
"provider": "unsloth"
|
||||
}
|
||||
],
|
||||
"_discovered": true
|
||||
},
|
||||
{
|
||||
"name": "Qwen/Qwen3.5-4B-MTP",
|
||||
"provider": "Qwen",
|
||||
"parameter_count": "4.7B",
|
||||
"parameters_raw": 4659865088,
|
||||
"min_ram_gb": 2.6,
|
||||
"recommended_ram_gb": 4.3,
|
||||
"min_vram_gb": 2.4,
|
||||
"quantization": "Q4_K_M",
|
||||
"context_length": 262144,
|
||||
"use_case": "General purpose, MTP",
|
||||
"capabilities": [
|
||||
"mtp",
|
||||
"tool_use",
|
||||
"vision"
|
||||
],
|
||||
"pipeline_tag": "image-text-to-text",
|
||||
"architecture": "qwen3_5",
|
||||
"hf_downloads": 99087,
|
||||
"hf_likes": 202,
|
||||
"release_date": "2026-02-27",
|
||||
"gguf_sources": [
|
||||
{
|
||||
"repo": "unsloth/Qwen3.5-4B-MTP-GGUF",
|
||||
"provider": "unsloth"
|
||||
}
|
||||
],
|
||||
"_discovered": true
|
||||
},
|
||||
{
|
||||
"name": "Qwen/Qwen3.5-9B-MTP",
|
||||
"provider": "Qwen",
|
||||
"parameter_count": "9.7B",
|
||||
"parameters_raw": 9653104368,
|
||||
"min_ram_gb": 5.4,
|
||||
"recommended_ram_gb": 9.0,
|
||||
"min_vram_gb": 4.9,
|
||||
"quantization": "Q4_K_M",
|
||||
"context_length": 262144,
|
||||
"use_case": "General purpose, MTP",
|
||||
"capabilities": [
|
||||
"mtp",
|
||||
"tool_use",
|
||||
"vision"
|
||||
],
|
||||
"pipeline_tag": "image-text-to-text",
|
||||
"architecture": "qwen3_5",
|
||||
"hf_downloads": 172298,
|
||||
"hf_likes": 345,
|
||||
"release_date": "2026-02-27",
|
||||
"gguf_sources": [
|
||||
{
|
||||
"repo": "unsloth/Qwen3.5-9B-MTP-GGUF",
|
||||
"provider": "unsloth"
|
||||
}
|
||||
],
|
||||
"_discovered": true
|
||||
},
|
||||
{
|
||||
"name": "Qwen/Qwen3.5-27B-MTP",
|
||||
"provider": "Qwen",
|
||||
"parameter_count": "27.8B",
|
||||
"parameters_raw": 27781427952,
|
||||
"min_ram_gb": 15.5,
|
||||
"recommended_ram_gb": 25.9,
|
||||
"min_vram_gb": 14.2,
|
||||
"quantization": "Q4_K_M",
|
||||
"context_length": 262144,
|
||||
"use_case": "General purpose, MTP",
|
||||
"capabilities": [
|
||||
"mtp",
|
||||
"tool_use",
|
||||
"vision"
|
||||
],
|
||||
"pipeline_tag": "image-text-to-text",
|
||||
"architecture": "qwen3_5",
|
||||
"hf_downloads": 406808,
|
||||
"hf_likes": 565,
|
||||
"release_date": "2026-02-24",
|
||||
"gguf_sources": [
|
||||
{
|
||||
"repo": "unsloth/Qwen3.5-27B-MTP-GGUF",
|
||||
"provider": "unsloth"
|
||||
}
|
||||
],
|
||||
"_discovered": true
|
||||
},
|
||||
{
|
||||
"name": "Qwen/Qwen3.5-35B-A3B-MTP",
|
||||
"provider": "Qwen",
|
||||
"parameter_count": "36.0B",
|
||||
"parameters_raw": 35951822704,
|
||||
"min_ram_gb": 20.1,
|
||||
"recommended_ram_gb": 33.5,
|
||||
"min_vram_gb": 18.4,
|
||||
"quantization": "Q4_K_M",
|
||||
"context_length": 262144,
|
||||
"use_case": "General purpose, MTP",
|
||||
"capabilities": [
|
||||
"mtp",
|
||||
"tool_use",
|
||||
"vision"
|
||||
],
|
||||
"pipeline_tag": "image-text-to-text",
|
||||
"architecture": "qwen3_5_moe",
|
||||
"hf_downloads": 769032,
|
||||
"hf_likes": 905,
|
||||
"release_date": "2026-02-24",
|
||||
"is_moe": true,
|
||||
"num_experts": 256,
|
||||
"active_experts": 8,
|
||||
"active_parameters": 3000000000,
|
||||
"gguf_sources": [
|
||||
{
|
||||
"repo": "unsloth/Qwen3.5-35B-A3B-MTP-GGUF",
|
||||
"provider": "unsloth"
|
||||
}
|
||||
],
|
||||
"_discovered": true
|
||||
},
|
||||
{
|
||||
"name": "Qwen/Qwen3.5-122B-A10B-MTP",
|
||||
"provider": "Qwen",
|
||||
"parameter_count": "125.1B",
|
||||
"parameters_raw": 125086497008,
|
||||
"min_ram_gb": 69.9,
|
||||
"recommended_ram_gb": 116.5,
|
||||
"min_vram_gb": 64.1,
|
||||
"quantization": "Q4_K_M",
|
||||
"context_length": 262144,
|
||||
"use_case": "General purpose, MTP",
|
||||
"capabilities": [
|
||||
"mtp",
|
||||
"tool_use",
|
||||
"vision"
|
||||
],
|
||||
"pipeline_tag": "image-text-to-text",
|
||||
"architecture": "qwen3_5_moe",
|
||||
"hf_downloads": 171055,
|
||||
"hf_likes": 389,
|
||||
"release_date": "2026-02-24",
|
||||
"is_moe": true,
|
||||
"num_experts": 256,
|
||||
"active_experts": 8,
|
||||
"active_parameters": 10000000000,
|
||||
"gguf_sources": [
|
||||
{
|
||||
"repo": "unsloth/Qwen3.5-122B-A10B-MTP-GGUF",
|
||||
"provider": "unsloth"
|
||||
}
|
||||
],
|
||||
"_discovered": true
|
||||
},
|
||||
{
|
||||
"name": "Qwen/Qwen3.5-397B-A17B-MTP",
|
||||
"provider": "Qwen",
|
||||
"parameter_count": "403.4B",
|
||||
"parameters_raw": 403397928944,
|
||||
"min_ram_gb": 225.4,
|
||||
"recommended_ram_gb": 375.7,
|
||||
"min_vram_gb": 206.6,
|
||||
"quantization": "Q4_K_M",
|
||||
"context_length": 262144,
|
||||
"use_case": "General purpose, MTP",
|
||||
"capabilities": [
|
||||
"mtp",
|
||||
"tool_use",
|
||||
"vision"
|
||||
],
|
||||
"pipeline_tag": "image-text-to-text",
|
||||
"architecture": "qwen3_5_moe",
|
||||
"hf_downloads": 1291825,
|
||||
"hf_likes": 1214,
|
||||
"release_date": "2026-02-16",
|
||||
"is_moe": true,
|
||||
"num_experts": 256,
|
||||
"active_experts": 8,
|
||||
"active_parameters": 17000000000,
|
||||
"gguf_sources": [
|
||||
{
|
||||
"repo": "unsloth/Qwen3.5-397B-A17B-MTP-GGUF",
|
||||
"provider": "unsloth"
|
||||
}
|
||||
],
|
||||
"_discovered": true
|
||||
}
|
||||
]
|
||||
|
||||
@@ -99,6 +99,27 @@ def _estimate_speed(model, quant, run_mode, system):
|
||||
return k / pb * sm
|
||||
|
||||
|
||||
def _architecture_bonus(model):
|
||||
name = (model.get("name") or "").lower()
|
||||
arch = (model.get("architecture") or "").lower()
|
||||
text = f"{name} {arch}"
|
||||
|
||||
# Keep this intentionally small: hardware fit and speed still matter, but
|
||||
# current model families should not be scored the same as older Qwen2/LLama
|
||||
# era entries just because the parameter count is similar.
|
||||
if "qwen3.6" in text or "qwen3_6" in text:
|
||||
return 9
|
||||
if "qwen3.5" in text or "qwen3_5" in text:
|
||||
return 8
|
||||
if "qwen3-next" in text or "qwen3_next" in text:
|
||||
return 6
|
||||
if "qwen3" in text or arch.startswith("qwen3"):
|
||||
return 4
|
||||
if "qwen2.5" in text or "qwen2_5" in text:
|
||||
return 2
|
||||
return 0
|
||||
|
||||
|
||||
def _quality_score(model, quant, use_case):
|
||||
pb = params_b(model)
|
||||
if pb < 1:
|
||||
@@ -128,6 +149,7 @@ def _quality_score(model, quant, use_case):
|
||||
if "gemma" in name_lower:
|
||||
base += 1
|
||||
|
||||
base += _architecture_bonus(model)
|
||||
base += QUANT_QUALITY_PENALTY.get(quant, 0)
|
||||
|
||||
model_uc = infer_use_case(model)
|
||||
@@ -220,12 +242,13 @@ def _quant_bits(q):
|
||||
return 0
|
||||
|
||||
|
||||
def analyze_model(model, system, target_quant=None):
|
||||
def analyze_model(model, system, target_quant=None, scoring_use_case=None):
|
||||
pb = params_b(model)
|
||||
if pb <= 0:
|
||||
return None
|
||||
|
||||
use_case = infer_use_case(model)
|
||||
model_use_case = infer_use_case(model)
|
||||
score_use_case = scoring_use_case or "general"
|
||||
has_gpu = system.get("has_gpu", False)
|
||||
gpu_vram = (system.get("gpu_vram_gb") or 0) if has_gpu else 0
|
||||
gpu_count = system.get("gpu_count", 1) or 1
|
||||
@@ -242,6 +265,8 @@ def analyze_model(model, system, target_quant=None):
|
||||
ctx = model.get("context_length", 4096) or 4096
|
||||
|
||||
native_quant = model.get("quantization", "Q4_K_M")
|
||||
if "nvfp4" in (model.get("name") or "").lower():
|
||||
native_quant = "NVFP4"
|
||||
preq = is_prequantized(model)
|
||||
|
||||
# GGUF models can't be sharded across GPUs — use single GPU VRAM
|
||||
@@ -260,10 +285,13 @@ def analyze_model(model, system, target_quant=None):
|
||||
# Determine which quant to evaluate at
|
||||
if preq:
|
||||
# AWQ/GPTQ/FP8/MLX come at a fixed bit-width. If the user picked a
|
||||
# specific quant tier (e.g. Q8 → 8-bit), only keep prequant models whose
|
||||
# native bit-width matches — otherwise selecting Q8 would still surface
|
||||
# AWQ-4bit models, mixing 4- and 8-bit in one view.
|
||||
# GGUF quant tier (Q4/Q8/etc.), do not treat a same-bit AWQ/GPTQ build
|
||||
# as equivalent. "Q4" means llama.cpp/Ollama-style GGUF in this UI;
|
||||
# AWQ/GPTQ/FP8 are separate GPU-serving formats and must only appear
|
||||
# when explicitly selected or when no quant filter is applied.
|
||||
if target_quant:
|
||||
if not any(target_quant.startswith(p) for p in ("AWQ-", "GPTQ-", "FP8", "NVFP4")):
|
||||
return None
|
||||
_tb, _nb = _quant_bits(target_quant), _quant_bits(native_quant)
|
||||
if _tb and _nb and _tb != _nb:
|
||||
return None
|
||||
@@ -300,7 +328,7 @@ def analyze_model(model, system, target_quant=None):
|
||||
"parameter_count": model.get("parameter_count"),
|
||||
"params_b": round(pb, 1),
|
||||
"is_moe": is_moe,
|
||||
"use_case": use_case,
|
||||
"use_case": model_use_case,
|
||||
"fit_level": "too_tight",
|
||||
"run_mode": "no_fit",
|
||||
"quant": quant_to_try,
|
||||
@@ -334,12 +362,12 @@ def analyze_model(model, system, target_quant=None):
|
||||
|
||||
tps = _estimate_speed(model, quant, run_mode, system)
|
||||
|
||||
q_score = _quality_score(model, quant, use_case)
|
||||
s_score = _speed_score(tps, use_case)
|
||||
q_score = _quality_score(model, quant, score_use_case)
|
||||
s_score = _speed_score(tps, score_use_case)
|
||||
f_score = _fit_score(required_gb, budget)
|
||||
c_score = _context_score(fit_ctx, use_case)
|
||||
c_score = _context_score(fit_ctx, score_use_case)
|
||||
|
||||
wq, ws, wf, wc = USE_CASE_WEIGHTS.get(use_case, (0.45, 0.30, 0.15, 0.10))
|
||||
wq, ws, wf, wc = USE_CASE_WEIGHTS.get(score_use_case, (0.45, 0.30, 0.15, 0.10))
|
||||
composite = q_score * wq + s_score * ws + f_score * wf + c_score * wc
|
||||
|
||||
return {
|
||||
@@ -348,7 +376,7 @@ def analyze_model(model, system, target_quant=None):
|
||||
"parameter_count": model.get("parameter_count"),
|
||||
"params_b": round(pb, 1),
|
||||
"is_moe": is_moe,
|
||||
"use_case": use_case,
|
||||
"use_case": model_use_case,
|
||||
"fit_level": fit_level,
|
||||
"run_mode": run_mode,
|
||||
"quant": quant,
|
||||
@@ -419,21 +447,29 @@ def rank_models(system, use_case=None, limit=50, search=None, sort="score", quan
|
||||
results.sort(key=sort_fn, reverse=(sort != "vram"))
|
||||
return results[:limit]
|
||||
|
||||
# If user picked a prequantized format (AWQ/FP8/GPTQ), filter to only those models
|
||||
filter_native = quant and any(quant.startswith(p) for p in ("AWQ-", "GPTQ-", "FP8"))
|
||||
# If user picked a prequantized format (AWQ/FP8/GPTQ/NVFP4), filter to only those models
|
||||
filter_native = quant and any(quant.startswith(p) for p in ("AWQ-", "GPTQ-", "FP8", "NVFP4"))
|
||||
|
||||
system_backend = (system.get("backend") or "").lower()
|
||||
apple_silicon = system_backend in ("mps", "metal", "apple")
|
||||
rocm = system_backend == "rocm"
|
||||
|
||||
for m in models:
|
||||
native_q = m.get("quantization", "")
|
||||
if "nvfp4" in (m.get("name") or "").lower():
|
||||
native_q = "NVFP4"
|
||||
|
||||
# MLX-quantized models need the MLX runtime (mlx_lm), which Odysseus
|
||||
# doesn't generate serve commands for — only llama.cpp/Ollama (Metal)
|
||||
# and vLLM/SGLang (CUDA). MLX repos ship no GGUF alternative, so they're
|
||||
# unrunnable on every backend we support. Always drop them, on Apple
|
||||
# Silicon too, so the Cookbook never recommends a model it can't serve.
|
||||
if native_q.startswith("mlx-"):
|
||||
# MLX is Apple Silicon only. Hide MLX rows on non-Mac hardware scans,
|
||||
# but leave them visible on Metal/MPS so Mac support is not broken.
|
||||
if not apple_silicon and (native_q.startswith("mlx-") or "mlx" in (m.get("name") or "").lower()):
|
||||
continue
|
||||
|
||||
# ROCm support for vLLM/SGLang quantized safetensors is too brittle to
|
||||
# recommend blindly in the default scan. Keep AWQ/GPTQ/FP8 discoverable
|
||||
# only when the user explicitly picks that format from the quant filter;
|
||||
# otherwise prefer GGUF/Q* entries that Odysseus can route through
|
||||
# llama.cpp/Ollama without pretending "fits VRAM" means "servable".
|
||||
if rocm and is_prequantized(m) and not filter_native:
|
||||
continue
|
||||
|
||||
# On Apple Silicon the only serving engines are llama.cpp and Ollama,
|
||||
@@ -443,7 +479,8 @@ def rank_models(system, use_case=None, limit=50, search=None, sort="score", quan
|
||||
# default GGUF quant) and vLLM-only AWQ/GPTQ/FP8 builds alike. Without
|
||||
# this the Cookbook recommends models the Mac can't run; on CUDA these
|
||||
# stay visible because vLLM serves safetensors directly.
|
||||
if apple_silicon and not (m.get("is_gguf") or m.get("gguf_sources")):
|
||||
is_mlx = native_q.startswith("mlx-") or "mlx" in (m.get("name") or "").lower()
|
||||
if apple_silicon and not (m.get("is_gguf") or m.get("gguf_sources") or is_mlx):
|
||||
continue
|
||||
|
||||
# Format filter: AWQ tab → only AWQ models, FP8 tab → only FP8 models
|
||||
@@ -454,6 +491,8 @@ def rank_models(system, use_case=None, limit=50, search=None, sort="score", quan
|
||||
continue
|
||||
if quant.startswith("GPTQ") and not native_q.startswith("GPTQ"):
|
||||
continue
|
||||
if quant.startswith("NVFP4") and not native_q.startswith("NVFP4"):
|
||||
continue
|
||||
|
||||
if search:
|
||||
name = m.get("name", "").lower()
|
||||
@@ -461,7 +500,7 @@ def rank_models(system, use_case=None, limit=50, search=None, sort="score", quan
|
||||
if search.lower() not in name and search.lower() not in provider:
|
||||
continue
|
||||
|
||||
result = analyze_model(m, system, target_quant=quant)
|
||||
result = analyze_model(m, system, target_quant=quant, scoring_use_case=(use_case or "general"))
|
||||
if result is None:
|
||||
continue
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ import re
|
||||
QUANT_HIERARCHY = ["Q8_0", "Q6_K", "Q5_K_M", "Q4_K_M", "Q3_K_M", "Q2_K"]
|
||||
|
||||
QUANT_BPP = {
|
||||
"F32": 4.0, "F16": 2.0, "BF16": 2.0, "FP8": 1.0,
|
||||
"F32": 4.0, "F16": 2.0, "BF16": 2.0, "FP8": 1.0, "NVFP4": 0.5,
|
||||
"Q8_0": 1.05, "Q6_K": 0.80, "Q5_K_M": 0.68,
|
||||
"Q4_K_M": 0.58, "Q4_0": 0.58, "Q3_K_M": 0.48, "Q2_K": 0.37,
|
||||
"AWQ-4bit": 0.50, "AWQ-8bit": 1.0,
|
||||
@@ -14,7 +14,7 @@ QUANT_BPP = {
|
||||
}
|
||||
|
||||
QUANT_SPEED_MULT = {
|
||||
"F16": 0.6, "BF16": 0.6, "FP8": 0.85,
|
||||
"F16": 0.6, "BF16": 0.6, "FP8": 0.85, "NVFP4": 1.1,
|
||||
"Q8_0": 0.8, "Q6_K": 0.95, "Q5_K_M": 1.0,
|
||||
"Q4_K_M": 1.15, "Q4_0": 1.15, "Q3_K_M": 1.25, "Q2_K": 1.35,
|
||||
"AWQ-4bit": 1.2, "AWQ-8bit": 0.85,
|
||||
@@ -23,7 +23,7 @@ QUANT_SPEED_MULT = {
|
||||
}
|
||||
|
||||
QUANT_QUALITY_PENALTY = {
|
||||
"F16": 0.0, "BF16": 0.0, "FP8": 0.0,
|
||||
"F16": 0.0, "BF16": 0.0, "FP8": 0.0, "NVFP4": 0.0,
|
||||
"Q8_0": 0.0, "Q6_K": -1.0, "Q5_K_M": -2.0,
|
||||
"Q4_K_M": -5.0, "Q4_0": -5.0, "Q3_K_M": -8.0, "Q2_K": -12.0,
|
||||
"AWQ-4bit": -3.0, "AWQ-8bit": 0.0,
|
||||
@@ -32,7 +32,7 @@ QUANT_QUALITY_PENALTY = {
|
||||
}
|
||||
|
||||
QUANT_BYTES_PER_PARAM = {
|
||||
"F16": 2.0, "BF16": 2.0, "FP8": 1.0,
|
||||
"F16": 2.0, "BF16": 2.0, "FP8": 1.0, "NVFP4": 0.5,
|
||||
"Q8_0": 1.0, "Q6_K": 0.75, "Q5_K_M": 0.625,
|
||||
"Q4_K_M": 0.5, "Q4_0": 0.5, "Q3_K_M": 0.375, "Q2_K": 0.25,
|
||||
"AWQ-4bit": 0.5, "AWQ-8bit": 1.0,
|
||||
@@ -41,12 +41,13 @@ QUANT_BYTES_PER_PARAM = {
|
||||
}
|
||||
|
||||
# Pre-quantized formats that should NOT go through the GGUF quant hierarchy
|
||||
PREQUANTIZED_PREFIXES = ("AWQ-", "GPTQ-", "mlx-", "FP8")
|
||||
PREQUANTIZED_PREFIXES = ("AWQ-", "GPTQ-", "mlx-", "FP8", "NVFP4")
|
||||
|
||||
|
||||
def is_prequantized(model):
|
||||
q = model.get("quantization", "")
|
||||
return any(q.startswith(p) for p in PREQUANTIZED_PREFIXES)
|
||||
name = (model.get("name") or "").lower()
|
||||
return "nvfp4" in name or any(q.startswith(p) for p in PREQUANTIZED_PREFIXES)
|
||||
|
||||
|
||||
def params_b(model):
|
||||
|
||||
@@ -502,6 +502,11 @@ async def _direct_fallback(
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
return {"error": f"web_fetch: timed out fetching {url}", "exit_code": 1}
|
||||
except Exception as e:
|
||||
# Direct URL fetches can hit bot protection / auth walls
|
||||
# (e.g. eBay 403). Treat that as a tool failure the model can
|
||||
# reason around, not an uncaught chat-stream 500.
|
||||
return {"error": f"web_fetch: {url}: {e}", "exit_code": 1}
|
||||
err = result.get("error")
|
||||
text = (result.get("content") or "").strip()
|
||||
title = result.get("title") or ""
|
||||
|
||||
@@ -27,6 +27,56 @@ import spinnerModule from './spinner.js';
|
||||
|
||||
// ── Error diagnosis ──
|
||||
|
||||
function _openCookbookDependencies(pkgName = '') {
|
||||
const cookbook = window.cookbookModule;
|
||||
if (cookbook && typeof cookbook.open === 'function') {
|
||||
cookbook.open({ tab: 'Dependencies' });
|
||||
} else {
|
||||
document.getElementById('tool-cookbook-btn')?.click();
|
||||
}
|
||||
|
||||
const wanted = String(pkgName || '').toLowerCase();
|
||||
const tryHighlight = (attempt = 0) => {
|
||||
const modal = document.getElementById('cookbook-modal');
|
||||
const tab = modal?.querySelector('.cookbook-tab[data-backend="Dependencies"]');
|
||||
if (tab && !tab.classList.contains('active')) tab.click();
|
||||
|
||||
const rows = [...document.querySelectorAll('#cookbook-deps-list [data-pkg-name]')];
|
||||
if (!rows.length) {
|
||||
if (attempt < 45) setTimeout(() => tryHighlight(attempt + 1), 100);
|
||||
return;
|
||||
}
|
||||
if (!wanted) return;
|
||||
const row = rows.find(r => {
|
||||
const name = (r.dataset.pkgName || '').toLowerCase();
|
||||
const pip = (r.dataset.depPip || '').toLowerCase();
|
||||
return name === wanted || pip.includes(wanted) || wanted.includes(name);
|
||||
});
|
||||
if (row) {
|
||||
row.scrollIntoView({ block: 'center' });
|
||||
row.classList.add('cookbook-pkg-flash');
|
||||
setTimeout(() => row.classList.remove('cookbook-pkg-flash'), 1800);
|
||||
}
|
||||
};
|
||||
tryHighlight();
|
||||
}
|
||||
|
||||
function _openServeEditFromDiagnosis(panel, fields = null) {
|
||||
const task = panel?.closest?.('.cookbook-task');
|
||||
if (!task) return;
|
||||
task.dispatchEvent(new CustomEvent('cookbook:edit-serve', { bubbles: true, detail: { fields } }));
|
||||
}
|
||||
|
||||
function _openCpuServeEdit(panel) {
|
||||
_openServeEditFromDiagnosis(panel, {
|
||||
backend: 'llamacpp',
|
||||
gpus: '',
|
||||
tp: '1',
|
||||
gpu_mem: '0.80',
|
||||
_forceBackend: true,
|
||||
});
|
||||
}
|
||||
|
||||
// Infer the gated base repo that single-file checkpoints need configs from
|
||||
function _inferBaseRepo(text) {
|
||||
if (!text) return null;
|
||||
@@ -218,6 +268,7 @@ export const ERROR_PATTERNS = [
|
||||
pattern: /vllm.*command not found|No module named vllm/i,
|
||||
message: 'vLLM is not installed or not in PATH.',
|
||||
fixes: [
|
||||
{ label: 'Open Dependencies', action: () => _openCookbookDependencies('vllm') },
|
||||
{ label: 'Check environment is set', action: (panel) => {
|
||||
const el = panel.querySelector('[data-field="env_type"]');
|
||||
if (el) { el.focus(); el.style.borderColor = 'var(--red)'; }
|
||||
@@ -226,11 +277,21 @@ export const ERROR_PATTERNS = [
|
||||
},
|
||||
{
|
||||
pattern: /sglang.*command not found|No module named sglang|SGLang is not installed/i,
|
||||
message: 'SGLang is not installed or not in PATH. Open Cookbook → Dependencies and install sglang on this server.',
|
||||
message: 'SGLang is not installed or not in PATH.',
|
||||
fixes: [
|
||||
{ label: 'Open Dependencies', action: () => _openCookbookDependencies('sglang') },
|
||||
{ label: 'Copy install command', action: () => _copyText('python3 -m pip install "sglang[all]"') },
|
||||
],
|
||||
},
|
||||
{
|
||||
pattern: /No accelerator \(CUDA, XPU, HPU, NPU, MUSA, MPS\) is available|Triton is not supported on current platform/i,
|
||||
message: 'SGLang needs a visible GPU/accelerator on this server.',
|
||||
suggestion: 'Suggested action: switch this serve config to llama.cpp for CPU/local serving, or choose a GPU server.',
|
||||
fixes: [
|
||||
{ label: 'Switch to llama.cpp', action: (panel) => _openCpuServeEdit(panel) },
|
||||
{ label: 'Choose GPU server', action: (panel) => _openServeEditFromDiagnosis(panel) },
|
||||
],
|
||||
},
|
||||
{
|
||||
pattern: /flashinfer.*version.*does not match|flashinfer-cubin version/i,
|
||||
message: 'FlashInfer version mismatch.',
|
||||
@@ -241,8 +302,12 @@ export const ERROR_PATTERNS = [
|
||||
},
|
||||
{
|
||||
pattern: /torch\.cuda\.is_available\(\).*False|No CUDA runtime/i,
|
||||
message: 'CUDA not available in this environment.',
|
||||
fixes: [],
|
||||
message: 'vLLM needs a visible CUDA/ROCm GPU.',
|
||||
suggestion: 'Suggested action: switch this serve config to llama.cpp for CPU/local serving, or choose a GPU server.',
|
||||
fixes: [
|
||||
{ label: 'Switch to llama.cpp', action: (panel) => _openCpuServeEdit(panel) },
|
||||
{ label: 'Choose GPU server', action: (panel) => _openServeEditFromDiagnosis(panel) },
|
||||
],
|
||||
},
|
||||
{
|
||||
pattern: /Engine core initialization failed/i,
|
||||
@@ -295,17 +360,20 @@ export const ERROR_PATTERNS = [
|
||||
},
|
||||
{
|
||||
pattern: /Either a revision or a version must be specified|transformers\.integrations\.hub_kernels|kernels\/layer/i,
|
||||
message: 'vLLM/Transformers kernel package mismatch.',
|
||||
message: 'Transformers/kernels package mismatch.',
|
||||
fixes: [
|
||||
{ label: 'Update vLLM/Transformers/kernels', action: (panel) => {
|
||||
{ label: 'Repair kernel package', action: (panel) => {
|
||||
const taskEl = panel.closest('.cookbook-task');
|
||||
const task = taskEl ? _loadTasks().find(t => t.sessionId === taskEl.dataset.taskId) : null;
|
||||
const host = task?.remoteHost || '';
|
||||
const prefix = _buildEnvPrefix();
|
||||
const pipCmd = prefix ? prefix + ' python3 -m pip install -U vllm transformers kernels' : 'python3 -m pip install -U vllm transformers kernels';
|
||||
const pipCmd = prefix
|
||||
? prefix + ' python3 -m pip install --user --break-system-packages "kernels<0.15"'
|
||||
: 'python3 -m pip install --user --break-system-packages "kernels<0.15"';
|
||||
const cmd = host ? _sshCmd(host, pipCmd) : pipCmd;
|
||||
_launchServeTask('update-vllm-stack', 'pip-update', cmd);
|
||||
_launchServeTask('repair-kernels', 'pip-update', cmd);
|
||||
}},
|
||||
{ label: 'Open Dependencies', action: () => _openCookbookDependencies('sglang') },
|
||||
],
|
||||
},
|
||||
{
|
||||
@@ -319,13 +387,24 @@ export const ERROR_PATTERNS = [
|
||||
pattern: /llama-server.*command not found|llama\.cpp.*not found|No module named.*llama_cpp|No module named 'starlette_context'/i,
|
||||
message: 'llama-cpp-python server is not installed. Run: pip install "llama-cpp-python[server]"',
|
||||
fixes: [
|
||||
{ label: 'Open Dependencies', action: () => _openCookbookDependencies('llama_cpp') },
|
||||
{ label: 'Copy install command', action: () => _copyText('pip install "llama-cpp-python[server]"') },
|
||||
],
|
||||
},
|
||||
{
|
||||
pattern: /CUDA Toolkit not found|Unable to find cudart library|missing:\s*CUDA_CUDART/i,
|
||||
message: 'llama.cpp found nvcc, but the CUDA runtime library is missing.',
|
||||
suggestion: 'Suggested action: relaunch with the updated runner so llama.cpp builds CPU-only, or install a complete CUDA toolkit/runtime on this server for GPU llama.cpp.',
|
||||
fixes: [
|
||||
{ label: 'Edit serve', action: (panel) => _openServeEditFromDiagnosis(panel) },
|
||||
{ label: 'Open Dependencies', action: () => _openCookbookDependencies('llama_cpp') },
|
||||
],
|
||||
},
|
||||
{
|
||||
pattern: /No module named ['"]?torch|No module named ['"]?diffusers|diffusers.*command not found/i,
|
||||
message: 'Diffusion serving needs PyTorch and diffusers. Install diffusers from Cookbook → Dependencies.',
|
||||
fixes: [
|
||||
{ label: 'Open Dependencies', action: () => _openCookbookDependencies('diffusers') },
|
||||
{ label: 'Copy install command', action: () => _copyText('python3 -m pip install "diffusers[torch]"') },
|
||||
],
|
||||
},
|
||||
@@ -402,10 +481,32 @@ export function _diagnose(text) {
|
||||
return null;
|
||||
}
|
||||
|
||||
function _diagnosisCopyBundle(task, diagnosis, sourceText, suggestionText) {
|
||||
const lines = ['## Odysseus Cookbook troubleshooting'];
|
||||
if (task) {
|
||||
lines.push(
|
||||
'',
|
||||
'### Task',
|
||||
`- ID: ${task.sessionId || task.id || 'unknown'}`,
|
||||
`- Type: ${task.type || 'unknown'}`,
|
||||
`- Status: ${task.status || 'unknown'}`,
|
||||
`- Model: ${task.payload?.repo_id || task.name || 'unknown'}`,
|
||||
`- Host: ${task.remoteHost || 'local'}${task.sshPort ? `:${task.sshPort}` : ''}`,
|
||||
);
|
||||
}
|
||||
lines.push('', '### Diagnosis', diagnosis?.message || '(none)');
|
||||
if (suggestionText) lines.push('', '### Suggested action', suggestionText.replace(/^Suggested action:\s*/i, ''));
|
||||
const cmd = task?.payload?._cmd || '';
|
||||
if (cmd) lines.push('', '### Launch command', '```bash', cmd, '```');
|
||||
if (sourceText) lines.push('', '### Captured output', '```text', String(sourceText).trim(), '```');
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
export function _showDiagnosis(panel, diagnosis, sourceText) {
|
||||
if (panel._lastDiagMsg === diagnosis.message) return;
|
||||
if (panel._diagDismissed === diagnosis.message) return; // stay dismissed until new error
|
||||
const wasCollapsed = panel._lastDiagMsg === diagnosis.message && panel._diagCollapsed;
|
||||
if (panel._diagDismissed === diagnosis.message) return;
|
||||
panel._lastDiagMsg = diagnosis.message;
|
||||
panel._diagCollapsed = !!wasCollapsed;
|
||||
|
||||
let diag = panel.querySelector('.cookbook-diagnosis');
|
||||
if (!diag) {
|
||||
@@ -417,57 +518,161 @@ export function _showDiagnosis(panel, diagnosis, sourceText) {
|
||||
}
|
||||
diag.classList.remove('hidden');
|
||||
diag.innerHTML = '';
|
||||
const taskEl = panel?.closest?.('.cookbook-task');
|
||||
const task = taskEl ? _loadTasks().find(t => t.sessionId === taskEl.dataset.taskId) : null;
|
||||
const fixes = [...(diagnosis.fixes || [])];
|
||||
if (task?.type === 'serve' && task.payload?._cmd && !fixes.some(f => f.label === 'Edit serve')) {
|
||||
fixes.push({ label: 'Edit serve', action: (p) => _openServeEditFromDiagnosis(p) });
|
||||
}
|
||||
const suggestionText = diagnosis.suggestion || (fixes.length
|
||||
? `Suggested action: ${fixes[0].label}.`
|
||||
: 'Suggested action: copy the error and adjust the serve settings.');
|
||||
|
||||
const header = document.createElement('div');
|
||||
header.style.cssText = 'display:flex;align-items:center;justify-content:space-between;';
|
||||
header.className = 'cookbook-diag-header';
|
||||
|
||||
const msg = document.createElement('div');
|
||||
msg.className = 'cookbook-diag-message';
|
||||
msg.textContent = diagnosis.message;
|
||||
header.appendChild(msg);
|
||||
const fold = document.createElement('button');
|
||||
fold.className = 'cookbook-diag-fold';
|
||||
fold.type = 'button';
|
||||
fold.innerHTML = '<span class="cookbook-diag-chevron">▾</span><span>Error message:</span>';
|
||||
header.appendChild(fold);
|
||||
|
||||
const copy = document.createElement('button');
|
||||
copy.className = 'cookbook-diag-copy';
|
||||
copy.type = 'button';
|
||||
copy.title = 'Copy troubleshooting bundle';
|
||||
copy.setAttribute('aria-label', 'Copy troubleshooting bundle');
|
||||
copy.innerHTML = '<svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.2" stroke-linecap="round" stroke-linejoin="round"><rect x="9" y="9" width="13" height="13" rx="2"/><path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"/></svg>';
|
||||
copy.addEventListener('click', (e) => {
|
||||
e.stopPropagation();
|
||||
_copyText(_diagnosisCopyBundle(task, diagnosis, sourceText, suggestionText));
|
||||
copy.classList.add('copied');
|
||||
copy.innerHTML = '<svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.6" stroke-linecap="round" stroke-linejoin="round"><polyline points="20 6 9 17 4 12"/></svg>';
|
||||
setTimeout(() => {
|
||||
if (!copy.isConnected) return;
|
||||
copy.classList.remove('copied');
|
||||
copy.innerHTML = '<svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.2" stroke-linecap="round" stroke-linejoin="round"><rect x="9" y="9" width="13" height="13" rx="2"/><path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"/></svg>';
|
||||
}, 1200);
|
||||
});
|
||||
header.appendChild(copy);
|
||||
|
||||
const dismiss = document.createElement('button');
|
||||
dismiss.className = 'close-btn';
|
||||
dismiss.style.cssText = 'width:16px;height:16px;font-size:9px;flex-shrink:0;';
|
||||
dismiss.textContent = '\u2715';
|
||||
dismiss.addEventListener('click', () => { panel._diagDismissed = diagnosis.message; _clearDiagnosis(panel); });
|
||||
dismiss.className = 'cookbook-diag-dismiss';
|
||||
dismiss.type = 'button';
|
||||
dismiss.title = 'Dismiss error';
|
||||
dismiss.setAttribute('aria-label', 'Dismiss error');
|
||||
dismiss.textContent = '×';
|
||||
dismiss.addEventListener('click', (e) => {
|
||||
e.stopPropagation();
|
||||
panel._diagDismissed = diagnosis.message;
|
||||
_clearDiagnosis(panel);
|
||||
});
|
||||
header.appendChild(dismiss);
|
||||
|
||||
diag.appendChild(header);
|
||||
|
||||
if (diagnosis.fixes && diagnosis.fixes.length) {
|
||||
const row = document.createElement('div');
|
||||
row.className = 'cookbook-diag-fixes';
|
||||
for (const fix of diagnosis.fixes) {
|
||||
const btn = document.createElement('button');
|
||||
btn.className = 'cookbook-btn cookbook-diag-btn';
|
||||
btn.textContent = fix.label;
|
||||
btn.addEventListener('click', async () => {
|
||||
if (btn.dataset.busy) return;
|
||||
btn.dataset.busy = '1';
|
||||
// Spinner feedback while the fix runs (kill + relaunch takes a moment).
|
||||
const _orig = btn.textContent;
|
||||
const body = document.createElement('div');
|
||||
body.className = 'cookbook-diag-body';
|
||||
body.classList.toggle('hidden', panel._diagCollapsed);
|
||||
fold.querySelector('.cookbook-diag-chevron').textContent = panel._diagCollapsed ? '▸' : '▾';
|
||||
const msg = document.createElement('div');
|
||||
msg.className = 'cookbook-diag-message';
|
||||
msg.textContent = diagnosis.message;
|
||||
body.appendChild(msg);
|
||||
const suggestion = document.createElement('div');
|
||||
suggestion.className = 'cookbook-diag-suggestion';
|
||||
suggestion.textContent = suggestionText;
|
||||
body.appendChild(suggestion);
|
||||
fold.addEventListener('click', (e) => {
|
||||
e.stopPropagation();
|
||||
panel._diagCollapsed = !panel._diagCollapsed;
|
||||
body.classList.toggle('hidden', panel._diagCollapsed);
|
||||
fold.querySelector('.cookbook-diag-chevron').textContent = panel._diagCollapsed ? '▸' : '▾';
|
||||
});
|
||||
diag.appendChild(body);
|
||||
|
||||
const runFix = async (fix, button, busyLabel = fix.label, onStart = null, onDone = null) => {
|
||||
if (!fix || !button || button.dataset.busy) return;
|
||||
button.dataset.busy = '1';
|
||||
const _orig = button.textContent;
|
||||
const wp = spinnerModule.createWhirlpool(12);
|
||||
wp.element.style.cssText = 'display:inline-block;vertical-align:middle;width:12px;height:12px;margin-right:5px;';
|
||||
btn.textContent = '';
|
||||
btn.appendChild(wp.element);
|
||||
button.textContent = '';
|
||||
button.appendChild(wp.element);
|
||||
const _lbl = document.createElement('span');
|
||||
_lbl.textContent = _orig;
|
||||
_lbl.textContent = busyLabel;
|
||||
_lbl.style.verticalAlign = 'middle';
|
||||
btn.appendChild(_lbl);
|
||||
button.appendChild(_lbl);
|
||||
try {
|
||||
if (typeof onStart === 'function') onStart();
|
||||
await fix.action(panel, sourceText);
|
||||
} catch (e) {
|
||||
console.error('[cookbook] diagnosis fix failed', e);
|
||||
} catch (err) {
|
||||
console.error('[cookbook] diagnosis fix failed', err);
|
||||
} finally {
|
||||
// Retries animate the whole card away (button goes with it). For fixes
|
||||
// that leave the card in place, restore the label.
|
||||
if (btn.isConnected) { try { wp.destroy(); } catch {} btn.textContent = _orig; delete btn.dataset.busy; }
|
||||
if (button.isConnected) {
|
||||
try { wp.destroy(); } catch {}
|
||||
button.textContent = _orig;
|
||||
delete button.dataset.busy;
|
||||
}
|
||||
if (typeof onDone === 'function') onDone();
|
||||
}
|
||||
};
|
||||
|
||||
if (fixes.length) {
|
||||
const row = document.createElement('div');
|
||||
row.className = 'cookbook-diag-fixes';
|
||||
|
||||
if (fixes.length <= 3) {
|
||||
for (const fix of fixes) {
|
||||
const btn = document.createElement('button');
|
||||
btn.className = 'cookbook-btn cookbook-diag-btn';
|
||||
btn.type = 'button';
|
||||
btn.textContent = fix.label;
|
||||
btn.addEventListener('click', (e) => {
|
||||
e.stopPropagation();
|
||||
runFix(fix, btn);
|
||||
});
|
||||
row.appendChild(btn);
|
||||
}
|
||||
diag.appendChild(row);
|
||||
body.appendChild(row);
|
||||
return;
|
||||
}
|
||||
|
||||
const wrap = document.createElement('div');
|
||||
wrap.className = 'cookbook-diag-actions';
|
||||
|
||||
const trigger = document.createElement('button');
|
||||
trigger.className = 'cookbook-btn cookbook-diag-action-trigger';
|
||||
trigger.type = 'button';
|
||||
trigger.textContent = 'Actions';
|
||||
trigger.appendChild(document.createTextNode(' ▾'));
|
||||
wrap.appendChild(trigger);
|
||||
|
||||
const menu = document.createElement('div');
|
||||
menu.className = 'dropdown cookbook-diag-menu hidden';
|
||||
for (const fix of fixes) {
|
||||
const item = document.createElement('button');
|
||||
item.type = 'button';
|
||||
item.textContent = fix.label;
|
||||
item.addEventListener('click', async (e) => {
|
||||
e.stopPropagation();
|
||||
if (item.dataset.busy || trigger.dataset.busy) return;
|
||||
item.dataset.busy = '1';
|
||||
await runFix(fix, trigger, fix.label, () => menu.classList.add('hidden'), () => delete item.dataset.busy);
|
||||
});
|
||||
menu.appendChild(item);
|
||||
}
|
||||
wrap.appendChild(menu);
|
||||
trigger.addEventListener('click', (e) => {
|
||||
e.stopPropagation();
|
||||
if (trigger.dataset.busy) return;
|
||||
document.querySelectorAll('.cookbook-diag-menu').forEach(m => {
|
||||
if (m !== menu) m.classList.add('hidden');
|
||||
});
|
||||
menu.classList.toggle('hidden');
|
||||
});
|
||||
row.appendChild(wrap);
|
||||
body.appendChild(row);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -193,6 +193,8 @@ export function _renderGpuToggles(system) {
|
||||
if (quantSel) {
|
||||
if (count <= 1) {
|
||||
quantSel.value = 'Q4_K_M'; // RAM or 1 GPU -> Q4 sweet spot
|
||||
} else if (String(system?.backend || '').toLowerCase() === 'rocm') {
|
||||
quantSel.value = 'Q4_K_M'; // ROCm default stays GGUF/local-safe; AWQ is explicit only
|
||||
} else {
|
||||
quantSel.value = 'AWQ-4bit'; // Multi-GPU -> AWQ for vLLM
|
||||
}
|
||||
|
||||
@@ -260,12 +260,31 @@ export function _detectBackend(model) {
|
||||
const q = (model.quant || '').toUpperCase();
|
||||
const sysBackend = String(_hwfitCache?.system?.backend || '').toLowerCase();
|
||||
const isRocm = sysBackend === 'rocm';
|
||||
const isAppleSilicon = ['metal', 'mps', 'apple'].includes(sysBackend);
|
||||
const _nm = `${model.repo_id || ''} ${model.path || ''} ${model.name || ''}`.toLowerCase();
|
||||
if (!isAppleSilicon && (/\bmlx\b|mlx-|_mlx/i.test(_nm) || q.startsWith('MLX'))) {
|
||||
return { backend: 'unsupported', label: 'Unsupported' };
|
||||
}
|
||||
const isAwqLike = /^AWQ|^GPTQ|^NVFP4/.test(q) || q === 'FP8' || /\b(awq|gptq|fp8|nvfp4)\b/i.test(_nm);
|
||||
const isGgufLike = model.is_gguf || /^Q[2-8]/.test(q) || /^IQ/.test(q) || q === 'GGUF' || _nm.includes('gguf');
|
||||
|
||||
// Image gen models → diffusers
|
||||
if (model.is_image_gen || model.is_diffusion || model._tag === 'image') {
|
||||
return { backend: 'diffusers', label: 'Diffusers' };
|
||||
}
|
||||
|
||||
// AWQ / GPTQ / FP8 are safetensors GPU-serving formats. Never route them
|
||||
// through llama.cpp/Ollama just because the host is Mac/Windows; those engines
|
||||
// need GGUF. The UI will warn/block on Metal where vLLM/SGLang aren't viable.
|
||||
if (isAwqLike) {
|
||||
return { backend: 'vllm', label: 'vLLM' };
|
||||
}
|
||||
|
||||
// GGUF → llama.cpp/Ollama-compatible.
|
||||
if (isGgufLike) {
|
||||
return { backend: 'llamacpp', label: 'llama.cpp' };
|
||||
}
|
||||
|
||||
// Windows → default to llama.cpp (no vLLM support on Windows)
|
||||
if (_isWindows()) {
|
||||
return { backend: 'llamacpp', label: 'llama.cpp' };
|
||||
@@ -278,19 +297,6 @@ export function _detectBackend(model) {
|
||||
return { backend: 'llamacpp', label: 'llama.cpp' };
|
||||
}
|
||||
|
||||
// AWQ / GPTQ / FP8 → vLLM
|
||||
if (/^AWQ|^GPTQ/.test(q) || q === 'FP8') {
|
||||
return { backend: 'vllm', label: 'vLLM' };
|
||||
}
|
||||
|
||||
// GGUF → llama.cpp. Match the quant tag OR a gguf hint in the repo/path/name:
|
||||
// a raw .gguf file often has no quant field, which made it fall through to the
|
||||
// vLLM default below.
|
||||
const _nm = `${model.repo_id || ''} ${model.path || ''} ${model.name || ''}`.toLowerCase();
|
||||
if (model.is_gguf || /^Q[2-8]/.test(q) || /^IQ/.test(q) || q === 'GGUF' || _nm.includes('gguf')) {
|
||||
return { backend: 'llamacpp', label: 'llama.cpp' };
|
||||
}
|
||||
|
||||
// ROCm/AMD machines should not blindly default HF safetensors models to
|
||||
// vLLM. SGLang is the safer OpenAI-compatible default for plain HF text
|
||||
// repos there; llama.cpp still wins above whenever the model is GGUF.
|
||||
@@ -1020,6 +1026,16 @@ function _wireTabEvents(body) {
|
||||
// Download input
|
||||
const dlBtn = document.getElementById('cookbook-dl-btn');
|
||||
const dlInput = document.getElementById('cookbook-dl-repo');
|
||||
const dlCardToggle = document.getElementById('cookbook-download-card-toggle');
|
||||
const dlCardBody = document.getElementById('cookbook-download-card-body');
|
||||
const dlCardArrow = document.getElementById('cookbook-download-card-arrow');
|
||||
if (dlCardToggle && dlCardBody) {
|
||||
dlCardToggle.addEventListener('click', () => {
|
||||
const isOpen = dlCardBody.style.display !== 'none';
|
||||
dlCardBody.style.display = isOpen ? 'none' : 'block';
|
||||
if (dlCardArrow) dlCardArrow.style.transform = isOpen ? 'rotate(0deg)' : 'rotate(90deg)';
|
||||
});
|
||||
}
|
||||
if (dlBtn && dlInput) {
|
||||
function _stripHfUrl(input) {
|
||||
let repo = input.trim();
|
||||
@@ -1099,8 +1115,12 @@ function _wireTabEvents(body) {
|
||||
if (hfToggle && hfList) {
|
||||
let _loaded = false;
|
||||
// Per-server VRAM cache so we don't re-probe on every expand
|
||||
const _vramCache = {};
|
||||
async function _getSelectedServerVram() {
|
||||
const _hwCache = {};
|
||||
function _hfModelLooksAwqLike(m) {
|
||||
const text = `${m?.repo_id || ''} ${(m?.tags || []).join(' ')}`.toLowerCase();
|
||||
return /\b(awq|gptq|fp8|4bit|int4)\b/.test(text);
|
||||
}
|
||||
async function _getSelectedServerHw() {
|
||||
// Prefer the "What Fits" dropdown (the main control that shows hardware);
|
||||
// fall back to the download dropdown. This is the server the list ranks for.
|
||||
const dlSrv = document.getElementById('hwfit-server-select') || document.getElementById('hwfit-dl-server');
|
||||
@@ -1117,7 +1137,7 @@ function _wireTabEvents(body) {
|
||||
}
|
||||
}
|
||||
const cacheKey = host || 'local';
|
||||
if (_vramCache[cacheKey] !== undefined) return _vramCache[cacheKey];
|
||||
if (_hwCache[cacheKey]) return _hwCache[cacheKey];
|
||||
// Fetch system info for this server from hwfit
|
||||
try {
|
||||
const qp = new URLSearchParams();
|
||||
@@ -1127,13 +1147,13 @@ function _wireTabEvents(body) {
|
||||
const r = await fetch(`/api/hwfit/system?${qp}`);
|
||||
if (r.ok) {
|
||||
const sys = await r.json();
|
||||
const v = sys?.gpu_vram_gb || 0;
|
||||
_vramCache[cacheKey] = v;
|
||||
return v;
|
||||
const hw = { vram: sys?.gpu_vram_gb || 0, backend: String(sys?.backend || '').toLowerCase() };
|
||||
_hwCache[cacheKey] = hw;
|
||||
return hw;
|
||||
}
|
||||
} catch {}
|
||||
_vramCache[cacheKey] = 0;
|
||||
return 0;
|
||||
_hwCache[cacheKey] = { vram: 0, backend: '' };
|
||||
return _hwCache[cacheKey];
|
||||
}
|
||||
async function _loadLatest() {
|
||||
// Match the Dependencies loader: whirlpool spinner + text label so the
|
||||
@@ -1152,7 +1172,8 @@ function _wireTabEvents(body) {
|
||||
} catch {
|
||||
hfList.innerHTML = '<div class="hwfit-loading">Scanning models…</div>';
|
||||
}
|
||||
const vram = await _getSelectedServerVram();
|
||||
const hwInfo = await _getSelectedServerHw();
|
||||
const vram = hwInfo.vram || 0;
|
||||
try {
|
||||
let lastErr = '';
|
||||
const _fetchLatest = async (v) => {
|
||||
@@ -1168,6 +1189,9 @@ function _wireTabEvents(body) {
|
||||
if (!models.length && vram > 0) {
|
||||
models = await _fetchLatest(0);
|
||||
}
|
||||
if (['rocm', 'metal', 'mps', 'apple', 'generic', 'cpu'].includes(hwInfo.backend)) {
|
||||
models = models.filter(m => !_hfModelLooksAwqLike(m));
|
||||
}
|
||||
if (!models.length) {
|
||||
// Distinguish "the HF API failed" from "nothing matched" so an outage
|
||||
// doesn't masquerade as no-fitting-models.
|
||||
@@ -1351,10 +1375,12 @@ function _renderRecipes() {
|
||||
// Search group
|
||||
html += '<div class="cookbook-group" data-backend-group="Search" style="flex:0 0 auto;">';
|
||||
html += '<div class="admin-card" style="display:flex;flex-direction:column;overflow:hidden;">';
|
||||
html += '<div style="display:flex;align-items:baseline;gap:8px;margin-bottom:2px;">';
|
||||
html += '<button type="button" id="cookbook-download-card-toggle" style="display:flex;align-items:baseline;gap:8px;margin-bottom:2px;width:100%;background:transparent;border:0;padding:0;color:inherit;text-align:left;cursor:pointer;">';
|
||||
html += '<h2 style="margin:0;padding:0;line-height:1;">Download</h2>';
|
||||
html += '</div>';
|
||||
html += '<p class="memory-desc doclib-desc" style="margin-top:6px;">Download from <a href="https://huggingface.co/models" target="_blank" rel="noopener" style="color:var(--accent,var(--red));text-decoration:none;"><svg width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" style="vertical-align:-1px;margin-right:1px;"><path d="M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6"/><polyline points="15 3 21 3 21 9"/><line x1="10" y1="14" x2="21" y2="3"/></svg>HuggingFace</a> by pasting model link, or download directly in the Scan section below.</p>';
|
||||
html += '<span id="cookbook-download-card-arrow" style="margin-left:auto;display:inline-block;transition:transform 0.15s;font-size:13px;line-height:1;">\u25B8</span>';
|
||||
html += '</button>';
|
||||
html += '<div id="cookbook-download-card-body" style="display:none;">';
|
||||
html += '<p class="memory-desc doclib-desc" style="margin-top:6px;">Download directly from Scan, or paste a HuggingFace model link.</p>';
|
||||
html += '<div class="hwfit-container" id="hwfit-container">';
|
||||
|
||||
// Section 1: Settings
|
||||
@@ -1383,7 +1409,7 @@ function _renderRecipes() {
|
||||
// silently sending downloads to the wrong server. An empty selection means Local; the user
|
||||
// chooses a remote server explicitly via the dropdown.
|
||||
|
||||
// Download input
|
||||
// Manual download input
|
||||
html += `<div style="margin-top:7px;margin-bottom:2px;display:flex;gap:4px;align-items:center;">`;
|
||||
if (_es.servers.length > 1) {
|
||||
html += `<select class="cookbook-field-input hwfit-dl-server" id="hwfit-dl-server" style="height:28px;position:relative;top:0px;">`;
|
||||
@@ -1399,7 +1425,7 @@ function _renderRecipes() {
|
||||
html += `<button class="cookbook-btn cookbook-dl-btn" id="cookbook-dl-btn">Download</button>`;
|
||||
html += `</div>`;
|
||||
// Latest HF models that fit — collapsible card list
|
||||
html += `<div style="margin-top:2px;position:relative;top:-8px;">`;
|
||||
html += `<div style="margin-top:5px;position:relative;top:-3px;">`;
|
||||
html += `<div style="display:flex;gap:4px;align-items:center;">`;
|
||||
html += `<button type="button" class="memory-toolbar-btn" id="cookbook-hf-latest-toggle" style="flex:1;text-align:left;height:26px;display:flex;align-items:center;gap:6px;border-radius:4px;">`;
|
||||
html += `<span id="cookbook-hf-latest-arrow" style="display:inline-block;transition:transform 0.15s;pointer-events:none;">\u25B8</span>`;
|
||||
@@ -1411,7 +1437,7 @@ function _renderRecipes() {
|
||||
html += `</div>`;
|
||||
|
||||
// Search section
|
||||
html += '</div></div></div>';
|
||||
html += '</div></div></div></div>';
|
||||
html += '<div class="cookbook-group" data-backend-group="Search">';
|
||||
html += '<div class="admin-card" style="flex:1;display:flex;flex-direction:column;overflow:hidden;">';
|
||||
html += '<div style="display:flex;align-items:baseline;gap:8px;margin-bottom:2px;">';
|
||||
|
||||
@@ -86,6 +86,9 @@ function _ggufIncludePattern(model, source) {
|
||||
|
||||
function _missingGgufMessage(model) {
|
||||
const name = model?.name || 'this model';
|
||||
if (/\bnvfp4\b/i.test(name)) {
|
||||
return `${name} is an NVIDIA NVFP4 checkpoint, not a GGUF download. Pick the base model row with an Unsloth GGUF source, or paste the GGUF repo directly.`;
|
||||
}
|
||||
return `No GGUF source is configured for ${name}. Pick a model with a GGUF source, or paste the GGUF repo in Download.`;
|
||||
}
|
||||
|
||||
|
||||
@@ -34,12 +34,106 @@ function _taskBadge(task) {
|
||||
return { text: _statusLabel(task.status, task.type), cls: 'cookbook-task-' + task.status };
|
||||
}
|
||||
|
||||
function _canClearTask(task) {
|
||||
if (!task || task.status === 'running') return false;
|
||||
if (task.type === 'serve' && (task.status === 'ready' || task._serveReady)) return false;
|
||||
if (task.type === 'download' && task.status === 'done' && !task.payload?._dep) return false;
|
||||
return ['done', 'stopped', 'error', 'crashed', 'failed'].includes(task.status);
|
||||
}
|
||||
|
||||
function _clearPillLabel(task) {
|
||||
return 'clear';
|
||||
}
|
||||
|
||||
function _shouldOfferCrashReport(task) {
|
||||
if (!task) return false;
|
||||
if (task._unreachable && task.type === 'serve') return true;
|
||||
return ['error', 'crashed', 'failed'].includes(task.status);
|
||||
}
|
||||
|
||||
function _serveTaskLooksAwqOnLocalBackend(task, outputText = '') {
|
||||
const repo = `${task?.payload?.repo_id || ''} ${task?.name || ''}`.toLowerCase();
|
||||
const cmd = `${task?.payload?._cmd || ''} ${outputText || ''}`.toLowerCase();
|
||||
return /\b(awq|gptq|fp8)\b/.test(repo) && /(llama-server|llama_cpp\.server|ollama|ggml_cuda_enable_unified_memory)/.test(cmd);
|
||||
}
|
||||
|
||||
function _serveTaskLooksAwqWithoutUsableAccelerator(task, outputText = '') {
|
||||
const repo = `${task?.payload?.repo_id || ''} ${task?.name || ''}`.toLowerCase();
|
||||
const out = String(outputText || '').toLowerCase();
|
||||
return /\b(awq|gptq|fp8)\b/.test(repo)
|
||||
&& /(no accelerator|no cuda runtime|failed to infer device type|triton is not supported|0 active driver)/i.test(out);
|
||||
}
|
||||
|
||||
async function _openDownloadForGgufTask(task) {
|
||||
const raw = task?.payload?.repo_id || task?.name || '';
|
||||
const modelName = String(raw)
|
||||
.split('/').pop()
|
||||
.replace(/[-_](?:AWQ|GPTQ|FP8|4bit|8bit|Int4|Int8).*$/i, '')
|
||||
.replace(/[-_]+$/g, '')
|
||||
|| String(raw).split('/').pop()
|
||||
|| raw;
|
||||
const cookbook = window.cookbookModule;
|
||||
if (cookbook && typeof cookbook.open === 'function') {
|
||||
cookbook.open({ tab: 'Search' });
|
||||
} else {
|
||||
document.getElementById('tool-cookbook-btn')?.click();
|
||||
}
|
||||
setTimeout(async () => {
|
||||
const modal = document.getElementById('cookbook-modal');
|
||||
const tab = modal?.querySelector('.cookbook-tab[data-backend="Search"]');
|
||||
if (tab && !tab.classList.contains('active')) tab.click();
|
||||
const search = document.getElementById('hwfit-search');
|
||||
if (search) {
|
||||
search.value = modelName;
|
||||
search.dispatchEvent(new Event('input', { bubbles: true }));
|
||||
search.focus();
|
||||
}
|
||||
const quant = document.getElementById('hwfit-quant');
|
||||
if (quant) {
|
||||
quant.value = 'Q4_K_M';
|
||||
quant.dispatchEvent(new Event('change', { bubbles: true }));
|
||||
}
|
||||
try {
|
||||
const hwfit = await import('./cookbook-hwfit.js');
|
||||
if (typeof hwfit._hwfitFetch === 'function') hwfit._hwfitFetch(true);
|
||||
} catch {}
|
||||
}, 80);
|
||||
}
|
||||
|
||||
function _terminalServeDiagnosis(task, outputText) {
|
||||
const out = String(outputText || task?.output || '');
|
||||
if (!task || task.type !== 'serve' || !['stopped', 'error', 'crashed', 'failed'].includes(task.status) || !out.trim()) return null;
|
||||
if (_serveTaskLooksAwqOnLocalBackend(task, out)) {
|
||||
return {
|
||||
message: 'AWQ/GPTQ/FP8 cannot be served through llama.cpp/Ollama unified-memory mode.',
|
||||
suggestion: 'Suggested action: use vLLM/SGLang on a compatible CUDA/ROCm GPU server, or download a GGUF version for llama.cpp/Ollama/unified-memory serving.',
|
||||
fixes: [
|
||||
{ label: 'Find GGUF download', action: () => _openDownloadForGgufTask(task) },
|
||||
{ label: 'Edit serve', action: (panel) => _openServeEditForTask(task) },
|
||||
],
|
||||
};
|
||||
}
|
||||
if (_serveTaskLooksAwqWithoutUsableAccelerator(task, out)) {
|
||||
return {
|
||||
message: 'AWQ/GPTQ/FP8 needs a working vLLM/SGLang accelerator path; this server did not expose one.',
|
||||
suggestion: 'Suggested action: choose a CUDA/ROCm server where vLLM/SGLang can see the GPU, or download a GGUF version and serve it with llama.cpp/Ollama.',
|
||||
fixes: [
|
||||
{ label: 'Find GGUF download', action: () => _openDownloadForGgufTask(task) },
|
||||
{ label: 'Edit serve', action: (panel) => _openServeEditForTask(task) },
|
||||
],
|
||||
};
|
||||
}
|
||||
return _diagnose(out) || {
|
||||
message: /Native llama-server not found|building llama-server|llama\.cpp/i.test(out)
|
||||
? 'llama.cpp build stopped before the server became reachable.'
|
||||
: 'Serve stopped before the model became reachable.',
|
||||
suggestion: /Native llama-server not found|building llama-server|llama\.cpp/i.test(out)
|
||||
? 'Suggested action: copy the troubleshooting bundle, then edit serve settings. For the quickest local/CPU path, use Ollama or a prebuilt llama-server; source builds can take several minutes and fail if build dependencies are incomplete.'
|
||||
: 'Suggested action: copy the troubleshooting bundle, then edit serve settings or relaunch with a CPU/backend fallback.',
|
||||
fixes: [{ label: 'Edit serve', action: (panel) => _openServeEditForTask(task) }],
|
||||
};
|
||||
}
|
||||
|
||||
function _redactCrashReportText(text) {
|
||||
if (!text) return '';
|
||||
return String(text)
|
||||
@@ -173,6 +267,23 @@ export function _parseServePhase(snapshot) {
|
||||
if (/Ollama API ready on port\s+\d+/i.test(flat)) {
|
||||
return { phase: 'ready', status: 'ready' };
|
||||
}
|
||||
const llamaBuildMatches = [...flat.matchAll(/\[\s*(\d{1,3})%\]\s*(?:Building|Linking)/gi)];
|
||||
if (llamaBuildMatches.length) {
|
||||
const pct = Math.min(100, parseInt(llamaBuildMatches[llamaBuildMatches.length - 1][1], 10));
|
||||
return { phase: `building llama.cpp ${pct}%`, status: 'running', pct };
|
||||
}
|
||||
if (/Native llama-server not found|building from source/i.test(flat)) {
|
||||
if (/Cloning into ['"]?llama\.cpp/i.test(flat) && !/Receiving objects:\s*100%/i.test(flat)) {
|
||||
return { phase: 'cloning llama.cpp', status: 'running' };
|
||||
}
|
||||
if (/Configuring incomplete|CMake Error/i.test(flat)) {
|
||||
return {};
|
||||
}
|
||||
if (/CMAKE_BUILD_TYPE|Detecting CXX|Found Threads|Including CPU backend|CUDA nvcc found|building llama-server/i.test(flat)) {
|
||||
return { phase: 'configuring llama.cpp', status: 'running' };
|
||||
}
|
||||
return { phase: 'building llama.cpp', status: 'running' };
|
||||
}
|
||||
// HTTP access logs (e.g. GET /v1/models 200 OK) mean the server is up
|
||||
if (/(?:GET|POST)\s+\/[^\s]*\s+HTTP\/[\d.]+"\s*\d{3}/.test(flat)) {
|
||||
return { phase: 'idle', status: 'ready' };
|
||||
@@ -341,8 +452,24 @@ async function _startQueuedDownload(task) {
|
||||
|
||||
// ── Task CRUD ──
|
||||
|
||||
function _serveOutputLooksReady(task) {
|
||||
const out = String(task?.output || '');
|
||||
return !!task?._serveReady
|
||||
|| /Application startup complete/i.test(out)
|
||||
|| /Ollama API ready on port\s+\d+/i.test(out)
|
||||
|| /(?:GET|POST)\s+\/[^\s]*\s+HTTP\/[\d.]+"\s*2\d\d/i.test(out);
|
||||
}
|
||||
|
||||
function _normalizeTaskForDisplay(task) {
|
||||
if (!task || typeof task !== 'object') return task;
|
||||
if (task.type === 'serve' && task.status === 'done' && !_serveOutputLooksReady(task)) {
|
||||
return { ...task, status: 'error' };
|
||||
}
|
||||
return task;
|
||||
}
|
||||
|
||||
export function _loadTasks() {
|
||||
try { return JSON.parse(localStorage.getItem(TASKS_KEY)) || []; }
|
||||
try { return (JSON.parse(localStorage.getItem(TASKS_KEY)) || []).map(_normalizeTaskForDisplay); }
|
||||
catch { return []; }
|
||||
}
|
||||
|
||||
@@ -876,7 +1003,7 @@ export async function _serveAutoFix(panel, envVar) {
|
||||
// Edit button, but optionally with a modified command (used by the diagnosis
|
||||
// "Retry with X" buttons so a retry lands in the editable Serve panel with the
|
||||
// adjusted setting, instead of blindly relaunching).
|
||||
async function _openServeEditForTask(task, cmdOverride) {
|
||||
async function _openServeEditForTask(task, cmdOverride, fieldOverrides = null) {
|
||||
const repo = task.payload?.repo_id;
|
||||
if (!repo) { uiModule.showToast('No model info on this task'); return; }
|
||||
const cmd = cmdOverride || task.payload?._cmd;
|
||||
@@ -884,6 +1011,9 @@ async function _openServeEditForTask(task, cmdOverride) {
|
||||
let fields = cmdOverride
|
||||
? _parseServeCmdToFields(cmd)
|
||||
: (task.payload?._fields || (cmd ? _parseServeCmdToFields(cmd) : null));
|
||||
if (fieldOverrides && typeof fieldOverrides === 'object') {
|
||||
fields = { ...(fields || {}), ...fieldOverrides };
|
||||
}
|
||||
// Switch the active server to the one this serve ran on (mirrors _openEdit).
|
||||
const _tHost = task.remoteHost || '';
|
||||
_envState.remoteHost = _tHost;
|
||||
@@ -1352,8 +1482,8 @@ export function _renderRunningTab() {
|
||||
const host = btn.dataset.clearServer;
|
||||
if (!await window.styledConfirm(`Clear finished tasks on ${_serverName(host)}?`, { confirmText: 'Clear' })) return;
|
||||
const allTasks = _loadTasks();
|
||||
const toRemove = allTasks.filter(t => (t.remoteHost || '') === host && t.status !== 'running');
|
||||
const remaining = allTasks.filter(t => (t.remoteHost || '') !== host || t.status === 'running');
|
||||
const toRemove = allTasks.filter(t => (t.remoteHost || '') === host && _canClearTask(t));
|
||||
const remaining = allTasks.filter(t => (t.remoteHost || '') !== host || !_canClearTask(t));
|
||||
_saveTasks(remaining);
|
||||
// Fade/slide each finished card out (same exit as the per-card clear)
|
||||
// instead of yanking them instantly.
|
||||
@@ -1443,16 +1573,19 @@ export function _renderRunningTab() {
|
||||
const _bdg = _taskBadge(task);
|
||||
badge.textContent = _bdg.text;
|
||||
badge.className = 'cookbook-task-status' + (_bdg.cls ? ' ' + _bdg.cls : '');
|
||||
badge.style.display = isDone ? 'none' : ''; // hidden — type chip carries it
|
||||
badge.style.display = '';
|
||||
}
|
||||
// Indicator: spinning wave while running, green check when finished.
|
||||
const wave = el.querySelector('.cookbook-task-wave');
|
||||
if (wave) wave.style.display = task.status === 'running' ? '' : 'none';
|
||||
// Model downloads (which have a Serve → button) don't get a clear pill —
|
||||
// pressing Serve clears them. Dep installs / serve tasks keep it.
|
||||
const check = el.querySelector('.cookbook-task-check');
|
||||
const _showClear = isDone && !(task.type === 'download' && !task.payload?._dep);
|
||||
if (check) check.style.display = _showClear ? '' : 'none';
|
||||
if (check) {
|
||||
check.style.display = _canClearTask(task) ? '' : 'none';
|
||||
const label = check.querySelector('.cookbook-task-done-label');
|
||||
if (label) label.textContent = _clearPillLabel(task);
|
||||
}
|
||||
const terminalDiag = _terminalServeDiagnosis(task, el.querySelector('.cookbook-output-pre')?.textContent || task.output || '');
|
||||
if (terminalDiag) _showDiagnosis(el, terminalDiag, el.querySelector('.cookbook-output-pre')?.textContent || task.output || '');
|
||||
}
|
||||
if (!task) {
|
||||
if (el._uptimeInterval) { clearInterval(el._uptimeInterval); el._uptimeInterval = null; }
|
||||
@@ -1476,11 +1609,8 @@ export function _renderRunningTab() {
|
||||
<div class="cookbook-task-header">
|
||||
<span class="cookbook-task-type${(task.status === 'done' && task.type === 'download') ? ' cookbook-task-type-done' : ''}" data-type="${esc(task.type)}">${esc((task.status === 'done' && task.type === 'download') ? 'finished' : task.type)}</span>
|
||||
<span class="cookbook-task-name">${modelLogo(task.name)}${esc(task.name)}</span>
|
||||
<span class="cookbook-task-status ${_bdg.cls}" style="display:${task.status === 'done' ? 'none' : ''}"${_bdgTitle}>${esc(_bdg.text)}</span>
|
||||
${task.type === 'serve' && task.payload?._cmd ? '<button class="cookbook-task-edit-btn" title="Edit settings & relaunch"><svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M11 4H4a2 2 0 0 0-2 2v14a2 2 0 0 0 2 2h14a2 2 0 0 0 2-2v-7"/><path d="M18.5 2.5a2.121 2.121 0 0 1 3 3L12 15l-4 1 1-4 9.5-9.5z"/></svg></button>' : ''}
|
||||
${task.type === 'serve' && task.payload?._cmd ? '<button class="cookbook-task-save-btn" title="Save preset"><svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M19 21H5a2 2 0 0 1-2-2V5a2 2 0 0 1 2-2h11l5 5v11a2 2 0 0 1-2 2z"/><polyline points="17 21 17 13 7 13 7 21"/><polyline points="7 3 7 8 15 8"/></svg></button>' : ''}
|
||||
<span class="cookbook-task-indicator"><span class="cookbook-task-wave" style="display:${task.status === 'running' ? '' : 'none'}"></span><span class="cookbook-task-check" title="Clear" style="display:${(task.status === 'done' && !(task.type === 'download' && !task.payload?._dep)) ? '' : 'none'}"><svg class="cookbook-task-check-ico" width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="#50fa7b" stroke-width="3" stroke-linecap="round" stroke-linejoin="round"><polyline points="20 6 9 17 4 12"/></svg><svg class="cookbook-task-clear-ico" width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="3" stroke-linecap="round" stroke-linejoin="round"><line x1="18" y1="6" x2="6" y2="18"/><line x1="6" y1="6" x2="18" y2="18"/></svg><span class="cookbook-task-done-label">done</span><span class="cookbook-task-clear-label">clear</span></span></span>
|
||||
${task.type === 'download' && !task.payload?._dep && task.status === 'done' ? `<span class="cookbook-task-status cookbook-task-done">finished</span>` : ''}
|
||||
<span class="cookbook-task-indicator"><span class="cookbook-task-wave" style="display:${task.status === 'running' ? '' : 'none'}"></span><span class="cookbook-task-check" title="Clear" style="display:${_canClearTask(task) ? '' : 'none'}"><svg class="cookbook-task-check-ico" width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="#50fa7b" stroke-width="3" stroke-linecap="round" stroke-linejoin="round"><polyline points="20 6 9 17 4 12"/></svg><svg class="cookbook-task-clear-ico" width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="3" stroke-linecap="round" stroke-linejoin="round"><line x1="18" y1="6" x2="6" y2="18"/><line x1="6" y1="6" x2="18" y2="18"/></svg><span class="cookbook-task-done-label">${esc(_clearPillLabel(task))}</span><span class="cookbook-task-clear-label">clear</span></span></span>
|
||||
<span class="cookbook-task-status ${_bdg.cls}"${_bdgTitle}>${esc(_bdg.text)}</span>
|
||||
<button class="cookbook-task-menu-btn" title="Actions">⋮</button>
|
||||
</div>
|
||||
<div class="cookbook-task-sub"><span class="cookbook-task-session">${esc(task.sessionId)}</span><span class="cookbook-task-uptime" style="display:${((task.type === 'serve' || task.type === 'download') && task.status === 'running') ? '' : 'none'}"></span></div>
|
||||
@@ -1490,6 +1620,9 @@ export function _renderRunningTab() {
|
||||
const _waveEl = el.querySelector('.cookbook-task-wave');
|
||||
if (_waveEl && task.status === 'running') _registerWaveEl(_waveEl);
|
||||
|
||||
const terminalDiag = _terminalServeDiagnosis(task, task.output || '');
|
||||
if (terminalDiag) _showDiagnosis(el, terminalDiag, task.output || '');
|
||||
|
||||
const _uptimeEl = el.querySelector('.cookbook-task-uptime');
|
||||
if (_uptimeEl && (task.type === 'serve' || task.type === 'download') && task.status === 'running') {
|
||||
const _startedAt = task.ts || Date.now();
|
||||
@@ -1506,35 +1639,12 @@ export function _renderRunningTab() {
|
||||
}
|
||||
|
||||
// Re-open the Serve panel for this model, pre-filled with the EXACT
|
||||
// settings this instance launched with, and on the SERVER it runs on —
|
||||
// shared by the edit icon button and the ⋮ "Edit settings" menu item.
|
||||
// settings this instance launched with, and on the SERVER it runs on.
|
||||
const _openEdit = () => _openServeEditForTask(task);
|
||||
const editBtn = el.querySelector('.cookbook-task-edit-btn');
|
||||
if (editBtn) {
|
||||
editBtn.addEventListener('click', (e) => { e.stopPropagation(); _openEdit(); });
|
||||
}
|
||||
|
||||
// Wire save icon button
|
||||
const saveBtn = el.querySelector('.cookbook-task-save-btn');
|
||||
if (saveBtn) {
|
||||
saveBtn.addEventListener('click', async (e) => {
|
||||
el.addEventListener('cookbook:edit-serve', (e) => {
|
||||
e.stopPropagation();
|
||||
// Tell them it's already saved up front (often true now that working
|
||||
// configs auto-save) instead of after they've typed a name.
|
||||
if (_loadPresets().some(p => p.cmd === task.payload?._cmd)) {
|
||||
uiModule.showToast('Already saved');
|
||||
return;
|
||||
}
|
||||
const label = (await uiModule.styledPrompt('Name this config so you can recall it later.', {
|
||||
title: 'Save Config', defaultValue: task.name, placeholder: 'e.g. 8-bit, fast', confirmText: 'Save',
|
||||
}) || '').trim();
|
||||
if (!label) return;
|
||||
if (!_saveTaskAsPreset(task, label)) { uiModule.showToast('Already saved'); return; }
|
||||
saveBtn.innerHTML = '<svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="#50fa7b" stroke-width="2.5" stroke-linecap="round"><polyline points="20 6 9 17 4 12"/></svg>';
|
||||
uiModule.showToast(`Saved "${label}"`);
|
||||
setTimeout(() => { saveBtn.style.display = 'none'; }, 1500);
|
||||
_openServeEditForTask(task, null, e.detail?.fields || null);
|
||||
});
|
||||
}
|
||||
|
||||
// Finished download → an explicit "Serve →" button jumps straight to the
|
||||
// Serve tab with this model pre-selected (on the server it downloaded to).
|
||||
@@ -2018,12 +2128,31 @@ async function _reconnectTask(el, task) {
|
||||
if (badge) { badge.textContent = _statusLabel('error', task.type); badge.className = 'cookbook-task-status cookbook-task-error'; }
|
||||
_showCookbookNotif(true);
|
||||
} else {
|
||||
const looksSuccessful = !lastOutput.includes('DOWNLOAD_FAILED') && (lastOutput.includes('DONE') || lastOutput.includes('100%') || lastOutput.includes('Application startup complete') || lastOutput.includes('/snapshots/') || lastOutput.includes('Download complete') || lastOutput.includes('DOWNLOAD_OK'));
|
||||
if (!lastOutput.trim() || (task.type === 'download' && !looksSuccessful)) {
|
||||
const downloadLooksSuccessful = !lastOutput.includes('DOWNLOAD_FAILED')
|
||||
&& (lastOutput.includes('DONE') || lastOutput.includes('100%') || lastOutput.includes('/snapshots/') || lastOutput.includes('Download complete') || lastOutput.includes('DOWNLOAD_OK'));
|
||||
const serveLooksReady = task.type === 'serve' && _serveOutputLooksReady({ ...task, output: lastOutput });
|
||||
const looksSuccessful = task.type === 'download' ? downloadLooksSuccessful : serveLooksReady;
|
||||
if (!lastOutput.trim() || !looksSuccessful) {
|
||||
_updateTask(task.sessionId, { status: 'crashed' });
|
||||
el.dataset.status = 'crashed';
|
||||
const badge = el.querySelector('.cookbook-task-status');
|
||||
if (badge) { badge.textContent = _statusLabel('crashed', task.type); badge.className = 'cookbook-task-status cookbook-task-crashed'; }
|
||||
if (task.type === 'serve') {
|
||||
const diag = _diagnose(lastOutput) || {
|
||||
message: _serveTaskLooksAwqOnLocalBackend(task, lastOutput)
|
||||
? 'AWQ/GPTQ/FP8 cannot be served through llama.cpp/Ollama unified-memory mode.'
|
||||
: /Native llama-server not found|building llama-server|llama\.cpp/i.test(lastOutput)
|
||||
? 'llama.cpp build stopped before the server became reachable.'
|
||||
: 'Serve stopped before the model became reachable.',
|
||||
suggestion: _serveTaskLooksAwqOnLocalBackend(task, lastOutput)
|
||||
? 'Suggested action: use vLLM/SGLang on a compatible CUDA/ROCm GPU server, or download a GGUF version for llama.cpp/Ollama/unified-memory serving.'
|
||||
: /Native llama-server not found|building llama-server|llama\.cpp/i.test(lastOutput)
|
||||
? 'Suggested action: copy the troubleshooting bundle, then edit serve settings. For the quickest local/CPU path, use Ollama or a prebuilt llama-server; source builds can take several minutes and fail if build dependencies are incomplete.'
|
||||
: 'Suggested action: copy the troubleshooting bundle, then edit serve settings or relaunch with a CPU/backend fallback.',
|
||||
fixes: [{ label: 'Edit serve', action: (panel) => _openServeEditForTask(task) }],
|
||||
};
|
||||
_showDiagnosis(el, diag, lastOutput);
|
||||
}
|
||||
_showCookbookNotif(true);
|
||||
} else {
|
||||
_updateTask(task.sessionId, { status: 'done' });
|
||||
|
||||
@@ -41,6 +41,48 @@ const SERVE_STATE_KEY = 'cookbook-serve-state';
|
||||
|
||||
let _cachedAllModels = [];
|
||||
|
||||
function _repoLooksAwqLike(model, repo) {
|
||||
const q = String(model?.quant || '').toUpperCase();
|
||||
const n = `${repo || ''} ${model?.repo_id || ''} ${model?.name || ''} ${model?.path || ''}`.toLowerCase();
|
||||
return /^AWQ|^GPTQ/.test(q) || q === 'FP8' || /\b(awq|gptq|fp8)\b/i.test(n);
|
||||
}
|
||||
|
||||
function _repoLooksGgufLike(model, repo) {
|
||||
const q = String(model?.quant || '').toUpperCase();
|
||||
const n = `${repo || ''} ${model?.repo_id || ''} ${model?.name || ''} ${model?.path || ''}`.toLowerCase();
|
||||
return !!model?.is_gguf || /^Q[2-8]/.test(q) || /^IQ/.test(q) || q === 'GGUF' || n.includes('gguf');
|
||||
}
|
||||
|
||||
function _serveBackendWarning(model, repo, backend, fields = {}) {
|
||||
const awqLike = _repoLooksAwqLike(model, repo);
|
||||
const ggufLike = _repoLooksGgufLike(model, repo);
|
||||
if (awqLike && (backend === 'llamacpp' || backend === 'ollama')) {
|
||||
return {
|
||||
title: 'AWQ needs vLLM or SGLang',
|
||||
body: 'This model looks like AWQ/GPTQ/FP8 safetensors. llama.cpp and Ollama need GGUF files, so this backend cannot serve it. Choose vLLM/SGLang on a CUDA/ROCm GPU server, or download a GGUF version for llama.cpp/Ollama.',
|
||||
};
|
||||
}
|
||||
if (awqLike && _isMetal() && (backend === 'vllm' || backend === 'sglang')) {
|
||||
return {
|
||||
title: 'AWQ is not a unified-memory path',
|
||||
body: 'This model looks like AWQ/GPTQ/FP8 safetensors. AWQ is for vLLM/SGLang on CUDA/ROCm-style GPU servers, not local unified-memory llama.cpp/Ollama serving. For unified memory, download a GGUF model and use llama.cpp/Ollama.',
|
||||
};
|
||||
}
|
||||
if (awqLike && fields.unified_mem) {
|
||||
return {
|
||||
title: 'AWQ is not a unified-memory path',
|
||||
body: 'This model looks like AWQ/GPTQ/FP8 safetensors, but unified-memory local serving expects GGUF. Use vLLM/SGLang on a compatible GPU server, or download a GGUF version for llama.cpp/Ollama.',
|
||||
};
|
||||
}
|
||||
if (ggufLike && (backend === 'vllm' || backend === 'sglang')) {
|
||||
return {
|
||||
title: 'GGUF needs llama.cpp or Ollama',
|
||||
body: 'This model looks like GGUF. vLLM/SGLang expect HuggingFace safetensors-style repos. Choose llama.cpp/Ollama for GGUF, or download a safetensors model for vLLM/SGLang.',
|
||||
};
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function _hasOwn(obj, key) {
|
||||
return Object.prototype.hasOwnProperty.call(obj || {}, key);
|
||||
}
|
||||
@@ -324,12 +366,6 @@ function _rerenderCachedModels() {
|
||||
c.style.alignItems = '';
|
||||
});
|
||||
|
||||
// Capture grid height
|
||||
const _tb = list.closest('.admin-card')?.querySelector('.memory-toolbar');
|
||||
const _tbH = _tb ? _tb.offsetHeight : 0;
|
||||
list.style.minHeight = (list.offsetHeight + _tbH) + 'px';
|
||||
list.style.maxHeight = (list.offsetHeight + _tbH) + 'px';
|
||||
|
||||
const shortName = repo.split('/').pop();
|
||||
const _es = _envState;
|
||||
// The venv set per-server in Settings (server.envPath). Used as the venv
|
||||
@@ -350,8 +386,13 @@ function _rerenderCachedModels() {
|
||||
? _byRepo[repo]
|
||||
: (_lastUsed || (_isLegacyFlat ? _allSs : {}));
|
||||
const detectedBackend = _detectBackend(m).backend;
|
||||
const defaultBackend = detectedBackend;
|
||||
const savedMatchesBackend = (ss.backend || 'vllm') === detectedBackend;
|
||||
const _allowedBackends = new Set(_isWindows()
|
||||
? ['llamacpp']
|
||||
: (_isMetal() ? ['llamacpp', 'ollama'] : ['vllm', 'sglang', 'llamacpp', 'ollama', 'diffusers']));
|
||||
const defaultBackend = (ss._forceBackend && ss.backend && _allowedBackends.has(ss.backend))
|
||||
? ss.backend
|
||||
: detectedBackend;
|
||||
const savedMatchesBackend = !!ss._forceBackend || (ss.backend || 'vllm') === detectedBackend;
|
||||
const sv = (k, def) => (ss[k] !== undefined && savedMatchesBackend) ? ss[k] : def;
|
||||
const defaultTp = defaultBackend === 'llamacpp' ? '1' : sv('tp', '1');
|
||||
const detectedGpuIds = _allGpuIds(_getGpuToggleTotal?.());
|
||||
@@ -1200,7 +1241,16 @@ function _rerenderCachedModels() {
|
||||
if (el.type === 'checkbox') serveState[el.dataset.field] = el.checked;
|
||||
else serveState[el.dataset.field] = el.value;
|
||||
});
|
||||
serveState.backend = (_detectBackend(m).backend) || serveState.backend || 'vllm';
|
||||
serveState.backend = serveState.backend || (_detectBackend(m).backend) || 'vllm';
|
||||
const backendWarning = _serveBackendWarning(m, repo, serveState.backend, serveState);
|
||||
if (backendWarning) {
|
||||
await window.styledConfirm(backendWarning.body, {
|
||||
title: backendWarning.title,
|
||||
confirmText: 'Edit settings',
|
||||
cancelText: 'Close',
|
||||
});
|
||||
return;
|
||||
}
|
||||
// Save in the { _byRepo, _lastUsed } schema — no legacy flat keys at
|
||||
// the root so per-model state doesn't leak between models.
|
||||
try {
|
||||
|
||||
@@ -2253,8 +2253,9 @@ function _renderActivityEntry(entry) {
|
||||
const hue = _categoryHue(entry.taskName, entry.kind);
|
||||
// CSS vars feed the colored title + accent stripe.
|
||||
const styleVars = `--cat-hue:${hue};`;
|
||||
const _runningPlaceholder = /^(Starting…|Starting\.\.\.|_Running…_|_Running\.\.\._|_Queued\b)/i.test((entry.result || '').trim());
|
||||
const hasResult = !!(entry.result && entry.result.trim() && entry.status !== 'running' && entry.status !== 'queued');
|
||||
const hasRunningProgress = !!(entry.result && entry.result.trim() && (entry.status === 'running' || entry.status === 'queued'));
|
||||
const hasRunningProgress = !!(entry.result && entry.result.trim() && !_runningPlaceholder && (entry.status === 'running' || entry.status === 'queued'));
|
||||
// "Open in chat" only makes sense for runs whose result is a real assistant
|
||||
// message (Prompt / Research tasks). Action/event runs are just log lines
|
||||
// (e.g. "No recent emails", "Tidied N memories") — for those, replace the
|
||||
@@ -2299,9 +2300,10 @@ function _renderActivityEntry(entry) {
|
||||
let rightHtml;
|
||||
if (_isRunning) {
|
||||
const isQueued = entry.status === 'queued';
|
||||
const label = isQueued ? 'Queued' : 'Running';
|
||||
// Initial elapsed for the first paint; the 1s interval below keeps it live.
|
||||
const startMs = entry.ts ? new Date(entry.ts).getTime() : Date.now();
|
||||
const stale = !isQueued && (Date.now() - startMs) > 30 * 60 * 1000;
|
||||
const label = isQueued ? 'Queued' : stale ? 'Still running' : 'Running';
|
||||
const elapsedInit = isQueued ? '' : `<span class="task-log-running-elapsed" data-since="${startMs}">${_fmtElapsed(Date.now() - startMs)}</span>`;
|
||||
const forceBtn = isQueued && entry.taskId ? `<button class="task-log-force-run" type="button" title="Start now in parallel, bypassing the queue" style="border:0;background:transparent;box-shadow:none;margin-left:5px;padding:0;width:12px;height:12px;display:inline-flex;align-items:center;justify-content:center;font-size:10px;line-height:1;color:inherit;opacity:.8;"><svg width="9" height="9" viewBox="0 0 24 24" fill="currentColor" style="display:block;"><polygon points="6 4 20 12 6 20 6 4"/></svg></button>` : '';
|
||||
const stopBtn = entry.taskId ? `<button class="task-log-stop" type="button" title="Stop this task"><svg width="9" height="9" viewBox="0 0 24 24" fill="currentColor"><rect x="6" y="6" width="12" height="12" rx="1"/></svg></button>` : '';
|
||||
|
||||
159
static/style.css
159
static/style.css
@@ -5363,19 +5363,20 @@ body.bg-pattern-sparkles {
|
||||
#compare-model-overlay .modal-header h4 {
|
||||
pointer-events: none;
|
||||
}
|
||||
/* Compare modal sizes to content — the global .modal-content max-height
|
||||
+ .modal-body overflow combo makes BOTH the outer card and the inner
|
||||
body scrollable, so even when the content fits the viewport you get
|
||||
a stray vertical scrollbar. Drop the cap and disable inner scroll
|
||||
here; if the viewport is genuinely tiny the modal still won't exceed
|
||||
it because it's centered and the parent .modal flex layout shrinks. */
|
||||
/* Compare model selector: keep manually-resized/tiny windows contained.
|
||||
Picker dropdowns are appended to document.body, so the card itself can
|
||||
clip and scroll without cropping the dropdown list. */
|
||||
#compare-model-overlay .modal-content {
|
||||
max-height: none;
|
||||
overflow: visible;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
max-height: min(720px, calc(100dvh - 48px));
|
||||
overflow: hidden;
|
||||
min-height: 180px;
|
||||
}
|
||||
#compare-model-overlay .modal-body {
|
||||
overflow: visible;
|
||||
flex: 0 0 auto;
|
||||
overflow: auto;
|
||||
flex: 1 1 auto;
|
||||
min-height: 0;
|
||||
}
|
||||
.vis-hint {
|
||||
font-size: 10px;
|
||||
@@ -6955,6 +6956,8 @@ pre { background: var(--code-bg, var(--hl-bg, #282c34)) !important; }
|
||||
.compare-mode-tabs {
|
||||
display: flex;
|
||||
gap: 4px;
|
||||
flex-wrap: wrap;
|
||||
min-width: 0;
|
||||
}
|
||||
/* Type tabs match Mode toggles 1:1 (same flex column layout, same metrics) */
|
||||
.compare-mode-tab {
|
||||
@@ -19015,7 +19018,7 @@ body.gallery-selecting .gallery-dl-btn,
|
||||
align-items: center;
|
||||
gap: 3px;
|
||||
position: relative;
|
||||
top: 2px;
|
||||
top: 0;
|
||||
cursor: pointer;
|
||||
padding: 1px 6px 1px 4px;
|
||||
border-radius: 9px;
|
||||
@@ -19024,22 +19027,17 @@ body.gallery-selecting .gallery-dl-btn,
|
||||
}
|
||||
.cookbook-task-check svg { flex-shrink: 0; }
|
||||
.cookbook-task-check:hover { background: color-mix(in srgb, var(--red, #ff5555) 18%, transparent); }
|
||||
/* Shows "done" (green) normally; on hover the icon + label swap to a red ✕ /
|
||||
"clear" to reveal it's a dismiss action. */
|
||||
/* Terminal task clear pill. */
|
||||
.cookbook-task-done-label,
|
||||
.cookbook-task-clear-label {
|
||||
font-size: 9px;
|
||||
line-height: 1;
|
||||
text-transform: lowercase;
|
||||
}
|
||||
.cookbook-task-done-label { color: var(--green, #50fa7b); }
|
||||
.cookbook-task-clear-label { display: none; color: var(--red, #ff5555); }
|
||||
.cookbook-task-check:hover .cookbook-task-done-label { display: none; }
|
||||
.cookbook-task-check:hover .cookbook-task-clear-label { display: inline; }
|
||||
/* Default: show the green check. On hover: swap to a red ✕ to signal "clear". */
|
||||
.cookbook-task-clear-ico { display: none; }
|
||||
.cookbook-task-check:hover .cookbook-task-check-ico { display: none; }
|
||||
.cookbook-task-check:hover .cookbook-task-clear-ico { display: inline; }
|
||||
.cookbook-task-done-label { color: var(--red, #ff5555); }
|
||||
.cookbook-task-clear-label { display: none; }
|
||||
.cookbook-task-check-ico { display: none; }
|
||||
.cookbook-task-clear-ico { display: inline; }
|
||||
/* "Serve" button on a finished download — green pill matching the "running" /
|
||||
finished badge (it sits next to the green FINISHED chip + check). */
|
||||
.cookbook-task-serve-btn {
|
||||
@@ -19583,17 +19581,136 @@ body.gallery-selecting .gallery-dl-btn,
|
||||
border: 1px solid color-mix(in srgb, var(--color-error) 30%, transparent);
|
||||
border-radius: 6px;
|
||||
}
|
||||
.cookbook-diag-header {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 7px;
|
||||
position: relative;
|
||||
top: -4px;
|
||||
margin-bottom: -4px;
|
||||
}
|
||||
.cookbook-diag-fold {
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
gap: 5px;
|
||||
padding: 0;
|
||||
min-height: 0;
|
||||
border: 0;
|
||||
background: transparent;
|
||||
color: var(--color-error);
|
||||
font: inherit;
|
||||
font-size: 11px;
|
||||
font-weight: 700;
|
||||
cursor: pointer;
|
||||
margin-right: auto;
|
||||
}
|
||||
.cookbook-diag-fold:hover {
|
||||
background: transparent;
|
||||
color: var(--color-error);
|
||||
opacity: 0.85;
|
||||
}
|
||||
.cookbook-diag-chevron {
|
||||
display: inline-block;
|
||||
width: 10px;
|
||||
font-size: 10px;
|
||||
}
|
||||
.cookbook-diag-copy {
|
||||
border: 0;
|
||||
background: transparent;
|
||||
color: var(--fg-muted);
|
||||
padding: 0 2px;
|
||||
width: 18px;
|
||||
height: 18px;
|
||||
min-height: 18px;
|
||||
cursor: pointer;
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
}
|
||||
.cookbook-diag-copy:hover {
|
||||
background: transparent;
|
||||
color: var(--fg);
|
||||
}
|
||||
.cookbook-diag-copy.copied {
|
||||
color: var(--green, #50fa7b);
|
||||
}
|
||||
.cookbook-diag-copy svg {
|
||||
display: block;
|
||||
}
|
||||
.cookbook-diag-dismiss {
|
||||
border: 0;
|
||||
background: transparent;
|
||||
color: var(--fg-muted);
|
||||
padding: 0;
|
||||
width: 16px;
|
||||
height: 18px;
|
||||
min-height: 18px;
|
||||
line-height: 16px;
|
||||
font-size: 13px;
|
||||
cursor: pointer;
|
||||
display: inline-flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
position: relative;
|
||||
top: -2px;
|
||||
}
|
||||
.cookbook-diag-dismiss:hover {
|
||||
background: transparent;
|
||||
color: var(--color-error);
|
||||
}
|
||||
.cookbook-diag-body {
|
||||
margin-top: 7px;
|
||||
}
|
||||
.cookbook-diag-message {
|
||||
font-size: 12px;
|
||||
font-weight: 600;
|
||||
color: var(--color-error);
|
||||
margin-bottom: 4px;
|
||||
margin-left: 2px;
|
||||
user-select: text;
|
||||
}
|
||||
.cookbook-diag-suggestion {
|
||||
font-size: 11px;
|
||||
line-height: 1.35;
|
||||
color: var(--fg-muted);
|
||||
margin-bottom: 8px;
|
||||
margin-left: 2px;
|
||||
user-select: text;
|
||||
}
|
||||
.cookbook-diag-fixes {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
gap: 6px;
|
||||
}
|
||||
.cookbook-diag-actions {
|
||||
position: relative;
|
||||
display: inline-flex;
|
||||
}
|
||||
.cookbook-diag-action-trigger {
|
||||
font-size: 11px;
|
||||
padding: 4px 10px;
|
||||
min-height: 24px;
|
||||
background: var(--panel);
|
||||
border: 1px solid color-mix(in srgb, var(--color-error) 40%, transparent);
|
||||
color: var(--fg);
|
||||
}
|
||||
.cookbook-diag-action-trigger:hover {
|
||||
border-color: var(--color-error);
|
||||
background: color-mix(in srgb, var(--color-error) 12%, transparent);
|
||||
}
|
||||
.cookbook-diag-menu {
|
||||
position: absolute;
|
||||
left: 0;
|
||||
top: calc(100% + 4px);
|
||||
min-width: 180px;
|
||||
z-index: 80;
|
||||
}
|
||||
.cookbook-diag-menu button {
|
||||
width: 100%;
|
||||
justify-content: flex-start;
|
||||
text-align: left;
|
||||
white-space: nowrap;
|
||||
}
|
||||
.cookbook-diag-btn {
|
||||
font-size: 11px;
|
||||
padding: 4px 10px;
|
||||
|
||||
Reference in New Issue
Block a user