From 87fc675ccb5add5f5ef4f94eb807589b475b60c2 Mon Sep 17 00:00:00 2001 From: "Ruben G." <161253222+Rub3n-0lte4n@users.noreply.github.com> Date: Wed, 3 Jun 2026 08:24:17 +0300 Subject: [PATCH] fix(cookbook): auto-register a local endpoint when serving an LLM (#1380) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Serving a diffusion model auto-registered an image endpoint so it appeared in the model picker, but serving an LLM (llama.cpp/vLLM/SGLang/Ollama) did not — a downloaded-and-served model never showed up until the user manually ran /setup. Add _auto_register_llm_endpoint (text sibling of _auto_register_image_endpoint): parse the serve port (explicit --port, else Ollama 11434, else llama.cpp 8080), point an endpoint at http://host:port/v1, dedupe by base_url, and set supports_tools from --enable-auto-tool-choice. Wire it into /api/model/serve for any non-pip, non-diffusion serve. --- routes/cookbook_routes.py | 81 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 80 insertions(+), 1 deletion(-) diff --git a/routes/cookbook_routes.py b/routes/cookbook_routes.py index 5aa8ed0..a890000 100644 --- a/routes/cookbook_routes.py +++ b/routes/cookbook_routes.py @@ -809,6 +809,80 @@ def setup_cookbook_routes() -> APIRouter: finally: db.close() + def _auto_register_llm_endpoint(req: ServeRequest, remote: str | None) -> str | None: + """Register a freshly-served LLM as a model endpoint so it appears in the + model picker without a manual /setup step — the text-model sibling of + _auto_register_image_endpoint. + + Cookbook serve commands launch an OpenAI-compatible server (llama.cpp's + llama-server, vLLM, SGLang, or Ollama) on a known port. We point an + endpoint at that server's /v1; the picker auto-discovers the model id by + probing /v1/models and dims the endpoint until the server is reachable, + so registering immediately (before the server finishes loading) is safe. + """ + import re + from core.database import SessionLocal, ModelEndpoint + + # Port: an explicit --port wins. Otherwise fall back by backend — Ollama + # is the only server in our generated commands that omits --port. + port_match = re.search(r'--port\s+(\d+)', req.cmd) + if port_match: + port = int(port_match.group(1)) + elif "ollama" in req.cmd: + port = 11434 + else: + port = 8080 # llama.cpp's llama-server default — the Apple Silicon path + + # Determine host (mirrors the image path: SSH alias for remote serves). + if remote: + host = remote.split("@")[-1] if "@" in remote else remote + else: + host = "localhost" + + base_url = f"http://{host}:{port}/v1" + + short_name = req.repo_id.split("/")[-1] if "/" in req.repo_id else req.repo_id + display_name = short_name or "Local model" + + # If the serve command opts models into OpenAI tool-calling, record it so + # agent_loop trusts emitted tool_calls instead of the name heuristic. + supports_tools = True if "--enable-auto-tool-choice" in req.cmd else None + + db = SessionLocal() + try: + # Reuse an endpoint already pointed at this URL instead of duplicating. + existing = db.query(ModelEndpoint).filter(ModelEndpoint.base_url == base_url).first() + if existing: + existing.is_enabled = True + existing.model_type = "llm" + existing.name = display_name + if supports_tools is not None: + existing.supports_tools = supports_tools + db.commit() + logger.info(f"Updated existing local model endpoint: {base_url}") + return existing.id + + ep_id = f"local-{uuid.uuid4().hex[:8]}" + ep = ModelEndpoint( + id=ep_id, + name=display_name, + base_url=base_url, + api_key=None, + is_enabled=True, + model_type="llm", + supports_tools=supports_tools, + ) + db.add(ep) + db.commit() + logger.info(f"Auto-registered local model endpoint: {display_name} @ {base_url}") + return ep_id + except Exception as e: + logger.error(f"Failed to auto-register local model endpoint: {e}") + db.rollback() + return None + finally: + db.close() + @router.post("/api/model/serve") async def model_serve(request: Request, req: ServeRequest): """Launch a model server in a tmux session (or PowerShell background process on Windows). @@ -1152,11 +1226,16 @@ def setup_cookbook_routes() -> APIRouter: stderr = (await proc.stderr.read()).decode(errors="replace") return {"ok": False, "error": stderr, "session_id": session_id} - # Auto-register as model endpoint if serving a diffusion model + # Auto-register a model endpoint so the served model shows up in the model + # picker with no manual /setup step. Diffusion models get an image + # endpoint; any other real model serve (i.e. not a pip-install task) gets + # a local LLM endpoint pointed at its /v1. endpoint_id = None is_diffusion = "diffusion_server.py" in req.cmd if is_diffusion: endpoint_id = _auto_register_image_endpoint(req, remote) + elif not is_pip_install: + endpoint_id = _auto_register_llm_endpoint(req, remote) # Log to assistant try: