191 lines
6.7 KiB
Python
191 lines
6.7 KiB
Python
import subprocess
|
|
import json
|
|
import time
|
|
import httpx
|
|
import logging
|
|
import os
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
from typing import List, Dict, Any, Optional
|
|
from urllib.parse import urlparse
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Cache for discovered hosts
|
|
_hosts_cache: List[str] = []
|
|
_hosts_cache_time: float = 0
|
|
_HOSTS_CACHE_TTL = 60 # seconds
|
|
|
|
|
|
def discover_tailscale_hosts() -> List[str]:
|
|
"""Discover online Tailscale peers, returning their IPv4 addresses."""
|
|
global _hosts_cache, _hosts_cache_time
|
|
|
|
now = time.time()
|
|
if _hosts_cache and (now - _hosts_cache_time) < _HOSTS_CACHE_TTL:
|
|
return list(_hosts_cache)
|
|
|
|
hosts = []
|
|
try:
|
|
result = subprocess.run(
|
|
["tailscale", "status", "--json"],
|
|
capture_output=True, text=True, timeout=5
|
|
)
|
|
if result.returncode != 0:
|
|
return hosts
|
|
|
|
data = json.loads(result.stdout)
|
|
|
|
# Add self
|
|
self_ips = data.get("Self", {}).get("TailscaleIPs", [])
|
|
for ip in self_ips:
|
|
if "." in ip: # IPv4 only
|
|
hosts.append(ip)
|
|
break
|
|
|
|
# Add online peers (skip funnel-ingress-nodes and android devices)
|
|
for peer in data.get("Peer", {}).values():
|
|
if not peer.get("Online"):
|
|
continue
|
|
hostname = peer.get("HostName", "")
|
|
if hostname == "funnel-ingress-node":
|
|
continue
|
|
os_name = peer.get("OS", "")
|
|
if os_name == "android":
|
|
continue
|
|
peer_ips = peer.get("TailscaleIPs", [])
|
|
for ip in peer_ips:
|
|
if "." in ip: # IPv4 only
|
|
hosts.append(ip)
|
|
break
|
|
|
|
_hosts_cache = hosts
|
|
_hosts_cache_time = now
|
|
logger.info(f"Tailscale discovery found {len(hosts)} hosts: {hosts}")
|
|
except FileNotFoundError:
|
|
logger.debug("tailscale command not found")
|
|
except Exception as e:
|
|
logger.warning(f"Tailscale discovery failed: {e}")
|
|
|
|
return hosts
|
|
|
|
|
|
class ModelDiscovery:
|
|
def __init__(self, default_host: str, openai_api_key: Optional[str] = None):
|
|
self.default_host = default_host
|
|
self.openai_api_key = openai_api_key
|
|
self.openai_compat_path = "/v1/chat/completions"
|
|
|
|
def _get_hosts(self) -> List[str]:
|
|
"""Get all hosts to scan, using env override, Tailscale, or default."""
|
|
def _append_host(out: List[str], host: str) -> None:
|
|
host = (host or "").strip()
|
|
if not host or host in out:
|
|
return
|
|
out.append(host)
|
|
|
|
# Manual override takes priority
|
|
extra = os.getenv("LLM_HOSTS", "").strip()
|
|
if extra:
|
|
hosts = [h.strip() for h in extra.split(",") if h.strip()]
|
|
# Always include the default host too
|
|
if self.default_host not in hosts:
|
|
hosts.insert(0, self.default_host)
|
|
_append_host(hosts, "host.docker.internal")
|
|
return hosts
|
|
|
|
# Try Tailscale discovery
|
|
ts_hosts = discover_tailscale_hosts()
|
|
if ts_hosts:
|
|
# Ensure default_host is included
|
|
if self.default_host not in ts_hosts:
|
|
ts_hosts.insert(0, self.default_host)
|
|
_append_host(ts_hosts, "host.docker.internal")
|
|
return ts_hosts
|
|
|
|
hosts = [self.default_host]
|
|
# Docker desktop/Linux compose maps this to the host machine. That is
|
|
# the common "I started Ollama normally on this computer" case.
|
|
_append_host(hosts, "host.docker.internal")
|
|
for env_name in ("OLLAMA_BASE_URL", "OLLAMA_URL"):
|
|
raw = os.getenv(env_name, "").strip()
|
|
if not raw:
|
|
continue
|
|
try:
|
|
parsed = urlparse(raw if "://" in raw else "http://" + raw)
|
|
_append_host(hosts, parsed.hostname or "")
|
|
except Exception:
|
|
pass
|
|
return hosts
|
|
|
|
def _check_port(self, host: str, port: int) -> Optional[Dict[str, Any]]:
|
|
"""Check a single host:port for models."""
|
|
base = f"http://{host}:{port}/v1"
|
|
try:
|
|
r = httpx.get(f"{base}/models", timeout=3)
|
|
if not r.is_success:
|
|
return None
|
|
data = r.json() or {}
|
|
ids = [m.get("id") for m in (data.get("data") or []) if m.get("id")]
|
|
if ids:
|
|
return {
|
|
"host": host,
|
|
"port": port,
|
|
"url": f"http://{host}:{port}{self.openai_compat_path}",
|
|
"models": ids,
|
|
"models_display": [i.lstrip("/") for i in ids]
|
|
}
|
|
except Exception:
|
|
pass
|
|
return None
|
|
|
|
def discover_models(self) -> Dict[str, List[Dict[str, Any]]]:
|
|
"""Discover available models from all reachable hosts."""
|
|
hosts = self._get_hosts()
|
|
items = []
|
|
|
|
logger.info(f"Scanning {len(hosts)} hosts for models: {hosts}")
|
|
|
|
# Build list of (host, port) to check. 8000-8020 catches vLLM,
|
|
# llama.cpp, SGLang, and Cookbook serves; 11434 catches Ollama.
|
|
ports = list(range(8000, 8021)) + [11434]
|
|
targets = [(h, p) for h in hosts for p in ports]
|
|
|
|
seen_models = set() # dedupe by (port, model_ids) to avoid same machine via different IPs
|
|
|
|
with ThreadPoolExecutor(max_workers=50) as pool:
|
|
futures = {pool.submit(self._check_port, h, p): (h, p) for h, p in targets}
|
|
for future in as_completed(futures):
|
|
result = future.result()
|
|
if result:
|
|
key = (result["port"], tuple(sorted(result["models"])))
|
|
if key not in seen_models:
|
|
seen_models.add(key)
|
|
items.append(result)
|
|
|
|
# Sort by host then port for consistent ordering
|
|
items.sort(key=lambda x: (x["host"], x["port"]))
|
|
|
|
logger.info(f"Discovered {len(items)} model endpoints across {len(hosts)} hosts")
|
|
return {"hosts": hosts, "items": items}
|
|
|
|
def get_providers(self) -> Dict[str, Any]:
|
|
"""Get all available providers"""
|
|
discovery = self.discover_models()
|
|
items = discovery["items"]
|
|
providers = [{"provider": "vllm", "hosts": discovery["hosts"], "items": items}]
|
|
|
|
if self.openai_api_key:
|
|
openai_models = [
|
|
"gpt-5.2-codex", "gpt-4o-mini", "gpt-image-1.5",
|
|
"gpt-4o", "gpt-5.2", "gpt-5.2-pro",
|
|
]
|
|
providers.append({
|
|
"provider": "openai",
|
|
"items": [{
|
|
"url": "https://api.openai.com/v1/chat/completions",
|
|
"models": openai_models
|
|
}]
|
|
})
|
|
|
|
return {"providers": providers}
|