diff --git a/routes/cookbook_helpers.py b/routes/cookbook_helpers.py
index 30f99e7..9dc232a 100644
--- a/routes/cookbook_helpers.py
+++ b/routes/cookbook_helpers.py
@@ -541,21 +541,15 @@ def _append_llama_cpp_linux_accel_build_lines(runner_lines: list[str]) -> None:
     runner_lines.append('        export HIP_PATH="${HIP_PATH:-$(hipconfig -R)}"')
     runner_lines.append('      fi')
     runner_lines.append('      echo "[odysseus] ROCm/HIP detected — building llama-server with HIP support..."')
-    runner_lines.append('      cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_HIP=ON \\\\')
-    runner_lines.append('        && cmake --build build -j"$NPROC" --target llama-server \\\\')
-    runner_lines.append('        && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
+    runner_lines.append('      cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_HIP=ON && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
     runner_lines.append('    elif command -v nvcc &>/dev/null; then')
     runner_lines.append('      echo "[odysseus] CUDA nvcc found — building llama-server with CUDA (GPU) support..."')
-    runner_lines.append('      cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_CUDA=ON \\\\')
-    runner_lines.append('        && cmake --build build -j"$NPROC" --target llama-server \\\\')
-    runner_lines.append('        && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
+    runner_lines.append('      cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_CUDA=ON && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
     runner_lines.append('    else')
     runner_lines.append('      echo "[odysseus] WARNING: no HIP/CUDA toolchain found — building llama-server for CPU only."')
     runner_lines.append('      echo "[odysseus]   GPU inference will not be available for this llama.cpp build."')
     runner_lines.append('      echo "[odysseus]   Install ROCm for AMD GPUs or vLLM/CUDA tooling for NVIDIA, then re-launch this serve task."')
-    runner_lines.append('      cmake -B build -DCMAKE_BUILD_TYPE=Release \\\\')
-    runner_lines.append('        && cmake --build build -j"$NPROC" --target llama-server \\\\')
-    runner_lines.append('        && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
+    runner_lines.append('      cmake -B build -DCMAKE_BUILD_TYPE=Release && cmake --build build -j"$NPROC" --target llama-server && ln -sf ~/llama.cpp/build/bin/llama-server ~/bin/llama-server')
     runner_lines.append('    fi')
 
 class ModelDownloadRequest(BaseModel):
diff --git a/routes/model_routes.py b/routes/model_routes.py
index f66cdd6..0135d1c 100644
--- a/routes/model_routes.py
+++ b/routes/model_routes.py
@@ -148,6 +148,32 @@ def _docker_host_gateway_reachable() -> bool:
     except OSError:
         return False
 
+def _container_loopback_reachable(base_url: str, timeout: float = 0.2) -> bool:
+    """True when the requested loopback host:port is already reachable from
+    inside the current container.
+
+    This distinguishes "a model server running alongside Odysseus in the same
+    container" from "a model server running on the Docker host". Only the
+    latter should be rewritten to host.docker.internal.
+    """
+    try:
+        parsed = urlparse(base_url)
+    except Exception:
+        return False
+    host = (parsed.hostname or "").lower()
+    port = parsed.port
+    if host not in _LOOPBACK_HOSTS or not port:
+        return False
+    probe_host = "::1" if host == "::1" else "127.0.0.1"
+    family = socket.AF_INET6 if probe_host == "::1" else socket.AF_INET
+    try:
+        with socket.socket(family, socket.SOCK_STREAM) as sock:
+            sock.settimeout(timeout)
+            sock.connect((probe_host, port))
+        return True
+    except OSError:
+        return False
+
 
 def _rewrite_loopback_for_docker(base_url: str, *, container_local: bool = False) -> str:
     """Rewrite a loopback model-endpoint URL to ``host.docker.internal`` when
@@ -176,6 +202,8 @@ def _rewrite_loopback_for_docker(base_url: str, *, container_local: bool = False
     if host in _ANY_BIND_HOSTS and not _docker_host_gateway_reachable():
         netloc = "127.0.0.1" + (f":{parsed.port}" if parsed.port else "")
         return urlunparse(parsed._replace(netloc=netloc))
+    if _container_loopback_reachable(base_url):
+        return base_url
     if not _docker_host_gateway_reachable():
         return base_url
     netloc = "host.docker.internal" + (f":{parsed.port}" if parsed.port else "")
diff --git a/static/js/cookbookServe.js b/static/js/cookbookServe.js
index 6e71791..0c2fcd2 100644
--- a/static/js/cookbookServe.js
+++ b/static/js/cookbookServe.js
@@ -246,10 +246,20 @@ function _selectedGgufExpr(model, repo, relPath) {
     const base = String(model.path || '').replace(/\/+$/, '');
     return `$(printf %s ${_shellPathExpr(`${base}/${repo}/${rel}`)})`;
   }
+  if (model.path) {
+    const base = String(model.path || '').replace(/\/+$/, '');
+    return `$(printf %s ${_shellPathExpr(`${base}/models--${repo.replace(/\//g, '--')}/snapshots/${rel}`)})`;
+  }
   const cacheRepo = repo.replace(/\//g, '--');
   return `$(printf %s \${HOME}${_shellQuote(`/.cache/huggingface/hub/models--${cacheRepo}/snapshots/${rel}`)})`;
 }
 
+function _ggufSearchDirExpr(model, repo) {
+  if (model.is_local_dir && model.path) return _shellQuote(`${String(model.path || '').replace(/\/+$/, '')}/${repo}`);
+  if (model.path) return _shellQuote(`${String(model.path || '').replace(/\/+$/, '')}/models--${repo.replace(/\//g, '--')}/snapshots`);
+  return `"$HOME/.cache/huggingface/hub/models--${repo.replace(/\//g, '--')}/snapshots"`;
+}
+
 function _rerenderCachedModels() {
   const list = document.getElementById('hwfit-cached-list');
   const tagContainer = document.getElementById('serve-tags');
@@ -736,13 +746,12 @@ function _rerenderCachedModels() {
           // For multi-part GGUFs, llama.cpp requires the first split
           // (-00001-of-NNNNN.gguf). Prefer it (sorted, so UD-IQ4_XS/001 comes
           // before Q4_K_M/001 etc); fall back to any single GGUF sorted.
-          // Use $HOME (not ~) so tilde survives variable interpolation inside $(...).
-          const dir = `"$HOME/.cache/huggingface/hub/models--${repo.replace(/\//g, '--')}/snapshots"`;
+          const dir = _ggufSearchDirExpr(m, repo);
           // GGUF needs the actual .gguf FILE, not the folder. For a custom-dir
           // model the file lives under "<path>/<repo>" — search there just like we
           // search the HF snapshots dir, so serving a GGUF from a custom dir works
           // instead of handing llama.cpp a directory (which fails).
-          const _ldir = `"${m.path}/${repo}"`;
+          const _ldir = m.path ? _shellQuote(`${m.path}/${repo}`) : '""';
           f._gguf_path = selectedGguf
             ? _selectedGgufExpr(m, repo, selectedGguf.rel_path)
             : m.is_local_dir && m.path
diff --git a/tests/test_endpoint_probing.py b/tests/test_endpoint_probing.py
index aab4c52..0c7a2ca 100644
--- a/tests/test_endpoint_probing.py
+++ b/tests/test_endpoint_probing.py
@@ -198,11 +198,20 @@ class TestPingEndpoint:
 class TestDockerLoopbackRewrite:
     def test_manual_loopback_rewrites_to_docker_host_when_available(self, monkeypatch):
         monkeypatch.setattr(model_routes, "_docker_host_gateway_reachable", lambda: True)
+        monkeypatch.setattr(model_routes, "_container_loopback_reachable", lambda base_url: False)
         assert (
             _rewrite_loopback_for_docker("http://localhost:8000/v1")
             == "http://host.docker.internal:8000/v1"
         )
 
+    def test_reachable_container_loopback_stays_local_even_without_container_flag(self, monkeypatch):
+        monkeypatch.setattr(model_routes, "_docker_host_gateway_reachable", lambda: True)
+        monkeypatch.setattr(model_routes, "_container_loopback_reachable", lambda base_url: True)
+        assert (
+            _rewrite_loopback_for_docker("http://127.0.0.1:8001/v1")
+            == "http://127.0.0.1:8001/v1"
+        )
+
     def test_cookbook_container_local_loopback_stays_inside_container(self, monkeypatch):
         monkeypatch.setattr(model_routes, "_docker_host_gateway_reachable", lambda: True)
         assert (