Add native Windows compatibility layer
This commit is contained in:
@@ -13,6 +13,17 @@ Set EMBEDDING_URL in .env, e.g.:
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
# Windows: force HuggingFace/fastembed to COPY model files rather than symlink
|
||||
# them. On a network-share/UNC cache dir Windows can't follow HF's symlinks
|
||||
# ([WinError 1463] "symbolic link cannot be followed"), so ONNX fails to load the
|
||||
# model and semantic memory dies. huggingface_hub reads this flag at import time,
|
||||
# so it must be set before huggingface_hub is first imported — hence module-top.
|
||||
# (app.py sets the same guard for the server entrypoint.)
|
||||
if os.name == "nt":
|
||||
os.environ.setdefault("HF_HUB_DISABLE_SYMLINKS", "1")
|
||||
os.environ.setdefault("HF_HUB_DISABLE_SYMLINKS_WARNING", "1")
|
||||
|
||||
import logging
|
||||
import numpy as np
|
||||
import httpx
|
||||
@@ -109,6 +120,35 @@ class FastEmbedClient:
|
||||
"data", "fastembed_cache",
|
||||
)
|
||||
os.makedirs(cache_dir, exist_ok=True)
|
||||
# Windows self-heal: the HuggingFace-hub cache stores model files as
|
||||
# symlinks (snapshots/<rev>/model.onnx -> ../../blobs/<hash>). On a
|
||||
# network-share / UNC data dir Windows refuses to follow them
|
||||
# ([WinError 1463] "symbolic link cannot be followed because its type is
|
||||
# disabled"), and a cache copied between machines can carry dead symlinks
|
||||
# too. Either way fastembed tries to load a broken symlink and fails
|
||||
# *without* re-downloading, leaving semantic memory degraded. Detect a
|
||||
# broken-symlink model in the cache and drop the contaminated hub dir so
|
||||
# fastembed re-fetches (it falls back to its CDN tarball of real files,
|
||||
# which load fine). Best-effort; only ever removes a verifiably dead link.
|
||||
if os.name == "nt":
|
||||
try:
|
||||
import glob, shutil
|
||||
for _onnx in glob.glob(os.path.join(cache_dir, "**", "*.onnx"), recursive=True):
|
||||
if os.path.islink(_onnx) and not os.path.exists(_onnx):
|
||||
_root = _onnx
|
||||
while os.path.basename(_root) and not os.path.basename(_root).startswith("models--"):
|
||||
_parent = os.path.dirname(_root)
|
||||
if _parent == _root:
|
||||
break
|
||||
_root = _parent
|
||||
if os.path.basename(_root).startswith("models--"):
|
||||
logger.warning(
|
||||
"Embedding cache has a broken symlink (%s); clearing %s "
|
||||
"so fastembed re-downloads real files", _onnx, _root,
|
||||
)
|
||||
shutil.rmtree(_root, ignore_errors=True)
|
||||
except Exception as _e:
|
||||
logger.debug("embedding cache symlink-heal skipped: %s", _e)
|
||||
kwargs = {"model_name": self.model, "cache_dir": cache_dir}
|
||||
self._embedding = TextEmbedding(**kwargs)
|
||||
self._dim: Optional[int] = None
|
||||
@@ -152,7 +192,7 @@ def _load_persisted_endpoint() -> dict:
|
||||
)
|
||||
if os.path.exists(endpoint_file):
|
||||
import json
|
||||
data = json.loads(open(endpoint_file).read())
|
||||
data = json.loads(open(endpoint_file, encoding="utf-8").read())
|
||||
if data.get("url"):
|
||||
return data
|
||||
except Exception:
|
||||
|
||||
Reference in New Issue
Block a user