From 0888a3b3e6d918435f894b7414b44f4741086fba Mon Sep 17 00:00:00 2001 From: pewdiepie-archdaemon Date: Mon, 1 Jun 2026 15:09:47 +0900 Subject: [PATCH] Add native Windows compatibility layer --- .gitattributes | 27 ++- .gitignore | 2 + README.md | 34 +++ app.py | 64 ++++- core/atomic_io.py | 4 +- core/auth.py | 4 +- core/database.py | 8 +- core/platform_compat.py | 203 ++++++++++++++++ launch-windows.ps1 | 79 +++++++ mcp_servers/email_server.py | 2 +- routes/admin_wipe_routes.py | 2 +- routes/contacts_routes.py | 4 +- routes/cookbook_routes.py | 328 +++++++++++++++++++------- routes/document_helpers.py | 2 +- routes/email_helpers.py | 2 +- routes/email_routes.py | 2 +- routes/embedding_routes.py | 4 +- routes/mcp_routes.py | 8 +- routes/note_routes.py | 6 +- routes/prefs_routes.py | 4 +- routes/research_routes.py | 18 +- routes/shell_routes.py | 129 +++++++++- routes/upload_routes.py | 10 +- routes/vault_routes.py | 29 ++- scripts/add_hwfit_models.py | 6 +- scripts/claim_ownerless.py | 4 +- scripts/diffusion_server.py | 2 +- scripts/migrate_faiss_to_chroma.py | 6 +- services/hwfit/hardware.py | 64 ++++- services/hwfit/models.py | 2 +- services/memory/memory_extractor.py | 4 +- services/memory/skills.py | 12 +- services/research/research_handler.py | 8 +- setup.py | 2 +- src/api_key_manager.py | 4 +- src/bg_jobs.py | 79 ++++--- src/builtin_actions.py | 10 + src/builtin_mcp.py | 27 ++- src/chat_handler.py | 8 +- src/config.py | 11 + src/embeddings.py | 42 +++- src/integrations.py | 8 +- src/pdf_form_doc.py | 4 +- src/personal_docs.py | 8 +- src/preset_manager.py | 4 +- src/research_handler.py | 28 +-- src/secret_storage.py | 9 +- src/settings.py | 4 +- src/task_scheduler.py | 4 +- src/tool_execution.py | 5 +- src/tool_implementations.py | 8 +- src/upload_handler.py | 10 +- tests/test_auth_regressions.py | 8 +- tests/test_security_regressions.py | 5 + 54 files changed, 1104 insertions(+), 267 deletions(-) create mode 100644 core/platform_compat.py create mode 100644 launch-windows.ps1 diff --git a/.gitattributes b/.gitattributes index d62b6c3..2db234b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,2 +1,27 @@ -*.sh text eol=lf +# Normalize line endings so a Windows checkout (git core.autocrlf=true) can't +# corrupt shell-script shebangs. A CRLF `#!/bin/sh\r` makes the kernel look for +# an interpreter literally named "/bin/sh\r", producing the Docker startup error +# "exec /usr/local/bin/entrypoint.sh: no such file or directory" (issues #150, #77). +* text=auto + +# Shell scripts must stay LF on every platform (run by sh/bash, incl. in Docker). +*.sh text eol=lf +*.bash text eol=lf +entrypoint.sh text eol=lf docker/entrypoint.sh text eol=lf + +# Windows-native scripts stay CRLF. +*.ps1 text eol=crlf +*.cmd text eol=crlf +*.bat text eol=crlf + +# Binary assets — never normalize. +*.png binary +*.jpg binary +*.jpeg binary +*.gif binary +*.webp binary +*.pdf binary +*.ico binary +*.woff binary +*.woff2 binary diff --git a/.gitignore b/.gitignore index 3349982..8ec11ab 100644 --- a/.gitignore +++ b/.gitignore @@ -76,6 +76,8 @@ research_data/ # Internal dev/review notes — not for public repo dev-docs/ +# Windows-port working docs (local only, not for public repo) +docs/windows-port/ # Local config compound.config.json diff --git a/README.md b/README.md index 3e0162c..d17d3cf 100644 --- a/README.md +++ b/README.md @@ -152,6 +152,40 @@ do not run on macOS. MLX-only models are not served by Odysseus. +### Native Windows + +**One-command launcher** (creates the venv, installs deps, runs setup, starts the +server; safe to re-run): + +```powershell +git clone https://github.com/pewdiepie-archdaemon/odysseus.git +cd odysseus +powershell -ExecutionPolicy Bypass -File .\launch-windows.ps1 +``` + +Or do it by hand: + +```powershell +git clone https://github.com/pewdiepie-archdaemon/odysseus.git +cd odysseus +python -m venv venv +venv\Scripts\Activate.ps1 +pip install -r requirements.txt +python setup.py +python -m uvicorn app:app --host 127.0.0.1 --port 7000 +``` + +**Requirements:** Python 3.11+. The core app (chat, agent, memory, documents, +email, calendar, deep research) runs fully native. For full **Cookbook** background +model downloads and the agent shell tool, also install +[Git for Windows](https://git-scm.com/download/win) (provides `bash.exe`). +Local GPU *serving* of vLLM/SGLang needs Linux/WSL2; for a local model on Windows, +[Ollama](https://ollama.com/download) is the easiest path — point Odysseus at +`http://localhost:11434/v1` in Settings. + +Open `http://localhost:7000`, log in with the generated admin password, +and configure everything else inside **Settings**. + ## Security Notes Odysseus is a self-hosted workspace with powerful local tools: shell access, file uploads, model downloads, web research, email/calendar integrations, and API tokens. Treat it like an admin console. diff --git a/app.py b/app.py index a07e947..63974e8 100644 --- a/app.py +++ b/app.py @@ -1,7 +1,22 @@ # app.py — slim orchestrator -from dotenv import load_dotenv -load_dotenv() import os + +# Windows: force HuggingFace/fastembed to COPY model files instead of symlinking. +# On a network-share/UNC data dir Windows can't follow HF's symlinks ([WinError +# 1463]), so the ONNX embedding model fails to load. huggingface_hub reads this +# at import time, so set it before anything pulls it in. (Mirrored in +# src/embeddings.py for non-server entrypoints.) +if os.name == "nt": + os.environ.setdefault("HF_HUB_DISABLE_SYMLINKS", "1") + os.environ.setdefault("HF_HUB_DISABLE_SYMLINKS_WARNING", "1") + +from dotenv import load_dotenv +# encoding="utf-8-sig" tolerates a UTF-8 BOM in .env — a common Windows gotcha +# when the file is saved from Notepad. Without this, the first key parses as +# "AUTH_ENABLED" instead of "AUTH_ENABLED", so AUTH_ENABLED=false (etc.) +# is silently ignored and the user is unexpectedly forced to log in (issue #142). +# utf-8-sig reads plain UTF-8 (no BOM) identically, so this is safe everywhere. +load_dotenv(encoding="utf-8-sig") import uuid import asyncio @@ -170,6 +185,31 @@ if AUTH_ENABLED: _token_cache.update(new_map) app.state._token_cache_dirty = False + # Headers that prove a request was forwarded by a proxy/tunnel (cloudflared, + # nginx, Caddy, Tailscale Funnel, …). cloudflared connects to the app FROM + # 127.0.0.1, so without this check every tunneled request would look like + # loopback and could bypass auth. + _PROXY_FWD_HEADERS = ( + "cf-connecting-ip", "cf-ray", "cf-visitor", + "x-forwarded-for", "x-forwarded-host", "x-real-ip", "forwarded", + ) + + def _is_trusted_loopback(request: Request) -> bool: + """True ONLY for a DIRECT loopback connection with no proxy/tunnel + forwarding headers. A bare ``client.host in ('127.0.0.1','::1')`` check is + unsafe behind a Cloudflare tunnel / reverse proxy: those connect from + loopback, so a remote visitor would otherwise inherit local trust and + slip past LOCALHOST_BYPASS or spoof the internal-tool path. Odysseus's own + in-process agent loopback calls carry none of these headers, so they still + qualify.""" + host = request.client.host if request.client else None + if host not in ("127.0.0.1", "::1"): + return False + for _h in _PROXY_FWD_HEADERS: + if request.headers.get(_h): + return False + return True + class AuthMiddleware(BaseHTTPMiddleware): async def dispatch(self, request: Request, call_next): path = request.url.path @@ -182,8 +222,7 @@ if AUTH_ENABLED: try: from core.middleware import INTERNAL_TOOL_HEADER, INTERNAL_TOOL_TOKEN as _ITT _hdr = request.headers.get(INTERNAL_TOOL_HEADER) - _client_host = request.client.host if request.client else None - if _hdr and _hdr == _ITT and _client_host in ("127.0.0.1", "::1"): + if _hdr and _hdr == _ITT and _is_trusted_loopback(request): # Impersonation: when the agent's loopback call sets # X-Odysseus-Owner, attribute the request to that # user so notes/calendar/etc. land in their account @@ -196,12 +235,13 @@ if AUTH_ENABLED: return await call_next(request) except Exception: pass - # Allow localhost requests (internal service calls from heartbeats etc.) - # Disable with LOCALHOST_BYPASS=false when exposing via reverse proxy / Tailscale Funnel - if LOCALHOST_BYPASS: - client_host = request.client.host if request.client else None - if client_host in ("127.0.0.1", "::1"): - return await call_next(request) + # Allow DIRECT localhost requests (internal service calls from + # heartbeats etc.). Tunnel/proxy-forwarded requests are excluded by + # _is_trusted_loopback so LOCALHOST_BYPASS can't be abused over a + # Cloudflare tunnel / reverse proxy. Keep LOCALHOST_BYPASS=false for + # network-exposed deployments regardless. + if LOCALHOST_BYPASS and _is_trusted_loopback(request): + return await call_next(request) if not auth_manager.is_configured: # No users yet — redirect to login for first-time setup if not path.startswith("/api/"): @@ -819,7 +859,7 @@ async def startup_event(): try: import json as _json auth_path = "data/auth.json" - with open(auth_path) as f: + with open(auth_path, encoding="utf-8") as f: users = _json.load(f).get("users", {}) owners.update(users.keys()) except Exception as e: @@ -866,7 +906,7 @@ async def startup_event(): try: import json as _json auth_path = "data/auth.json" - with open(auth_path) as f: + with open(auth_path, encoding="utf-8") as f: users = _json.load(f).get("users", {}) primary_owner = None for uname, udata in users.items(): diff --git a/core/atomic_io.py b/core/atomic_io.py index b7801ec..9d6ca12 100644 --- a/core/atomic_io.py +++ b/core/atomic_io.py @@ -26,7 +26,7 @@ def atomic_write_json(path: str, data: Any, *, indent: Optional[int] = None) -> """ os.makedirs(os.path.dirname(path) or ".", exist_ok=True) tmp = f"{path}.tmp.{os.getpid()}" - with open(tmp, "w") as f: + with open(tmp, "w", encoding="utf-8") as f: json.dump(data, f, indent=indent) f.flush() os.fsync(f.fileno()) @@ -36,7 +36,7 @@ def atomic_write_json(path: str, data: Any, *, indent: Optional[int] = None) -> def atomic_write_text(path: str, text: str) -> None: os.makedirs(os.path.dirname(path) or ".", exist_ok=True) tmp = f"{path}.tmp.{os.getpid()}" - with open(tmp, "w") as f: + with open(tmp, "w", encoding="utf-8") as f: f.write(text) f.flush() os.fsync(f.fileno()) diff --git a/core/auth.py b/core/auth.py index b254ffc..4d35554 100644 --- a/core/auth.py +++ b/core/auth.py @@ -68,7 +68,7 @@ class AuthManager: def _load(self): try: if os.path.exists(self.auth_path): - with open(self.auth_path, "r") as f: + with open(self.auth_path, "r", encoding="utf-8") as f: self._config = json.load(f) logger.info("Auth config loaded") else: @@ -82,7 +82,7 @@ class AuthManager: """Load persisted session tokens from disk, pruning expired ones.""" try: if os.path.exists(self._sessions_path): - with open(self._sessions_path, "r") as f: + with open(self._sessions_path, "r", encoding="utf-8") as f: data = json.load(f) now = time.time() self._sessions = {k: v for k, v in data.items() if v.get("expiry", 0) > now} diff --git a/core/database.py b/core/database.py index 29377c2..745c42d 100644 --- a/core/database.py +++ b/core/database.py @@ -996,7 +996,7 @@ def _migrate_assign_legacy_owner(): auth_path = os.path.join("data", "auth.json") admin_user = None try: - with open(auth_path, "r") as f: + with open(auth_path, "r", encoding="utf-8") as f: auth_data = _json.load(f) users = auth_data.get("users", {}) if users: @@ -1067,12 +1067,12 @@ def _migrate_assign_legacy_owner(): prefs_path = os.path.join("data", "user_prefs.json") try: if os.path.exists(prefs_path): - with open(prefs_path, "r") as f: + with open(prefs_path, "r", encoding="utf-8") as f: prefs = _json.load(f) if "_users" not in prefs and prefs: # Flat format → nest under admin user new_prefs = {"_users": {admin_user: prefs}} - with open(prefs_path, "w") as f: + with open(prefs_path, "w", encoding="utf-8") as f: _json.dump(new_prefs, f, indent=2) logger.info(f"Migrated user_prefs.json to per-user format under '{admin_user}'") except Exception as e: @@ -1437,7 +1437,7 @@ def _migrate_seed_email_account(): if not settings_file.exists(): return try: - s = _json.loads(settings_file.read_text()) + s = _json.loads(settings_file.read_text(encoding="utf-8")) except Exception: return diff --git a/core/platform_compat.py b/core/platform_compat.py new file mode 100644 index 0000000..01ebe32 --- /dev/null +++ b/core/platform_compat.py @@ -0,0 +1,203 @@ +"""Cross-platform OS compatibility helpers. + +Odysseus began as a Linux/macOS/Docker-only app. This module centralizes the +small set of OS differences needed to run it *natively* on Windows so the rest +of the codebase can stay platform-agnostic. Import from here instead of +sprinkling ``os.name == "nt"`` checks (and POSIX-only calls) across modules. + +Design rules: + * Stdlib + ctypes only — no new third-party deps (no psutil/pywinpty). + * POSIX behaviour is unchanged; Windows gets a faithful equivalent or a + safe, documented no-op. +""" + +from __future__ import annotations + +import os +import shutil +import subprocess +from pathlib import Path +from typing import List, Optional + +IS_WINDOWS = os.name == "nt" +IS_POSIX = not IS_WINDOWS + + +# ── File permissions ──────────────────────────────────────────────────────── +def safe_chmod(path, mode: int) -> bool: + """``os.chmod`` that is a harmless no-op on Windows. + + On POSIX we apply the mode — used to lock secret/key files down to 0o600. + Windows has no POSIX permission bits; files under the user profile are + already ACL-restricted to that user, so we skip rather than raise. Returns + True when the mode was actually applied. + """ + if IS_WINDOWS: + return False + try: + os.chmod(path, mode) + return True + except OSError: + return False + + +# ── Process detach / liveness / teardown ──────────────────────────────────── +def detached_popen_kwargs() -> dict: + """Keyword args for :class:`subprocess.Popen` that fully detach a child so + it outlives the request/stream that launched it. + + POSIX: ``start_new_session=True`` (setsid) — new session + process group. + Windows: ``CREATE_NEW_PROCESS_GROUP | DETACHED_PROCESS`` — the child gets + its own process group (so it isn't killed when the parent's console closes) + and is detached from any console. + """ + if IS_WINDOWS: + flags = ( + getattr(subprocess, "CREATE_NEW_PROCESS_GROUP", 0x00000200) + | getattr(subprocess, "DETACHED_PROCESS", 0x00000008) + ) + return {"creationflags": flags} + return {"start_new_session": True} + + +def pid_alive(pid: Optional[int]) -> bool: + """True if a process with ``pid`` is currently running. + + POSIX uses the classic ``os.kill(pid, 0)`` probe. That is **unsafe on + Windows**: CPython's ``os.kill`` calls ``TerminateProcess(handle, sig)`` for + any signal other than CTRL_C/CTRL_BREAK, so ``os.kill(pid, 0)`` would *kill* + the process it is checking. We instead open the process and read its exit + code via the Win32 API. + """ + if not pid: + return False + if IS_WINDOWS: + import ctypes + from ctypes import wintypes + + PROCESS_QUERY_LIMITED_INFORMATION = 0x1000 + STILL_ACTIVE = 259 + kernel32 = ctypes.windll.kernel32 + handle = kernel32.OpenProcess( + PROCESS_QUERY_LIMITED_INFORMATION, False, int(pid) + ) + if not handle: + return False + try: + code = wintypes.DWORD() + if kernel32.GetExitCodeProcess(handle, ctypes.byref(code)): + return code.value == STILL_ACTIVE + return False + finally: + kernel32.CloseHandle(handle) + try: + os.kill(pid, 0) + return True + except (OSError, ProcessLookupError): + return False + + +def kill_process_tree(pid: Optional[int]) -> None: + """Terminate ``pid`` and all of its descendants. + + POSIX: signal the whole process group (``killpg``), falling back to a plain + ``kill`` if the pid isn't a group leader. + Windows: ``taskkill /T /F`` walks and kills the child tree (there is no + process-group signalling). + """ + if not pid: + return + if IS_WINDOWS: + try: + subprocess.run( + ["taskkill", "/F", "/T", "/PID", str(pid)], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + creationflags=getattr(subprocess, "CREATE_NO_WINDOW", 0), + ) + except Exception: + pass + return + import signal + + try: + os.killpg(os.getpgid(pid), signal.SIGTERM) + except Exception: + try: + os.kill(pid, signal.SIGTERM) + except Exception: + pass + + +# ── Shell / executable resolution ─────────────────────────────────────────── +_BASH_CACHE: Optional[str] = None +_BASH_PROBED = False + +# Common Git-for-Windows install locations to probe when bash isn't on PATH. +_WINDOWS_BASH_FALLBACKS = ( + r"C:\Program Files\Git\bin\bash.exe", + r"C:\Program Files\Git\usr\bin\bash.exe", + r"C:\Program Files (x86)\Git\bin\bash.exe", +) + + +def find_bash() -> Optional[str]: + """Locate a real ``bash`` interpreter, or None. + + On Windows this is typically Git Bash / WSL. Many Odysseus features (the + agent ``bash`` tool, background jobs, Cookbook scripts) emit bash syntax, so + when a bash is present we use it and keep full parity with POSIX. Result is + cached. + """ + global _BASH_CACHE, _BASH_PROBED + if _BASH_PROBED: + return _BASH_CACHE + _BASH_PROBED = True + found = shutil.which("bash") + if not found and IS_WINDOWS: + for cand in _WINDOWS_BASH_FALLBACKS: + if os.path.exists(cand): + found = cand + break + _BASH_CACHE = found + return found + + +def has_bash() -> bool: + return find_bash() is not None + + +def which_tool(name: str) -> Optional[str]: + """``shutil.which`` that also tries Windows executable suffixes. + + On Windows, Node/npm shims are ``npx.cmd``/``npm.cmd`` and binaries end in + ``.exe``; a bare ``which("npx")`` can miss them depending on PATHEXT. We try + the bare name first, then the common suffixes. + """ + found = shutil.which(name) + if found: + return found + if IS_WINDOWS: + for ext in (".cmd", ".exe", ".bat"): + found = shutil.which(name + ext) + if found: + return found + return None + + +def run_script_argv(script_path) -> List[str]: + """argv to execute a shell *script file*. + + Prefers bash (so existing ``.sh`` wrappers work verbatim, including on + Windows via Git Bash). On Windows with no bash available, falls back to + ``cmd.exe /c`` — simple commands still run, but bash-specific syntax won't. + Callers that need guaranteed bash should check :func:`has_bash` first and + surface a clear "install Git Bash" message. + """ + bash = find_bash() + if bash: + return [bash, str(script_path)] + if IS_WINDOWS: + comspec = os.environ.get("ComSpec", "cmd.exe") + return [comspec, "/c", str(script_path)] + return ["sh", str(script_path)] diff --git a/launch-windows.ps1 b/launch-windows.ps1 new file mode 100644 index 0000000..827bfdc --- /dev/null +++ b/launch-windows.ps1 @@ -0,0 +1,79 @@ +#Requires -Version 5.1 +<# + Odysseus - native Windows launcher (no Docker). + + One command to: create a virtualenv, install dependencies, run first-time + setup (prints an admin password on first run), and start the server. + Safe to re-run - it skips whatever already exists. + + Usage: + powershell -ExecutionPolicy Bypass -File .\launch-windows.ps1 + powershell -ExecutionPolicy Bypass -File .\launch-windows.ps1 -Port 7000 -BindHost 127.0.0.1 + + Tip: bind 127.0.0.1 (default) for local-only use. Use 0.0.0.0 only when you + intentionally want other devices on your LAN to reach it. +#> +param( + [int]$Port = 7000, + [string]$BindHost = "127.0.0.1" +) + +$ErrorActionPreference = "Stop" +Set-Location -Path $PSScriptRoot + +function Write-Step($msg) { Write-Host ""; Write-Host ("==> " + $msg) -ForegroundColor Cyan } +function Fail($msg) { + Write-Host "" + Write-Host ("ERROR: " + $msg) -ForegroundColor Red + Write-Host "" + Read-Host "Press Enter to exit" + exit 1 +} + +# 1. Locate a Python interpreter (3.11+ recommended) +Write-Step "Checking for Python" +$pyExe = $null +foreach ($c in @("python", "py")) { + $cmd = Get-Command $c -ErrorAction SilentlyContinue + if ($cmd) { $pyExe = $cmd.Source; break } +} +if (-not $pyExe) { + Fail "Python not found on PATH. Install Python 3.11+ from https://www.python.org/downloads/ (check 'Add to PATH'), then re-run this script." +} +Write-Host ("Using Python: " + $pyExe) + +# 2. Create the virtualenv if missing +$venvPy = Join-Path $PSScriptRoot "venv\Scripts\python.exe" +if (-not (Test-Path $venvPy)) { + Write-Step "Creating virtual environment (venv)" + & $pyExe -m venv venv + if ($LASTEXITCODE -ne 0 -or -not (Test-Path $venvPy)) { Fail "Failed to create the virtual environment." } +} else { + Write-Host "venv already exists - skipping creation." +} + +# 3. Install / update dependencies +Write-Step "Installing dependencies (first run can take a few minutes)" +& $venvPy -m pip install --upgrade pip --quiet +& $venvPy -m pip install -r requirements.txt +if ($LASTEXITCODE -ne 0) { Fail "Dependency install failed. Scroll up for the pip error." } + +# 4. First-time setup (creates data dirs, DB, .env, admin user) +Write-Step "Running first-time setup" +& $venvPy setup.py +if ($LASTEXITCODE -ne 0) { Fail "setup.py failed." } + +# 5. Friendly note about Git Bash (full Cookbook / agent-shell parity) +if (-not (Get-Command bash -ErrorAction SilentlyContinue)) { + Write-Host "" + Write-Host "NOTE: Git Bash (bash.exe) was not found on PATH." -ForegroundColor Yellow + Write-Host " The core app works without it. For full Cookbook background" -ForegroundColor Yellow + Write-Host " downloads and the agent shell tool, install Git for Windows:" -ForegroundColor Yellow + Write-Host " https://git-scm.com/download/win" -ForegroundColor Yellow +} + +# 6. Start the server (use `python -m uvicorn` - bare `uvicorn` may not be on PATH) +Write-Step ("Starting Odysseus at http://{0}:{1}" -f $BindHost, $Port) +Write-Host "Press Ctrl+C to stop." +Write-Host "" +& $venvPy -m uvicorn app:app --host $BindHost --port $Port diff --git a/mcp_servers/email_server.py b/mcp_servers/email_server.py index f5b89ee..bde4307 100644 --- a/mcp_servers/email_server.py +++ b/mcp_servers/email_server.py @@ -197,7 +197,7 @@ def _load_config(account: str | None = None) -> dict: try: settings_path = Path(__file__).resolve().parent.parent / "data" / "settings.json" if settings_path.exists(): - settings = json.loads(settings_path.read_text()) + settings = json.loads(settings_path.read_text(encoding="utf-8")) for key in ( "imap_host", "imap_port", "imap_user", "imap_password", "smtp_host", "smtp_port", "smtp_user", "smtp_password", diff --git a/routes/admin_wipe_routes.py b/routes/admin_wipe_routes.py index 89d8ed0..668b02d 100644 --- a/routes/admin_wipe_routes.py +++ b/routes/admin_wipe_routes.py @@ -44,7 +44,7 @@ def _wipe_memory_files(): continue try: if name == "memory.json": - with open(p, "w") as f: + with open(p, "w", encoding="utf-8") as f: json.dump([], f) else: os.remove(p) diff --git a/routes/contacts_routes.py b/routes/contacts_routes.py index 4d55959..8db5463 100644 --- a/routes/contacts_routes.py +++ b/routes/contacts_routes.py @@ -29,7 +29,7 @@ LOCAL_CONTACTS_FILE = DATA_DIR / "contacts.json" def _load_settings(): if SETTINGS_FILE.exists(): - return json.loads(SETTINGS_FILE.read_text()) + return json.loads(SETTINGS_FILE.read_text(encoding="utf-8")) return {} @@ -79,7 +79,7 @@ def _load_local_contacts() -> List[Dict]: try: if not LOCAL_CONTACTS_FILE.exists(): return [] - data = json.loads(LOCAL_CONTACTS_FILE.read_text()) + data = json.loads(LOCAL_CONTACTS_FILE.read_text(encoding="utf-8")) rows = data.get("contacts", data) if isinstance(data, dict) else data return [_normalize_contact(c) for c in (rows or []) if isinstance(c, dict)] except Exception as e: diff --git a/routes/cookbook_routes.py b/routes/cookbook_routes.py index 921ed34..b14a147 100644 --- a/routes/cookbook_routes.py +++ b/routes/cookbook_routes.py @@ -7,6 +7,7 @@ import os import re import shlex import shutil +import subprocess import sys import uuid from pathlib import Path @@ -17,6 +18,15 @@ from src.auth_helpers import require_user from pydantic import BaseModel from core.middleware import require_admin +from core.platform_compat import ( + IS_WINDOWS, + detached_popen_kwargs, + find_bash, + kill_process_tree, + pid_alive, + safe_chmod, + which_tool, +) from routes.shell_routes import TMUX_LOG_DIR logger = logging.getLogger(__name__) @@ -208,16 +218,20 @@ def setup_cookbook_routes() -> APIRouter: if not _cookbook_state_path.exists(): return "" try: - state = json.loads(_cookbook_state_path.read_text()) + state = json.loads(_cookbook_state_path.read_text(encoding="utf-8")) env = state.get("env") if isinstance(state, dict) else {} return _decrypt_secret(env.get("hfToken") if isinstance(env, dict) else "") except Exception: return "" def _cookbook_ssh_dir() -> Path: - app_ssh = Path("/app/.ssh") - if Path("/app").exists(): - return app_ssh + # The Docker image keeps cookbook keys under /app/.ssh; that path only + # exists inside the container. On Windows (and any non-container host) + # fall back to the user profile's ~/.ssh, which OpenSSH on Win10+ uses. + if not IS_WINDOWS: + app_ssh = Path("/app/.ssh") + if Path("/app").exists(): + return app_ssh return Path.home() / ".ssh" def _cookbook_ssh_key_path() -> Path: @@ -244,13 +258,15 @@ def setup_cookbook_routes() -> APIRouter: ssh_dir = _cookbook_ssh_dir() key_path = _cookbook_ssh_key_path() ssh_dir.mkdir(parents=True, exist_ok=True) - try: - os.chmod(ssh_dir, 0o700) - except Exception: - pass + # safe_chmod no-ops on Windows (~/.ssh is already ACL-restricted to the + # user profile); applies 0o700 on POSIX. + safe_chmod(ssh_dir, 0o700) if not key_path.exists(): + # ssh-keygen ships with the OpenSSH client on Win10+; resolve it via + # which_tool so the .exe is found even when PATHEXT is unusual. + ssh_keygen = which_tool("ssh-keygen") or "ssh-keygen" proc = await asyncio.create_subprocess_exec( - "ssh-keygen", "-t", "ed25519", "-N", "", "-C", "odysseus-cookbook", "-f", str(key_path), + ssh_keygen, "-t", "ed25519", "-N", "", "-C", "odysseus-cookbook", "-f", str(key_path), stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, ) @@ -258,11 +274,8 @@ def setup_cookbook_routes() -> APIRouter: if proc.returncode != 0: detail = (stderr or stdout).decode("utf-8", errors="replace").strip()[-500:] return {"ok": False, "error": detail or "Failed to generate SSH key"} - try: - os.chmod(key_path, 0o600) - os.chmod(key_path.with_suffix(".pub"), 0o644) - except Exception: - pass + safe_chmod(key_path, 0o600) + safe_chmod(key_path.with_suffix(".pub"), 0o644) return {"ok": True, "public_key": _read_cookbook_public_key()} def _user_shell_path_bootstrap() -> list[str]: @@ -314,6 +327,56 @@ def setup_cookbook_routes() -> APIRouter: return await _remote_binary_available(remote, ssh_port, binary, windows=windows) return shutil.which(binary) is not None + def _launch_local_detached(session_id: str, bash_lines: list[str]) -> dict: + """Windows-native stand-in for a LOCAL tmux session (tmux doesn't exist + on Windows). Mirrors shell_routes._generate_win_detached / bg_jobs.launch: + runs the wrapper detached so it survives a browser/SSE disconnect (the + whole point of the tmux feature for long downloads/serves), writing a + .log the status poller tails and a .pid for liveness. + + `bash_lines` is the same bash wrapper used on POSIX. Prefers Git Bash + for full command-syntax parity; falls back to a cmd.exe wrapper that + runs the script through whatever bash is reachable, else best-effort + directly (simple commands only). Returns the launched job record.""" + log_path = TMUX_LOG_DIR / f"{session_id}.log" + pid_path = TMUX_LOG_DIR / f"{session_id}.pid" + bash = find_bash() + if bash: + # Run the existing bash wrapper verbatim through Git Bash, redirecting + # all output to the log the poller reads. Paths handed to bash use + # POSIX form + shell-quoting so drive paths / spaces survive. + inner = TMUX_LOG_DIR / f"{session_id}_run.sh" + inner.write_text("\n".join(bash_lines) + "\n", encoding="utf-8") + lp = shlex.quote(log_path.as_posix()) + ip = shlex.quote(inner.as_posix()) + script_path = TMUX_LOG_DIR / f"{session_id}.sh" + script_path.write_text( + f"bash {ip} > {lp} 2>&1\n", + encoding="utf-8", + ) + argv = [bash, str(script_path)] + else: + # No bash on this Windows host: the bash wrapper can't run. Fall back + # to a cmd.exe wrapper that just records a clear error to the log so + # the UI surfaces "install Git Bash" instead of silently hanging. + script_path = TMUX_LOG_DIR / f"{session_id}.cmd" + script_path.write_text( + "@echo off\r\n" + f'echo Cookbook LOCAL execution on Windows needs Git Bash ^(bash.exe^) on PATH. > "{log_path}" 2>&1\r\n' + f'echo Install Git for Windows, then retry. >> "{log_path}"\r\n', + encoding="utf-8", + ) + argv = [os.environ.get("ComSpec", "cmd.exe"), "/c", str(script_path)] + proc = subprocess.Popen( + argv, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + stdin=subprocess.DEVNULL, + **detached_popen_kwargs(), + ) + pid_path.write_text(str(proc.pid), encoding="utf-8") + return {"pid": proc.pid, "log_path": str(log_path)} + @router.post("/api/model/download") async def model_download(request: Request, req: ModelDownloadRequest): """Download a HuggingFace model in a tmux session. @@ -379,9 +442,12 @@ def setup_cookbook_routes() -> APIRouter: remote = req.remote_host # None for local is_windows = req.platform == "windows" + # LOCAL execution on a native-Windows host never uses tmux (it uses the + # detached-process path below), regardless of the UI-supplied platform. + local_windows = IS_WINDOWS and not remote logger.info(f"Download request: repo={req.repo_id}, remote={remote}, ssh_port={req.ssh_port}, platform={req.platform}") - if not is_windows and not await _binary_available("tmux", remote, req.ssh_port): + if not is_windows and not local_windows and not await _binary_available("tmux", remote, req.ssh_port): return { "ok": False, "error": _missing_binary_message("tmux", remote or "local server"), @@ -425,7 +491,7 @@ def setup_cookbook_routes() -> APIRouter: ps_lines.append('}}') ps_lines.append(f'Remove-Item -Force "$HOME\\{remote_runner}" -ErrorAction SilentlyContinue') runner_path = TMUX_LOG_DIR / f"{session_id}_run.ps1" - runner_path.write_text("\r\n".join(ps_lines) + "\r\n") + runner_path.write_text("\r\n".join(ps_lines) + "\r\n", encoding="utf-8") # scp the .ps1 script, then launch it as a detached process with log + pid files _port = req.ssh_port @@ -492,8 +558,10 @@ def setup_cookbook_routes() -> APIRouter: runner_lines.append(f"rm -f {remote_runner}") runner_lines.append('exec "${SHELL:-/bin/bash}"') runner_path = TMUX_LOG_DIR / f"{session_id}_run.sh" - runner_path.write_text("\n".join(runner_lines) + "\n") - runner_path.chmod(0o755) + runner_path.write_text("\n".join(runner_lines) + "\n", encoding="utf-8") + # Local temp file is scp'd then chmod'd on the remote; the local bit + # is irrelevant (no-op on Windows). + safe_chmod(runner_path, 0o755) # scp the runner script, then create tmux session on the remote _port = req.ssh_port @@ -504,7 +572,8 @@ def setup_cookbook_routes() -> APIRouter: f"ssh {_spf}{remote} 'chmod +x {remote_runner} && tmux new-session -d -s {session_id} \"./{remote_runner}\"'" ) else: - # Local: run hf download in a local tmux session + # Local: run hf download in the background (tmux on POSIX, a detached + # process + logfile on Windows where tmux doesn't exist). if req.env_prefix: lines.append(_safe_env_prefix(req.env_prefix)) else: @@ -512,29 +581,43 @@ def setup_cookbook_routes() -> APIRouter: # Show whether the HF token reached this run (masked) — tells a gated # "not authorized" failure apart from a missing token. lines.append(_HF_TOKEN_STATUS_SNIPPET) - # < /dev/null suppresses interactive "update available? [Y/n]" prompt - lines.append(f"{hf_cmd} < /dev/null") - lines.append('if [ $? -eq 0 ]; then echo ""; echo "DOWNLOAD_OK"; else echo ""; echo "DOWNLOAD_FAILED (exit $?)"; fi') - lines.append(f"rm -f '{wrapper_script}'") - lines.append('exec "${SHELL:-/bin/bash}"') - wrapper_script.write_text("\n".join(lines) + "\n") - wrapper_script.chmod(0o755) - setup_cmd = f"tmux new-session -d -s {session_id} {shlex.quote(str(wrapper_script))}" + if IS_WINDOWS: + # Detached path: no controlling TTY, so skip `< /dev/null` + # (handled by Popen stdin=DEVNULL) and don't keep a shell open. + lines.append(hf_cmd) + lines.append('if [ $? -eq 0 ]; then echo ""; echo "DOWNLOAD_OK"; else echo ""; echo "DOWNLOAD_FAILED (exit $?)"; fi') + else: + # < /dev/null suppresses interactive "update available? [Y/n]" prompt + lines.append(f"{hf_cmd} < /dev/null") + lines.append('if [ $? -eq 0 ]; then echo ""; echo "DOWNLOAD_OK"; else echo ""; echo "DOWNLOAD_FAILED (exit $?)"; fi') + lines.append(f"rm -f '{wrapper_script}'") + lines.append('exec "${SHELL:-/bin/bash}"') + wrapper_script.write_text("\n".join(lines) + "\n", encoding="utf-8") + wrapper_script.chmod(0o755) + setup_cmd = None if IS_WINDOWS else f"tmux new-session -d -s {session_id} {shlex.quote(str(wrapper_script))}" logger.info(f"Model download: {req.repo_id} (include={req.include}, session={session_id}, remote={remote})") logger.info(f"Download setup_cmd: {setup_cmd}") - proc = await asyncio.create_subprocess_shell( - setup_cmd, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) - await proc.wait() + if setup_cmd is None: + # LOCAL Windows: launch the bash wrapper detached; no tmux setup_cmd. + try: + _launch_local_detached(session_id, lines) + except Exception as e: + logger.error(f"Local detached download launch failed: {e}") + return {"ok": False, "error": str(e), "session_id": session_id} + else: + proc = await asyncio.create_subprocess_shell( + setup_cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + await proc.wait() - if proc.returncode != 0: - stderr = (await proc.stderr.read()).decode(errors="replace") - logger.error(f"Download failed (rc={proc.returncode}): {stderr}") - return {"ok": False, "error": stderr, "session_id": session_id} + if proc.returncode != 0: + stderr = (await proc.stderr.read()).decode(errors="replace") + logger.error(f"Download failed (rc={proc.returncode}): {stderr}") + return {"ok": False, "error": stderr, "session_id": session_id} # Log to assistant try: @@ -643,7 +726,7 @@ def setup_cookbook_routes() -> APIRouter: paths_code += "print(json.dumps(models))\n" scan_py = TMUX_LOG_DIR / "scan_cache.py" - scan_py.write_text(paths_code) + scan_py.write_text(paths_code, encoding="utf-8") if host: _pf = f"-p {ssh_port} " if ssh_port and ssh_port != "22" else "" @@ -652,15 +735,27 @@ def setup_cookbook_routes() -> APIRouter: cmd = f'ssh {_pf}{host} "python -" < \'{scan_py}\'' else: cmd = f"ssh {_pf}{host} 'python3 -' < '{scan_py}'" + proc = await asyncio.create_subprocess_shell( + cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + cwd=str(Path.home()), + ) else: - cmd = f"python3 '{scan_py}'" - - proc = await asyncio.create_subprocess_shell( - cmd, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - cwd=str(Path.home()), - ) + # LOCAL scan: run the interpreter directly. `python3` isn't a thing on + # Windows (it's `python`/`py`), and shell single-quoting of the path + # doesn't survive cmd.exe — so resolve the interpreter and exec it + # with the script path as an argv element (no shell quoting needed). + local_py = ( + which_tool("python3") or which_tool("python") + or which_tool("py") or "python" + ) + proc = await asyncio.create_subprocess_exec( + local_py, str(scan_py), + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + cwd=str(Path.home()), + ) stdout_b, stderr_b = await asyncio.wait_for(proc.communicate(), timeout=60) models = [] @@ -785,8 +880,11 @@ def setup_cookbook_routes() -> APIRouter: session_id = f"serve-{uuid.uuid4().hex[:8]}" remote = req.remote_host is_windows = req.platform == "windows" + # LOCAL execution on a native-Windows host never uses tmux (detached + # process path below), regardless of the UI-supplied platform. + local_windows = IS_WINDOWS and not remote - if not is_windows and not await _binary_available("tmux", remote, req.ssh_port): + if not is_windows and not local_windows and not await _binary_available("tmux", remote, req.ssh_port): return { "ok": False, "error": _missing_binary_message("tmux", remote or "local server"), @@ -832,7 +930,7 @@ def setup_cookbook_routes() -> APIRouter: ps_lines.append('Write-Host ""') ps_lines.append('Write-Host "=== Process exited with code $LASTEXITCODE ==="') runner_path = TMUX_LOG_DIR / f"{session_id}_run.ps1" - runner_path.write_text("\r\n".join(ps_lines) + "\r\n") + runner_path.write_text("\r\n".join(ps_lines) + "\r\n", encoding="utf-8") _port = req.ssh_port _Pf = f"-P {_port} " if _port and _port != "22" else "" @@ -956,14 +1054,24 @@ def setup_cookbook_routes() -> APIRouter: runner_lines.append('fi') runner_lines.append(req.cmd) - # Keep shell open after exit so user can see errors - runner_lines.append('echo ""; echo "=== Process exited with code $? ==="; exec "${SHELL:-/bin/bash}"') + if local_windows: + # Detached background process — no interactive shell to keep open. + # Print the exit marker the status poller looks for, then stop. + runner_lines.append('echo ""; echo "=== Process exited with code $? ==="') + else: + # Keep shell open after exit so user can see errors + runner_lines.append('echo ""; echo "=== Process exited with code $? ==="; exec "${SHELL:-/bin/bash}"') runner_path = TMUX_LOG_DIR / f"{session_id}_run.sh" - runner_path.write_text("\n".join(runner_lines) + "\n") - runner_path.chmod(0o755) + runner_path.write_text("\n".join(runner_lines) + "\n", encoding="utf-8") + # chmod is a no-op on Windows; bash on Windows runs the script + # regardless of the executable bit. + safe_chmod(runner_path, 0o755) - if remote: + if local_windows: + # LOCAL Windows: launch the bash runner detached (tmux replacement). + setup_cmd = None + elif remote: remote_runner = f".{session_id}_run.sh" # If command references scripts/, scp those too scp_extras = "" @@ -976,9 +1084,10 @@ def setup_cookbook_routes() -> APIRouter: if diff_script.exists(): scp_extras = f"scp -O {_Pf}-q '{diff_script}' {remote}:.diffusion_server.py && " runner_path.write_text( - runner_path.read_text().replace( + runner_path.read_text(encoding="utf-8").replace( "scripts/diffusion_server.py", ".diffusion_server.py" - ) + ), + encoding="utf-8", ) setup_cmd = ( f"{scp_extras}" @@ -988,16 +1097,24 @@ def setup_cookbook_routes() -> APIRouter: else: setup_cmd = f"tmux new-session -d -s {session_id} {shlex.quote(str(runner_path))}" - proc = await asyncio.create_subprocess_shell( - setup_cmd, - stdout=asyncio.subprocess.PIPE, - stderr=asyncio.subprocess.PIPE, - ) - await proc.wait() + if setup_cmd is None: + # LOCAL Windows: launch the bash runner detached; no tmux setup_cmd. + try: + _launch_local_detached(session_id, runner_lines) + except Exception as e: + logger.error(f"Local detached serve launch failed: {e}") + return {"ok": False, "error": str(e), "session_id": session_id} + else: + proc = await asyncio.create_subprocess_shell( + setup_cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + await proc.wait() - if proc.returncode != 0: - stderr = (await proc.stderr.read()).decode(errors="replace") - return {"ok": False, "error": stderr, "session_id": session_id} + if proc.returncode != 0: + stderr = (await proc.stderr.read()).decode(errors="replace") + return {"ok": False, "error": stderr, "session_id": session_id} # Auto-register as model endpoint if serving a diffusion model endpoint_id = None @@ -1404,6 +1521,16 @@ def setup_cookbook_routes() -> APIRouter: proc = await asyncio.create_subprocess_shell( cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE ) + elif IS_WINDOWS: + # No `kill` binary / POSIX signals on Windows. taskkill /F /T tears + # down the PID and its children. There's no graceful-vs-force + # distinction, so TERM/KILL/INT all map to the same forced kill. + # NB: never use os.kill(pid, 0) to probe here — on Windows that + # routes to TerminateProcess and would kill the process. + if not pid_alive(req.pid): + return {"ok": False, "error": f"PID {req.pid} is not running"} + await asyncio.to_thread(kill_process_tree, req.pid) + return {"ok": True, "pid": req.pid, "signal": sig} else: proc = await asyncio.create_subprocess_exec( "kill", f"-{sig}", str(req.pid), @@ -1427,7 +1554,7 @@ def setup_cookbook_routes() -> APIRouter: require_admin(request) if _cookbook_state_path.exists(): try: - return _state_for_client(json.loads(_cookbook_state_path.read_text())) + return _state_for_client(json.loads(_cookbook_state_path.read_text(encoding="utf-8"))) except Exception: return {} return {} @@ -1456,7 +1583,7 @@ def setup_cookbook_routes() -> APIRouter: data = {} try: if _cookbook_state_path.exists(): - on_disk = json.loads(_cookbook_state_path.read_text()) + on_disk = json.loads(_cookbook_state_path.read_text(encoding="utf-8")) else: on_disk = {} except Exception: @@ -1636,7 +1763,7 @@ def setup_cookbook_routes() -> APIRouter: tasks = [] if _cookbook_state_path.exists(): try: - state = json.loads(_cookbook_state_path.read_text()) + state = json.loads(_cookbook_state_path.read_text(encoding="utf-8")) saved_tasks = state.get("tasks", []) if isinstance(saved_tasks, list): tasks = saved_tasks @@ -1705,26 +1832,36 @@ def setup_cookbook_routes() -> APIRouter: ssh_base.extend(["-p", str(_tport)]) check_cmd = ssh_base + [remote, "tmux", "has-session", "-t", session_id] capture_cmd = ssh_base + [remote, "tmux", "capture-pane", "-t", session_id, "-p", "-S", "-50"] + elif IS_WINDOWS: + # LOCAL Windows task: launched as a detached process (no tmux). + # Liveness comes from the .pid file, output from the + # .log file the wrapper redirects into. No subprocess. + check_cmd = None + capture_cmd = None else: check_cmd = ["tmux", "has-session", "-t", session_id] capture_cmd = ["tmux", "capture-pane", "-t", session_id, "-p", "-S", "-50"] - try: - alive = subprocess.run(check_cmd, timeout=10, capture_output=True) - is_alive = alive.returncode == 0 - except Exception: - is_alive = False + local_win_task = (not remote) and IS_WINDOWS - # Capture last lines for progress. Prefer the "Downloading" line - # (real aggregate bytes) over "Fetching N files" (whole-file count that - # lags with hf_transfer). Falls back to the true last line otherwise. progress_text = "" full_snapshot = "" - if is_alive: + + if local_win_task: + # File-based liveness + output for the detached-process model. + pid_path = TMUX_LOG_DIR / f"{session_id}.pid" + log_path = TMUX_LOG_DIR / f"{session_id}.log" + task_pid = None try: - cap = subprocess.run(capture_cmd, timeout=10, capture_output=True, text=True) - if cap.returncode == 0: - full_snapshot = cap.stdout.strip() + task_pid = int(pid_path.read_text(encoding="utf-8").strip()) + except Exception: + task_pid = None + is_alive = pid_alive(task_pid) + try: + if log_path.exists(): + full_snapshot = log_path.read_text( + encoding="utf-8", errors="replace" + ).strip()[-12000:] lines = [l.strip() for l in full_snapshot.split('\n') if l.strip()] downloading_lines = [l for l in lines if l.startswith("Downloading")] if downloading_lines: @@ -1733,10 +1870,36 @@ def setup_cookbook_routes() -> APIRouter: progress_text = lines[-1] except Exception: pass + else: + try: + alive = subprocess.run(check_cmd, timeout=10, capture_output=True) + is_alive = alive.returncode == 0 + except Exception: + is_alive = False - # Determine status + # Capture last lines for progress. Prefer the "Downloading" line + # (real aggregate bytes) over "Fetching N files" (whole-file count that + # lags with hf_transfer). Falls back to the true last line otherwise. + if is_alive: + try: + cap = subprocess.run(capture_cmd, timeout=10, capture_output=True, text=True) + if cap.returncode == 0: + full_snapshot = cap.stdout.strip() + lines = [l.strip() for l in full_snapshot.split('\n') if l.strip()] + downloading_lines = [l for l in lines if l.startswith("Downloading")] + if downloading_lines: + progress_text = downloading_lines[-1] + elif lines: + progress_text = lines[-1] + except Exception: + pass + + # Determine status. For the local-Windows detached model the log file + # persists after the process exits, so a finished download still has a + # snapshot to classify (DOWNLOAD_OK / exit marker) — evaluate it even + # when the PID is gone instead of blindly reporting "stopped". status = "unknown" - if is_alive: + if is_alive or (local_win_task and full_snapshot): lower = full_snapshot.lower() has_exit = "=== process exited with code" in lower has_error = "error" in lower or "failed" in lower or "traceback" in lower @@ -1754,6 +1917,9 @@ def setup_cookbook_routes() -> APIRouter: status = "completed" elif "application startup complete" in lower: status = "ready" + elif not is_alive: + # local-Windows: process gone, log has no success/ready marker. + status = "stopped" else: status = "running" else: diff --git a/routes/document_helpers.py b/routes/document_helpers.py index b60ad94..4db04cd 100644 --- a/routes/document_helpers.py +++ b/routes/document_helpers.py @@ -148,7 +148,7 @@ def _locate_upload(upload_dir: str, file_id: str): try: idx_path = os.path.join(upload_dir, "uploads.json") if os.path.exists(idx_path): - with open(idx_path, "r") as f: + with open(idx_path, "r", encoding="utf-8") as f: idx = _json.load(f) for meta in (idx.values() if isinstance(idx, dict) else []): if meta.get("id") == file_id: diff --git a/routes/email_helpers.py b/routes/email_helpers.py index 4be3118..0315f06 100644 --- a/routes/email_helpers.py +++ b/routes/email_helpers.py @@ -444,7 +444,7 @@ _init_scheduled_db() def _load_settings(): if SETTINGS_FILE.exists(): - return json.loads(SETTINGS_FILE.read_text()) + return json.loads(SETTINGS_FILE.read_text(encoding="utf-8")) return {} diff --git a/routes/email_routes.py b/routes/email_routes.py index 4243209..f39fa11 100644 --- a/routes/email_routes.py +++ b/routes/email_routes.py @@ -2834,7 +2834,7 @@ def setup_email_routes(): if not path.exists(): return {"total_unread": 0, "total_urgent": 0, "max_score": 0, "per_uid": {}} try: - data = _json.loads(path.read_text()) + data = _json.loads(path.read_text(encoding="utf-8")) except Exception: return {"total_unread": 0, "total_urgent": 0, "max_score": 0, "per_uid": {}} # Drop `notified_uids` from the payload — it's an internal scheduler diff --git a/routes/embedding_routes.py b/routes/embedding_routes.py index ecdbfe0..bcf63d6 100644 --- a/routes/embedding_routes.py +++ b/routes/embedding_routes.py @@ -86,7 +86,7 @@ def _load_custom_endpoint() -> dict: """Load the saved custom embedding endpoint, if any.""" try: if os.path.exists(_ENDPOINT_FILE): - return json.loads(Path(_ENDPOINT_FILE).read_text()) + return json.loads(Path(_ENDPOINT_FILE).read_text(encoding="utf-8")) except Exception: pass return {} @@ -94,7 +94,7 @@ def _load_custom_endpoint() -> dict: def _save_custom_endpoint(data: dict): Path(_ENDPOINT_FILE).parent.mkdir(parents=True, exist_ok=True) - Path(_ENDPOINT_FILE).write_text(json.dumps(data, indent=2)) + Path(_ENDPOINT_FILE).write_text(json.dumps(data, indent=2), encoding="utf-8") def setup_embedding_routes(): diff --git a/routes/mcp_routes.py b/routes/mcp_routes.py index 8faf790..5b1a51d 100644 --- a/routes/mcp_routes.py +++ b/routes/mcp_routes.py @@ -141,7 +141,7 @@ def setup_mcp_routes(mcp_manager: McpManager): } } filepath = os.path.join(oauth_dir, oauth_filename) - with open(filepath, "w") as f: + with open(filepath, "w", encoding="utf-8") as f: json.dump(creds, f, indent=2) logger.info(f"Wrote OAuth credentials to {filepath}") parsed_env.pop("GOOGLE_CLIENT_ID", None) @@ -354,7 +354,7 @@ def setup_mcp_routes(mcp_manager: McpManager): if not keys_file or not os.path.exists(keys_file): raise HTTPException(400, "OAuth keys file not found") - with open(keys_file) as f: + with open(keys_file, encoding="utf-8") as f: keys_data = json.load(f) keys = keys_data.get("installed") or keys_data.get("web") if not keys: @@ -427,7 +427,7 @@ def setup_mcp_routes(mcp_manager: McpManager): keys_file = os.path.expanduser(oauth_cfg.get("keys_file", "")) token_file = os.path.expanduser(oauth_cfg.get("token_file", "")) - with open(keys_file) as f: + with open(keys_file, encoding="utf-8") as f: keys_data = json.load(f) keys = keys_data.get("installed") or keys_data.get("web") client_id = keys["client_id"] @@ -457,7 +457,7 @@ def setup_mcp_routes(mcp_manager: McpManager): # Save tokens to the file the MCP package expects os.makedirs(os.path.dirname(token_file), exist_ok=True) - with open(token_file, "w") as f: + with open(token_file, "w", encoding="utf-8") as f: json.dump(tokens, f, indent=2) logger.info(f"Saved OAuth tokens to {token_file}") diff --git a/routes/note_routes.py b/routes/note_routes.py index 5510087..925b4fb 100644 --- a/routes/note_routes.py +++ b/routes/note_routes.py @@ -145,7 +145,7 @@ async def dispatch_reminder( _slug = "".join(c if (c.isalnum() or c in "-_.@") else "_" for c in (owner or "default")) cache_path = _P(f"data/note_pings_{_slug}.json") if cache_path.exists(): - cache = _json.loads(cache_path.read_text()) + cache = _json.loads(cache_path.read_text(encoding="utf-8")) last = cache.get(cache_key) if last: last_channel = None @@ -428,7 +428,7 @@ async def dispatch_reminder( _STATE = _P(f"data/note_pings_{_slug}.json") _STATE.parent.mkdir(parents=True, exist_ok=True) try: - _cache = cache or (_json.loads(_STATE.read_text()) if _STATE.exists() else {}) + _cache = cache or (_json.loads(_STATE.read_text(encoding="utf-8")) if _STATE.exists() else {}) except Exception: _cache = {} sent_channel = "email" if email_sent else "ntfy" if ntfy_sent else "browser" @@ -436,7 +436,7 @@ async def dispatch_reminder( "at": _dt.now(_tz.utc).isoformat(), "channel": sent_channel, } - _STATE.write_text(_json.dumps(_cache)) + _STATE.write_text(_json.dumps(_cache), encoding="utf-8") except Exception as _e: logger.debug(f"dispatch_reminder: cache write failed: {_e}") diff --git a/routes/prefs_routes.py b/routes/prefs_routes.py index aa2b213..65f56a7 100644 --- a/routes/prefs_routes.py +++ b/routes/prefs_routes.py @@ -11,7 +11,7 @@ PREFS_FILE = os.path.join("data", "user_prefs.json") def _load(): """Load the raw prefs file (internal use only).""" try: - with open(PREFS_FILE, "r") as f: + with open(PREFS_FILE, "r", encoding="utf-8") as f: return json.load(f) except (FileNotFoundError, json.JSONDecodeError): return {} @@ -19,7 +19,7 @@ def _load(): def _save(prefs): os.makedirs(os.path.dirname(PREFS_FILE), exist_ok=True) - with open(PREFS_FILE, "w") as f: + with open(PREFS_FILE, "w", encoding="utf-8") as f: json.dump(prefs, f, indent=2) diff --git a/routes/research_routes.py b/routes/research_routes.py index 233cc82..4def1dd 100644 --- a/routes/research_routes.py +++ b/routes/research_routes.py @@ -69,7 +69,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter: if not path.exists(): return False try: - return json.loads(path.read_text()).get("owner") == user + return json.loads(path.read_text(encoding="utf-8")).get("owner") == user except Exception: return False @@ -130,7 +130,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter: if not path.exists(): raise HTTPException(404, "Research not found") try: - owner = json.loads(path.read_text()).get("owner") + owner = json.loads(path.read_text(encoding="utf-8")).get("owner") except Exception: raise HTTPException(404, "Research not found") if owner != user: @@ -190,7 +190,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter: items = [] for p in data_dir.glob("*.json"): try: - d = json.loads(p.read_text()) + d = json.loads(p.read_text(encoding="utf-8")) # SECURITY: only show research belonging to this user. Legacy # JSONs without an `owner` field are hidden — auth was the only # gate before, so every user saw every other user's reports. @@ -239,7 +239,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter: if not path.exists(): raise HTTPException(404, "Research not found") try: - data = json.loads(path.read_text()) + data = json.loads(path.read_text(encoding="utf-8")) except Exception as e: raise HTTPException(500, f"Failed to read research: {e}") # SECURITY: 404 (not 403) so we don't leak that the report exists. @@ -255,11 +255,11 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter: if not path.exists(): raise HTTPException(404, "Research not found") try: - data = json.loads(path.read_text()) + data = json.loads(path.read_text(encoding="utf-8")) if data.get("owner") != user: raise HTTPException(404, "Research not found") data["archived"] = bool(archived) - path.write_text(json.dumps(data)) + path.write_text(json.dumps(data), encoding="utf-8") except HTTPException: raise except Exception as e: @@ -276,7 +276,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter: if json_path.exists(): # SECURITY: verify ownership before letting the caller delete it. try: - data = json.loads(json_path.read_text()) + data = json.loads(json_path.read_text(encoding="utf-8")) if data.get("owner") != user: raise HTTPException(404, "Research not found") except HTTPException: @@ -452,7 +452,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter: if result is None: p = Path("data/deep_research") / f"{session_id}.json" if p.exists(): - d = json.loads(p.read_text()) + d = json.loads(p.read_text(encoding="utf-8")) return { "result": d.get("result", ""), "sources": d.get("sources", []), @@ -486,7 +486,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter: path = Path("data/deep_research") / f"{session_id}.json" if path.exists(): try: - disk = json.loads(path.read_text()) + disk = json.loads(path.read_text(encoding="utf-8")) if not result: result = disk.get("result") if not sources: diff --git a/routes/shell_routes.py b/routes/shell_routes.py index f367b09..a29ccd3 100644 --- a/routes/shell_routes.py +++ b/routes/shell_routes.py @@ -6,11 +6,17 @@ import logging import os import shlex import shutil +import subprocess import uuid import tempfile from pathlib import Path from typing import Dict, Any +# POSIX-only: `pty`/`fcntl` transitively import `termios`, which does NOT exist +# on Windows, so importing them unconditionally crashed app startup there +# (ModuleNotFoundError: termios — issues #140/#92/#63/#149/#150). The PTY code +# path is only reachable on POSIX; Windows uses pipe streaming + a detached-job +# fallback for the tmux feature (see _generate_win_detached). try: import fcntl import pty @@ -25,6 +31,12 @@ from fastapi import APIRouter, Request, HTTPException from fastapi.responses import StreamingResponse from pydantic import BaseModel +from core.platform_compat import ( + IS_WINDOWS, + detached_popen_kwargs, + find_bash, +) + def _require_admin(request: Request): """Reject non-admin callers. Shell exec is admin-only — never expose to @@ -78,11 +90,25 @@ class ShellExecRequest(BaseModel): use_tmux: bool = False # run in tmux session (survives browser disconnect) +async def _create_shell(command: str, **kwargs): + """Spawn a shell subprocess for `command`. + + POSIX: /bin/sh via create_subprocess_shell (unchanged behaviour). + Windows: prefer a real bash (Git Bash/WSL) so bash-syntax commands behave + the same as on Linux; fall back to cmd.exe when no bash is installed. + """ + if IS_WINDOWS: + bash = find_bash() + if bash: + return await asyncio.create_subprocess_exec(bash, "-c", command, **kwargs) + return await asyncio.create_subprocess_shell(command, **kwargs) + + async def _exec_shell(command: str, timeout: int = EXEC_TIMEOUT) -> Dict[str, Any]: """Run a shell command and return stdout/stderr/exit_code.""" proc = None try: - proc = await asyncio.create_subprocess_shell( + proc = await _create_shell( command, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, @@ -355,6 +381,93 @@ async def _generate_tmux(cmd: str, request: Request): pass +async def _generate_win_detached(cmd: str, request: Request): + """Windows stand-in for the tmux path (issues #84/#162). + + tmux doesn't exist on Windows, so we run the command in a *detached* child + (DETACHED_PROCESS — survives browser disconnect, same as the tmux session) + that writes output to a log file, and tail that log over SSE. Prefers bash + (Git Bash) for command-syntax parity; falls back to cmd.exe. There's no + `tmux attach` equivalent, but the "keeps running if you disconnect" contract + holds, which is the point of the feature for long Cookbook downloads.""" + TMUX_LOG_DIR.mkdir(parents=True, exist_ok=True) + session_id = f"cookbook-{uuid.uuid4().hex[:8]}" + log_path = TMUX_LOG_DIR / f"{session_id}.log" + exit_path = TMUX_LOG_DIR / f"{session_id}.exit" + + bash = find_bash() + if bash: + script_path = TMUX_LOG_DIR / f"{session_id}.sh" + script_path.write_text( + f"{cmd} > {shlex.quote(str(log_path))} 2>&1\n" + f"echo $? > {shlex.quote(str(exit_path))}\n", + encoding="utf-8", + ) + argv = [bash, str(script_path)] + else: + script_path = TMUX_LOG_DIR / f"{session_id}.cmd" + # cmd.exe wrapper: run, redirect all output to the log, record exit code. + script_path.write_text( + "@echo off\r\n" + f'call {cmd} > "{log_path}" 2>&1\r\n' + f'echo %ERRORLEVEL%> "{exit_path}"\r\n', + encoding="utf-8", + ) + argv = [os.environ.get("ComSpec", "cmd.exe"), "/c", str(script_path)] + + try: + subprocess.Popen( + argv, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + stdin=subprocess.DEVNULL, + **detached_popen_kwargs(), + ) + except Exception as e: + yield f"data: {json.dumps({'stream': 'stderr', 'data': f'Failed to launch background job: {e}'})}\n\n" + yield f"data: {json.dumps({'exit_code': -1})}\n\n" + return + + yield f"data: {json.dumps({'stream': 'stdout', 'data': f'Started background job: {session_id}'})}\n\n" + + lines_sent = 0 + exit_code = None + while True: + if await request.is_disconnected(): + yield f"data: {json.dumps({'stream': 'stdout', 'data': f'Disconnected. Background job {session_id} continues running.'})}\n\n" + return + try: + if log_path.exists(): + lines = log_path.read_text(encoding="utf-8", errors="replace").splitlines() + for line in lines[lines_sent:]: + yield f"data: {json.dumps({'stream': 'stdout', 'data': line})}\n\n" + lines_sent = len(lines) + except Exception as e: + logger.debug("win detached log read error: %s", e) + + if exit_path.exists(): + # Drain any final lines, then read the recorded exit code. + await asyncio.sleep(0.3) + try: + if log_path.exists(): + lines = log_path.read_text(encoding="utf-8", errors="replace").splitlines() + for line in lines[lines_sent:]: + yield f"data: {json.dumps({'stream': 'stdout', 'data': line})}\n\n" + lines_sent = len(lines) + exit_code = int((exit_path.read_text(encoding="utf-8", errors="replace").strip() or "0")) + except Exception: + exit_code = 0 + break + await asyncio.sleep(1.0) + + yield f"data: {json.dumps({'exit_code': exit_code})}\n\n" + for p in (log_path, exit_path, script_path): + try: + p.unlink(missing_ok=True) + except Exception: + pass + + def setup_shell_routes() -> APIRouter: router = APIRouter(tags=["shell"]) @@ -393,22 +506,24 @@ def setup_shell_routes() -> APIRouter: ) if use_tmux: - return StreamingResponse( - _generate_tmux(cmd, request), - media_type="text/event-stream", - ) + # tmux is POSIX-only; Windows uses a detached-process + logfile tail + # that preserves the "survives disconnect" behaviour. + gen = _generate_win_detached(cmd, request) if IS_WINDOWS else _generate_tmux(cmd, request) + return StreamingResponse(gen, media_type="text/event-stream") - if use_pty: + if use_pty and not IS_WINDOWS: return StreamingResponse( _generate_pty(cmd, timeout, request), media_type="text/event-stream", ) + # Windows has no PTY; fall through to pipe streaming below (output still + # streams line-by-line, just without live in-place progress-bar redraws). async def generate(): proc = None reader_tasks = [] try: - proc = await asyncio.create_subprocess_shell( + proc = await _create_shell( cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, diff --git a/routes/upload_routes.py b/routes/upload_routes.py index efaff7e..8572d47 100644 --- a/routes/upload_routes.py +++ b/routes/upload_routes.py @@ -105,7 +105,7 @@ def setup_upload_routes(upload_handler): info = None uploads_db = os.path.join(UPLOAD_DIR, "uploads.json") if os.path.exists(uploads_db): - with open(uploads_db) as f: + with open(uploads_db, encoding="utf-8") as f: db = json.load(f) info = next((fi for fi in db.values() if fi["id"] == file_id), None) if info: @@ -153,7 +153,7 @@ def setup_upload_routes(upload_handler): info = None uploads_db = os.path.join(UPLOAD_DIR, "uploads.json") if os.path.exists(uploads_db): - with open(uploads_db) as f: + with open(uploads_db, encoding="utf-8") as f: db = json.load(f) info = next((fi for fi in db.values() if fi["id"] == file_id), None) return info @@ -199,7 +199,7 @@ def setup_upload_routes(upload_handler): cache_path = _vision_cache_path(file_id) if not force and os.path.exists(cache_path): try: - with open(cache_path) as f: + with open(cache_path, encoding="utf-8") as f: return {"text": f.read(), "cached": True} except Exception as e: logger.warning(f"Vision cache read failed for {file_id}: {e}") @@ -210,7 +210,7 @@ def setup_upload_routes(upload_handler): logger.error(f"Vision analysis failed for {file_id}: {e}") raise HTTPException(500, f"Vision analysis failed: {e}") try: - with open(cache_path, "w") as f: + with open(cache_path, "w", encoding="utf-8") as f: f.write(text) except Exception as e: logger.warning(f"Vision cache write failed for {file_id}: {e}") @@ -238,7 +238,7 @@ def setup_upload_routes(upload_handler): text = (body or {}).get("text", "") if not isinstance(text, str): raise HTTPException(400, "text must be a string") - with open(_vision_cache_path(file_id), "w") as f: + with open(_vision_cache_path(file_id), "w", encoding="utf-8") as f: f.write(text) return {"ok": True} diff --git a/routes/vault_routes.py b/routes/vault_routes.py index e7c755d..e41c92f 100644 --- a/routes/vault_routes.py +++ b/routes/vault_routes.py @@ -16,6 +16,7 @@ from fastapi import APIRouter, Request from pydantic import BaseModel from core.middleware import require_admin +from core.platform_compat import IS_WINDOWS, safe_chmod, which_tool logger = logging.getLogger(__name__) @@ -23,10 +24,23 @@ VAULT_FILE = Path("data/vault.json") def _find_bw() -> str: - """Locate the bw binary, checking PATH and common npm-global locations.""" - p = shutil.which("bw") + """Locate the bw binary, checking PATH and common npm-global locations. + + On Windows the Bitwarden CLI shim is `bw.cmd`/`bw.exe`, resolved by + which_tool via PATHEXT. + """ + p = which_tool("bw") if p: return p + if IS_WINDOWS: + appdata = os.environ.get("APPDATA", os.path.expanduser("~")) + for candidate in ( + os.path.join(appdata, "npm", "bw.cmd"), + os.path.join(appdata, "npm", "bw.exe"), + ): + if os.path.isfile(candidate): + return candidate + return "bw" home = os.path.expanduser("~") for candidate in ( f"{home}/.npm-global/bin/bw", @@ -47,7 +61,7 @@ def _find_bw() -> str: def _load_config() -> dict: if VAULT_FILE.exists(): try: - return json.loads(VAULT_FILE.read_text()) + return json.loads(VAULT_FILE.read_text(encoding="utf-8")) except Exception: pass return {} @@ -55,11 +69,10 @@ def _load_config() -> dict: def _save_config(cfg: dict): VAULT_FILE.parent.mkdir(parents=True, exist_ok=True) - VAULT_FILE.write_text(json.dumps(cfg, indent=2)) - try: - os.chmod(str(VAULT_FILE), 0o600) - except Exception: - pass + VAULT_FILE.write_text(json.dumps(cfg, indent=2), encoding="utf-8") + # POSIX: restrict the BW_SESSION store to 0o600. Windows: no-op (profile dir + # is ACL-restricted already). + safe_chmod(str(VAULT_FILE), 0o600) async def _run_bw(args: list, session: str = None, input_text: str = None) -> tuple: diff --git a/scripts/add_hwfit_models.py b/scripts/add_hwfit_models.py index 6bd4e2d..2d7129c 100644 --- a/scripts/add_hwfit_models.py +++ b/scripts/add_hwfit_models.py @@ -173,7 +173,7 @@ def _entry_from_modelinfo(mi, overrides): def main(): - with open(DATA_PATH) as f: + with open(DATA_PATH, encoding="utf-8") as f: catalog = json.load(f) by_name = {m["name"]: m for m in catalog} existing = set(by_name) @@ -214,12 +214,12 @@ def main(): return # Backup + merge - with open(DATA_PATH + ".bak", "w") as f: + with open(DATA_PATH + ".bak", "w", encoding="utf-8") as f: json.dump(catalog, f, indent=2) for name, entry in to_add.items(): by_name[name] = entry merged = list(by_name.values()) - with open(DATA_PATH, "w") as f: + with open(DATA_PATH, "w", encoding="utf-8") as f: json.dump(merged, f, indent=2) print(f"\nAdded/updated {len(to_add)} models. Catalog now {len(merged)} (was {len(catalog)}).") diff --git a/scripts/claim_ownerless.py b/scripts/claim_ownerless.py index 3925a8c..ad8e5b5 100644 --- a/scripts/claim_ownerless.py +++ b/scripts/claim_ownerless.py @@ -29,7 +29,7 @@ def main(): if not os.path.exists(path): print(f" {label}: not found, skipping") continue - with open(path, "r") as f: + with open(path, "r", encoding="utf-8") as f: entries = json.load(f) count = 0 for e in entries: @@ -37,7 +37,7 @@ def main(): e["owner"] = owner count += 1 if count: - with open(path, "w") as f: + with open(path, "w", encoding="utf-8") as f: json.dump(entries, f, ensure_ascii=False, indent=2) print(f" {label}: claimed {count} entries") diff --git a/scripts/diffusion_server.py b/scripts/diffusion_server.py index a8c0008..4c3d5d0 100644 --- a/scripts/diffusion_server.py +++ b/scripts/diffusion_server.py @@ -117,7 +117,7 @@ def load_model(): cls_name_from_index = "" if model_index.exists(): try: - idx = json.loads(model_index.read_text()) + idx = json.loads(model_index.read_text(encoding="utf-8")) cls_name_from_index = idx.get("_class_name", "") if hasattr(diffusers, cls_name_from_index): pipeline_cls = getattr(diffusers, cls_name_from_index) diff --git a/scripts/migrate_faiss_to_chroma.py b/scripts/migrate_faiss_to_chroma.py index 375222c..255be0a 100644 --- a/scripts/migrate_faiss_to_chroma.py +++ b/scripts/migrate_faiss_to_chroma.py @@ -39,7 +39,7 @@ def migrate_memories(): logger.info("No memory FAISS index found, skipping memory migration") return - ids = json.loads(open(ids_path).read()) + ids = json.loads(open(ids_path, encoding="utf-8").read()) if not ids: logger.info("Memory FAISS index is empty, skipping") return @@ -47,7 +47,7 @@ def migrate_memories(): # Load memory texts memories = {} if os.path.exists(memory_path): - for mem in json.loads(open(memory_path).read()): + for mem in json.loads(open(memory_path, encoding="utf-8").read()): memories[mem.get("id", "")] = mem embed = get_embedding_client() @@ -97,7 +97,7 @@ def migrate_rag(): logger.info("No RAG DocStore found, skipping RAG migration") return - data = json.loads(open(docs_path).read()) + data = json.loads(open(docs_path, encoding="utf-8").read()) ids = data.get("ids", []) documents = data.get("documents", []) metadatas = data.get("metadatas", []) diff --git a/services/hwfit/hardware.py b/services/hwfit/hardware.py index c5ff486..ff545a1 100644 --- a/services/hwfit/hardware.py +++ b/services/hwfit/hardware.py @@ -1,5 +1,6 @@ import os import platform +import shutil import subprocess import time @@ -138,7 +139,7 @@ def _detect_amd(): val = _run(["cat", path]) return val.strip() if val else None try: - with open(path) as f: + with open(path, encoding="utf-8", errors="replace") as f: return f.read().strip() except Exception: return None @@ -285,7 +286,7 @@ def _read_file(path): if _remote_host: return _run(["cat", path]) try: - with open(path) as f: + with open(path, encoding="utf-8", errors="replace") as f: return f.read() except Exception: return None @@ -314,7 +315,9 @@ def _get_ram_gb(): if "MemTotal" in meminfo: return meminfo["MemTotal"] / (1024**2) - if not _remote_host: + # os.sysconf only exists on Unix; on Windows it's absent (AttributeError) + # and these constants aren't defined — guard so this never raises there. + if not _remote_host and hasattr(os, "sysconf") and "SC_PHYS_PAGES" in getattr(os, "sysconf_names", {}): try: pages = os.sysconf("SC_PHYS_PAGES") page_size = os.sysconf("SC_PAGE_SIZE") @@ -375,8 +378,20 @@ def _get_cpu_count(): return os.cpu_count() or 1 +def _powershell_exe(): + """Pick the best PowerShell executable for LOCAL execution: prefer pwsh + (PowerShell 7+), fall back to Windows PowerShell 5.1. Returns an absolute + path so we don't depend on a particular PATH ordering.""" + return shutil.which("pwsh") or shutil.which("powershell") or "powershell" + + def _detect_windows(): - """Detect Windows hardware in a single SSH call using PowerShell.""" + """Detect Windows hardware via PowerShell/WMI. + + Works for BOTH local (host="") and remote (SSH) detection: + * remote -> `_run` ships the string to the host over SSH. + * local -> `_run` executes a list argv directly (no shell quoting hell). + """ # Single PowerShell command that gathers all hardware info at once ps_cmd = ( "$r = @{}; " @@ -413,22 +428,43 @@ def _detect_windows(): "}; " "$r | ConvertTo-Json -Compress" ) - out = _run(f'powershell -Command "{ps_cmd}"') + if _remote_host: + # Remote: ship a single command string over SSH. The remote shell parses + # the quoting; PowerShell on the far side runs the -Command payload. + out = _run(f'powershell -Command "{ps_cmd}"') + else: + # Local: pass a LIST argv straight to subprocess so the OS hands ps_cmd + # to PowerShell verbatim — no fragile string-level quote escaping. Prefer + # pwsh (PS7), else Windows PowerShell 5.1. + out = _run([_powershell_exe(), "-NoProfile", "-NonInteractive", "-Command", ps_cmd]) if not out: return None import json as _json try: d = _json.loads(out) + # PowerShell's Measure-Object .Sum / .Count come back as JSON numbers and + # decode to float; the Linux path returns plain ints for these — coerce + # so the dict shape (and downstream int math) matches across platforms. + def _as_int(v, default): + try: + return int(v) + except (TypeError, ValueError): + return default + _cpu_name = (d.get("cpu_name") or "unknown") + if isinstance(_cpu_name, str): + _cpu_name = _cpu_name.strip() or "unknown" result = { "total_ram_gb": d.get("ram_gb", 0), "available_ram_gb": d.get("avail_gb", 0), - "cpu_cores": d.get("cpu_cores", 1), - "cpu_name": d.get("cpu_name", "unknown"), + "cpu_cores": _as_int(d.get("cpu_cores"), 1), + "cpu_name": _cpu_name, "has_gpu": bool(d.get("gpu_name")), "gpu_name": d.get("gpu_name"), "gpu_vram_gb": d.get("gpu_vram_gb"), - "gpu_count": d.get("gpu_count", 0), + "gpu_count": _as_int(d.get("gpu_count"), 0), "backend": d.get("gpu_backend", "cpu_x86"), + "homogeneous": True, + "gpu_error": None, } # PowerShell only reports aggregate GPU info, not per-card detail, so we # can't tell a mixed box from a uniform one here — assume one homogeneous @@ -490,6 +526,18 @@ def detect_system(host="", ssh_port="", platform="", fresh=False): _cache_by_host[cache_key] = (now, result) return result + # Local Windows: the Linux /proc + /sys + os.sysconf path returns 0 GB RAM, + # "unknown" CPU and no GPU on Windows (and os.sysconf doesn't even exist), + # so detect locally via PowerShell/WMI instead. _detect_windows() runs the + # same probe used for remote Windows, but _run() executes it locally. + if not _remote_host and os.name == "nt": + result = _detect_windows() + if result: + _cache_by_host[cache_key] = (now, result) + return result + # PowerShell probe failed entirely — fall through to the generic path + # below so we at least return a well-shaped dict rather than crashing. + # Linux/Termux: existing multi-command detection total_ram = round(_get_ram_gb(), 1) # If remote host returns 0 RAM, connection likely failed diff --git a/services/hwfit/models.py b/services/hwfit/models.py index 43cb036..642983d 100644 --- a/services/hwfit/models.py +++ b/services/hwfit/models.py @@ -166,7 +166,7 @@ def get_models(): if _models_cache is None: data_path = os.path.join(os.path.dirname(__file__), "data", "hf_models.json") try: - with open(data_path) as f: + with open(data_path, encoding="utf-8") as f: _models_cache = json.load(f) except (FileNotFoundError, json.JSONDecodeError): _models_cache = [] diff --git a/services/memory/memory_extractor.py b/services/memory/memory_extractor.py index 02258c0..eea652a 100644 --- a/services/memory/memory_extractor.py +++ b/services/memory/memory_extractor.py @@ -45,7 +45,7 @@ def _fingerprint_entries(entries) -> str: def _load_tidy_state(memory_manager) -> dict: path = _tidy_state_path(memory_manager) try: - with open(path, "r") as f: + with open(path, "r", encoding="utf-8") as f: data = json.load(f) return data if isinstance(data, dict) else {} except (FileNotFoundError, json.JSONDecodeError): @@ -57,7 +57,7 @@ def _save_tidy_state(memory_manager, owner: Optional[str], fingerprint: str) -> state = _load_tidy_state(memory_manager) state[owner or ""] = {"fingerprint": fingerprint} try: - with open(path, "w") as f: + with open(path, "w", encoding="utf-8") as f: json.dump(state, f, indent=2) except OSError as e: logger.warning(f"Could not persist tidy fingerprint: {e}") diff --git a/services/memory/skills.py b/services/memory/skills.py index 784b2ef..74a3917 100644 --- a/services/memory/skills.py +++ b/services/memory/skills.py @@ -89,7 +89,7 @@ class SkillsManager: if not os.path.exists(self.usage_file): return {} try: - with open(self.usage_file) as f: + with open(self.usage_file, encoding="utf-8") as f: d = json.load(f) return d if isinstance(d, dict) else {} except Exception: @@ -101,7 +101,7 @@ class SkillsManager: atomic_write_json(self.usage_file, usage, indent=2) except Exception: tmp = self.usage_file + ".tmp" - with open(tmp, "w") as f: + with open(tmp, "w", encoding="utf-8") as f: json.dump(usage, f, indent=2) os.replace(tmp, self.usage_file) @@ -148,7 +148,7 @@ class SkillsManager: def _read_skill(self, path: str) -> Optional[Skill]: try: - with open(path) as f: + with open(path, encoding="utf-8") as f: text = f.read() return Skill.from_markdown(text, path=path) except Exception as e: @@ -221,7 +221,7 @@ class SkillsManager: # Legacy JSON entries — surfaced as draft, not editable from new flow if os.path.exists(self.legacy_file): try: - with open(self.legacy_file) as f: + with open(self.legacy_file, encoding="utf-8") as f: legacy = json.load(f) if isinstance(legacy, list): for row in legacy: @@ -461,7 +461,7 @@ class SkillsManager: sk = self._read_skill(path) if sk and sk.name == name: try: - with open(path) as f: + with open(path, encoding="utf-8") as f: return f.read() except Exception: return None @@ -481,7 +481,7 @@ class SkillsManager: if not os.path.isfile(target): return None try: - with open(target) as f: + with open(target, encoding="utf-8") as f: return f.read() except Exception: return None diff --git a/services/research/research_handler.py b/services/research/research_handler.py index 6b0d3b5..77863b8 100644 --- a/services/research/research_handler.py +++ b/services/research/research_handler.py @@ -114,7 +114,7 @@ class ResearchHandler: path = RESEARCH_DATA_DIR / f"{session_id}.json" if path.exists(): try: - data = json.loads(path.read_text()) + data = json.loads(path.read_text(encoding="utf-8")) return { "status": data.get("status", "done"), "progress": {}, @@ -151,7 +151,7 @@ class ResearchHandler: path = RESEARCH_DATA_DIR / f"{session_id}.json" if path.exists(): try: - data = json.loads(path.read_text()) + data = json.loads(path.read_text(encoding="utf-8")) return data.get("result") except Exception: pass @@ -171,7 +171,7 @@ class ResearchHandler: path = RESEARCH_DATA_DIR / f"{session_id}.json" if path.exists(): try: - data = json.loads(path.read_text()) + data = json.loads(path.read_text(encoding="utf-8")) return data.get("sources") except Exception: pass @@ -219,7 +219,7 @@ class ResearchHandler: "started_at": entry["started_at"], "completed_at": time.time(), } - path.write_text(json.dumps(data)) + path.write_text(json.dumps(data), encoding="utf-8") logger.info(f"Research result saved to {path}") except Exception as e: logger.error(f"Failed to save research result: {e}") diff --git a/setup.py b/setup.py index fb5ba04..358d4b4 100644 --- a/setup.py +++ b/setup.py @@ -65,7 +65,7 @@ def create_default_admin(): } } } - with open(auth_path, "w") as f: + with open(auth_path, "w", encoding="utf-8") as f: json.dump(auth_data, f, indent=2) print(f" [ok] Initial admin user created ({username})") print(f" Temporary password: {password}") diff --git a/src/api_key_manager.py b/src/api_key_manager.py index 22e18c9..6bf3a6d 100644 --- a/src/api_key_manager.py +++ b/src/api_key_manager.py @@ -38,14 +38,14 @@ class APIKeyManager: """Save encrypted API key to file""" keys = self.load() keys[provider] = self.encrypt_api_key(api_key) - with open(self.api_keys_file, 'w') as f: + with open(self.api_keys_file, 'w', encoding="utf-8") as f: json.dump(keys, f) def load(self) -> Dict[str, str]: """Load and decrypt API keys""" if not os.path.exists(self.api_keys_file): return {} - with open(self.api_keys_file, 'r') as f: + with open(self.api_keys_file, 'r', encoding="utf-8") as f: encrypted_keys = json.load(f) return { provider: self.decrypt_api_key(key) diff --git a/src/bg_jobs.py b/src/bg_jobs.py index 35863de..a770f11 100644 --- a/src/bg_jobs.py +++ b/src/bg_jobs.py @@ -22,7 +22,7 @@ from __future__ import annotations import json import os -import signal +import shlex import subprocess import time import uuid @@ -30,6 +30,12 @@ from pathlib import Path from typing import Any, Dict, List, Optional from core.atomic_io import atomic_write_json +from core.platform_compat import ( + detached_popen_kwargs, + find_bash, + kill_process_tree, + pid_alive, +) _DATA_DIR = Path(os.environ.get("DATA_DIR", "data")) _JOBS_DIR = _DATA_DIR / "bg_jobs" @@ -49,7 +55,7 @@ _RETENTION_S = 3600 # 1 hour after follow-up def _load() -> Dict[str, Dict[str, Any]]: try: if _STORE.exists(): - return json.loads(_STORE.read_text()) or {} + return json.loads(_STORE.read_text(encoding="utf-8")) or {} except Exception: pass return {} @@ -60,13 +66,11 @@ def _save(jobs: Dict[str, Dict[str, Any]]) -> None: def _pid_alive(pid: Optional[int]) -> bool: - if not pid: - return False - try: - os.kill(pid, 0) - return True - except (OSError, ProcessLookupError): - return False + # Delegates to the platform-safe probe. NB: a bare os.kill(pid, 0) is unsafe + # on Windows — CPython routes it to TerminateProcess, which would KILL the + # job we're only trying to check. core.platform_compat.pid_alive handles + # both OSes correctly. + return pid_alive(pid) def launch(command: str, session_id: str, cwd: Optional[str] = None, @@ -88,22 +92,46 @@ def launch(command: str, session_id: str, cwd: Optional[str] = None, # command in `( … )` — the wrapper can't be broken by an unbalanced paren or # a trailing line-continuation in the command. `$?` is the child's real # exit status. - cmd_path = _JOBS_DIR / f"{job_id}.cmd.sh" - cmd_path.write_text(command + "\n") - wrapper = ( - f"bash {cmd_path} > {log_path} 2>&1\n" - f"echo $? > {exit_path}\n" - ) - script_path = _JOBS_DIR / f"{job_id}.sh" - script_path.write_text(wrapper) + bash = find_bash() + if bash: + # POSIX, or Windows with Git Bash/WSL. The user command goes in its OWN + # script file, run as a child `bash` — an `exit` inside it only ends + # that child (so the wrapper still records the exit code), and an + # unbalanced paren / trailing line-continuation in the command can't + # break the wrapper. `$?` is the child's real exit status. Paths are + # emitted as POSIX (forward-slash) + shell-quoted so Git Bash on Windows + # handles drive paths and spaces correctly. + cmd_path = _JOBS_DIR / f"{job_id}.cmd.sh" + cmd_path.write_text(command + "\n", encoding="utf-8") + lp, xp, cp = (shlex.quote(p.as_posix()) for p in (log_path, exit_path, cmd_path)) + script_path = _JOBS_DIR / f"{job_id}.sh" + script_path.write_text( + f"bash {cp} > {lp} 2>&1\n" + f"echo $? > {xp}\n", + encoding="utf-8", + ) + argv = [bash, str(script_path)] + else: + # Windows without any bash installed: cmd.exe wrapper. The command runs + # in its own child .cmd so %ERRORLEVEL% is the command's real exit code. + child_path = _JOBS_DIR / f"{job_id}.child.cmd" + child_path.write_text("@echo off\r\n" + command + "\r\n", encoding="utf-8") + script_path = _JOBS_DIR / f"{job_id}.cmd" + script_path.write_text( + "@echo off\r\n" + f'call "{child_path}" > "{log_path}" 2>&1\r\n' + f'echo %ERRORLEVEL%> "{exit_path}"\r\n', + encoding="utf-8", + ) + argv = [os.environ.get("ComSpec", "cmd.exe"), "/c", str(script_path)] proc = subprocess.Popen( - ["bash", str(script_path)], + argv, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, stdin=subprocess.DEVNULL, cwd=cwd or None, - start_new_session=True, # setsid — detach from the request lifecycle + **detached_popen_kwargs(), # detach from the request lifecycle (setsid / DETACHED_PROCESS) ) rec = { @@ -128,7 +156,7 @@ def launch(command: str, session_id: str, cwd: Optional[str] = None, def _read_output(rec: Dict[str, Any]) -> str: try: - txt = Path(rec["log_path"]).read_text(errors="replace") + txt = Path(rec["log_path"]).read_text(encoding="utf-8", errors="replace") except Exception: return "" if len(txt) > _MAX_OUTPUT_CHARS: @@ -198,15 +226,8 @@ def refresh() -> Dict[str, Dict[str, Any]]: def _kill(pid: Optional[int]) -> None: - if not pid: - return - try: - os.killpg(os.getpgid(pid), signal.SIGTERM) - except Exception: - try: - os.kill(pid, signal.SIGTERM) - except Exception: - pass + # Cross-platform process-tree teardown (POSIX killpg / Windows taskkill /T). + kill_process_tree(pid) def pending_followups() -> List[Dict[str, Any]]: diff --git a/src/builtin_actions.py b/src/builtin_actions.py index 3b83110..2ac90ed 100644 --- a/src/builtin_actions.py +++ b/src/builtin_actions.py @@ -11,6 +11,7 @@ from datetime import datetime from typing import Tuple from src.auth_helpers import owner_filter +from core.platform_compat import IS_WINDOWS, find_bash logger = logging.getLogger(__name__) @@ -266,6 +267,11 @@ async def action_ssh_command(owner: str, command: str = "", host: str = "localho if not command: return "No command specified", False if host in ("localhost", "127.0.0.1", "local"): + if IS_WINDOWS: + bash = find_bash() + if bash: + return await _run_subprocess([bash, "-c", command], timeout=120, label="Command") + return await _run_subprocess(command, shell=True, timeout=120, label="Command") return await _run_subprocess(["bash", "-c", command], timeout=120, label="Command") return await _run_subprocess( ["ssh", "-o", "ConnectTimeout=10", host, command], timeout=120, label="Command", @@ -278,6 +284,8 @@ async def action_run_script(owner: str, script: str = "", host: str = "", **kwar return "No script specified", False target_host = (host or os.getenv("ODYSSEUS_SCRIPT_HOST", "localhost")).strip() if target_host in ("", "localhost", "127.0.0.1", "local"): + if IS_WINDOWS and find_bash(): + return await _run_subprocess([find_bash(), "-c", script], timeout=300, label="Script") return await _run_subprocess(script, shell=True, timeout=300, label="Script") return await _run_subprocess(["ssh", target_host, script], timeout=300, label="Script") @@ -286,6 +294,8 @@ async def action_run_local(owner: str, script: str = "", **kwargs) -> Tuple[str, """Run a script locally (no SSH).""" if not script: return "No script specified", False + if IS_WINDOWS and find_bash(): + return await _run_subprocess([find_bash(), "-c", script], timeout=300, label="Script") return await _run_subprocess(script, shell=True, timeout=300, label="Script") diff --git a/src/builtin_mcp.py b/src/builtin_mcp.py index c570044..fb9a878 100644 --- a/src/builtin_mcp.py +++ b/src/builtin_mcp.py @@ -11,15 +11,36 @@ import shutil import sys import asyncio +from core.platform_compat import IS_WINDOWS, which_tool + logger = logging.getLogger(__name__) def _find_npx() -> str: - """Find npx binary, checking common locations if not on PATH.""" - npx = shutil.which("npx") + """Find the npx binary, checking common locations if not on PATH. + + On Windows the shim is `npx.cmd`, which `which_tool` resolves via PATHEXT. + """ + npx = which_tool("npx") if npx: return npx - # Common locations when PATH is minimal (e.g. systemd) + if IS_WINDOWS: + # Minimal-PATH fallbacks: npm's global bin lives under %APPDATA%\npm, + # and node's installer dir carries npx.cmd alongside node.exe. + appdata = os.environ.get("APPDATA", os.path.expanduser("~")) + for candidate in ( + os.path.join(appdata, "npm", "npx.cmd"), + r"C:\Program Files\nodejs\npx.cmd", + ): + if os.path.isfile(candidate): + return candidate + node = which_tool("node") + if node: + cand = os.path.join(os.path.dirname(node), "npx.cmd") + if os.path.isfile(cand): + return cand + return "npx.cmd" # fallback, will fail with a clear error + # Common POSIX locations when PATH is minimal (e.g. systemd) for candidate in [ os.path.expanduser("~/.npm-global/bin/npx"), os.path.expanduser("~/.local/bin/npx"), diff --git a/src/chat_handler.py b/src/chat_handler.py index 01daa52..c7af61a 100644 --- a/src/chat_handler.py +++ b/src/chat_handler.py @@ -154,7 +154,7 @@ class ChatHandler: if att_ids: uploads_db_path = os.path.join(UPLOAD_DIR, "uploads.json") try: - with open(uploads_db_path, "r") as f: + with open(uploads_db_path, "r", encoding="utf-8") as f: _all_files = json.load(f) files_by_id = {fi["id"]: fi for fi in _all_files.values() if "id" in fi} except (FileNotFoundError, json.JSONDecodeError): @@ -193,7 +193,7 @@ class ChatHandler: _vcache = os.path.join(UPLOAD_DIR, ".vision", att_id + ".txt") if os.path.exists(_vcache): try: - with open(_vcache) as _vf: + with open(_vcache, encoding="utf-8") as _vf: _vtext = _vf.read().strip() if _vtext: enhanced_message += f"\n[User-corrected caption / OCR for this image — treat as authoritative]:\n{_vtext}" @@ -212,7 +212,7 @@ class ChatHandler: vl_model = get_setting("vision_model", "") or "" if os.path.exists(_vcache): try: - with open(_vcache) as _vf: + with open(_vcache, encoding="utf-8") as _vf: cached_desc = _vf.read().strip() if cached_desc and not cached_desc.startswith("["): vl_desc = cached_desc @@ -225,7 +225,7 @@ class ChatHandler: if vl_desc and not vl_desc.startswith("["): try: os.makedirs(os.path.join(UPLOAD_DIR, ".vision"), exist_ok=True) - with open(_vcache, "w") as _vf: + with open(_vcache, "w", encoding="utf-8") as _vf: _vf.write(vl_desc) except Exception: pass diff --git a/src/config.py b/src/config.py index 376bee1..58a5c46 100644 --- a/src/config.py +++ b/src/config.py @@ -1,8 +1,19 @@ +import os from pathlib import Path from typing import List, Optional from pydantic_settings import BaseSettings, SettingsConfigDict from pydantic import Field, field_validator +# Cross-platform OS flag, exposed here so callers can `from src.config import +# IS_WINDOWS`. Defined locally (a trivial `os.name == "nt"`) rather than imported +# from core.platform_compat, to keep this dependency-light config module from +# dragging in the whole core/__init__ + llm_core import chain. The platform +# *helper functions* (safe_chmod, pid_alive, find_bash, ...) live solely in +# core.platform_compat — that remains their single source of truth. Keep platform +# branches as small inline `if IS_WINDOWS:` deltas (never parallel *_windows.py +# files) so they stay easy to integrate with upstream changes. +IS_WINDOWS = os.name == "nt" + class DataConfig(BaseSettings): """Configuration for data storage and file handling.""" # Base directory diff --git a/src/embeddings.py b/src/embeddings.py index 664c33f..67cfd86 100644 --- a/src/embeddings.py +++ b/src/embeddings.py @@ -13,6 +13,17 @@ Set EMBEDDING_URL in .env, e.g.: """ import os + +# Windows: force HuggingFace/fastembed to COPY model files rather than symlink +# them. On a network-share/UNC cache dir Windows can't follow HF's symlinks +# ([WinError 1463] "symbolic link cannot be followed"), so ONNX fails to load the +# model and semantic memory dies. huggingface_hub reads this flag at import time, +# so it must be set before huggingface_hub is first imported — hence module-top. +# (app.py sets the same guard for the server entrypoint.) +if os.name == "nt": + os.environ.setdefault("HF_HUB_DISABLE_SYMLINKS", "1") + os.environ.setdefault("HF_HUB_DISABLE_SYMLINKS_WARNING", "1") + import logging import numpy as np import httpx @@ -109,6 +120,35 @@ class FastEmbedClient: "data", "fastembed_cache", ) os.makedirs(cache_dir, exist_ok=True) + # Windows self-heal: the HuggingFace-hub cache stores model files as + # symlinks (snapshots//model.onnx -> ../../blobs/). On a + # network-share / UNC data dir Windows refuses to follow them + # ([WinError 1463] "symbolic link cannot be followed because its type is + # disabled"), and a cache copied between machines can carry dead symlinks + # too. Either way fastembed tries to load a broken symlink and fails + # *without* re-downloading, leaving semantic memory degraded. Detect a + # broken-symlink model in the cache and drop the contaminated hub dir so + # fastembed re-fetches (it falls back to its CDN tarball of real files, + # which load fine). Best-effort; only ever removes a verifiably dead link. + if os.name == "nt": + try: + import glob, shutil + for _onnx in glob.glob(os.path.join(cache_dir, "**", "*.onnx"), recursive=True): + if os.path.islink(_onnx) and not os.path.exists(_onnx): + _root = _onnx + while os.path.basename(_root) and not os.path.basename(_root).startswith("models--"): + _parent = os.path.dirname(_root) + if _parent == _root: + break + _root = _parent + if os.path.basename(_root).startswith("models--"): + logger.warning( + "Embedding cache has a broken symlink (%s); clearing %s " + "so fastembed re-downloads real files", _onnx, _root, + ) + shutil.rmtree(_root, ignore_errors=True) + except Exception as _e: + logger.debug("embedding cache symlink-heal skipped: %s", _e) kwargs = {"model_name": self.model, "cache_dir": cache_dir} self._embedding = TextEmbedding(**kwargs) self._dim: Optional[int] = None @@ -152,7 +192,7 @@ def _load_persisted_endpoint() -> dict: ) if os.path.exists(endpoint_file): import json - data = json.loads(open(endpoint_file).read()) + data = json.loads(open(endpoint_file, encoding="utf-8").read()) if data.get("url"): return data except Exception: diff --git a/src/integrations.py b/src/integrations.py index 968c758..27e356e 100644 --- a/src/integrations.py +++ b/src/integrations.py @@ -148,7 +148,7 @@ def load_integrations() -> List[Dict[str, Any]]: if not os.path.exists(DATA_FILE): return [] try: - with open(DATA_FILE, "r") as f: + with open(DATA_FILE, "r", encoding="utf-8") as f: return json.load(f) except (json.JSONDecodeError, IOError) as exc: log.error("Failed to load integrations: %s", exc) @@ -158,7 +158,7 @@ def load_integrations() -> List[Dict[str, Any]]: def save_integrations(integrations: List[Dict[str, Any]]) -> None: """Persist integrations list to disk.""" _ensure_data_dir() - with open(DATA_FILE, "w") as f: + with open(DATA_FILE, "w", encoding="utf-8") as f: json.dump(integrations, f, indent=2) @@ -409,7 +409,7 @@ def migrate_from_settings() -> None: return try: - with open(settings_path, "r") as f: + with open(settings_path, "r", encoding="utf-8") as f: settings = json.load(f) except (json.JSONDecodeError, IOError): return @@ -436,7 +436,7 @@ def migrate_from_settings() -> None: # Clear migrated keys settings.pop("miniflux_url", None) settings.pop("miniflux_api_key", None) - with open(settings_path, "w") as f: + with open(settings_path, "w", encoding="utf-8") as f: json.dump(settings, f, indent=2) log.info("Migrated Miniflux integration from settings.json") diff --git a/src/pdf_form_doc.py b/src/pdf_form_doc.py index a0891de..9552aca 100644 --- a/src/pdf_form_doc.py +++ b/src/pdf_form_doc.py @@ -142,7 +142,7 @@ def save_field_sidecar(pdf_path: str, fields: list[dict[str, Any]]) -> str: """Persist the field schema next to its source PDF. Returns the sidecar path.""" path = sidecar_path(pdf_path) try: - with open(path, "w") as f: + with open(path, "w", encoding="utf-8") as f: json.dump(fields, f, indent=2) except Exception as e: logger.warning(f"Failed to write field sidecar {path}: {e}") @@ -155,7 +155,7 @@ def load_field_sidecar(pdf_path: str) -> Optional[list[dict[str, Any]]]: if not os.path.exists(path): return None try: - with open(path) as f: + with open(path, encoding="utf-8") as f: return json.load(f) except Exception as e: logger.warning(f"Failed to read field sidecar {path}: {e}") diff --git a/src/personal_docs.py b/src/personal_docs.py index 9fc9d2d..2183ee7 100644 --- a/src/personal_docs.py +++ b/src/personal_docs.py @@ -178,7 +178,7 @@ class PersonalDocsManager: """Load the list of indexed directories from persistent storage.""" try: if os.path.exists(self.directories_file): - with open(self.directories_file, 'r') as f: + with open(self.directories_file, 'r', encoding="utf-8") as f: self.indexed_directories = json.load(f) logger.info(f"Loaded {len(self.indexed_directories)} indexed directories") else: @@ -190,7 +190,7 @@ class PersonalDocsManager: def save_directories(self): """Save the list of indexed directories to persistent storage.""" try: - with open(self.directories_file, 'w') as f: + with open(self.directories_file, 'w', encoding="utf-8") as f: json.dump(self.indexed_directories, f, indent=2) logger.info(f"Saved {len(self.indexed_directories)} indexed directories") except Exception as e: @@ -200,7 +200,7 @@ class PersonalDocsManager: """Load the set of excluded file paths from persistent storage.""" try: if os.path.exists(self._excluded_file): - with open(self._excluded_file, 'r') as f: + with open(self._excluded_file, 'r', encoding="utf-8") as f: self.excluded_files = set(json.load(f)) else: self.excluded_files = set() @@ -210,7 +210,7 @@ class PersonalDocsManager: def _save_excluded(self): try: - with open(self._excluded_file, 'w') as f: + with open(self._excluded_file, 'w', encoding="utf-8") as f: json.dump(list(self.excluded_files), f) except Exception as e: logger.error(f"Error saving excluded files: {e}") diff --git a/src/preset_manager.py b/src/preset_manager.py index a417ee0..c694ca1 100644 --- a/src/preset_manager.py +++ b/src/preset_manager.py @@ -75,7 +75,7 @@ Use precise language. Show causal relationships explicitly. Quantify uncertainty return self.DEFAULT_PRESETS.copy() try: - with open(self.presets_file, 'r') as f: + with open(self.presets_file, 'r', encoding="utf-8") as f: presets = json.load(f) custom = presets.get("custom") if isinstance(presets, dict) else None if isinstance(custom, dict) and "enabled" not in custom: @@ -101,7 +101,7 @@ Use precise language. Show causal relationships explicitly. Quantify uncertainty """Save presets to file""" try: os.makedirs(os.path.dirname(self.presets_file), exist_ok=True) - with open(self.presets_file, 'w') as f: + with open(self.presets_file, 'w', encoding="utf-8") as f: json.dump(presets, f, indent=2) self.presets = presets return True diff --git a/src/research_handler.py b/src/research_handler.py index 1a69e08..4a64ac7 100644 --- a/src/research_handler.py +++ b/src/research_handler.py @@ -299,7 +299,7 @@ class ResearchHandler: path = RESEARCH_DATA_DIR / f"{session_id}.json" if path.exists(): try: - data = json.loads(path.read_text()) + data = json.loads(path.read_text(encoding="utf-8")) if data.get("consumed"): return None return { @@ -338,7 +338,7 @@ class ResearchHandler: path = RESEARCH_DATA_DIR / f"{session_id}.json" if path.exists(): try: - data = json.loads(path.read_text()) + data = json.loads(path.read_text(encoding="utf-8")) if data.get("consumed"): return None return data.get("result") @@ -360,7 +360,7 @@ class ResearchHandler: path = RESEARCH_DATA_DIR / f"{session_id}.json" if path.exists(): try: - data = json.loads(path.read_text()) + data = json.loads(path.read_text(encoding="utf-8")) return data.get("sources") except Exception: pass @@ -377,7 +377,7 @@ class ResearchHandler: path = RESEARCH_DATA_DIR / f"{session_id}.json" if path.exists(): try: - data = json.loads(path.read_text()) + data = json.loads(path.read_text(encoding="utf-8")) return data.get("raw_findings") except Exception as e: logger.warning(f"Failed to read raw findings for {session_id}: {e}") @@ -425,7 +425,7 @@ class ResearchHandler: try: for p in RESEARCH_DATA_DIR.glob("*.json"): try: - data = json.loads(p.read_text()) + data = json.loads(p.read_text(encoding="utf-8")) if data.get("status") == "done": started = data.get("started_at", 0) completed = data.get("completed_at", 0) @@ -448,9 +448,9 @@ class ResearchHandler: path = RESEARCH_DATA_DIR / f"{session_id}.json" if path.exists(): try: - data = json.loads(path.read_text()) + data = json.loads(path.read_text(encoding="utf-8")) data["consumed"] = True - path.write_text(json.dumps(data)) + path.write_text(json.dumps(data), encoding="utf-8") except Exception: pass @@ -481,7 +481,7 @@ class ResearchHandler: # SECURITY: stamp owner so route handlers can filter by user. "owner": entry.get("owner", ""), } - path.write_text(json.dumps(data)) + path.write_text(json.dumps(data), encoding="utf-8") logger.info(f"Research result saved to {path}") try: from src.event_bus import fire_event @@ -496,7 +496,7 @@ class ResearchHandler: path = RESEARCH_DATA_DIR / f"{session_id}.json" if path.exists(): try: - return json.loads(path.read_text()) + return json.loads(path.read_text(encoding="utf-8")) except Exception: pass return None @@ -511,7 +511,7 @@ class ResearchHandler: try: from src.visual_report import generate_visual_report - data = json.loads(json_path.read_text()) + data = json.loads(json_path.read_text(encoding="utf-8")) report_md = data.get("raw_report") or data.get("result", "") html_content = generate_visual_report( question=data.get("query", ""), @@ -534,12 +534,12 @@ class ResearchHandler: if not path.exists(): return False try: - data = json.loads(path.read_text()) + data = json.loads(path.read_text(encoding="utf-8")) hidden = data.get("hidden_images") or [] if image_url not in hidden: hidden.append(image_url) data["hidden_images"] = hidden - path.write_text(json.dumps(data)) + path.write_text(json.dumps(data), encoding="utf-8") logger.info(f"Hid image {image_url[:80]} for research {session_id}") return True except Exception as e: @@ -552,9 +552,9 @@ class ResearchHandler: if not path.exists(): return False try: - data = json.loads(path.read_text()) + data = json.loads(path.read_text(encoding="utf-8")) data["hidden_images"] = [] - path.write_text(json.dumps(data)) + path.write_text(json.dumps(data), encoding="utf-8") logger.info(f"Cleared hidden_images for research {session_id}") return True except Exception as e: diff --git a/src/secret_storage.py b/src/secret_storage.py index 58db9ed..15f02f2 100644 --- a/src/secret_storage.py +++ b/src/secret_storage.py @@ -24,6 +24,8 @@ from pathlib import Path from cryptography.fernet import Fernet, InvalidToken +from core.platform_compat import safe_chmod + logger = logging.getLogger(__name__) _KEY_PATH = Path(__file__).resolve().parent.parent / "data" / ".app_key" @@ -37,10 +39,9 @@ def _load_or_create_key() -> bytes: _KEY_PATH.parent.mkdir(parents=True, exist_ok=True) key = Fernet.generate_key() _KEY_PATH.write_bytes(key) - try: - os.chmod(_KEY_PATH, 0o600) - except Exception: - pass + # POSIX: lock the key to 0o600. Windows: no-op (the user-profile data dir is + # already ACL-restricted); safe_chmod swallows both cases. + safe_chmod(_KEY_PATH, 0o600) logger.info(f"Generated new app key at {_KEY_PATH}") return key diff --git a/src/settings.py b/src/settings.py index 4ef068b..7da1e73 100644 --- a/src/settings.py +++ b/src/settings.py @@ -140,7 +140,7 @@ def load_settings() -> dict: if _settings_cache and (now - _settings_cache[0]) < _CACHE_TTL: return _settings_cache[1] try: - with open(SETTINGS_FILE, "r") as f: + with open(SETTINGS_FILE, "r", encoding="utf-8") as f: saved = json.load(f) merged = {**DEFAULT_SETTINGS, **saved} except (FileNotFoundError, json.JSONDecodeError): @@ -205,7 +205,7 @@ def load_features() -> dict: if _features_cache and (now - _features_cache[0]) < _CACHE_TTL: return _features_cache[1] try: - with open(FEATURES_FILE, "r") as f: + with open(FEATURES_FILE, "r", encoding="utf-8") as f: saved = json.load(f) merged = {**DEFAULT_FEATURES, **saved} except (FileNotFoundError, json.JSONDecodeError): diff --git a/src/task_scheduler.py b/src/task_scheduler.py index 4bdb1ef..4268b96 100644 --- a/src/task_scheduler.py +++ b/src/task_scheduler.py @@ -1013,7 +1013,7 @@ class TaskScheduler: from pathlib import Path as _P integrations_file = _P("data/integrations.json") if integrations_file.exists(): - integrations = json.loads(integrations_file.read_text()) + integrations = json.loads(integrations_file.read_text(encoding="utf-8")) for integ in integrations: if not integ.get("enabled"): continue @@ -1616,7 +1616,7 @@ class TaskScheduler: "task_id": task.id, "task_name": task.name, } - (RESEARCH_DATA_DIR / f"{session_id}.json").write_text(json.dumps(payload)) + (RESEARCH_DATA_DIR / f"{session_id}.json").write_text(json.dumps(payload), encoding="utf-8") try: from src.event_bus import fire_event fire_event("research_completed", task.owner or None) diff --git a/src/tool_execution.py b/src/tool_execution.py index c8075da..21ab553 100644 --- a/src/tool_execution.py +++ b/src/tool_execution.py @@ -12,6 +12,7 @@ import collections import json import logging import os +import sys import time from typing import Any, Awaitable, Callable, Dict, Optional, Tuple @@ -348,7 +349,9 @@ async def _direct_fallback( # can't take the whole server down. -I = isolated mode (skip # user site, no PYTHONPATH inheritance) for hygiene. proc = await asyncio.create_subprocess_exec( - "python3", "-I", "-c", content, + # Use the running interpreter — there is no `python3.exe` on + # Windows, which made the agent's `python` tool fail there. + (sys.executable or "python"), "-I", "-c", content, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, env=_subproc_env, diff --git a/src/tool_implementations.py b/src/tool_implementations.py index f569926..5871dea 100644 --- a/src/tool_implementations.py +++ b/src/tool_implementations.py @@ -3639,7 +3639,7 @@ async def do_manage_research(content: str, owner: Optional[str] = None) -> Dict: def _load(p): try: - return _json.loads(p.read_text()) + return _json.loads(p.read_text(encoding="utf-8")) except Exception: return None @@ -3874,7 +3874,7 @@ def _load_vault_config() -> Dict: p = Path("data/vault.json") if p.exists(): try: - return json.loads(p.read_text()) + return json.loads(p.read_text(encoding="utf-8")) except Exception: pass return {} @@ -4027,13 +4027,13 @@ async def do_vault_unlock(content: str, owner: Optional[str] = None) -> Dict: cfg = {} if p.exists(): try: - cfg = json.loads(p.read_text()) + cfg = json.loads(p.read_text(encoding="utf-8")) except Exception: pass cfg["session"] = session from datetime import datetime as _dt cfg["unlocked_at"] = _dt.utcnow().isoformat() - p.write_text(json.dumps(cfg, indent=2)) + p.write_text(json.dumps(cfg, indent=2), encoding="utf-8") try: import os as _os _os.chmod(str(p), 0o600) diff --git a/src/upload_handler.py b/src/upload_handler.py index f1e1dd1..75c25b1 100644 --- a/src/upload_handler.py +++ b/src/upload_handler.py @@ -269,7 +269,7 @@ class UploadHandler: uploads_db_path = os.path.join(self.upload_dir, "uploads.json") if os.path.exists(uploads_db_path): - with open(uploads_db_path, "r") as f: + with open(uploads_db_path, "r", encoding="utf-8") as f: files = json.load(f) total_files = len(files) @@ -352,7 +352,7 @@ class UploadHandler: if os.path.exists(uploads_db_path): try: - with open(uploads_db_path, "r") as f: + with open(uploads_db_path, "r", encoding="utf-8") as f: existing_files = json.load(f) except Exception as e: logger.warning(f"Failed to read uploads database: {e}") @@ -374,7 +374,7 @@ class UploadHandler: existing_files[existing_key] = existing_file try: - with open(uploads_db_path, "w") as f: + with open(uploads_db_path, "w", encoding="utf-8") as f: json.dump(existing_files, f, indent=2) except Exception as e: logger.warning(f"Failed to update uploads database: {e}") @@ -439,7 +439,7 @@ class UploadHandler: try: if os.path.exists(uploads_db_path): try: - with open(uploads_db_path, "r") as f: + with open(uploads_db_path, "r", encoding="utf-8") as f: all_files = json.load(f) except Exception: all_files = {} @@ -449,7 +449,7 @@ class UploadHandler: storage_key = f"{owner}:{file_hash}" if owner else file_hash all_files[storage_key] = file_metadata - with open(uploads_db_path, "w") as f: + with open(uploads_db_path, "w", encoding="utf-8") as f: json.dump(all_files, f, indent=2) except Exception as e: diff --git a/tests/test_auth_regressions.py b/tests/test_auth_regressions.py index c468edb..d9939c8 100644 --- a/tests/test_auth_regressions.py +++ b/tests/test_auth_regressions.py @@ -226,7 +226,7 @@ def test_admin_only_actions_set_contains_shell_runners(): # `_ADMIN_ONLY_ACTIONS` is a closure constant. Easiest pin: re-read # the source and check for the three risky entries + the admin gate # wording. - src = open(task_routes.__file__).read() + src = open(task_routes.__file__, encoding="utf-8").read() assert '"run_local"' in src assert '"run_script"' in src assert '"ssh_command"' in src @@ -249,8 +249,8 @@ def test_ship_paused_housekeeping_stays_paused_by_default(): from routes import task_routes from src import task_scheduler - route_src = open(task_routes.__file__).read() - scheduler_src = open(task_scheduler.__file__).read() + route_src = open(task_routes.__file__, encoding="utf-8").read() + scheduler_src = open(task_scheduler.__file__, encoding="utf-8").read() assert '"ship_paused": True' in scheduler_src assert 'defs.get("ship_paused")' in scheduler_src assert 'defs.get("ship_paused")' in route_src @@ -259,5 +259,5 @@ def test_ship_paused_housekeeping_stays_paused_by_default(): def test_task_payload_exposes_crew_member_id_for_ui_category(): from routes import task_routes - src = open(task_routes.__file__).read() + src = open(task_routes.__file__, encoding="utf-8").read() assert '"crew_member_id"' in src diff --git a/tests/test_security_regressions.py b/tests/test_security_regressions.py index 631172d..bddf74f 100644 --- a/tests/test_security_regressions.py +++ b/tests/test_security_regressions.py @@ -96,6 +96,11 @@ def test_secret_storage_corrupt_token_returns_empty(tmp_path, monkeypatch): assert ss.decrypt("enc:not-a-valid-fernet-token") == "" +@pytest.mark.skipif( + sys.platform == "win32", + reason="POSIX mode bits (0o600) don't exist on Windows; the key file is " + "protected by the user-profile NTFS ACL instead, and safe_chmod no-ops there.", +) def test_secret_storage_key_created_with_safe_mode(tmp_path, monkeypatch): """The auto-generated key file must be mode 0o600 — anyone who can read it can decrypt every stored secret."""