Add native Windows compatibility layer

This commit is contained in:
pewdiepie-archdaemon
2026-06-01 15:09:47 +09:00
parent ead7c01822
commit 0888a3b3e6
54 changed files with 1104 additions and 267 deletions

27
.gitattributes vendored
View File

@@ -1,2 +1,27 @@
*.sh text eol=lf
# Normalize line endings so a Windows checkout (git core.autocrlf=true) can't
# corrupt shell-script shebangs. A CRLF `#!/bin/sh\r` makes the kernel look for
# an interpreter literally named "/bin/sh\r", producing the Docker startup error
# "exec /usr/local/bin/entrypoint.sh: no such file or directory" (issues #150, #77).
* text=auto
# Shell scripts must stay LF on every platform (run by sh/bash, incl. in Docker).
*.sh text eol=lf
*.bash text eol=lf
entrypoint.sh text eol=lf
docker/entrypoint.sh text eol=lf
# Windows-native scripts stay CRLF.
*.ps1 text eol=crlf
*.cmd text eol=crlf
*.bat text eol=crlf
# Binary assets — never normalize.
*.png binary
*.jpg binary
*.jpeg binary
*.gif binary
*.webp binary
*.pdf binary
*.ico binary
*.woff binary
*.woff2 binary

2
.gitignore vendored
View File

@@ -76,6 +76,8 @@ research_data/
# Internal dev/review notes — not for public repo
dev-docs/
# Windows-port working docs (local only, not for public repo)
docs/windows-port/
# Local config
compound.config.json

View File

@@ -152,6 +152,40 @@ do not run on macOS. MLX-only models are not served by Odysseus.
</details>
### Native Windows
**One-command launcher** (creates the venv, installs deps, runs setup, starts the
server; safe to re-run):
```powershell
git clone https://github.com/pewdiepie-archdaemon/odysseus.git
cd odysseus
powershell -ExecutionPolicy Bypass -File .\launch-windows.ps1
```
Or do it by hand:
```powershell
git clone https://github.com/pewdiepie-archdaemon/odysseus.git
cd odysseus
python -m venv venv
venv\Scripts\Activate.ps1
pip install -r requirements.txt
python setup.py
python -m uvicorn app:app --host 127.0.0.1 --port 7000
```
**Requirements:** Python 3.11+. The core app (chat, agent, memory, documents,
email, calendar, deep research) runs fully native. For full **Cookbook** background
model downloads and the agent shell tool, also install
[Git for Windows](https://git-scm.com/download/win) (provides `bash.exe`).
Local GPU *serving* of vLLM/SGLang needs Linux/WSL2; for a local model on Windows,
[Ollama](https://ollama.com/download) is the easiest path — point Odysseus at
`http://localhost:11434/v1` in Settings.
Open `http://localhost:7000`, log in with the generated admin password,
and configure everything else inside **Settings**.
## Security Notes
Odysseus is a self-hosted workspace with powerful local tools: shell access, file uploads, model downloads, web research, email/calendar integrations, and API tokens. Treat it like an admin console.

64
app.py
View File

@@ -1,7 +1,22 @@
# app.py — slim orchestrator
from dotenv import load_dotenv
load_dotenv()
import os
# Windows: force HuggingFace/fastembed to COPY model files instead of symlinking.
# On a network-share/UNC data dir Windows can't follow HF's symlinks ([WinError
# 1463]), so the ONNX embedding model fails to load. huggingface_hub reads this
# at import time, so set it before anything pulls it in. (Mirrored in
# src/embeddings.py for non-server entrypoints.)
if os.name == "nt":
os.environ.setdefault("HF_HUB_DISABLE_SYMLINKS", "1")
os.environ.setdefault("HF_HUB_DISABLE_SYMLINKS_WARNING", "1")
from dotenv import load_dotenv
# encoding="utf-8-sig" tolerates a UTF-8 BOM in .env — a common Windows gotcha
# when the file is saved from Notepad. Without this, the first key parses as
# "AUTH_ENABLED" instead of "AUTH_ENABLED", so AUTH_ENABLED=false (etc.)
# is silently ignored and the user is unexpectedly forced to log in (issue #142).
# utf-8-sig reads plain UTF-8 (no BOM) identically, so this is safe everywhere.
load_dotenv(encoding="utf-8-sig")
import uuid
import asyncio
@@ -170,6 +185,31 @@ if AUTH_ENABLED:
_token_cache.update(new_map)
app.state._token_cache_dirty = False
# Headers that prove a request was forwarded by a proxy/tunnel (cloudflared,
# nginx, Caddy, Tailscale Funnel, …). cloudflared connects to the app FROM
# 127.0.0.1, so without this check every tunneled request would look like
# loopback and could bypass auth.
_PROXY_FWD_HEADERS = (
"cf-connecting-ip", "cf-ray", "cf-visitor",
"x-forwarded-for", "x-forwarded-host", "x-real-ip", "forwarded",
)
def _is_trusted_loopback(request: Request) -> bool:
"""True ONLY for a DIRECT loopback connection with no proxy/tunnel
forwarding headers. A bare ``client.host in ('127.0.0.1','::1')`` check is
unsafe behind a Cloudflare tunnel / reverse proxy: those connect from
loopback, so a remote visitor would otherwise inherit local trust and
slip past LOCALHOST_BYPASS or spoof the internal-tool path. Odysseus's own
in-process agent loopback calls carry none of these headers, so they still
qualify."""
host = request.client.host if request.client else None
if host not in ("127.0.0.1", "::1"):
return False
for _h in _PROXY_FWD_HEADERS:
if request.headers.get(_h):
return False
return True
class AuthMiddleware(BaseHTTPMiddleware):
async def dispatch(self, request: Request, call_next):
path = request.url.path
@@ -182,8 +222,7 @@ if AUTH_ENABLED:
try:
from core.middleware import INTERNAL_TOOL_HEADER, INTERNAL_TOOL_TOKEN as _ITT
_hdr = request.headers.get(INTERNAL_TOOL_HEADER)
_client_host = request.client.host if request.client else None
if _hdr and _hdr == _ITT and _client_host in ("127.0.0.1", "::1"):
if _hdr and _hdr == _ITT and _is_trusted_loopback(request):
# Impersonation: when the agent's loopback call sets
# X-Odysseus-Owner, attribute the request to that
# user so notes/calendar/etc. land in their account
@@ -196,12 +235,13 @@ if AUTH_ENABLED:
return await call_next(request)
except Exception:
pass
# Allow localhost requests (internal service calls from heartbeats etc.)
# Disable with LOCALHOST_BYPASS=false when exposing via reverse proxy / Tailscale Funnel
if LOCALHOST_BYPASS:
client_host = request.client.host if request.client else None
if client_host in ("127.0.0.1", "::1"):
return await call_next(request)
# Allow DIRECT localhost requests (internal service calls from
# heartbeats etc.). Tunnel/proxy-forwarded requests are excluded by
# _is_trusted_loopback so LOCALHOST_BYPASS can't be abused over a
# Cloudflare tunnel / reverse proxy. Keep LOCALHOST_BYPASS=false for
# network-exposed deployments regardless.
if LOCALHOST_BYPASS and _is_trusted_loopback(request):
return await call_next(request)
if not auth_manager.is_configured:
# No users yet — redirect to login for first-time setup
if not path.startswith("/api/"):
@@ -819,7 +859,7 @@ async def startup_event():
try:
import json as _json
auth_path = "data/auth.json"
with open(auth_path) as f:
with open(auth_path, encoding="utf-8") as f:
users = _json.load(f).get("users", {})
owners.update(users.keys())
except Exception as e:
@@ -866,7 +906,7 @@ async def startup_event():
try:
import json as _json
auth_path = "data/auth.json"
with open(auth_path) as f:
with open(auth_path, encoding="utf-8") as f:
users = _json.load(f).get("users", {})
primary_owner = None
for uname, udata in users.items():

View File

@@ -26,7 +26,7 @@ def atomic_write_json(path: str, data: Any, *, indent: Optional[int] = None) ->
"""
os.makedirs(os.path.dirname(path) or ".", exist_ok=True)
tmp = f"{path}.tmp.{os.getpid()}"
with open(tmp, "w") as f:
with open(tmp, "w", encoding="utf-8") as f:
json.dump(data, f, indent=indent)
f.flush()
os.fsync(f.fileno())
@@ -36,7 +36,7 @@ def atomic_write_json(path: str, data: Any, *, indent: Optional[int] = None) ->
def atomic_write_text(path: str, text: str) -> None:
os.makedirs(os.path.dirname(path) or ".", exist_ok=True)
tmp = f"{path}.tmp.{os.getpid()}"
with open(tmp, "w") as f:
with open(tmp, "w", encoding="utf-8") as f:
f.write(text)
f.flush()
os.fsync(f.fileno())

View File

@@ -68,7 +68,7 @@ class AuthManager:
def _load(self):
try:
if os.path.exists(self.auth_path):
with open(self.auth_path, "r") as f:
with open(self.auth_path, "r", encoding="utf-8") as f:
self._config = json.load(f)
logger.info("Auth config loaded")
else:
@@ -82,7 +82,7 @@ class AuthManager:
"""Load persisted session tokens from disk, pruning expired ones."""
try:
if os.path.exists(self._sessions_path):
with open(self._sessions_path, "r") as f:
with open(self._sessions_path, "r", encoding="utf-8") as f:
data = json.load(f)
now = time.time()
self._sessions = {k: v for k, v in data.items() if v.get("expiry", 0) > now}

View File

@@ -996,7 +996,7 @@ def _migrate_assign_legacy_owner():
auth_path = os.path.join("data", "auth.json")
admin_user = None
try:
with open(auth_path, "r") as f:
with open(auth_path, "r", encoding="utf-8") as f:
auth_data = _json.load(f)
users = auth_data.get("users", {})
if users:
@@ -1067,12 +1067,12 @@ def _migrate_assign_legacy_owner():
prefs_path = os.path.join("data", "user_prefs.json")
try:
if os.path.exists(prefs_path):
with open(prefs_path, "r") as f:
with open(prefs_path, "r", encoding="utf-8") as f:
prefs = _json.load(f)
if "_users" not in prefs and prefs:
# Flat format → nest under admin user
new_prefs = {"_users": {admin_user: prefs}}
with open(prefs_path, "w") as f:
with open(prefs_path, "w", encoding="utf-8") as f:
_json.dump(new_prefs, f, indent=2)
logger.info(f"Migrated user_prefs.json to per-user format under '{admin_user}'")
except Exception as e:
@@ -1437,7 +1437,7 @@ def _migrate_seed_email_account():
if not settings_file.exists():
return
try:
s = _json.loads(settings_file.read_text())
s = _json.loads(settings_file.read_text(encoding="utf-8"))
except Exception:
return

203
core/platform_compat.py Normal file
View File

@@ -0,0 +1,203 @@
"""Cross-platform OS compatibility helpers.
Odysseus began as a Linux/macOS/Docker-only app. This module centralizes the
small set of OS differences needed to run it *natively* on Windows so the rest
of the codebase can stay platform-agnostic. Import from here instead of
sprinkling ``os.name == "nt"`` checks (and POSIX-only calls) across modules.
Design rules:
* Stdlib + ctypes only — no new third-party deps (no psutil/pywinpty).
* POSIX behaviour is unchanged; Windows gets a faithful equivalent or a
safe, documented no-op.
"""
from __future__ import annotations
import os
import shutil
import subprocess
from pathlib import Path
from typing import List, Optional
IS_WINDOWS = os.name == "nt"
IS_POSIX = not IS_WINDOWS
# ── File permissions ────────────────────────────────────────────────────────
def safe_chmod(path, mode: int) -> bool:
"""``os.chmod`` that is a harmless no-op on Windows.
On POSIX we apply the mode — used to lock secret/key files down to 0o600.
Windows has no POSIX permission bits; files under the user profile are
already ACL-restricted to that user, so we skip rather than raise. Returns
True when the mode was actually applied.
"""
if IS_WINDOWS:
return False
try:
os.chmod(path, mode)
return True
except OSError:
return False
# ── Process detach / liveness / teardown ────────────────────────────────────
def detached_popen_kwargs() -> dict:
"""Keyword args for :class:`subprocess.Popen` that fully detach a child so
it outlives the request/stream that launched it.
POSIX: ``start_new_session=True`` (setsid) — new session + process group.
Windows: ``CREATE_NEW_PROCESS_GROUP | DETACHED_PROCESS`` — the child gets
its own process group (so it isn't killed when the parent's console closes)
and is detached from any console.
"""
if IS_WINDOWS:
flags = (
getattr(subprocess, "CREATE_NEW_PROCESS_GROUP", 0x00000200)
| getattr(subprocess, "DETACHED_PROCESS", 0x00000008)
)
return {"creationflags": flags}
return {"start_new_session": True}
def pid_alive(pid: Optional[int]) -> bool:
"""True if a process with ``pid`` is currently running.
POSIX uses the classic ``os.kill(pid, 0)`` probe. That is **unsafe on
Windows**: CPython's ``os.kill`` calls ``TerminateProcess(handle, sig)`` for
any signal other than CTRL_C/CTRL_BREAK, so ``os.kill(pid, 0)`` would *kill*
the process it is checking. We instead open the process and read its exit
code via the Win32 API.
"""
if not pid:
return False
if IS_WINDOWS:
import ctypes
from ctypes import wintypes
PROCESS_QUERY_LIMITED_INFORMATION = 0x1000
STILL_ACTIVE = 259
kernel32 = ctypes.windll.kernel32
handle = kernel32.OpenProcess(
PROCESS_QUERY_LIMITED_INFORMATION, False, int(pid)
)
if not handle:
return False
try:
code = wintypes.DWORD()
if kernel32.GetExitCodeProcess(handle, ctypes.byref(code)):
return code.value == STILL_ACTIVE
return False
finally:
kernel32.CloseHandle(handle)
try:
os.kill(pid, 0)
return True
except (OSError, ProcessLookupError):
return False
def kill_process_tree(pid: Optional[int]) -> None:
"""Terminate ``pid`` and all of its descendants.
POSIX: signal the whole process group (``killpg``), falling back to a plain
``kill`` if the pid isn't a group leader.
Windows: ``taskkill /T /F`` walks and kills the child tree (there is no
process-group signalling).
"""
if not pid:
return
if IS_WINDOWS:
try:
subprocess.run(
["taskkill", "/F", "/T", "/PID", str(pid)],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
creationflags=getattr(subprocess, "CREATE_NO_WINDOW", 0),
)
except Exception:
pass
return
import signal
try:
os.killpg(os.getpgid(pid), signal.SIGTERM)
except Exception:
try:
os.kill(pid, signal.SIGTERM)
except Exception:
pass
# ── Shell / executable resolution ───────────────────────────────────────────
_BASH_CACHE: Optional[str] = None
_BASH_PROBED = False
# Common Git-for-Windows install locations to probe when bash isn't on PATH.
_WINDOWS_BASH_FALLBACKS = (
r"C:\Program Files\Git\bin\bash.exe",
r"C:\Program Files\Git\usr\bin\bash.exe",
r"C:\Program Files (x86)\Git\bin\bash.exe",
)
def find_bash() -> Optional[str]:
"""Locate a real ``bash`` interpreter, or None.
On Windows this is typically Git Bash / WSL. Many Odysseus features (the
agent ``bash`` tool, background jobs, Cookbook scripts) emit bash syntax, so
when a bash is present we use it and keep full parity with POSIX. Result is
cached.
"""
global _BASH_CACHE, _BASH_PROBED
if _BASH_PROBED:
return _BASH_CACHE
_BASH_PROBED = True
found = shutil.which("bash")
if not found and IS_WINDOWS:
for cand in _WINDOWS_BASH_FALLBACKS:
if os.path.exists(cand):
found = cand
break
_BASH_CACHE = found
return found
def has_bash() -> bool:
return find_bash() is not None
def which_tool(name: str) -> Optional[str]:
"""``shutil.which`` that also tries Windows executable suffixes.
On Windows, Node/npm shims are ``npx.cmd``/``npm.cmd`` and binaries end in
``.exe``; a bare ``which("npx")`` can miss them depending on PATHEXT. We try
the bare name first, then the common suffixes.
"""
found = shutil.which(name)
if found:
return found
if IS_WINDOWS:
for ext in (".cmd", ".exe", ".bat"):
found = shutil.which(name + ext)
if found:
return found
return None
def run_script_argv(script_path) -> List[str]:
"""argv to execute a shell *script file*.
Prefers bash (so existing ``.sh`` wrappers work verbatim, including on
Windows via Git Bash). On Windows with no bash available, falls back to
``cmd.exe /c`` — simple commands still run, but bash-specific syntax won't.
Callers that need guaranteed bash should check :func:`has_bash` first and
surface a clear "install Git Bash" message.
"""
bash = find_bash()
if bash:
return [bash, str(script_path)]
if IS_WINDOWS:
comspec = os.environ.get("ComSpec", "cmd.exe")
return [comspec, "/c", str(script_path)]
return ["sh", str(script_path)]

79
launch-windows.ps1 Normal file
View File

@@ -0,0 +1,79 @@
#Requires -Version 5.1
<#
Odysseus - native Windows launcher (no Docker).
One command to: create a virtualenv, install dependencies, run first-time
setup (prints an admin password on first run), and start the server.
Safe to re-run - it skips whatever already exists.
Usage:
powershell -ExecutionPolicy Bypass -File .\launch-windows.ps1
powershell -ExecutionPolicy Bypass -File .\launch-windows.ps1 -Port 7000 -BindHost 127.0.0.1
Tip: bind 127.0.0.1 (default) for local-only use. Use 0.0.0.0 only when you
intentionally want other devices on your LAN to reach it.
#>
param(
[int]$Port = 7000,
[string]$BindHost = "127.0.0.1"
)
$ErrorActionPreference = "Stop"
Set-Location -Path $PSScriptRoot
function Write-Step($msg) { Write-Host ""; Write-Host ("==> " + $msg) -ForegroundColor Cyan }
function Fail($msg) {
Write-Host ""
Write-Host ("ERROR: " + $msg) -ForegroundColor Red
Write-Host ""
Read-Host "Press Enter to exit"
exit 1
}
# 1. Locate a Python interpreter (3.11+ recommended)
Write-Step "Checking for Python"
$pyExe = $null
foreach ($c in @("python", "py")) {
$cmd = Get-Command $c -ErrorAction SilentlyContinue
if ($cmd) { $pyExe = $cmd.Source; break }
}
if (-not $pyExe) {
Fail "Python not found on PATH. Install Python 3.11+ from https://www.python.org/downloads/ (check 'Add to PATH'), then re-run this script."
}
Write-Host ("Using Python: " + $pyExe)
# 2. Create the virtualenv if missing
$venvPy = Join-Path $PSScriptRoot "venv\Scripts\python.exe"
if (-not (Test-Path $venvPy)) {
Write-Step "Creating virtual environment (venv)"
& $pyExe -m venv venv
if ($LASTEXITCODE -ne 0 -or -not (Test-Path $venvPy)) { Fail "Failed to create the virtual environment." }
} else {
Write-Host "venv already exists - skipping creation."
}
# 3. Install / update dependencies
Write-Step "Installing dependencies (first run can take a few minutes)"
& $venvPy -m pip install --upgrade pip --quiet
& $venvPy -m pip install -r requirements.txt
if ($LASTEXITCODE -ne 0) { Fail "Dependency install failed. Scroll up for the pip error." }
# 4. First-time setup (creates data dirs, DB, .env, admin user)
Write-Step "Running first-time setup"
& $venvPy setup.py
if ($LASTEXITCODE -ne 0) { Fail "setup.py failed." }
# 5. Friendly note about Git Bash (full Cookbook / agent-shell parity)
if (-not (Get-Command bash -ErrorAction SilentlyContinue)) {
Write-Host ""
Write-Host "NOTE: Git Bash (bash.exe) was not found on PATH." -ForegroundColor Yellow
Write-Host " The core app works without it. For full Cookbook background" -ForegroundColor Yellow
Write-Host " downloads and the agent shell tool, install Git for Windows:" -ForegroundColor Yellow
Write-Host " https://git-scm.com/download/win" -ForegroundColor Yellow
}
# 6. Start the server (use `python -m uvicorn` - bare `uvicorn` may not be on PATH)
Write-Step ("Starting Odysseus at http://{0}:{1}" -f $BindHost, $Port)
Write-Host "Press Ctrl+C to stop."
Write-Host ""
& $venvPy -m uvicorn app:app --host $BindHost --port $Port

View File

@@ -197,7 +197,7 @@ def _load_config(account: str | None = None) -> dict:
try:
settings_path = Path(__file__).resolve().parent.parent / "data" / "settings.json"
if settings_path.exists():
settings = json.loads(settings_path.read_text())
settings = json.loads(settings_path.read_text(encoding="utf-8"))
for key in (
"imap_host", "imap_port", "imap_user", "imap_password",
"smtp_host", "smtp_port", "smtp_user", "smtp_password",

View File

@@ -44,7 +44,7 @@ def _wipe_memory_files():
continue
try:
if name == "memory.json":
with open(p, "w") as f:
with open(p, "w", encoding="utf-8") as f:
json.dump([], f)
else:
os.remove(p)

View File

@@ -29,7 +29,7 @@ LOCAL_CONTACTS_FILE = DATA_DIR / "contacts.json"
def _load_settings():
if SETTINGS_FILE.exists():
return json.loads(SETTINGS_FILE.read_text())
return json.loads(SETTINGS_FILE.read_text(encoding="utf-8"))
return {}
@@ -79,7 +79,7 @@ def _load_local_contacts() -> List[Dict]:
try:
if not LOCAL_CONTACTS_FILE.exists():
return []
data = json.loads(LOCAL_CONTACTS_FILE.read_text())
data = json.loads(LOCAL_CONTACTS_FILE.read_text(encoding="utf-8"))
rows = data.get("contacts", data) if isinstance(data, dict) else data
return [_normalize_contact(c) for c in (rows or []) if isinstance(c, dict)]
except Exception as e:

View File

@@ -7,6 +7,7 @@ import os
import re
import shlex
import shutil
import subprocess
import sys
import uuid
from pathlib import Path
@@ -17,6 +18,15 @@ from src.auth_helpers import require_user
from pydantic import BaseModel
from core.middleware import require_admin
from core.platform_compat import (
IS_WINDOWS,
detached_popen_kwargs,
find_bash,
kill_process_tree,
pid_alive,
safe_chmod,
which_tool,
)
from routes.shell_routes import TMUX_LOG_DIR
logger = logging.getLogger(__name__)
@@ -208,16 +218,20 @@ def setup_cookbook_routes() -> APIRouter:
if not _cookbook_state_path.exists():
return ""
try:
state = json.loads(_cookbook_state_path.read_text())
state = json.loads(_cookbook_state_path.read_text(encoding="utf-8"))
env = state.get("env") if isinstance(state, dict) else {}
return _decrypt_secret(env.get("hfToken") if isinstance(env, dict) else "")
except Exception:
return ""
def _cookbook_ssh_dir() -> Path:
app_ssh = Path("/app/.ssh")
if Path("/app").exists():
return app_ssh
# The Docker image keeps cookbook keys under /app/.ssh; that path only
# exists inside the container. On Windows (and any non-container host)
# fall back to the user profile's ~/.ssh, which OpenSSH on Win10+ uses.
if not IS_WINDOWS:
app_ssh = Path("/app/.ssh")
if Path("/app").exists():
return app_ssh
return Path.home() / ".ssh"
def _cookbook_ssh_key_path() -> Path:
@@ -244,13 +258,15 @@ def setup_cookbook_routes() -> APIRouter:
ssh_dir = _cookbook_ssh_dir()
key_path = _cookbook_ssh_key_path()
ssh_dir.mkdir(parents=True, exist_ok=True)
try:
os.chmod(ssh_dir, 0o700)
except Exception:
pass
# safe_chmod no-ops on Windows (~/.ssh is already ACL-restricted to the
# user profile); applies 0o700 on POSIX.
safe_chmod(ssh_dir, 0o700)
if not key_path.exists():
# ssh-keygen ships with the OpenSSH client on Win10+; resolve it via
# which_tool so the .exe is found even when PATHEXT is unusual.
ssh_keygen = which_tool("ssh-keygen") or "ssh-keygen"
proc = await asyncio.create_subprocess_exec(
"ssh-keygen", "-t", "ed25519", "-N", "", "-C", "odysseus-cookbook", "-f", str(key_path),
ssh_keygen, "-t", "ed25519", "-N", "", "-C", "odysseus-cookbook", "-f", str(key_path),
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
@@ -258,11 +274,8 @@ def setup_cookbook_routes() -> APIRouter:
if proc.returncode != 0:
detail = (stderr or stdout).decode("utf-8", errors="replace").strip()[-500:]
return {"ok": False, "error": detail or "Failed to generate SSH key"}
try:
os.chmod(key_path, 0o600)
os.chmod(key_path.with_suffix(".pub"), 0o644)
except Exception:
pass
safe_chmod(key_path, 0o600)
safe_chmod(key_path.with_suffix(".pub"), 0o644)
return {"ok": True, "public_key": _read_cookbook_public_key()}
def _user_shell_path_bootstrap() -> list[str]:
@@ -314,6 +327,56 @@ def setup_cookbook_routes() -> APIRouter:
return await _remote_binary_available(remote, ssh_port, binary, windows=windows)
return shutil.which(binary) is not None
def _launch_local_detached(session_id: str, bash_lines: list[str]) -> dict:
"""Windows-native stand-in for a LOCAL tmux session (tmux doesn't exist
on Windows). Mirrors shell_routes._generate_win_detached / bg_jobs.launch:
runs the wrapper detached so it survives a browser/SSE disconnect (the
whole point of the tmux feature for long downloads/serves), writing a
<session>.log the status poller tails and a <session>.pid for liveness.
`bash_lines` is the same bash wrapper used on POSIX. Prefers Git Bash
for full command-syntax parity; falls back to a cmd.exe wrapper that
runs the script through whatever bash is reachable, else best-effort
directly (simple commands only). Returns the launched job record."""
log_path = TMUX_LOG_DIR / f"{session_id}.log"
pid_path = TMUX_LOG_DIR / f"{session_id}.pid"
bash = find_bash()
if bash:
# Run the existing bash wrapper verbatim through Git Bash, redirecting
# all output to the log the poller reads. Paths handed to bash use
# POSIX form + shell-quoting so drive paths / spaces survive.
inner = TMUX_LOG_DIR / f"{session_id}_run.sh"
inner.write_text("\n".join(bash_lines) + "\n", encoding="utf-8")
lp = shlex.quote(log_path.as_posix())
ip = shlex.quote(inner.as_posix())
script_path = TMUX_LOG_DIR / f"{session_id}.sh"
script_path.write_text(
f"bash {ip} > {lp} 2>&1\n",
encoding="utf-8",
)
argv = [bash, str(script_path)]
else:
# No bash on this Windows host: the bash wrapper can't run. Fall back
# to a cmd.exe wrapper that just records a clear error to the log so
# the UI surfaces "install Git Bash" instead of silently hanging.
script_path = TMUX_LOG_DIR / f"{session_id}.cmd"
script_path.write_text(
"@echo off\r\n"
f'echo Cookbook LOCAL execution on Windows needs Git Bash ^(bash.exe^) on PATH. > "{log_path}" 2>&1\r\n'
f'echo Install Git for Windows, then retry. >> "{log_path}"\r\n',
encoding="utf-8",
)
argv = [os.environ.get("ComSpec", "cmd.exe"), "/c", str(script_path)]
proc = subprocess.Popen(
argv,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
stdin=subprocess.DEVNULL,
**detached_popen_kwargs(),
)
pid_path.write_text(str(proc.pid), encoding="utf-8")
return {"pid": proc.pid, "log_path": str(log_path)}
@router.post("/api/model/download")
async def model_download(request: Request, req: ModelDownloadRequest):
"""Download a HuggingFace model in a tmux session.
@@ -379,9 +442,12 @@ def setup_cookbook_routes() -> APIRouter:
remote = req.remote_host # None for local
is_windows = req.platform == "windows"
# LOCAL execution on a native-Windows host never uses tmux (it uses the
# detached-process path below), regardless of the UI-supplied platform.
local_windows = IS_WINDOWS and not remote
logger.info(f"Download request: repo={req.repo_id}, remote={remote}, ssh_port={req.ssh_port}, platform={req.platform}")
if not is_windows and not await _binary_available("tmux", remote, req.ssh_port):
if not is_windows and not local_windows and not await _binary_available("tmux", remote, req.ssh_port):
return {
"ok": False,
"error": _missing_binary_message("tmux", remote or "local server"),
@@ -425,7 +491,7 @@ def setup_cookbook_routes() -> APIRouter:
ps_lines.append('}}')
ps_lines.append(f'Remove-Item -Force "$HOME\\{remote_runner}" -ErrorAction SilentlyContinue')
runner_path = TMUX_LOG_DIR / f"{session_id}_run.ps1"
runner_path.write_text("\r\n".join(ps_lines) + "\r\n")
runner_path.write_text("\r\n".join(ps_lines) + "\r\n", encoding="utf-8")
# scp the .ps1 script, then launch it as a detached process with log + pid files
_port = req.ssh_port
@@ -492,8 +558,10 @@ def setup_cookbook_routes() -> APIRouter:
runner_lines.append(f"rm -f {remote_runner}")
runner_lines.append('exec "${SHELL:-/bin/bash}"')
runner_path = TMUX_LOG_DIR / f"{session_id}_run.sh"
runner_path.write_text("\n".join(runner_lines) + "\n")
runner_path.chmod(0o755)
runner_path.write_text("\n".join(runner_lines) + "\n", encoding="utf-8")
# Local temp file is scp'd then chmod'd on the remote; the local bit
# is irrelevant (no-op on Windows).
safe_chmod(runner_path, 0o755)
# scp the runner script, then create tmux session on the remote
_port = req.ssh_port
@@ -504,7 +572,8 @@ def setup_cookbook_routes() -> APIRouter:
f"ssh {_spf}{remote} 'chmod +x {remote_runner} && tmux new-session -d -s {session_id} \"./{remote_runner}\"'"
)
else:
# Local: run hf download in a local tmux session
# Local: run hf download in the background (tmux on POSIX, a detached
# process + logfile on Windows where tmux doesn't exist).
if req.env_prefix:
lines.append(_safe_env_prefix(req.env_prefix))
else:
@@ -512,29 +581,43 @@ def setup_cookbook_routes() -> APIRouter:
# Show whether the HF token reached this run (masked) — tells a gated
# "not authorized" failure apart from a missing token.
lines.append(_HF_TOKEN_STATUS_SNIPPET)
# < /dev/null suppresses interactive "update available? [Y/n]" prompt
lines.append(f"{hf_cmd} < /dev/null")
lines.append('if [ $? -eq 0 ]; then echo ""; echo "DOWNLOAD_OK"; else echo ""; echo "DOWNLOAD_FAILED (exit $?)"; fi')
lines.append(f"rm -f '{wrapper_script}'")
lines.append('exec "${SHELL:-/bin/bash}"')
wrapper_script.write_text("\n".join(lines) + "\n")
wrapper_script.chmod(0o755)
setup_cmd = f"tmux new-session -d -s {session_id} {shlex.quote(str(wrapper_script))}"
if IS_WINDOWS:
# Detached path: no controlling TTY, so skip `< /dev/null`
# (handled by Popen stdin=DEVNULL) and don't keep a shell open.
lines.append(hf_cmd)
lines.append('if [ $? -eq 0 ]; then echo ""; echo "DOWNLOAD_OK"; else echo ""; echo "DOWNLOAD_FAILED (exit $?)"; fi')
else:
# < /dev/null suppresses interactive "update available? [Y/n]" prompt
lines.append(f"{hf_cmd} < /dev/null")
lines.append('if [ $? -eq 0 ]; then echo ""; echo "DOWNLOAD_OK"; else echo ""; echo "DOWNLOAD_FAILED (exit $?)"; fi')
lines.append(f"rm -f '{wrapper_script}'")
lines.append('exec "${SHELL:-/bin/bash}"')
wrapper_script.write_text("\n".join(lines) + "\n", encoding="utf-8")
wrapper_script.chmod(0o755)
setup_cmd = None if IS_WINDOWS else f"tmux new-session -d -s {session_id} {shlex.quote(str(wrapper_script))}"
logger.info(f"Model download: {req.repo_id} (include={req.include}, session={session_id}, remote={remote})")
logger.info(f"Download setup_cmd: {setup_cmd}")
proc = await asyncio.create_subprocess_shell(
setup_cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
await proc.wait()
if setup_cmd is None:
# LOCAL Windows: launch the bash wrapper detached; no tmux setup_cmd.
try:
_launch_local_detached(session_id, lines)
except Exception as e:
logger.error(f"Local detached download launch failed: {e}")
return {"ok": False, "error": str(e), "session_id": session_id}
else:
proc = await asyncio.create_subprocess_shell(
setup_cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
await proc.wait()
if proc.returncode != 0:
stderr = (await proc.stderr.read()).decode(errors="replace")
logger.error(f"Download failed (rc={proc.returncode}): {stderr}")
return {"ok": False, "error": stderr, "session_id": session_id}
if proc.returncode != 0:
stderr = (await proc.stderr.read()).decode(errors="replace")
logger.error(f"Download failed (rc={proc.returncode}): {stderr}")
return {"ok": False, "error": stderr, "session_id": session_id}
# Log to assistant
try:
@@ -643,7 +726,7 @@ def setup_cookbook_routes() -> APIRouter:
paths_code += "print(json.dumps(models))\n"
scan_py = TMUX_LOG_DIR / "scan_cache.py"
scan_py.write_text(paths_code)
scan_py.write_text(paths_code, encoding="utf-8")
if host:
_pf = f"-p {ssh_port} " if ssh_port and ssh_port != "22" else ""
@@ -652,15 +735,27 @@ def setup_cookbook_routes() -> APIRouter:
cmd = f'ssh {_pf}{host} "python -" < \'{scan_py}\''
else:
cmd = f"ssh {_pf}{host} 'python3 -' < '{scan_py}'"
proc = await asyncio.create_subprocess_shell(
cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=str(Path.home()),
)
else:
cmd = f"python3 '{scan_py}'"
proc = await asyncio.create_subprocess_shell(
cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=str(Path.home()),
)
# LOCAL scan: run the interpreter directly. `python3` isn't a thing on
# Windows (it's `python`/`py`), and shell single-quoting of the path
# doesn't survive cmd.exe — so resolve the interpreter and exec it
# with the script path as an argv element (no shell quoting needed).
local_py = (
which_tool("python3") or which_tool("python")
or which_tool("py") or "python"
)
proc = await asyncio.create_subprocess_exec(
local_py, str(scan_py),
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd=str(Path.home()),
)
stdout_b, stderr_b = await asyncio.wait_for(proc.communicate(), timeout=60)
models = []
@@ -785,8 +880,11 @@ def setup_cookbook_routes() -> APIRouter:
session_id = f"serve-{uuid.uuid4().hex[:8]}"
remote = req.remote_host
is_windows = req.platform == "windows"
# LOCAL execution on a native-Windows host never uses tmux (detached
# process path below), regardless of the UI-supplied platform.
local_windows = IS_WINDOWS and not remote
if not is_windows and not await _binary_available("tmux", remote, req.ssh_port):
if not is_windows and not local_windows and not await _binary_available("tmux", remote, req.ssh_port):
return {
"ok": False,
"error": _missing_binary_message("tmux", remote or "local server"),
@@ -832,7 +930,7 @@ def setup_cookbook_routes() -> APIRouter:
ps_lines.append('Write-Host ""')
ps_lines.append('Write-Host "=== Process exited with code $LASTEXITCODE ==="')
runner_path = TMUX_LOG_DIR / f"{session_id}_run.ps1"
runner_path.write_text("\r\n".join(ps_lines) + "\r\n")
runner_path.write_text("\r\n".join(ps_lines) + "\r\n", encoding="utf-8")
_port = req.ssh_port
_Pf = f"-P {_port} " if _port and _port != "22" else ""
@@ -956,14 +1054,24 @@ def setup_cookbook_routes() -> APIRouter:
runner_lines.append('fi')
runner_lines.append(req.cmd)
# Keep shell open after exit so user can see errors
runner_lines.append('echo ""; echo "=== Process exited with code $? ==="; exec "${SHELL:-/bin/bash}"')
if local_windows:
# Detached background process — no interactive shell to keep open.
# Print the exit marker the status poller looks for, then stop.
runner_lines.append('echo ""; echo "=== Process exited with code $? ==="')
else:
# Keep shell open after exit so user can see errors
runner_lines.append('echo ""; echo "=== Process exited with code $? ==="; exec "${SHELL:-/bin/bash}"')
runner_path = TMUX_LOG_DIR / f"{session_id}_run.sh"
runner_path.write_text("\n".join(runner_lines) + "\n")
runner_path.chmod(0o755)
runner_path.write_text("\n".join(runner_lines) + "\n", encoding="utf-8")
# chmod is a no-op on Windows; bash on Windows runs the script
# regardless of the executable bit.
safe_chmod(runner_path, 0o755)
if remote:
if local_windows:
# LOCAL Windows: launch the bash runner detached (tmux replacement).
setup_cmd = None
elif remote:
remote_runner = f".{session_id}_run.sh"
# If command references scripts/, scp those too
scp_extras = ""
@@ -976,9 +1084,10 @@ def setup_cookbook_routes() -> APIRouter:
if diff_script.exists():
scp_extras = f"scp -O {_Pf}-q '{diff_script}' {remote}:.diffusion_server.py && "
runner_path.write_text(
runner_path.read_text().replace(
runner_path.read_text(encoding="utf-8").replace(
"scripts/diffusion_server.py", ".diffusion_server.py"
)
),
encoding="utf-8",
)
setup_cmd = (
f"{scp_extras}"
@@ -988,16 +1097,24 @@ def setup_cookbook_routes() -> APIRouter:
else:
setup_cmd = f"tmux new-session -d -s {session_id} {shlex.quote(str(runner_path))}"
proc = await asyncio.create_subprocess_shell(
setup_cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
await proc.wait()
if setup_cmd is None:
# LOCAL Windows: launch the bash runner detached; no tmux setup_cmd.
try:
_launch_local_detached(session_id, runner_lines)
except Exception as e:
logger.error(f"Local detached serve launch failed: {e}")
return {"ok": False, "error": str(e), "session_id": session_id}
else:
proc = await asyncio.create_subprocess_shell(
setup_cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
await proc.wait()
if proc.returncode != 0:
stderr = (await proc.stderr.read()).decode(errors="replace")
return {"ok": False, "error": stderr, "session_id": session_id}
if proc.returncode != 0:
stderr = (await proc.stderr.read()).decode(errors="replace")
return {"ok": False, "error": stderr, "session_id": session_id}
# Auto-register as model endpoint if serving a diffusion model
endpoint_id = None
@@ -1404,6 +1521,16 @@ def setup_cookbook_routes() -> APIRouter:
proc = await asyncio.create_subprocess_shell(
cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
)
elif IS_WINDOWS:
# No `kill` binary / POSIX signals on Windows. taskkill /F /T tears
# down the PID and its children. There's no graceful-vs-force
# distinction, so TERM/KILL/INT all map to the same forced kill.
# NB: never use os.kill(pid, 0) to probe here — on Windows that
# routes to TerminateProcess and would kill the process.
if not pid_alive(req.pid):
return {"ok": False, "error": f"PID {req.pid} is not running"}
await asyncio.to_thread(kill_process_tree, req.pid)
return {"ok": True, "pid": req.pid, "signal": sig}
else:
proc = await asyncio.create_subprocess_exec(
"kill", f"-{sig}", str(req.pid),
@@ -1427,7 +1554,7 @@ def setup_cookbook_routes() -> APIRouter:
require_admin(request)
if _cookbook_state_path.exists():
try:
return _state_for_client(json.loads(_cookbook_state_path.read_text()))
return _state_for_client(json.loads(_cookbook_state_path.read_text(encoding="utf-8")))
except Exception:
return {}
return {}
@@ -1456,7 +1583,7 @@ def setup_cookbook_routes() -> APIRouter:
data = {}
try:
if _cookbook_state_path.exists():
on_disk = json.loads(_cookbook_state_path.read_text())
on_disk = json.loads(_cookbook_state_path.read_text(encoding="utf-8"))
else:
on_disk = {}
except Exception:
@@ -1636,7 +1763,7 @@ def setup_cookbook_routes() -> APIRouter:
tasks = []
if _cookbook_state_path.exists():
try:
state = json.loads(_cookbook_state_path.read_text())
state = json.loads(_cookbook_state_path.read_text(encoding="utf-8"))
saved_tasks = state.get("tasks", [])
if isinstance(saved_tasks, list):
tasks = saved_tasks
@@ -1705,26 +1832,36 @@ def setup_cookbook_routes() -> APIRouter:
ssh_base.extend(["-p", str(_tport)])
check_cmd = ssh_base + [remote, "tmux", "has-session", "-t", session_id]
capture_cmd = ssh_base + [remote, "tmux", "capture-pane", "-t", session_id, "-p", "-S", "-50"]
elif IS_WINDOWS:
# LOCAL Windows task: launched as a detached process (no tmux).
# Liveness comes from the <session>.pid file, output from the
# <session>.log file the wrapper redirects into. No subprocess.
check_cmd = None
capture_cmd = None
else:
check_cmd = ["tmux", "has-session", "-t", session_id]
capture_cmd = ["tmux", "capture-pane", "-t", session_id, "-p", "-S", "-50"]
try:
alive = subprocess.run(check_cmd, timeout=10, capture_output=True)
is_alive = alive.returncode == 0
except Exception:
is_alive = False
local_win_task = (not remote) and IS_WINDOWS
# Capture last lines for progress. Prefer the "Downloading" line
# (real aggregate bytes) over "Fetching N files" (whole-file count that
# lags with hf_transfer). Falls back to the true last line otherwise.
progress_text = ""
full_snapshot = ""
if is_alive:
if local_win_task:
# File-based liveness + output for the detached-process model.
pid_path = TMUX_LOG_DIR / f"{session_id}.pid"
log_path = TMUX_LOG_DIR / f"{session_id}.log"
task_pid = None
try:
cap = subprocess.run(capture_cmd, timeout=10, capture_output=True, text=True)
if cap.returncode == 0:
full_snapshot = cap.stdout.strip()
task_pid = int(pid_path.read_text(encoding="utf-8").strip())
except Exception:
task_pid = None
is_alive = pid_alive(task_pid)
try:
if log_path.exists():
full_snapshot = log_path.read_text(
encoding="utf-8", errors="replace"
).strip()[-12000:]
lines = [l.strip() for l in full_snapshot.split('\n') if l.strip()]
downloading_lines = [l for l in lines if l.startswith("Downloading")]
if downloading_lines:
@@ -1733,10 +1870,36 @@ def setup_cookbook_routes() -> APIRouter:
progress_text = lines[-1]
except Exception:
pass
else:
try:
alive = subprocess.run(check_cmd, timeout=10, capture_output=True)
is_alive = alive.returncode == 0
except Exception:
is_alive = False
# Determine status
# Capture last lines for progress. Prefer the "Downloading" line
# (real aggregate bytes) over "Fetching N files" (whole-file count that
# lags with hf_transfer). Falls back to the true last line otherwise.
if is_alive:
try:
cap = subprocess.run(capture_cmd, timeout=10, capture_output=True, text=True)
if cap.returncode == 0:
full_snapshot = cap.stdout.strip()
lines = [l.strip() for l in full_snapshot.split('\n') if l.strip()]
downloading_lines = [l for l in lines if l.startswith("Downloading")]
if downloading_lines:
progress_text = downloading_lines[-1]
elif lines:
progress_text = lines[-1]
except Exception:
pass
# Determine status. For the local-Windows detached model the log file
# persists after the process exits, so a finished download still has a
# snapshot to classify (DOWNLOAD_OK / exit marker) — evaluate it even
# when the PID is gone instead of blindly reporting "stopped".
status = "unknown"
if is_alive:
if is_alive or (local_win_task and full_snapshot):
lower = full_snapshot.lower()
has_exit = "=== process exited with code" in lower
has_error = "error" in lower or "failed" in lower or "traceback" in lower
@@ -1754,6 +1917,9 @@ def setup_cookbook_routes() -> APIRouter:
status = "completed"
elif "application startup complete" in lower:
status = "ready"
elif not is_alive:
# local-Windows: process gone, log has no success/ready marker.
status = "stopped"
else:
status = "running"
else:

View File

@@ -148,7 +148,7 @@ def _locate_upload(upload_dir: str, file_id: str):
try:
idx_path = os.path.join(upload_dir, "uploads.json")
if os.path.exists(idx_path):
with open(idx_path, "r") as f:
with open(idx_path, "r", encoding="utf-8") as f:
idx = _json.load(f)
for meta in (idx.values() if isinstance(idx, dict) else []):
if meta.get("id") == file_id:

View File

@@ -444,7 +444,7 @@ _init_scheduled_db()
def _load_settings():
if SETTINGS_FILE.exists():
return json.loads(SETTINGS_FILE.read_text())
return json.loads(SETTINGS_FILE.read_text(encoding="utf-8"))
return {}

View File

@@ -2834,7 +2834,7 @@ def setup_email_routes():
if not path.exists():
return {"total_unread": 0, "total_urgent": 0, "max_score": 0, "per_uid": {}}
try:
data = _json.loads(path.read_text())
data = _json.loads(path.read_text(encoding="utf-8"))
except Exception:
return {"total_unread": 0, "total_urgent": 0, "max_score": 0, "per_uid": {}}
# Drop `notified_uids` from the payload — it's an internal scheduler

View File

@@ -86,7 +86,7 @@ def _load_custom_endpoint() -> dict:
"""Load the saved custom embedding endpoint, if any."""
try:
if os.path.exists(_ENDPOINT_FILE):
return json.loads(Path(_ENDPOINT_FILE).read_text())
return json.loads(Path(_ENDPOINT_FILE).read_text(encoding="utf-8"))
except Exception:
pass
return {}
@@ -94,7 +94,7 @@ def _load_custom_endpoint() -> dict:
def _save_custom_endpoint(data: dict):
Path(_ENDPOINT_FILE).parent.mkdir(parents=True, exist_ok=True)
Path(_ENDPOINT_FILE).write_text(json.dumps(data, indent=2))
Path(_ENDPOINT_FILE).write_text(json.dumps(data, indent=2), encoding="utf-8")
def setup_embedding_routes():

View File

@@ -141,7 +141,7 @@ def setup_mcp_routes(mcp_manager: McpManager):
}
}
filepath = os.path.join(oauth_dir, oauth_filename)
with open(filepath, "w") as f:
with open(filepath, "w", encoding="utf-8") as f:
json.dump(creds, f, indent=2)
logger.info(f"Wrote OAuth credentials to {filepath}")
parsed_env.pop("GOOGLE_CLIENT_ID", None)
@@ -354,7 +354,7 @@ def setup_mcp_routes(mcp_manager: McpManager):
if not keys_file or not os.path.exists(keys_file):
raise HTTPException(400, "OAuth keys file not found")
with open(keys_file) as f:
with open(keys_file, encoding="utf-8") as f:
keys_data = json.load(f)
keys = keys_data.get("installed") or keys_data.get("web")
if not keys:
@@ -427,7 +427,7 @@ def setup_mcp_routes(mcp_manager: McpManager):
keys_file = os.path.expanduser(oauth_cfg.get("keys_file", ""))
token_file = os.path.expanduser(oauth_cfg.get("token_file", ""))
with open(keys_file) as f:
with open(keys_file, encoding="utf-8") as f:
keys_data = json.load(f)
keys = keys_data.get("installed") or keys_data.get("web")
client_id = keys["client_id"]
@@ -457,7 +457,7 @@ def setup_mcp_routes(mcp_manager: McpManager):
# Save tokens to the file the MCP package expects
os.makedirs(os.path.dirname(token_file), exist_ok=True)
with open(token_file, "w") as f:
with open(token_file, "w", encoding="utf-8") as f:
json.dump(tokens, f, indent=2)
logger.info(f"Saved OAuth tokens to {token_file}")

View File

@@ -145,7 +145,7 @@ async def dispatch_reminder(
_slug = "".join(c if (c.isalnum() or c in "-_.@") else "_" for c in (owner or "default"))
cache_path = _P(f"data/note_pings_{_slug}.json")
if cache_path.exists():
cache = _json.loads(cache_path.read_text())
cache = _json.loads(cache_path.read_text(encoding="utf-8"))
last = cache.get(cache_key)
if last:
last_channel = None
@@ -428,7 +428,7 @@ async def dispatch_reminder(
_STATE = _P(f"data/note_pings_{_slug}.json")
_STATE.parent.mkdir(parents=True, exist_ok=True)
try:
_cache = cache or (_json.loads(_STATE.read_text()) if _STATE.exists() else {})
_cache = cache or (_json.loads(_STATE.read_text(encoding="utf-8")) if _STATE.exists() else {})
except Exception:
_cache = {}
sent_channel = "email" if email_sent else "ntfy" if ntfy_sent else "browser"
@@ -436,7 +436,7 @@ async def dispatch_reminder(
"at": _dt.now(_tz.utc).isoformat(),
"channel": sent_channel,
}
_STATE.write_text(_json.dumps(_cache))
_STATE.write_text(_json.dumps(_cache), encoding="utf-8")
except Exception as _e:
logger.debug(f"dispatch_reminder: cache write failed: {_e}")

View File

@@ -11,7 +11,7 @@ PREFS_FILE = os.path.join("data", "user_prefs.json")
def _load():
"""Load the raw prefs file (internal use only)."""
try:
with open(PREFS_FILE, "r") as f:
with open(PREFS_FILE, "r", encoding="utf-8") as f:
return json.load(f)
except (FileNotFoundError, json.JSONDecodeError):
return {}
@@ -19,7 +19,7 @@ def _load():
def _save(prefs):
os.makedirs(os.path.dirname(PREFS_FILE), exist_ok=True)
with open(PREFS_FILE, "w") as f:
with open(PREFS_FILE, "w", encoding="utf-8") as f:
json.dump(prefs, f, indent=2)

View File

@@ -69,7 +69,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
if not path.exists():
return False
try:
return json.loads(path.read_text()).get("owner") == user
return json.loads(path.read_text(encoding="utf-8")).get("owner") == user
except Exception:
return False
@@ -130,7 +130,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
if not path.exists():
raise HTTPException(404, "Research not found")
try:
owner = json.loads(path.read_text()).get("owner")
owner = json.loads(path.read_text(encoding="utf-8")).get("owner")
except Exception:
raise HTTPException(404, "Research not found")
if owner != user:
@@ -190,7 +190,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
items = []
for p in data_dir.glob("*.json"):
try:
d = json.loads(p.read_text())
d = json.loads(p.read_text(encoding="utf-8"))
# SECURITY: only show research belonging to this user. Legacy
# JSONs without an `owner` field are hidden — auth was the only
# gate before, so every user saw every other user's reports.
@@ -239,7 +239,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
if not path.exists():
raise HTTPException(404, "Research not found")
try:
data = json.loads(path.read_text())
data = json.loads(path.read_text(encoding="utf-8"))
except Exception as e:
raise HTTPException(500, f"Failed to read research: {e}")
# SECURITY: 404 (not 403) so we don't leak that the report exists.
@@ -255,11 +255,11 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
if not path.exists():
raise HTTPException(404, "Research not found")
try:
data = json.loads(path.read_text())
data = json.loads(path.read_text(encoding="utf-8"))
if data.get("owner") != user:
raise HTTPException(404, "Research not found")
data["archived"] = bool(archived)
path.write_text(json.dumps(data))
path.write_text(json.dumps(data), encoding="utf-8")
except HTTPException:
raise
except Exception as e:
@@ -276,7 +276,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
if json_path.exists():
# SECURITY: verify ownership before letting the caller delete it.
try:
data = json.loads(json_path.read_text())
data = json.loads(json_path.read_text(encoding="utf-8"))
if data.get("owner") != user:
raise HTTPException(404, "Research not found")
except HTTPException:
@@ -452,7 +452,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
if result is None:
p = Path("data/deep_research") / f"{session_id}.json"
if p.exists():
d = json.loads(p.read_text())
d = json.loads(p.read_text(encoding="utf-8"))
return {
"result": d.get("result", ""),
"sources": d.get("sources", []),
@@ -486,7 +486,7 @@ def setup_research_routes(research_handler, session_manager=None) -> APIRouter:
path = Path("data/deep_research") / f"{session_id}.json"
if path.exists():
try:
disk = json.loads(path.read_text())
disk = json.loads(path.read_text(encoding="utf-8"))
if not result:
result = disk.get("result")
if not sources:

View File

@@ -6,11 +6,17 @@ import logging
import os
import shlex
import shutil
import subprocess
import uuid
import tempfile
from pathlib import Path
from typing import Dict, Any
# POSIX-only: `pty`/`fcntl` transitively import `termios`, which does NOT exist
# on Windows, so importing them unconditionally crashed app startup there
# (ModuleNotFoundError: termios — issues #140/#92/#63/#149/#150). The PTY code
# path is only reachable on POSIX; Windows uses pipe streaming + a detached-job
# fallback for the tmux feature (see _generate_win_detached).
try:
import fcntl
import pty
@@ -25,6 +31,12 @@ from fastapi import APIRouter, Request, HTTPException
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from core.platform_compat import (
IS_WINDOWS,
detached_popen_kwargs,
find_bash,
)
def _require_admin(request: Request):
"""Reject non-admin callers. Shell exec is admin-only — never expose to
@@ -78,11 +90,25 @@ class ShellExecRequest(BaseModel):
use_tmux: bool = False # run in tmux session (survives browser disconnect)
async def _create_shell(command: str, **kwargs):
"""Spawn a shell subprocess for `command`.
POSIX: /bin/sh via create_subprocess_shell (unchanged behaviour).
Windows: prefer a real bash (Git Bash/WSL) so bash-syntax commands behave
the same as on Linux; fall back to cmd.exe when no bash is installed.
"""
if IS_WINDOWS:
bash = find_bash()
if bash:
return await asyncio.create_subprocess_exec(bash, "-c", command, **kwargs)
return await asyncio.create_subprocess_shell(command, **kwargs)
async def _exec_shell(command: str, timeout: int = EXEC_TIMEOUT) -> Dict[str, Any]:
"""Run a shell command and return stdout/stderr/exit_code."""
proc = None
try:
proc = await asyncio.create_subprocess_shell(
proc = await _create_shell(
command,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
@@ -355,6 +381,93 @@ async def _generate_tmux(cmd: str, request: Request):
pass
async def _generate_win_detached(cmd: str, request: Request):
"""Windows stand-in for the tmux path (issues #84/#162).
tmux doesn't exist on Windows, so we run the command in a *detached* child
(DETACHED_PROCESS — survives browser disconnect, same as the tmux session)
that writes output to a log file, and tail that log over SSE. Prefers bash
(Git Bash) for command-syntax parity; falls back to cmd.exe. There's no
`tmux attach` equivalent, but the "keeps running if you disconnect" contract
holds, which is the point of the feature for long Cookbook downloads."""
TMUX_LOG_DIR.mkdir(parents=True, exist_ok=True)
session_id = f"cookbook-{uuid.uuid4().hex[:8]}"
log_path = TMUX_LOG_DIR / f"{session_id}.log"
exit_path = TMUX_LOG_DIR / f"{session_id}.exit"
bash = find_bash()
if bash:
script_path = TMUX_LOG_DIR / f"{session_id}.sh"
script_path.write_text(
f"{cmd} > {shlex.quote(str(log_path))} 2>&1\n"
f"echo $? > {shlex.quote(str(exit_path))}\n",
encoding="utf-8",
)
argv = [bash, str(script_path)]
else:
script_path = TMUX_LOG_DIR / f"{session_id}.cmd"
# cmd.exe wrapper: run, redirect all output to the log, record exit code.
script_path.write_text(
"@echo off\r\n"
f'call {cmd} > "{log_path}" 2>&1\r\n'
f'echo %ERRORLEVEL%> "{exit_path}"\r\n',
encoding="utf-8",
)
argv = [os.environ.get("ComSpec", "cmd.exe"), "/c", str(script_path)]
try:
subprocess.Popen(
argv,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
stdin=subprocess.DEVNULL,
**detached_popen_kwargs(),
)
except Exception as e:
yield f"data: {json.dumps({'stream': 'stderr', 'data': f'Failed to launch background job: {e}'})}\n\n"
yield f"data: {json.dumps({'exit_code': -1})}\n\n"
return
yield f"data: {json.dumps({'stream': 'stdout', 'data': f'Started background job: {session_id}'})}\n\n"
lines_sent = 0
exit_code = None
while True:
if await request.is_disconnected():
yield f"data: {json.dumps({'stream': 'stdout', 'data': f'Disconnected. Background job {session_id} continues running.'})}\n\n"
return
try:
if log_path.exists():
lines = log_path.read_text(encoding="utf-8", errors="replace").splitlines()
for line in lines[lines_sent:]:
yield f"data: {json.dumps({'stream': 'stdout', 'data': line})}\n\n"
lines_sent = len(lines)
except Exception as e:
logger.debug("win detached log read error: %s", e)
if exit_path.exists():
# Drain any final lines, then read the recorded exit code.
await asyncio.sleep(0.3)
try:
if log_path.exists():
lines = log_path.read_text(encoding="utf-8", errors="replace").splitlines()
for line in lines[lines_sent:]:
yield f"data: {json.dumps({'stream': 'stdout', 'data': line})}\n\n"
lines_sent = len(lines)
exit_code = int((exit_path.read_text(encoding="utf-8", errors="replace").strip() or "0"))
except Exception:
exit_code = 0
break
await asyncio.sleep(1.0)
yield f"data: {json.dumps({'exit_code': exit_code})}\n\n"
for p in (log_path, exit_path, script_path):
try:
p.unlink(missing_ok=True)
except Exception:
pass
def setup_shell_routes() -> APIRouter:
router = APIRouter(tags=["shell"])
@@ -393,22 +506,24 @@ def setup_shell_routes() -> APIRouter:
)
if use_tmux:
return StreamingResponse(
_generate_tmux(cmd, request),
media_type="text/event-stream",
)
# tmux is POSIX-only; Windows uses a detached-process + logfile tail
# that preserves the "survives disconnect" behaviour.
gen = _generate_win_detached(cmd, request) if IS_WINDOWS else _generate_tmux(cmd, request)
return StreamingResponse(gen, media_type="text/event-stream")
if use_pty:
if use_pty and not IS_WINDOWS:
return StreamingResponse(
_generate_pty(cmd, timeout, request),
media_type="text/event-stream",
)
# Windows has no PTY; fall through to pipe streaming below (output still
# streams line-by-line, just without live in-place progress-bar redraws).
async def generate():
proc = None
reader_tasks = []
try:
proc = await asyncio.create_subprocess_shell(
proc = await _create_shell(
cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,

View File

@@ -105,7 +105,7 @@ def setup_upload_routes(upload_handler):
info = None
uploads_db = os.path.join(UPLOAD_DIR, "uploads.json")
if os.path.exists(uploads_db):
with open(uploads_db) as f:
with open(uploads_db, encoding="utf-8") as f:
db = json.load(f)
info = next((fi for fi in db.values() if fi["id"] == file_id), None)
if info:
@@ -153,7 +153,7 @@ def setup_upload_routes(upload_handler):
info = None
uploads_db = os.path.join(UPLOAD_DIR, "uploads.json")
if os.path.exists(uploads_db):
with open(uploads_db) as f:
with open(uploads_db, encoding="utf-8") as f:
db = json.load(f)
info = next((fi for fi in db.values() if fi["id"] == file_id), None)
return info
@@ -199,7 +199,7 @@ def setup_upload_routes(upload_handler):
cache_path = _vision_cache_path(file_id)
if not force and os.path.exists(cache_path):
try:
with open(cache_path) as f:
with open(cache_path, encoding="utf-8") as f:
return {"text": f.read(), "cached": True}
except Exception as e:
logger.warning(f"Vision cache read failed for {file_id}: {e}")
@@ -210,7 +210,7 @@ def setup_upload_routes(upload_handler):
logger.error(f"Vision analysis failed for {file_id}: {e}")
raise HTTPException(500, f"Vision analysis failed: {e}")
try:
with open(cache_path, "w") as f:
with open(cache_path, "w", encoding="utf-8") as f:
f.write(text)
except Exception as e:
logger.warning(f"Vision cache write failed for {file_id}: {e}")
@@ -238,7 +238,7 @@ def setup_upload_routes(upload_handler):
text = (body or {}).get("text", "")
if not isinstance(text, str):
raise HTTPException(400, "text must be a string")
with open(_vision_cache_path(file_id), "w") as f:
with open(_vision_cache_path(file_id), "w", encoding="utf-8") as f:
f.write(text)
return {"ok": True}

View File

@@ -16,6 +16,7 @@ from fastapi import APIRouter, Request
from pydantic import BaseModel
from core.middleware import require_admin
from core.platform_compat import IS_WINDOWS, safe_chmod, which_tool
logger = logging.getLogger(__name__)
@@ -23,10 +24,23 @@ VAULT_FILE = Path("data/vault.json")
def _find_bw() -> str:
"""Locate the bw binary, checking PATH and common npm-global locations."""
p = shutil.which("bw")
"""Locate the bw binary, checking PATH and common npm-global locations.
On Windows the Bitwarden CLI shim is `bw.cmd`/`bw.exe`, resolved by
which_tool via PATHEXT.
"""
p = which_tool("bw")
if p:
return p
if IS_WINDOWS:
appdata = os.environ.get("APPDATA", os.path.expanduser("~"))
for candidate in (
os.path.join(appdata, "npm", "bw.cmd"),
os.path.join(appdata, "npm", "bw.exe"),
):
if os.path.isfile(candidate):
return candidate
return "bw"
home = os.path.expanduser("~")
for candidate in (
f"{home}/.npm-global/bin/bw",
@@ -47,7 +61,7 @@ def _find_bw() -> str:
def _load_config() -> dict:
if VAULT_FILE.exists():
try:
return json.loads(VAULT_FILE.read_text())
return json.loads(VAULT_FILE.read_text(encoding="utf-8"))
except Exception:
pass
return {}
@@ -55,11 +69,10 @@ def _load_config() -> dict:
def _save_config(cfg: dict):
VAULT_FILE.parent.mkdir(parents=True, exist_ok=True)
VAULT_FILE.write_text(json.dumps(cfg, indent=2))
try:
os.chmod(str(VAULT_FILE), 0o600)
except Exception:
pass
VAULT_FILE.write_text(json.dumps(cfg, indent=2), encoding="utf-8")
# POSIX: restrict the BW_SESSION store to 0o600. Windows: no-op (profile dir
# is ACL-restricted already).
safe_chmod(str(VAULT_FILE), 0o600)
async def _run_bw(args: list, session: str = None, input_text: str = None) -> tuple:

View File

@@ -173,7 +173,7 @@ def _entry_from_modelinfo(mi, overrides):
def main():
with open(DATA_PATH) as f:
with open(DATA_PATH, encoding="utf-8") as f:
catalog = json.load(f)
by_name = {m["name"]: m for m in catalog}
existing = set(by_name)
@@ -214,12 +214,12 @@ def main():
return
# Backup + merge
with open(DATA_PATH + ".bak", "w") as f:
with open(DATA_PATH + ".bak", "w", encoding="utf-8") as f:
json.dump(catalog, f, indent=2)
for name, entry in to_add.items():
by_name[name] = entry
merged = list(by_name.values())
with open(DATA_PATH, "w") as f:
with open(DATA_PATH, "w", encoding="utf-8") as f:
json.dump(merged, f, indent=2)
print(f"\nAdded/updated {len(to_add)} models. Catalog now {len(merged)} (was {len(catalog)}).")

View File

@@ -29,7 +29,7 @@ def main():
if not os.path.exists(path):
print(f" {label}: not found, skipping")
continue
with open(path, "r") as f:
with open(path, "r", encoding="utf-8") as f:
entries = json.load(f)
count = 0
for e in entries:
@@ -37,7 +37,7 @@ def main():
e["owner"] = owner
count += 1
if count:
with open(path, "w") as f:
with open(path, "w", encoding="utf-8") as f:
json.dump(entries, f, ensure_ascii=False, indent=2)
print(f" {label}: claimed {count} entries")

View File

@@ -117,7 +117,7 @@ def load_model():
cls_name_from_index = ""
if model_index.exists():
try:
idx = json.loads(model_index.read_text())
idx = json.loads(model_index.read_text(encoding="utf-8"))
cls_name_from_index = idx.get("_class_name", "")
if hasattr(diffusers, cls_name_from_index):
pipeline_cls = getattr(diffusers, cls_name_from_index)

View File

@@ -39,7 +39,7 @@ def migrate_memories():
logger.info("No memory FAISS index found, skipping memory migration")
return
ids = json.loads(open(ids_path).read())
ids = json.loads(open(ids_path, encoding="utf-8").read())
if not ids:
logger.info("Memory FAISS index is empty, skipping")
return
@@ -47,7 +47,7 @@ def migrate_memories():
# Load memory texts
memories = {}
if os.path.exists(memory_path):
for mem in json.loads(open(memory_path).read()):
for mem in json.loads(open(memory_path, encoding="utf-8").read()):
memories[mem.get("id", "")] = mem
embed = get_embedding_client()
@@ -97,7 +97,7 @@ def migrate_rag():
logger.info("No RAG DocStore found, skipping RAG migration")
return
data = json.loads(open(docs_path).read())
data = json.loads(open(docs_path, encoding="utf-8").read())
ids = data.get("ids", [])
documents = data.get("documents", [])
metadatas = data.get("metadatas", [])

View File

@@ -1,5 +1,6 @@
import os
import platform
import shutil
import subprocess
import time
@@ -138,7 +139,7 @@ def _detect_amd():
val = _run(["cat", path])
return val.strip() if val else None
try:
with open(path) as f:
with open(path, encoding="utf-8", errors="replace") as f:
return f.read().strip()
except Exception:
return None
@@ -285,7 +286,7 @@ def _read_file(path):
if _remote_host:
return _run(["cat", path])
try:
with open(path) as f:
with open(path, encoding="utf-8", errors="replace") as f:
return f.read()
except Exception:
return None
@@ -314,7 +315,9 @@ def _get_ram_gb():
if "MemTotal" in meminfo:
return meminfo["MemTotal"] / (1024**2)
if not _remote_host:
# os.sysconf only exists on Unix; on Windows it's absent (AttributeError)
# and these constants aren't defined — guard so this never raises there.
if not _remote_host and hasattr(os, "sysconf") and "SC_PHYS_PAGES" in getattr(os, "sysconf_names", {}):
try:
pages = os.sysconf("SC_PHYS_PAGES")
page_size = os.sysconf("SC_PAGE_SIZE")
@@ -375,8 +378,20 @@ def _get_cpu_count():
return os.cpu_count() or 1
def _powershell_exe():
"""Pick the best PowerShell executable for LOCAL execution: prefer pwsh
(PowerShell 7+), fall back to Windows PowerShell 5.1. Returns an absolute
path so we don't depend on a particular PATH ordering."""
return shutil.which("pwsh") or shutil.which("powershell") or "powershell"
def _detect_windows():
"""Detect Windows hardware in a single SSH call using PowerShell."""
"""Detect Windows hardware via PowerShell/WMI.
Works for BOTH local (host="") and remote (SSH) detection:
* remote -> `_run` ships the string to the host over SSH.
* local -> `_run` executes a list argv directly (no shell quoting hell).
"""
# Single PowerShell command that gathers all hardware info at once
ps_cmd = (
"$r = @{}; "
@@ -413,22 +428,43 @@ def _detect_windows():
"}; "
"$r | ConvertTo-Json -Compress"
)
out = _run(f'powershell -Command "{ps_cmd}"')
if _remote_host:
# Remote: ship a single command string over SSH. The remote shell parses
# the quoting; PowerShell on the far side runs the -Command payload.
out = _run(f'powershell -Command "{ps_cmd}"')
else:
# Local: pass a LIST argv straight to subprocess so the OS hands ps_cmd
# to PowerShell verbatim — no fragile string-level quote escaping. Prefer
# pwsh (PS7), else Windows PowerShell 5.1.
out = _run([_powershell_exe(), "-NoProfile", "-NonInteractive", "-Command", ps_cmd])
if not out:
return None
import json as _json
try:
d = _json.loads(out)
# PowerShell's Measure-Object .Sum / .Count come back as JSON numbers and
# decode to float; the Linux path returns plain ints for these — coerce
# so the dict shape (and downstream int math) matches across platforms.
def _as_int(v, default):
try:
return int(v)
except (TypeError, ValueError):
return default
_cpu_name = (d.get("cpu_name") or "unknown")
if isinstance(_cpu_name, str):
_cpu_name = _cpu_name.strip() or "unknown"
result = {
"total_ram_gb": d.get("ram_gb", 0),
"available_ram_gb": d.get("avail_gb", 0),
"cpu_cores": d.get("cpu_cores", 1),
"cpu_name": d.get("cpu_name", "unknown"),
"cpu_cores": _as_int(d.get("cpu_cores"), 1),
"cpu_name": _cpu_name,
"has_gpu": bool(d.get("gpu_name")),
"gpu_name": d.get("gpu_name"),
"gpu_vram_gb": d.get("gpu_vram_gb"),
"gpu_count": d.get("gpu_count", 0),
"gpu_count": _as_int(d.get("gpu_count"), 0),
"backend": d.get("gpu_backend", "cpu_x86"),
"homogeneous": True,
"gpu_error": None,
}
# PowerShell only reports aggregate GPU info, not per-card detail, so we
# can't tell a mixed box from a uniform one here — assume one homogeneous
@@ -490,6 +526,18 @@ def detect_system(host="", ssh_port="", platform="", fresh=False):
_cache_by_host[cache_key] = (now, result)
return result
# Local Windows: the Linux /proc + /sys + os.sysconf path returns 0 GB RAM,
# "unknown" CPU and no GPU on Windows (and os.sysconf doesn't even exist),
# so detect locally via PowerShell/WMI instead. _detect_windows() runs the
# same probe used for remote Windows, but _run() executes it locally.
if not _remote_host and os.name == "nt":
result = _detect_windows()
if result:
_cache_by_host[cache_key] = (now, result)
return result
# PowerShell probe failed entirely — fall through to the generic path
# below so we at least return a well-shaped dict rather than crashing.
# Linux/Termux: existing multi-command detection
total_ram = round(_get_ram_gb(), 1)
# If remote host returns 0 RAM, connection likely failed

View File

@@ -166,7 +166,7 @@ def get_models():
if _models_cache is None:
data_path = os.path.join(os.path.dirname(__file__), "data", "hf_models.json")
try:
with open(data_path) as f:
with open(data_path, encoding="utf-8") as f:
_models_cache = json.load(f)
except (FileNotFoundError, json.JSONDecodeError):
_models_cache = []

View File

@@ -45,7 +45,7 @@ def _fingerprint_entries(entries) -> str:
def _load_tidy_state(memory_manager) -> dict:
path = _tidy_state_path(memory_manager)
try:
with open(path, "r") as f:
with open(path, "r", encoding="utf-8") as f:
data = json.load(f)
return data if isinstance(data, dict) else {}
except (FileNotFoundError, json.JSONDecodeError):
@@ -57,7 +57,7 @@ def _save_tidy_state(memory_manager, owner: Optional[str], fingerprint: str) ->
state = _load_tidy_state(memory_manager)
state[owner or ""] = {"fingerprint": fingerprint}
try:
with open(path, "w") as f:
with open(path, "w", encoding="utf-8") as f:
json.dump(state, f, indent=2)
except OSError as e:
logger.warning(f"Could not persist tidy fingerprint: {e}")

View File

@@ -89,7 +89,7 @@ class SkillsManager:
if not os.path.exists(self.usage_file):
return {}
try:
with open(self.usage_file) as f:
with open(self.usage_file, encoding="utf-8") as f:
d = json.load(f)
return d if isinstance(d, dict) else {}
except Exception:
@@ -101,7 +101,7 @@ class SkillsManager:
atomic_write_json(self.usage_file, usage, indent=2)
except Exception:
tmp = self.usage_file + ".tmp"
with open(tmp, "w") as f:
with open(tmp, "w", encoding="utf-8") as f:
json.dump(usage, f, indent=2)
os.replace(tmp, self.usage_file)
@@ -148,7 +148,7 @@ class SkillsManager:
def _read_skill(self, path: str) -> Optional[Skill]:
try:
with open(path) as f:
with open(path, encoding="utf-8") as f:
text = f.read()
return Skill.from_markdown(text, path=path)
except Exception as e:
@@ -221,7 +221,7 @@ class SkillsManager:
# Legacy JSON entries — surfaced as draft, not editable from new flow
if os.path.exists(self.legacy_file):
try:
with open(self.legacy_file) as f:
with open(self.legacy_file, encoding="utf-8") as f:
legacy = json.load(f)
if isinstance(legacy, list):
for row in legacy:
@@ -461,7 +461,7 @@ class SkillsManager:
sk = self._read_skill(path)
if sk and sk.name == name:
try:
with open(path) as f:
with open(path, encoding="utf-8") as f:
return f.read()
except Exception:
return None
@@ -481,7 +481,7 @@ class SkillsManager:
if not os.path.isfile(target):
return None
try:
with open(target) as f:
with open(target, encoding="utf-8") as f:
return f.read()
except Exception:
return None

View File

@@ -114,7 +114,7 @@ class ResearchHandler:
path = RESEARCH_DATA_DIR / f"{session_id}.json"
if path.exists():
try:
data = json.loads(path.read_text())
data = json.loads(path.read_text(encoding="utf-8"))
return {
"status": data.get("status", "done"),
"progress": {},
@@ -151,7 +151,7 @@ class ResearchHandler:
path = RESEARCH_DATA_DIR / f"{session_id}.json"
if path.exists():
try:
data = json.loads(path.read_text())
data = json.loads(path.read_text(encoding="utf-8"))
return data.get("result")
except Exception:
pass
@@ -171,7 +171,7 @@ class ResearchHandler:
path = RESEARCH_DATA_DIR / f"{session_id}.json"
if path.exists():
try:
data = json.loads(path.read_text())
data = json.loads(path.read_text(encoding="utf-8"))
return data.get("sources")
except Exception:
pass
@@ -219,7 +219,7 @@ class ResearchHandler:
"started_at": entry["started_at"],
"completed_at": time.time(),
}
path.write_text(json.dumps(data))
path.write_text(json.dumps(data), encoding="utf-8")
logger.info(f"Research result saved to {path}")
except Exception as e:
logger.error(f"Failed to save research result: {e}")

View File

@@ -65,7 +65,7 @@ def create_default_admin():
}
}
}
with open(auth_path, "w") as f:
with open(auth_path, "w", encoding="utf-8") as f:
json.dump(auth_data, f, indent=2)
print(f" [ok] Initial admin user created ({username})")
print(f" Temporary password: {password}")

View File

@@ -38,14 +38,14 @@ class APIKeyManager:
"""Save encrypted API key to file"""
keys = self.load()
keys[provider] = self.encrypt_api_key(api_key)
with open(self.api_keys_file, 'w') as f:
with open(self.api_keys_file, 'w', encoding="utf-8") as f:
json.dump(keys, f)
def load(self) -> Dict[str, str]:
"""Load and decrypt API keys"""
if not os.path.exists(self.api_keys_file):
return {}
with open(self.api_keys_file, 'r') as f:
with open(self.api_keys_file, 'r', encoding="utf-8") as f:
encrypted_keys = json.load(f)
return {
provider: self.decrypt_api_key(key)

View File

@@ -22,7 +22,7 @@ from __future__ import annotations
import json
import os
import signal
import shlex
import subprocess
import time
import uuid
@@ -30,6 +30,12 @@ from pathlib import Path
from typing import Any, Dict, List, Optional
from core.atomic_io import atomic_write_json
from core.platform_compat import (
detached_popen_kwargs,
find_bash,
kill_process_tree,
pid_alive,
)
_DATA_DIR = Path(os.environ.get("DATA_DIR", "data"))
_JOBS_DIR = _DATA_DIR / "bg_jobs"
@@ -49,7 +55,7 @@ _RETENTION_S = 3600 # 1 hour after follow-up
def _load() -> Dict[str, Dict[str, Any]]:
try:
if _STORE.exists():
return json.loads(_STORE.read_text()) or {}
return json.loads(_STORE.read_text(encoding="utf-8")) or {}
except Exception:
pass
return {}
@@ -60,13 +66,11 @@ def _save(jobs: Dict[str, Dict[str, Any]]) -> None:
def _pid_alive(pid: Optional[int]) -> bool:
if not pid:
return False
try:
os.kill(pid, 0)
return True
except (OSError, ProcessLookupError):
return False
# Delegates to the platform-safe probe. NB: a bare os.kill(pid, 0) is unsafe
# on Windows — CPython routes it to TerminateProcess, which would KILL the
# job we're only trying to check. core.platform_compat.pid_alive handles
# both OSes correctly.
return pid_alive(pid)
def launch(command: str, session_id: str, cwd: Optional[str] = None,
@@ -88,22 +92,46 @@ def launch(command: str, session_id: str, cwd: Optional[str] = None,
# command in `( … )` — the wrapper can't be broken by an unbalanced paren or
# a trailing line-continuation in the command. `$?` is the child's real
# exit status.
cmd_path = _JOBS_DIR / f"{job_id}.cmd.sh"
cmd_path.write_text(command + "\n")
wrapper = (
f"bash {cmd_path} > {log_path} 2>&1\n"
f"echo $? > {exit_path}\n"
)
script_path = _JOBS_DIR / f"{job_id}.sh"
script_path.write_text(wrapper)
bash = find_bash()
if bash:
# POSIX, or Windows with Git Bash/WSL. The user command goes in its OWN
# script file, run as a child `bash` — an `exit` inside it only ends
# that child (so the wrapper still records the exit code), and an
# unbalanced paren / trailing line-continuation in the command can't
# break the wrapper. `$?` is the child's real exit status. Paths are
# emitted as POSIX (forward-slash) + shell-quoted so Git Bash on Windows
# handles drive paths and spaces correctly.
cmd_path = _JOBS_DIR / f"{job_id}.cmd.sh"
cmd_path.write_text(command + "\n", encoding="utf-8")
lp, xp, cp = (shlex.quote(p.as_posix()) for p in (log_path, exit_path, cmd_path))
script_path = _JOBS_DIR / f"{job_id}.sh"
script_path.write_text(
f"bash {cp} > {lp} 2>&1\n"
f"echo $? > {xp}\n",
encoding="utf-8",
)
argv = [bash, str(script_path)]
else:
# Windows without any bash installed: cmd.exe wrapper. The command runs
# in its own child .cmd so %ERRORLEVEL% is the command's real exit code.
child_path = _JOBS_DIR / f"{job_id}.child.cmd"
child_path.write_text("@echo off\r\n" + command + "\r\n", encoding="utf-8")
script_path = _JOBS_DIR / f"{job_id}.cmd"
script_path.write_text(
"@echo off\r\n"
f'call "{child_path}" > "{log_path}" 2>&1\r\n'
f'echo %ERRORLEVEL%> "{exit_path}"\r\n',
encoding="utf-8",
)
argv = [os.environ.get("ComSpec", "cmd.exe"), "/c", str(script_path)]
proc = subprocess.Popen(
["bash", str(script_path)],
argv,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
stdin=subprocess.DEVNULL,
cwd=cwd or None,
start_new_session=True, # setsid — detach from the request lifecycle
**detached_popen_kwargs(), # detach from the request lifecycle (setsid / DETACHED_PROCESS)
)
rec = {
@@ -128,7 +156,7 @@ def launch(command: str, session_id: str, cwd: Optional[str] = None,
def _read_output(rec: Dict[str, Any]) -> str:
try:
txt = Path(rec["log_path"]).read_text(errors="replace")
txt = Path(rec["log_path"]).read_text(encoding="utf-8", errors="replace")
except Exception:
return ""
if len(txt) > _MAX_OUTPUT_CHARS:
@@ -198,15 +226,8 @@ def refresh() -> Dict[str, Dict[str, Any]]:
def _kill(pid: Optional[int]) -> None:
if not pid:
return
try:
os.killpg(os.getpgid(pid), signal.SIGTERM)
except Exception:
try:
os.kill(pid, signal.SIGTERM)
except Exception:
pass
# Cross-platform process-tree teardown (POSIX killpg / Windows taskkill /T).
kill_process_tree(pid)
def pending_followups() -> List[Dict[str, Any]]:

View File

@@ -11,6 +11,7 @@ from datetime import datetime
from typing import Tuple
from src.auth_helpers import owner_filter
from core.platform_compat import IS_WINDOWS, find_bash
logger = logging.getLogger(__name__)
@@ -266,6 +267,11 @@ async def action_ssh_command(owner: str, command: str = "", host: str = "localho
if not command:
return "No command specified", False
if host in ("localhost", "127.0.0.1", "local"):
if IS_WINDOWS:
bash = find_bash()
if bash:
return await _run_subprocess([bash, "-c", command], timeout=120, label="Command")
return await _run_subprocess(command, shell=True, timeout=120, label="Command")
return await _run_subprocess(["bash", "-c", command], timeout=120, label="Command")
return await _run_subprocess(
["ssh", "-o", "ConnectTimeout=10", host, command], timeout=120, label="Command",
@@ -278,6 +284,8 @@ async def action_run_script(owner: str, script: str = "", host: str = "", **kwar
return "No script specified", False
target_host = (host or os.getenv("ODYSSEUS_SCRIPT_HOST", "localhost")).strip()
if target_host in ("", "localhost", "127.0.0.1", "local"):
if IS_WINDOWS and find_bash():
return await _run_subprocess([find_bash(), "-c", script], timeout=300, label="Script")
return await _run_subprocess(script, shell=True, timeout=300, label="Script")
return await _run_subprocess(["ssh", target_host, script], timeout=300, label="Script")
@@ -286,6 +294,8 @@ async def action_run_local(owner: str, script: str = "", **kwargs) -> Tuple[str,
"""Run a script locally (no SSH)."""
if not script:
return "No script specified", False
if IS_WINDOWS and find_bash():
return await _run_subprocess([find_bash(), "-c", script], timeout=300, label="Script")
return await _run_subprocess(script, shell=True, timeout=300, label="Script")

View File

@@ -11,15 +11,36 @@ import shutil
import sys
import asyncio
from core.platform_compat import IS_WINDOWS, which_tool
logger = logging.getLogger(__name__)
def _find_npx() -> str:
"""Find npx binary, checking common locations if not on PATH."""
npx = shutil.which("npx")
"""Find the npx binary, checking common locations if not on PATH.
On Windows the shim is `npx.cmd`, which `which_tool` resolves via PATHEXT.
"""
npx = which_tool("npx")
if npx:
return npx
# Common locations when PATH is minimal (e.g. systemd)
if IS_WINDOWS:
# Minimal-PATH fallbacks: npm's global bin lives under %APPDATA%\npm,
# and node's installer dir carries npx.cmd alongside node.exe.
appdata = os.environ.get("APPDATA", os.path.expanduser("~"))
for candidate in (
os.path.join(appdata, "npm", "npx.cmd"),
r"C:\Program Files\nodejs\npx.cmd",
):
if os.path.isfile(candidate):
return candidate
node = which_tool("node")
if node:
cand = os.path.join(os.path.dirname(node), "npx.cmd")
if os.path.isfile(cand):
return cand
return "npx.cmd" # fallback, will fail with a clear error
# Common POSIX locations when PATH is minimal (e.g. systemd)
for candidate in [
os.path.expanduser("~/.npm-global/bin/npx"),
os.path.expanduser("~/.local/bin/npx"),

View File

@@ -154,7 +154,7 @@ class ChatHandler:
if att_ids:
uploads_db_path = os.path.join(UPLOAD_DIR, "uploads.json")
try:
with open(uploads_db_path, "r") as f:
with open(uploads_db_path, "r", encoding="utf-8") as f:
_all_files = json.load(f)
files_by_id = {fi["id"]: fi for fi in _all_files.values() if "id" in fi}
except (FileNotFoundError, json.JSONDecodeError):
@@ -193,7 +193,7 @@ class ChatHandler:
_vcache = os.path.join(UPLOAD_DIR, ".vision", att_id + ".txt")
if os.path.exists(_vcache):
try:
with open(_vcache) as _vf:
with open(_vcache, encoding="utf-8") as _vf:
_vtext = _vf.read().strip()
if _vtext:
enhanced_message += f"\n[User-corrected caption / OCR for this image — treat as authoritative]:\n{_vtext}"
@@ -212,7 +212,7 @@ class ChatHandler:
vl_model = get_setting("vision_model", "") or ""
if os.path.exists(_vcache):
try:
with open(_vcache) as _vf:
with open(_vcache, encoding="utf-8") as _vf:
cached_desc = _vf.read().strip()
if cached_desc and not cached_desc.startswith("["):
vl_desc = cached_desc
@@ -225,7 +225,7 @@ class ChatHandler:
if vl_desc and not vl_desc.startswith("["):
try:
os.makedirs(os.path.join(UPLOAD_DIR, ".vision"), exist_ok=True)
with open(_vcache, "w") as _vf:
with open(_vcache, "w", encoding="utf-8") as _vf:
_vf.write(vl_desc)
except Exception:
pass

View File

@@ -1,8 +1,19 @@
import os
from pathlib import Path
from typing import List, Optional
from pydantic_settings import BaseSettings, SettingsConfigDict
from pydantic import Field, field_validator
# Cross-platform OS flag, exposed here so callers can `from src.config import
# IS_WINDOWS`. Defined locally (a trivial `os.name == "nt"`) rather than imported
# from core.platform_compat, to keep this dependency-light config module from
# dragging in the whole core/__init__ + llm_core import chain. The platform
# *helper functions* (safe_chmod, pid_alive, find_bash, ...) live solely in
# core.platform_compat — that remains their single source of truth. Keep platform
# branches as small inline `if IS_WINDOWS:` deltas (never parallel *_windows.py
# files) so they stay easy to integrate with upstream changes.
IS_WINDOWS = os.name == "nt"
class DataConfig(BaseSettings):
"""Configuration for data storage and file handling."""
# Base directory

View File

@@ -13,6 +13,17 @@ Set EMBEDDING_URL in .env, e.g.:
"""
import os
# Windows: force HuggingFace/fastembed to COPY model files rather than symlink
# them. On a network-share/UNC cache dir Windows can't follow HF's symlinks
# ([WinError 1463] "symbolic link cannot be followed"), so ONNX fails to load the
# model and semantic memory dies. huggingface_hub reads this flag at import time,
# so it must be set before huggingface_hub is first imported — hence module-top.
# (app.py sets the same guard for the server entrypoint.)
if os.name == "nt":
os.environ.setdefault("HF_HUB_DISABLE_SYMLINKS", "1")
os.environ.setdefault("HF_HUB_DISABLE_SYMLINKS_WARNING", "1")
import logging
import numpy as np
import httpx
@@ -109,6 +120,35 @@ class FastEmbedClient:
"data", "fastembed_cache",
)
os.makedirs(cache_dir, exist_ok=True)
# Windows self-heal: the HuggingFace-hub cache stores model files as
# symlinks (snapshots/<rev>/model.onnx -> ../../blobs/<hash>). On a
# network-share / UNC data dir Windows refuses to follow them
# ([WinError 1463] "symbolic link cannot be followed because its type is
# disabled"), and a cache copied between machines can carry dead symlinks
# too. Either way fastembed tries to load a broken symlink and fails
# *without* re-downloading, leaving semantic memory degraded. Detect a
# broken-symlink model in the cache and drop the contaminated hub dir so
# fastembed re-fetches (it falls back to its CDN tarball of real files,
# which load fine). Best-effort; only ever removes a verifiably dead link.
if os.name == "nt":
try:
import glob, shutil
for _onnx in glob.glob(os.path.join(cache_dir, "**", "*.onnx"), recursive=True):
if os.path.islink(_onnx) and not os.path.exists(_onnx):
_root = _onnx
while os.path.basename(_root) and not os.path.basename(_root).startswith("models--"):
_parent = os.path.dirname(_root)
if _parent == _root:
break
_root = _parent
if os.path.basename(_root).startswith("models--"):
logger.warning(
"Embedding cache has a broken symlink (%s); clearing %s "
"so fastembed re-downloads real files", _onnx, _root,
)
shutil.rmtree(_root, ignore_errors=True)
except Exception as _e:
logger.debug("embedding cache symlink-heal skipped: %s", _e)
kwargs = {"model_name": self.model, "cache_dir": cache_dir}
self._embedding = TextEmbedding(**kwargs)
self._dim: Optional[int] = None
@@ -152,7 +192,7 @@ def _load_persisted_endpoint() -> dict:
)
if os.path.exists(endpoint_file):
import json
data = json.loads(open(endpoint_file).read())
data = json.loads(open(endpoint_file, encoding="utf-8").read())
if data.get("url"):
return data
except Exception:

View File

@@ -148,7 +148,7 @@ def load_integrations() -> List[Dict[str, Any]]:
if not os.path.exists(DATA_FILE):
return []
try:
with open(DATA_FILE, "r") as f:
with open(DATA_FILE, "r", encoding="utf-8") as f:
return json.load(f)
except (json.JSONDecodeError, IOError) as exc:
log.error("Failed to load integrations: %s", exc)
@@ -158,7 +158,7 @@ def load_integrations() -> List[Dict[str, Any]]:
def save_integrations(integrations: List[Dict[str, Any]]) -> None:
"""Persist integrations list to disk."""
_ensure_data_dir()
with open(DATA_FILE, "w") as f:
with open(DATA_FILE, "w", encoding="utf-8") as f:
json.dump(integrations, f, indent=2)
@@ -409,7 +409,7 @@ def migrate_from_settings() -> None:
return
try:
with open(settings_path, "r") as f:
with open(settings_path, "r", encoding="utf-8") as f:
settings = json.load(f)
except (json.JSONDecodeError, IOError):
return
@@ -436,7 +436,7 @@ def migrate_from_settings() -> None:
# Clear migrated keys
settings.pop("miniflux_url", None)
settings.pop("miniflux_api_key", None)
with open(settings_path, "w") as f:
with open(settings_path, "w", encoding="utf-8") as f:
json.dump(settings, f, indent=2)
log.info("Migrated Miniflux integration from settings.json")

View File

@@ -142,7 +142,7 @@ def save_field_sidecar(pdf_path: str, fields: list[dict[str, Any]]) -> str:
"""Persist the field schema next to its source PDF. Returns the sidecar path."""
path = sidecar_path(pdf_path)
try:
with open(path, "w") as f:
with open(path, "w", encoding="utf-8") as f:
json.dump(fields, f, indent=2)
except Exception as e:
logger.warning(f"Failed to write field sidecar {path}: {e}")
@@ -155,7 +155,7 @@ def load_field_sidecar(pdf_path: str) -> Optional[list[dict[str, Any]]]:
if not os.path.exists(path):
return None
try:
with open(path) as f:
with open(path, encoding="utf-8") as f:
return json.load(f)
except Exception as e:
logger.warning(f"Failed to read field sidecar {path}: {e}")

View File

@@ -178,7 +178,7 @@ class PersonalDocsManager:
"""Load the list of indexed directories from persistent storage."""
try:
if os.path.exists(self.directories_file):
with open(self.directories_file, 'r') as f:
with open(self.directories_file, 'r', encoding="utf-8") as f:
self.indexed_directories = json.load(f)
logger.info(f"Loaded {len(self.indexed_directories)} indexed directories")
else:
@@ -190,7 +190,7 @@ class PersonalDocsManager:
def save_directories(self):
"""Save the list of indexed directories to persistent storage."""
try:
with open(self.directories_file, 'w') as f:
with open(self.directories_file, 'w', encoding="utf-8") as f:
json.dump(self.indexed_directories, f, indent=2)
logger.info(f"Saved {len(self.indexed_directories)} indexed directories")
except Exception as e:
@@ -200,7 +200,7 @@ class PersonalDocsManager:
"""Load the set of excluded file paths from persistent storage."""
try:
if os.path.exists(self._excluded_file):
with open(self._excluded_file, 'r') as f:
with open(self._excluded_file, 'r', encoding="utf-8") as f:
self.excluded_files = set(json.load(f))
else:
self.excluded_files = set()
@@ -210,7 +210,7 @@ class PersonalDocsManager:
def _save_excluded(self):
try:
with open(self._excluded_file, 'w') as f:
with open(self._excluded_file, 'w', encoding="utf-8") as f:
json.dump(list(self.excluded_files), f)
except Exception as e:
logger.error(f"Error saving excluded files: {e}")

View File

@@ -75,7 +75,7 @@ Use precise language. Show causal relationships explicitly. Quantify uncertainty
return self.DEFAULT_PRESETS.copy()
try:
with open(self.presets_file, 'r') as f:
with open(self.presets_file, 'r', encoding="utf-8") as f:
presets = json.load(f)
custom = presets.get("custom") if isinstance(presets, dict) else None
if isinstance(custom, dict) and "enabled" not in custom:
@@ -101,7 +101,7 @@ Use precise language. Show causal relationships explicitly. Quantify uncertainty
"""Save presets to file"""
try:
os.makedirs(os.path.dirname(self.presets_file), exist_ok=True)
with open(self.presets_file, 'w') as f:
with open(self.presets_file, 'w', encoding="utf-8") as f:
json.dump(presets, f, indent=2)
self.presets = presets
return True

View File

@@ -299,7 +299,7 @@ class ResearchHandler:
path = RESEARCH_DATA_DIR / f"{session_id}.json"
if path.exists():
try:
data = json.loads(path.read_text())
data = json.loads(path.read_text(encoding="utf-8"))
if data.get("consumed"):
return None
return {
@@ -338,7 +338,7 @@ class ResearchHandler:
path = RESEARCH_DATA_DIR / f"{session_id}.json"
if path.exists():
try:
data = json.loads(path.read_text())
data = json.loads(path.read_text(encoding="utf-8"))
if data.get("consumed"):
return None
return data.get("result")
@@ -360,7 +360,7 @@ class ResearchHandler:
path = RESEARCH_DATA_DIR / f"{session_id}.json"
if path.exists():
try:
data = json.loads(path.read_text())
data = json.loads(path.read_text(encoding="utf-8"))
return data.get("sources")
except Exception:
pass
@@ -377,7 +377,7 @@ class ResearchHandler:
path = RESEARCH_DATA_DIR / f"{session_id}.json"
if path.exists():
try:
data = json.loads(path.read_text())
data = json.loads(path.read_text(encoding="utf-8"))
return data.get("raw_findings")
except Exception as e:
logger.warning(f"Failed to read raw findings for {session_id}: {e}")
@@ -425,7 +425,7 @@ class ResearchHandler:
try:
for p in RESEARCH_DATA_DIR.glob("*.json"):
try:
data = json.loads(p.read_text())
data = json.loads(p.read_text(encoding="utf-8"))
if data.get("status") == "done":
started = data.get("started_at", 0)
completed = data.get("completed_at", 0)
@@ -448,9 +448,9 @@ class ResearchHandler:
path = RESEARCH_DATA_DIR / f"{session_id}.json"
if path.exists():
try:
data = json.loads(path.read_text())
data = json.loads(path.read_text(encoding="utf-8"))
data["consumed"] = True
path.write_text(json.dumps(data))
path.write_text(json.dumps(data), encoding="utf-8")
except Exception:
pass
@@ -481,7 +481,7 @@ class ResearchHandler:
# SECURITY: stamp owner so route handlers can filter by user.
"owner": entry.get("owner", ""),
}
path.write_text(json.dumps(data))
path.write_text(json.dumps(data), encoding="utf-8")
logger.info(f"Research result saved to {path}")
try:
from src.event_bus import fire_event
@@ -496,7 +496,7 @@ class ResearchHandler:
path = RESEARCH_DATA_DIR / f"{session_id}.json"
if path.exists():
try:
return json.loads(path.read_text())
return json.loads(path.read_text(encoding="utf-8"))
except Exception:
pass
return None
@@ -511,7 +511,7 @@ class ResearchHandler:
try:
from src.visual_report import generate_visual_report
data = json.loads(json_path.read_text())
data = json.loads(json_path.read_text(encoding="utf-8"))
report_md = data.get("raw_report") or data.get("result", "")
html_content = generate_visual_report(
question=data.get("query", ""),
@@ -534,12 +534,12 @@ class ResearchHandler:
if not path.exists():
return False
try:
data = json.loads(path.read_text())
data = json.loads(path.read_text(encoding="utf-8"))
hidden = data.get("hidden_images") or []
if image_url not in hidden:
hidden.append(image_url)
data["hidden_images"] = hidden
path.write_text(json.dumps(data))
path.write_text(json.dumps(data), encoding="utf-8")
logger.info(f"Hid image {image_url[:80]} for research {session_id}")
return True
except Exception as e:
@@ -552,9 +552,9 @@ class ResearchHandler:
if not path.exists():
return False
try:
data = json.loads(path.read_text())
data = json.loads(path.read_text(encoding="utf-8"))
data["hidden_images"] = []
path.write_text(json.dumps(data))
path.write_text(json.dumps(data), encoding="utf-8")
logger.info(f"Cleared hidden_images for research {session_id}")
return True
except Exception as e:

View File

@@ -24,6 +24,8 @@ from pathlib import Path
from cryptography.fernet import Fernet, InvalidToken
from core.platform_compat import safe_chmod
logger = logging.getLogger(__name__)
_KEY_PATH = Path(__file__).resolve().parent.parent / "data" / ".app_key"
@@ -37,10 +39,9 @@ def _load_or_create_key() -> bytes:
_KEY_PATH.parent.mkdir(parents=True, exist_ok=True)
key = Fernet.generate_key()
_KEY_PATH.write_bytes(key)
try:
os.chmod(_KEY_PATH, 0o600)
except Exception:
pass
# POSIX: lock the key to 0o600. Windows: no-op (the user-profile data dir is
# already ACL-restricted); safe_chmod swallows both cases.
safe_chmod(_KEY_PATH, 0o600)
logger.info(f"Generated new app key at {_KEY_PATH}")
return key

View File

@@ -140,7 +140,7 @@ def load_settings() -> dict:
if _settings_cache and (now - _settings_cache[0]) < _CACHE_TTL:
return _settings_cache[1]
try:
with open(SETTINGS_FILE, "r") as f:
with open(SETTINGS_FILE, "r", encoding="utf-8") as f:
saved = json.load(f)
merged = {**DEFAULT_SETTINGS, **saved}
except (FileNotFoundError, json.JSONDecodeError):
@@ -205,7 +205,7 @@ def load_features() -> dict:
if _features_cache and (now - _features_cache[0]) < _CACHE_TTL:
return _features_cache[1]
try:
with open(FEATURES_FILE, "r") as f:
with open(FEATURES_FILE, "r", encoding="utf-8") as f:
saved = json.load(f)
merged = {**DEFAULT_FEATURES, **saved}
except (FileNotFoundError, json.JSONDecodeError):

View File

@@ -1013,7 +1013,7 @@ class TaskScheduler:
from pathlib import Path as _P
integrations_file = _P("data/integrations.json")
if integrations_file.exists():
integrations = json.loads(integrations_file.read_text())
integrations = json.loads(integrations_file.read_text(encoding="utf-8"))
for integ in integrations:
if not integ.get("enabled"):
continue
@@ -1616,7 +1616,7 @@ class TaskScheduler:
"task_id": task.id,
"task_name": task.name,
}
(RESEARCH_DATA_DIR / f"{session_id}.json").write_text(json.dumps(payload))
(RESEARCH_DATA_DIR / f"{session_id}.json").write_text(json.dumps(payload), encoding="utf-8")
try:
from src.event_bus import fire_event
fire_event("research_completed", task.owner or None)

View File

@@ -12,6 +12,7 @@ import collections
import json
import logging
import os
import sys
import time
from typing import Any, Awaitable, Callable, Dict, Optional, Tuple
@@ -348,7 +349,9 @@ async def _direct_fallback(
# can't take the whole server down. -I = isolated mode (skip
# user site, no PYTHONPATH inheritance) for hygiene.
proc = await asyncio.create_subprocess_exec(
"python3", "-I", "-c", content,
# Use the running interpreter — there is no `python3.exe` on
# Windows, which made the agent's `python` tool fail there.
(sys.executable or "python"), "-I", "-c", content,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
env=_subproc_env,

View File

@@ -3639,7 +3639,7 @@ async def do_manage_research(content: str, owner: Optional[str] = None) -> Dict:
def _load(p):
try:
return _json.loads(p.read_text())
return _json.loads(p.read_text(encoding="utf-8"))
except Exception:
return None
@@ -3874,7 +3874,7 @@ def _load_vault_config() -> Dict:
p = Path("data/vault.json")
if p.exists():
try:
return json.loads(p.read_text())
return json.loads(p.read_text(encoding="utf-8"))
except Exception:
pass
return {}
@@ -4027,13 +4027,13 @@ async def do_vault_unlock(content: str, owner: Optional[str] = None) -> Dict:
cfg = {}
if p.exists():
try:
cfg = json.loads(p.read_text())
cfg = json.loads(p.read_text(encoding="utf-8"))
except Exception:
pass
cfg["session"] = session
from datetime import datetime as _dt
cfg["unlocked_at"] = _dt.utcnow().isoformat()
p.write_text(json.dumps(cfg, indent=2))
p.write_text(json.dumps(cfg, indent=2), encoding="utf-8")
try:
import os as _os
_os.chmod(str(p), 0o600)

View File

@@ -269,7 +269,7 @@ class UploadHandler:
uploads_db_path = os.path.join(self.upload_dir, "uploads.json")
if os.path.exists(uploads_db_path):
with open(uploads_db_path, "r") as f:
with open(uploads_db_path, "r", encoding="utf-8") as f:
files = json.load(f)
total_files = len(files)
@@ -352,7 +352,7 @@ class UploadHandler:
if os.path.exists(uploads_db_path):
try:
with open(uploads_db_path, "r") as f:
with open(uploads_db_path, "r", encoding="utf-8") as f:
existing_files = json.load(f)
except Exception as e:
logger.warning(f"Failed to read uploads database: {e}")
@@ -374,7 +374,7 @@ class UploadHandler:
existing_files[existing_key] = existing_file
try:
with open(uploads_db_path, "w") as f:
with open(uploads_db_path, "w", encoding="utf-8") as f:
json.dump(existing_files, f, indent=2)
except Exception as e:
logger.warning(f"Failed to update uploads database: {e}")
@@ -439,7 +439,7 @@ class UploadHandler:
try:
if os.path.exists(uploads_db_path):
try:
with open(uploads_db_path, "r") as f:
with open(uploads_db_path, "r", encoding="utf-8") as f:
all_files = json.load(f)
except Exception:
all_files = {}
@@ -449,7 +449,7 @@ class UploadHandler:
storage_key = f"{owner}:{file_hash}" if owner else file_hash
all_files[storage_key] = file_metadata
with open(uploads_db_path, "w") as f:
with open(uploads_db_path, "w", encoding="utf-8") as f:
json.dump(all_files, f, indent=2)
except Exception as e:

View File

@@ -226,7 +226,7 @@ def test_admin_only_actions_set_contains_shell_runners():
# `_ADMIN_ONLY_ACTIONS` is a closure constant. Easiest pin: re-read
# the source and check for the three risky entries + the admin gate
# wording.
src = open(task_routes.__file__).read()
src = open(task_routes.__file__, encoding="utf-8").read()
assert '"run_local"' in src
assert '"run_script"' in src
assert '"ssh_command"' in src
@@ -249,8 +249,8 @@ def test_ship_paused_housekeeping_stays_paused_by_default():
from routes import task_routes
from src import task_scheduler
route_src = open(task_routes.__file__).read()
scheduler_src = open(task_scheduler.__file__).read()
route_src = open(task_routes.__file__, encoding="utf-8").read()
scheduler_src = open(task_scheduler.__file__, encoding="utf-8").read()
assert '"ship_paused": True' in scheduler_src
assert 'defs.get("ship_paused")' in scheduler_src
assert 'defs.get("ship_paused")' in route_src
@@ -259,5 +259,5 @@ def test_ship_paused_housekeeping_stays_paused_by_default():
def test_task_payload_exposes_crew_member_id_for_ui_category():
from routes import task_routes
src = open(task_routes.__file__).read()
src = open(task_routes.__file__, encoding="utf-8").read()
assert '"crew_member_id"' in src

View File

@@ -96,6 +96,11 @@ def test_secret_storage_corrupt_token_returns_empty(tmp_path, monkeypatch):
assert ss.decrypt("enc:not-a-valid-fernet-token") == ""
@pytest.mark.skipif(
sys.platform == "win32",
reason="POSIX mode bits (0o600) don't exist on Windows; the key file is "
"protected by the user-profile NTFS ACL instead, and safe_chmod no-ops there.",
)
def test_secret_storage_key_created_with_safe_mode(tmp_path, monkeypatch):
"""The auto-generated key file must be mode 0o600 — anyone who can
read it can decrypt every stored secret."""