diff --git a/app.py b/app.py index 0ff6e42..d45161e 100644 --- a/app.py +++ b/app.py @@ -21,6 +21,7 @@ import uuid import asyncio import logging +import secrets from datetime import datetime from typing import Dict @@ -222,7 +223,7 @@ if AUTH_ENABLED: try: from core.middleware import INTERNAL_TOOL_HEADER, INTERNAL_TOOL_TOKEN as _ITT _hdr = request.headers.get(INTERNAL_TOOL_HEADER) - if _hdr and _hdr == _ITT and _is_trusted_loopback(request): + if _hdr and secrets.compare_digest(_hdr, _ITT) and _is_trusted_loopback(request): # Impersonation: when the agent's loopback call sets # X-Odysseus-Owner, attribute the request to that user only # if they exist. Authorization checks remain separate; this diff --git a/core/middleware.py b/core/middleware.py index a3e9e9a..82d1d03 100644 --- a/core/middleware.py +++ b/core/middleware.py @@ -27,7 +27,8 @@ def require_admin(request: Request): # (b) the auth middleware already validated the token and stamped # request.state.current_user = "internal-tool". try: - if request.headers.get(INTERNAL_TOOL_HEADER) == INTERNAL_TOOL_TOKEN: + hdr = request.headers.get(INTERNAL_TOOL_HEADER) + if hdr and secrets.compare_digest(hdr, INTERNAL_TOOL_TOKEN): return if getattr(request.state, "current_user", None) == "internal-tool": return diff --git a/routes/diagnostics_routes.py b/routes/diagnostics_routes.py index 8f3a915..daebef8 100644 --- a/routes/diagnostics_routes.py +++ b/routes/diagnostics_routes.py @@ -3,10 +3,11 @@ import logging from typing import Dict, Any -from fastapi import APIRouter, HTTPException, Form +from fastapi import APIRouter, HTTPException, Form, Request from services.youtube.youtube_handler import extract_youtube_id, extract_transcript_async from core.constants import DEFAULT_HOST +from core.middleware import require_admin logger = logging.getLogger(__name__) @@ -19,7 +20,8 @@ def setup_diagnostics_routes( router = APIRouter(tags=["diagnostics"]) @router.get("/api/db/stats") - async def get_database_stats() -> Dict[str, Any]: + async def get_database_stats(request: Request) -> Dict[str, Any]: + require_admin(request) try: from core.database import get_detailed_stats return get_detailed_stats() @@ -28,13 +30,15 @@ def setup_diagnostics_routes( raise HTTPException(500, "Failed to retrieve database statistics") @router.get("/api/rag/stats") - async def get_rag_stats() -> Dict[str, Any]: + async def get_rag_stats(request: Request) -> Dict[str, Any]: + require_admin(request) if rag_available and rag_manager: return rag_manager.get_stats() return {"error": "RAG system not available"} @router.get("/api/test/youtube") - async def test_youtube(url: str) -> Dict[str, Any]: + async def test_youtube(request: Request, url: str) -> Dict[str, Any]: + require_admin(request) try: video_id = extract_youtube_id(url) if not video_id: @@ -54,7 +58,8 @@ def setup_diagnostics_routes( return {"error": str(e)} @router.post("/api/test-research") - async def test_research(query: str = Form("What is machine learning?")) -> Dict[str, Any]: + async def test_research(request: Request, query: str = Form("What is machine learning?")) -> Dict[str, Any]: + require_admin(request) try: endpoint = f"http://{DEFAULT_HOST}:8000/v1/chat/completions" model = "gpt-oss-120b" diff --git a/routes/email_helpers.py b/routes/email_helpers.py index 27d7338..c14fd8c 100644 --- a/routes/email_helpers.py +++ b/routes/email_helpers.py @@ -269,6 +269,20 @@ COMPOSE_UPLOADS_DIR.mkdir(parents=True, exist_ok=True) SCHEDULED_DB = DATA_DIR / "scheduled_emails.db" +def attachment_extract_dir(folder: str, uid: str) -> Path: + """Containment-safe extraction directory for an attachment. + + `folder` and `uid` are user-controlled (query/path params). Flatten them to + a single safe path segment so a value like folder='../../tmp' can't escape + ATTACHMENTS_DIR, then assert containment as belt-and-suspenders.""" + key = re.sub(r"[^A-Za-z0-9._-]", "_", f"{folder}_{uid}") or "_" + target = (ATTACHMENTS_DIR / key).resolve() + base = ATTACHMENTS_DIR.resolve() + if target != base and base not in target.parents: + raise HTTPException(400, "Invalid attachment location") + return target + + def _init_scheduled_db(): import sqlite3 conn = sqlite3.connect(SCHEDULED_DB) diff --git a/routes/email_routes.py b/routes/email_routes.py index 94ce9dc..8b82aa5 100644 --- a/routes/email_routes.py +++ b/routes/email_routes.py @@ -48,6 +48,7 @@ from routes.email_helpers import ( _EMAIL_REPLY_SYS_PROMPT_BASE, _POOL_HOOKS, SendEmailRequest, ExtractStyleRequest, ATTACHMENTS_DIR, COMPOSE_UPLOADS_DIR, SCHEDULED_DB, + attachment_extract_dir, ) from routes.email_pollers import _start_poller @@ -1390,7 +1391,7 @@ def setup_email_routes(): msg = email_mod.message_from_bytes(raw) # Extract to a per-email folder - target_dir = ATTACHMENTS_DIR / f"{folder}_{uid}" + target_dir = attachment_extract_dir(folder, uid) filepath = _extract_attachment_to_disk(msg, index, target_dir) if not filepath: return {"error": f"Attachment index {index} not found"} @@ -1425,7 +1426,7 @@ def setup_email_routes(): raw = msg_data[0][1] msg = email_mod.message_from_bytes(raw) - target_dir = ATTACHMENTS_DIR / f"{folder}_{uid}" + target_dir = attachment_extract_dir(folder, uid) filepath = _extract_attachment_to_disk(msg, index, target_dir) if not filepath: return {"error": f"Attachment index {index} not found"} @@ -1633,7 +1634,7 @@ def setup_email_routes(): raw = msg_data[0][1] msg = email_mod.message_from_bytes(raw) - target_dir = ATTACHMENTS_DIR / f"{folder}_{uid}" + target_dir = attachment_extract_dir(folder, uid) filepath = _extract_attachment_to_disk(msg, index, target_dir) if not filepath: return {"error": f"Attachment index {index} not found"} diff --git a/routes/mcp_routes.py b/routes/mcp_routes.py index 5b1a51d..c09108f 100644 --- a/routes/mcp_routes.py +++ b/routes/mcp_routes.py @@ -499,6 +499,11 @@ def setup_mcp_routes(mcp_manager: McpManager): def _oauth_authorize_page(auth_url: str, server_id: str, host: str) -> str: """Page with Google sign-in link and URL paste-back form for remote access.""" + # Escape values interpolated into the page: `host` comes from the request + # Host header and `server_id` from the OAuth state — neither is trusted. + auth_url = html.escape(auth_url, quote=True) + server_id = html.escape(server_id, quote=True) + host = html.escape(host, quote=True) return f""" Authorize — Odysseus diff --git a/routes/session_routes.py b/routes/session_routes.py index 3372e2e..5caf7d5 100644 --- a/routes/session_routes.py +++ b/routes/session_routes.py @@ -1,5 +1,6 @@ # routes/session_routes.py import re +import html import json import uuid from datetime import datetime @@ -587,15 +588,16 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_ ) if fmt == "html": + safe_title = html.escape(session.name or "") html_parts = [ "", - f"{session.name}", + f"{safe_title}", "", - f"

{session.name}

", + f"

{safe_title}

", ] for m in session.history: cls = "user" if m.role == "user" else "ai" diff --git a/static/js/emailLibrary.js b/static/js/emailLibrary.js index b391f7e..7880848 100644 --- a/static/js/emailLibrary.js +++ b/static/js/emailLibrary.js @@ -2469,7 +2469,7 @@ function _renderTurnsAsBubbles(turns, data) { + (isMine ? '' : avatar) + `
` + head - + `
${t.body_html || ''}
` + + `
${_sanitizeHtml(t.body_html || '')}
` + `
` + (isMine ? avatar : '') + `` @@ -2499,7 +2499,7 @@ function _renderTurnsFromServer(turns) { const w = wrap(top); if (stack.length) stack[stack.length - 1].html += w; else out += w; } - out += t.body_html || ''; + out += _sanitizeHtml(t.body_html || ''); } else { while (stack.length && stack[stack.length - 1].level > t.level) { const top = stack.pop(); @@ -2507,9 +2507,9 @@ function _renderTurnsFromServer(turns) { if (stack.length) stack[stack.length - 1].html += w; else out += w; } if (!stack.length || stack[stack.length - 1].level < t.level) { - stack.push({ level: t.level, meta: t.meta, html: t.body_html || '' }); + stack.push({ level: t.level, meta: t.meta, html: _sanitizeHtml(t.body_html || '') }); } else { - stack[stack.length - 1].html += t.body_html || ''; + stack[stack.length - 1].html += _sanitizeHtml(t.body_html || ''); if (t.meta && !stack[stack.length - 1].meta) { stack[stack.length - 1].meta = t.meta; } diff --git a/tests/test_security_regressions.py b/tests/test_security_regressions.py index 59e6f68..93ac0dc 100644 --- a/tests/test_security_regressions.py +++ b/tests/test_security_regressions.py @@ -622,3 +622,71 @@ def test_web_fetch_guard_blocks_redirect_into_private(monkeypatch): with _pytest.raises(httpx.RequestError) as exc: content._get_public_url("http://public.example/start", headers={}, timeout=5) assert "non-public" in str(exc.value) + + +# ── audit fixes (2026-06-01): email XSS, attachment traversal, authz ── + +def _import_attachment_extract_dir(): + sys.modules.pop("routes.email_helpers", None) + from routes.email_helpers import attachment_extract_dir, ATTACHMENTS_DIR + return attachment_extract_dir, ATTACHMENTS_DIR + + +@pytest.mark.parametrize("folder,uid", [ + ("../../../../tmp/evil", "1"), + ("INBOX", "../../etc/cron.d/x"), + ("a/../../b", "x"), + ("..", ".."), + ("/abs/path", "2"), +]) +def test_attachment_extract_dir_stays_contained(folder, uid): + """User-controlled folder/uid must never escape ATTACHMENTS_DIR — pins the + fix for the attachment-extraction path traversal.""" + aed, base = _import_attachment_extract_dir() + target = aed(folder, uid) + base_r = base.resolve() + assert target == base_r or base_r in target.parents + # exactly one extra path segment, and no `..` component survived + rel = target.relative_to(base_r) + assert ".." not in rel.parts + + +def test_attachment_extract_dir_normal_inputs_unchanged(): + aed, base = _import_attachment_extract_dir() + assert aed("INBOX", "123") == base.resolve() / "INBOX_123" + + +def test_diagnostics_routes_are_admin_gated(): + """db/rag stats + test endpoints must require admin (they relied only on + the global session check before).""" + src = Path(__file__).resolve().parents[1] / "routes" / "diagnostics_routes.py" + text = src.read_text() + for handler in ("get_database_stats", "get_rag_stats", "test_youtube", "test_research"): + assert f"def {handler}(request: Request" in text, handler + assert text.count("require_admin(request)") >= 4 + + +def test_email_thread_rendering_sanitizes_body_html(): + """Both threaded render paths must run server-parsed body_html through the + allowlist sanitizer (the flat path already did).""" + src = Path(__file__).resolve().parents[1] / "static" / "js" / "emailLibrary.js" + text = src.read_text() + # every `t.body_html` reference is wrapped by _sanitizeHtml(...) + assert text.count("t.body_html") == text.count("_sanitizeHtml(t.body_html") + assert "t.body_html" in text # guard against the file being refactored away + + +def test_session_html_export_escapes_name(): + src = Path(__file__).resolve().parents[1] / "routes" / "session_routes.py" + text = src.read_text() + assert "safe_title = html.escape(session.name" in text + assert "{session.name}" not in text + assert "<h1>{session.name}</h1>" not in text + + +def test_mcp_oauth_page_escapes_reflected_values(): + src = Path(__file__).resolve().parents[1] / "routes" / "mcp_routes.py" + text = src.read_text() + body = text.split("def _oauth_authorize_page(", 1)[1].split("return f", 1)[0] + for var in ("auth_url", "server_id", "host"): + assert f"{var} = html.escape({var}" in body, var