Fix email-thread HTML injection, attachment path traversal, and missing authz (#475)
Hardens issues found in a security review of the current tree (separate from
the cookbook SSH PR):
- Email thread rendering (static/js/emailLibrary.js): the flat read path runs
inbound HTML through the allowlist sanitizer, but the two threaded paths
(_renderTurnsAsBubbles / _renderTurnsFromServer — the default view) injected
server-parsed `body_html` raw into the DOM. A crafted inbound email could
inject arbitrary markup (phishing/form/credential-capture/tracking; full XSS
if a deployment relaxes the script CSP). Now sanitized on all paths.
- Attachment extraction (routes/email_routes.py, routes/email_helpers.py): the
on-disk extraction dir was `ATTACHMENTS_DIR / f"{folder}_{uid}"` with
user-controlled folder/uid and no containment, so a folder like `../../tmp`
could escape ATTACHMENTS_DIR. New attachment_extract_dir() flattens both to a
single safe segment and asserts containment.
- Diagnostics routes (routes/diagnostics_routes.py): /api/db/stats,
/api/rag/stats, /api/test/youtube, /api/test-research relied only on the
global session check (any logged-in user). Now require_admin-gated.
- Defense-in-depth HTML escaping: session HTML export escapes the session name
(routes/session_routes.py); the MCP OAuth page escapes the reflected Host
header / server_id (routes/mcp_routes.py).
- Internal-tool token now compared with secrets.compare_digest (constant time)
in core/middleware.py and app.py.
Adds regression tests in tests/test_security_regressions.py.
This commit is contained in:
committed by
GitHub
parent
9e8de43f25
commit
171c29dcf3
3
app.py
3
app.py
@@ -21,6 +21,7 @@ import uuid
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import secrets
|
||||
from datetime import datetime
|
||||
from typing import Dict
|
||||
|
||||
@@ -222,7 +223,7 @@ if AUTH_ENABLED:
|
||||
try:
|
||||
from core.middleware import INTERNAL_TOOL_HEADER, INTERNAL_TOOL_TOKEN as _ITT
|
||||
_hdr = request.headers.get(INTERNAL_TOOL_HEADER)
|
||||
if _hdr and _hdr == _ITT and _is_trusted_loopback(request):
|
||||
if _hdr and secrets.compare_digest(_hdr, _ITT) and _is_trusted_loopback(request):
|
||||
# Impersonation: when the agent's loopback call sets
|
||||
# X-Odysseus-Owner, attribute the request to that user only
|
||||
# if they exist. Authorization checks remain separate; this
|
||||
|
||||
@@ -27,7 +27,8 @@ def require_admin(request: Request):
|
||||
# (b) the auth middleware already validated the token and stamped
|
||||
# request.state.current_user = "internal-tool".
|
||||
try:
|
||||
if request.headers.get(INTERNAL_TOOL_HEADER) == INTERNAL_TOOL_TOKEN:
|
||||
hdr = request.headers.get(INTERNAL_TOOL_HEADER)
|
||||
if hdr and secrets.compare_digest(hdr, INTERNAL_TOOL_TOKEN):
|
||||
return
|
||||
if getattr(request.state, "current_user", None) == "internal-tool":
|
||||
return
|
||||
|
||||
@@ -3,10 +3,11 @@
|
||||
import logging
|
||||
from typing import Dict, Any
|
||||
|
||||
from fastapi import APIRouter, HTTPException, Form
|
||||
from fastapi import APIRouter, HTTPException, Form, Request
|
||||
|
||||
from services.youtube.youtube_handler import extract_youtube_id, extract_transcript_async
|
||||
from core.constants import DEFAULT_HOST
|
||||
from core.middleware import require_admin
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -19,7 +20,8 @@ def setup_diagnostics_routes(
|
||||
router = APIRouter(tags=["diagnostics"])
|
||||
|
||||
@router.get("/api/db/stats")
|
||||
async def get_database_stats() -> Dict[str, Any]:
|
||||
async def get_database_stats(request: Request) -> Dict[str, Any]:
|
||||
require_admin(request)
|
||||
try:
|
||||
from core.database import get_detailed_stats
|
||||
return get_detailed_stats()
|
||||
@@ -28,13 +30,15 @@ def setup_diagnostics_routes(
|
||||
raise HTTPException(500, "Failed to retrieve database statistics")
|
||||
|
||||
@router.get("/api/rag/stats")
|
||||
async def get_rag_stats() -> Dict[str, Any]:
|
||||
async def get_rag_stats(request: Request) -> Dict[str, Any]:
|
||||
require_admin(request)
|
||||
if rag_available and rag_manager:
|
||||
return rag_manager.get_stats()
|
||||
return {"error": "RAG system not available"}
|
||||
|
||||
@router.get("/api/test/youtube")
|
||||
async def test_youtube(url: str) -> Dict[str, Any]:
|
||||
async def test_youtube(request: Request, url: str) -> Dict[str, Any]:
|
||||
require_admin(request)
|
||||
try:
|
||||
video_id = extract_youtube_id(url)
|
||||
if not video_id:
|
||||
@@ -54,7 +58,8 @@ def setup_diagnostics_routes(
|
||||
return {"error": str(e)}
|
||||
|
||||
@router.post("/api/test-research")
|
||||
async def test_research(query: str = Form("What is machine learning?")) -> Dict[str, Any]:
|
||||
async def test_research(request: Request, query: str = Form("What is machine learning?")) -> Dict[str, Any]:
|
||||
require_admin(request)
|
||||
try:
|
||||
endpoint = f"http://{DEFAULT_HOST}:8000/v1/chat/completions"
|
||||
model = "gpt-oss-120b"
|
||||
|
||||
@@ -269,6 +269,20 @@ COMPOSE_UPLOADS_DIR.mkdir(parents=True, exist_ok=True)
|
||||
SCHEDULED_DB = DATA_DIR / "scheduled_emails.db"
|
||||
|
||||
|
||||
def attachment_extract_dir(folder: str, uid: str) -> Path:
|
||||
"""Containment-safe extraction directory for an attachment.
|
||||
|
||||
`folder` and `uid` are user-controlled (query/path params). Flatten them to
|
||||
a single safe path segment so a value like folder='../../tmp' can't escape
|
||||
ATTACHMENTS_DIR, then assert containment as belt-and-suspenders."""
|
||||
key = re.sub(r"[^A-Za-z0-9._-]", "_", f"{folder}_{uid}") or "_"
|
||||
target = (ATTACHMENTS_DIR / key).resolve()
|
||||
base = ATTACHMENTS_DIR.resolve()
|
||||
if target != base and base not in target.parents:
|
||||
raise HTTPException(400, "Invalid attachment location")
|
||||
return target
|
||||
|
||||
|
||||
def _init_scheduled_db():
|
||||
import sqlite3
|
||||
conn = sqlite3.connect(SCHEDULED_DB)
|
||||
|
||||
@@ -48,6 +48,7 @@ from routes.email_helpers import (
|
||||
_EMAIL_REPLY_SYS_PROMPT_BASE, _POOL_HOOKS,
|
||||
SendEmailRequest, ExtractStyleRequest,
|
||||
ATTACHMENTS_DIR, COMPOSE_UPLOADS_DIR, SCHEDULED_DB,
|
||||
attachment_extract_dir,
|
||||
)
|
||||
from routes.email_pollers import _start_poller
|
||||
|
||||
@@ -1390,7 +1391,7 @@ def setup_email_routes():
|
||||
msg = email_mod.message_from_bytes(raw)
|
||||
|
||||
# Extract to a per-email folder
|
||||
target_dir = ATTACHMENTS_DIR / f"{folder}_{uid}"
|
||||
target_dir = attachment_extract_dir(folder, uid)
|
||||
filepath = _extract_attachment_to_disk(msg, index, target_dir)
|
||||
if not filepath:
|
||||
return {"error": f"Attachment index {index} not found"}
|
||||
@@ -1425,7 +1426,7 @@ def setup_email_routes():
|
||||
raw = msg_data[0][1]
|
||||
msg = email_mod.message_from_bytes(raw)
|
||||
|
||||
target_dir = ATTACHMENTS_DIR / f"{folder}_{uid}"
|
||||
target_dir = attachment_extract_dir(folder, uid)
|
||||
filepath = _extract_attachment_to_disk(msg, index, target_dir)
|
||||
if not filepath:
|
||||
return {"error": f"Attachment index {index} not found"}
|
||||
@@ -1633,7 +1634,7 @@ def setup_email_routes():
|
||||
raw = msg_data[0][1]
|
||||
msg = email_mod.message_from_bytes(raw)
|
||||
|
||||
target_dir = ATTACHMENTS_DIR / f"{folder}_{uid}"
|
||||
target_dir = attachment_extract_dir(folder, uid)
|
||||
filepath = _extract_attachment_to_disk(msg, index, target_dir)
|
||||
if not filepath:
|
||||
return {"error": f"Attachment index {index} not found"}
|
||||
|
||||
@@ -499,6 +499,11 @@ def setup_mcp_routes(mcp_manager: McpManager):
|
||||
|
||||
def _oauth_authorize_page(auth_url: str, server_id: str, host: str) -> str:
|
||||
"""Page with Google sign-in link and URL paste-back form for remote access."""
|
||||
# Escape values interpolated into the page: `host` comes from the request
|
||||
# Host header and `server_id` from the OAuth state — neither is trusted.
|
||||
auth_url = html.escape(auth_url, quote=True)
|
||||
server_id = html.escape(server_id, quote=True)
|
||||
host = html.escape(host, quote=True)
|
||||
return f"""<!DOCTYPE html>
|
||||
<html><head>
|
||||
<meta charset="UTF-8"><title>Authorize — Odysseus</title>
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
# routes/session_routes.py
|
||||
import re
|
||||
import html
|
||||
import json
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
@@ -587,15 +588,16 @@ def setup_session_routes(session_manager: SessionManager, config: dict, webhook_
|
||||
)
|
||||
|
||||
if fmt == "html":
|
||||
safe_title = html.escape(session.name or "")
|
||||
html_parts = [
|
||||
"<!DOCTYPE html><html><head>",
|
||||
f"<meta charset='utf-8'><title>{session.name}</title>",
|
||||
f"<meta charset='utf-8'><title>{safe_title}</title>",
|
||||
"<style>body{font-family:monospace;max-width:800px;margin:2rem auto;padding:0 1rem;background:#111;color:#ddd}",
|
||||
".msg{margin:1rem 0;padding:0.8rem;border-radius:6px;border:1px solid #333}",
|
||||
".user{background:#1a1a2e}.ai{background:#1a2e1a}",
|
||||
".role{font-weight:bold;margin-bottom:0.4rem;opacity:0.7;text-transform:uppercase;font-size:0.85em}",
|
||||
"pre{background:#000;padding:0.5rem;border-radius:4px;overflow-x:auto}</style></head><body>",
|
||||
f"<h1>{session.name}</h1>",
|
||||
f"<h1>{safe_title}</h1>",
|
||||
]
|
||||
for m in session.history:
|
||||
cls = "user" if m.role == "user" else "ai"
|
||||
|
||||
@@ -2469,7 +2469,7 @@ function _renderTurnsAsBubbles(turns, data) {
|
||||
+ (isMine ? '' : avatar)
|
||||
+ `<div class="email-bubble">`
|
||||
+ head
|
||||
+ `<div class="email-bubble-body">${t.body_html || ''}</div>`
|
||||
+ `<div class="email-bubble-body">${_sanitizeHtml(t.body_html || '')}</div>`
|
||||
+ `</div>`
|
||||
+ (isMine ? avatar : '')
|
||||
+ `</div>`
|
||||
@@ -2499,7 +2499,7 @@ function _renderTurnsFromServer(turns) {
|
||||
const w = wrap(top);
|
||||
if (stack.length) stack[stack.length - 1].html += w; else out += w;
|
||||
}
|
||||
out += t.body_html || '';
|
||||
out += _sanitizeHtml(t.body_html || '');
|
||||
} else {
|
||||
while (stack.length && stack[stack.length - 1].level > t.level) {
|
||||
const top = stack.pop();
|
||||
@@ -2507,9 +2507,9 @@ function _renderTurnsFromServer(turns) {
|
||||
if (stack.length) stack[stack.length - 1].html += w; else out += w;
|
||||
}
|
||||
if (!stack.length || stack[stack.length - 1].level < t.level) {
|
||||
stack.push({ level: t.level, meta: t.meta, html: t.body_html || '' });
|
||||
stack.push({ level: t.level, meta: t.meta, html: _sanitizeHtml(t.body_html || '') });
|
||||
} else {
|
||||
stack[stack.length - 1].html += t.body_html || '';
|
||||
stack[stack.length - 1].html += _sanitizeHtml(t.body_html || '');
|
||||
if (t.meta && !stack[stack.length - 1].meta) {
|
||||
stack[stack.length - 1].meta = t.meta;
|
||||
}
|
||||
|
||||
@@ -622,3 +622,71 @@ def test_web_fetch_guard_blocks_redirect_into_private(monkeypatch):
|
||||
with _pytest.raises(httpx.RequestError) as exc:
|
||||
content._get_public_url("http://public.example/start", headers={}, timeout=5)
|
||||
assert "non-public" in str(exc.value)
|
||||
|
||||
|
||||
# ── audit fixes (2026-06-01): email XSS, attachment traversal, authz ──
|
||||
|
||||
def _import_attachment_extract_dir():
|
||||
sys.modules.pop("routes.email_helpers", None)
|
||||
from routes.email_helpers import attachment_extract_dir, ATTACHMENTS_DIR
|
||||
return attachment_extract_dir, ATTACHMENTS_DIR
|
||||
|
||||
|
||||
@pytest.mark.parametrize("folder,uid", [
|
||||
("../../../../tmp/evil", "1"),
|
||||
("INBOX", "../../etc/cron.d/x"),
|
||||
("a/../../b", "x"),
|
||||
("..", ".."),
|
||||
("/abs/path", "2"),
|
||||
])
|
||||
def test_attachment_extract_dir_stays_contained(folder, uid):
|
||||
"""User-controlled folder/uid must never escape ATTACHMENTS_DIR — pins the
|
||||
fix for the attachment-extraction path traversal."""
|
||||
aed, base = _import_attachment_extract_dir()
|
||||
target = aed(folder, uid)
|
||||
base_r = base.resolve()
|
||||
assert target == base_r or base_r in target.parents
|
||||
# exactly one extra path segment, and no `..` component survived
|
||||
rel = target.relative_to(base_r)
|
||||
assert ".." not in rel.parts
|
||||
|
||||
|
||||
def test_attachment_extract_dir_normal_inputs_unchanged():
|
||||
aed, base = _import_attachment_extract_dir()
|
||||
assert aed("INBOX", "123") == base.resolve() / "INBOX_123"
|
||||
|
||||
|
||||
def test_diagnostics_routes_are_admin_gated():
|
||||
"""db/rag stats + test endpoints must require admin (they relied only on
|
||||
the global session check before)."""
|
||||
src = Path(__file__).resolve().parents[1] / "routes" / "diagnostics_routes.py"
|
||||
text = src.read_text()
|
||||
for handler in ("get_database_stats", "get_rag_stats", "test_youtube", "test_research"):
|
||||
assert f"def {handler}(request: Request" in text, handler
|
||||
assert text.count("require_admin(request)") >= 4
|
||||
|
||||
|
||||
def test_email_thread_rendering_sanitizes_body_html():
|
||||
"""Both threaded render paths must run server-parsed body_html through the
|
||||
allowlist sanitizer (the flat path already did)."""
|
||||
src = Path(__file__).resolve().parents[1] / "static" / "js" / "emailLibrary.js"
|
||||
text = src.read_text()
|
||||
# every `t.body_html` reference is wrapped by _sanitizeHtml(...)
|
||||
assert text.count("t.body_html") == text.count("_sanitizeHtml(t.body_html")
|
||||
assert "t.body_html" in text # guard against the file being refactored away
|
||||
|
||||
|
||||
def test_session_html_export_escapes_name():
|
||||
src = Path(__file__).resolve().parents[1] / "routes" / "session_routes.py"
|
||||
text = src.read_text()
|
||||
assert "safe_title = html.escape(session.name" in text
|
||||
assert "<title>{session.name}" not in text
|
||||
assert "<h1>{session.name}</h1>" not in text
|
||||
|
||||
|
||||
def test_mcp_oauth_page_escapes_reflected_values():
|
||||
src = Path(__file__).resolve().parents[1] / "routes" / "mcp_routes.py"
|
||||
text = src.read_text()
|
||||
body = text.split("def _oauth_authorize_page(", 1)[1].split("return f", 1)[0]
|
||||
for var in ("auth_url", "server_id", "host"):
|
||||
assert f"{var} = html.escape({var}" in body, var
|
||||
|
||||
Reference in New Issue
Block a user