diff --git a/app.py b/app.py index 0ff6e42..d45161e 100644 --- a/app.py +++ b/app.py @@ -21,6 +21,7 @@ import uuid import asyncio import logging +import secrets from datetime import datetime from typing import Dict @@ -222,7 +223,7 @@ if AUTH_ENABLED: try: from core.middleware import INTERNAL_TOOL_HEADER, INTERNAL_TOOL_TOKEN as _ITT _hdr = request.headers.get(INTERNAL_TOOL_HEADER) - if _hdr and _hdr == _ITT and _is_trusted_loopback(request): + if _hdr and secrets.compare_digest(_hdr, _ITT) and _is_trusted_loopback(request): # Impersonation: when the agent's loopback call sets # X-Odysseus-Owner, attribute the request to that user only # if they exist. Authorization checks remain separate; this diff --git a/core/middleware.py b/core/middleware.py index a3e9e9a..82d1d03 100644 --- a/core/middleware.py +++ b/core/middleware.py @@ -27,7 +27,8 @@ def require_admin(request: Request): # (b) the auth middleware already validated the token and stamped # request.state.current_user = "internal-tool". try: - if request.headers.get(INTERNAL_TOOL_HEADER) == INTERNAL_TOOL_TOKEN: + hdr = request.headers.get(INTERNAL_TOOL_HEADER) + if hdr and secrets.compare_digest(hdr, INTERNAL_TOOL_TOKEN): return if getattr(request.state, "current_user", None) == "internal-tool": return diff --git a/routes/diagnostics_routes.py b/routes/diagnostics_routes.py index 8f3a915..daebef8 100644 --- a/routes/diagnostics_routes.py +++ b/routes/diagnostics_routes.py @@ -3,10 +3,11 @@ import logging from typing import Dict, Any -from fastapi import APIRouter, HTTPException, Form +from fastapi import APIRouter, HTTPException, Form, Request from services.youtube.youtube_handler import extract_youtube_id, extract_transcript_async from core.constants import DEFAULT_HOST +from core.middleware import require_admin logger = logging.getLogger(__name__) @@ -19,7 +20,8 @@ def setup_diagnostics_routes( router = APIRouter(tags=["diagnostics"]) @router.get("/api/db/stats") - async def get_database_stats() -> Dict[str, Any]: + async def get_database_stats(request: Request) -> Dict[str, Any]: + require_admin(request) try: from core.database import get_detailed_stats return get_detailed_stats() @@ -28,13 +30,15 @@ def setup_diagnostics_routes( raise HTTPException(500, "Failed to retrieve database statistics") @router.get("/api/rag/stats") - async def get_rag_stats() -> Dict[str, Any]: + async def get_rag_stats(request: Request) -> Dict[str, Any]: + require_admin(request) if rag_available and rag_manager: return rag_manager.get_stats() return {"error": "RAG system not available"} @router.get("/api/test/youtube") - async def test_youtube(url: str) -> Dict[str, Any]: + async def test_youtube(request: Request, url: str) -> Dict[str, Any]: + require_admin(request) try: video_id = extract_youtube_id(url) if not video_id: @@ -54,7 +58,8 @@ def setup_diagnostics_routes( return {"error": str(e)} @router.post("/api/test-research") - async def test_research(query: str = Form("What is machine learning?")) -> Dict[str, Any]: + async def test_research(request: Request, query: str = Form("What is machine learning?")) -> Dict[str, Any]: + require_admin(request) try: endpoint = f"http://{DEFAULT_HOST}:8000/v1/chat/completions" model = "gpt-oss-120b" diff --git a/routes/email_helpers.py b/routes/email_helpers.py index 27d7338..c14fd8c 100644 --- a/routes/email_helpers.py +++ b/routes/email_helpers.py @@ -269,6 +269,20 @@ COMPOSE_UPLOADS_DIR.mkdir(parents=True, exist_ok=True) SCHEDULED_DB = DATA_DIR / "scheduled_emails.db" +def attachment_extract_dir(folder: str, uid: str) -> Path: + """Containment-safe extraction directory for an attachment. + + `folder` and `uid` are user-controlled (query/path params). Flatten them to + a single safe path segment so a value like folder='../../tmp' can't escape + ATTACHMENTS_DIR, then assert containment as belt-and-suspenders.""" + key = re.sub(r"[^A-Za-z0-9._-]", "_", f"{folder}_{uid}") or "_" + target = (ATTACHMENTS_DIR / key).resolve() + base = ATTACHMENTS_DIR.resolve() + if target != base and base not in target.parents: + raise HTTPException(400, "Invalid attachment location") + return target + + def _init_scheduled_db(): import sqlite3 conn = sqlite3.connect(SCHEDULED_DB) diff --git a/routes/email_routes.py b/routes/email_routes.py index 94ce9dc..8b82aa5 100644 --- a/routes/email_routes.py +++ b/routes/email_routes.py @@ -48,6 +48,7 @@ from routes.email_helpers import ( _EMAIL_REPLY_SYS_PROMPT_BASE, _POOL_HOOKS, SendEmailRequest, ExtractStyleRequest, ATTACHMENTS_DIR, COMPOSE_UPLOADS_DIR, SCHEDULED_DB, + attachment_extract_dir, ) from routes.email_pollers import _start_poller @@ -1390,7 +1391,7 @@ def setup_email_routes(): msg = email_mod.message_from_bytes(raw) # Extract to a per-email folder - target_dir = ATTACHMENTS_DIR / f"{folder}_{uid}" + target_dir = attachment_extract_dir(folder, uid) filepath = _extract_attachment_to_disk(msg, index, target_dir) if not filepath: return {"error": f"Attachment index {index} not found"} @@ -1425,7 +1426,7 @@ def setup_email_routes(): raw = msg_data[0][1] msg = email_mod.message_from_bytes(raw) - target_dir = ATTACHMENTS_DIR / f"{folder}_{uid}" + target_dir = attachment_extract_dir(folder, uid) filepath = _extract_attachment_to_disk(msg, index, target_dir) if not filepath: return {"error": f"Attachment index {index} not found"} @@ -1633,7 +1634,7 @@ def setup_email_routes(): raw = msg_data[0][1] msg = email_mod.message_from_bytes(raw) - target_dir = ATTACHMENTS_DIR / f"{folder}_{uid}" + target_dir = attachment_extract_dir(folder, uid) filepath = _extract_attachment_to_disk(msg, index, target_dir) if not filepath: return {"error": f"Attachment index {index} not found"} diff --git a/routes/mcp_routes.py b/routes/mcp_routes.py index 5b1a51d..c09108f 100644 --- a/routes/mcp_routes.py +++ b/routes/mcp_routes.py @@ -499,6 +499,11 @@ def setup_mcp_routes(mcp_manager: McpManager): def _oauth_authorize_page(auth_url: str, server_id: str, host: str) -> str: """Page with Google sign-in link and URL paste-back form for remote access.""" + # Escape values interpolated into the page: `host` comes from the request + # Host header and `server_id` from the OAuth state — neither is trusted. + auth_url = html.escape(auth_url, quote=True) + server_id = html.escape(server_id, quote=True) + host = html.escape(host, quote=True) return f"""