diff --git a/routes/personal_routes.py b/routes/personal_routes.py index 98be74e..220c6aa 100644 --- a/routes/personal_routes.py +++ b/routes/personal_routes.py @@ -2,7 +2,8 @@ """Routes for personal documents management.""" import os import logging -from typing import List +import uuid +from typing import List, Tuple from fastapi import APIRouter, HTTPException, Query, Request, UploadFile, File, Depends from src.request_models import DirectoryRequest from core.constants import BASE_DIR, PERSONAL_DIR @@ -12,9 +13,39 @@ from core.middleware import require_admin from src.upload_handler import secure_filename UPLOADS_DIR = os.path.join(BASE_DIR, "data", "personal_uploads") +MAX_PERSONAL_UPLOAD_BYTES = int( + os.getenv("ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES", str(25 * 1024 * 1024)) +) logger = logging.getLogger(__name__) + +def _personal_upload_dir_for_owner(owner: str | None) -> str: + """Return the per-owner upload directory used for direct RAG uploads.""" + owner_segment = secure_filename((owner or "local").strip())[:80] or "local" + upload_dir = os.path.abspath(os.path.join(UPLOADS_DIR, owner_segment)) + base_abs = os.path.abspath(UPLOADS_DIR) + if os.path.commonpath([upload_dir, base_abs]) != base_abs: + raise ValueError("Unsafe upload owner path") + os.makedirs(upload_dir, exist_ok=True) + return upload_dir + + +def _unique_personal_upload_path(upload_dir: str, original_name: str | None) -> Tuple[str, str, str]: + """Build a collision-resistant upload path while preserving a display name.""" + safe_name = secure_filename(os.path.basename(original_name or "upload")) + if not safe_name or safe_name.startswith("."): + safe_name = "upload" + + stem, ext = os.path.splitext(safe_name) + stem = (stem or "upload")[:80] + filename = f"{stem}-{uuid.uuid4().hex[:10]}{ext.lower()}" + file_path = os.path.abspath(os.path.join(upload_dir, filename)) + upload_abs = os.path.abspath(upload_dir) + if os.path.commonpath([file_path, upload_abs]) != upload_abs: + raise ValueError("Unsafe upload filename") + return file_path, filename, safe_name + def setup_personal_routes(personal_docs_manager, rag_manager, rag_available): """ Setup personal documents related routes. @@ -165,7 +196,7 @@ def setup_personal_routes(personal_docs_manager, rag_manager, rag_available): if not rag: raise HTTPException(503, "RAG system is not available — is the embedding service running?") - os.makedirs(UPLOADS_DIR, exist_ok=True) + upload_dir = _personal_upload_dir_for_owner(user) total_indexed = 0 total_failed = 0 @@ -173,18 +204,12 @@ def setup_personal_routes(personal_docs_manager, rag_manager, rag_available): for upload in files: try: - # Sanitize filename — strip directory components and unsafe chars - safe_name = secure_filename(os.path.basename(upload.filename or "upload")) - if not safe_name or safe_name.startswith("."): - safe_name = f"upload_{total_indexed + total_failed}" - file_path = os.path.join(UPLOADS_DIR, safe_name) - # Defense-in-depth: ensure resolved path stays under UPLOADS_DIR - base_abs = os.path.abspath(UPLOADS_DIR) - if os.path.commonpath([os.path.abspath(file_path), base_abs]) != base_abs: - logger.warning(f"Rejected unsafe upload path: {upload.filename!r}") + file_path, stored_name, safe_name = _unique_personal_upload_path(upload_dir, upload.filename) + content_bytes = await upload.read(MAX_PERSONAL_UPLOAD_BYTES + 1) + if len(content_bytes) > MAX_PERSONAL_UPLOAD_BYTES: + logger.warning(f"Rejected oversized personal upload: {upload.filename!r}") total_failed += 1 continue - content_bytes = await upload.read() with open(file_path, "wb") as f: f.write(content_bytes) @@ -205,7 +230,8 @@ def setup_personal_routes(personal_docs_manager, rag_manager, rag_available): metadata = { "source": file_path, "filename": safe_name, - "directory": UPLOADS_DIR, + "stored_filename": stored_name, + "directory": upload_dir, "type": ext, "chunk_id": i, } @@ -223,7 +249,7 @@ def setup_personal_routes(personal_docs_manager, rag_manager, rag_available): # Track uploads directory if uploaded_files and hasattr(personal_docs_manager, "add_directory"): - personal_docs_manager.add_directory(UPLOADS_DIR, index=False) + personal_docs_manager.add_directory(upload_dir, index=False) return { "success": True, diff --git a/tests/test_personal_upload_isolation.py b/tests/test_personal_upload_isolation.py new file mode 100644 index 0000000..8bfabf4 --- /dev/null +++ b/tests/test_personal_upload_isolation.py @@ -0,0 +1,44 @@ +import os +from pathlib import Path + +from routes import personal_routes + + +def test_personal_upload_paths_are_owner_scoped_and_unique(tmp_path, monkeypatch): + monkeypatch.setattr(personal_routes, "UPLOADS_DIR", str(tmp_path)) + + alice_dir = personal_routes._personal_upload_dir_for_owner("alice") + bob_dir = personal_routes._personal_upload_dir_for_owner("bob") + + assert Path(alice_dir).parent == tmp_path + assert Path(bob_dir).parent == tmp_path + assert alice_dir != bob_dir + + first_path, first_stored, first_display = personal_routes._unique_personal_upload_path( + alice_dir, + "notes.txt", + ) + second_path, second_stored, second_display = personal_routes._unique_personal_upload_path( + alice_dir, + "notes.txt", + ) + + assert first_display == second_display == "notes.txt" + assert first_stored != second_stored + assert first_path != second_path + assert Path(first_path).parent == Path(alice_dir) + assert Path(second_path).parent == Path(alice_dir) + + +def test_personal_upload_paths_stay_under_upload_root(tmp_path, monkeypatch): + monkeypatch.setattr(personal_routes, "UPLOADS_DIR", str(tmp_path)) + + upload_dir = personal_routes._personal_upload_dir_for_owner("../alice") + file_path, stored_name, display_name = personal_routes._unique_personal_upload_path( + upload_dir, + "../../.env", + ) + + assert os.path.commonpath([file_path, upload_dir]) == upload_dir + assert Path(file_path).name == stored_name + assert display_name == "env"