Scope personal RAG uploads by owner (#446)
This commit is contained in:
@@ -2,7 +2,8 @@
|
||||
"""Routes for personal documents management."""
|
||||
import os
|
||||
import logging
|
||||
from typing import List
|
||||
import uuid
|
||||
from typing import List, Tuple
|
||||
from fastapi import APIRouter, HTTPException, Query, Request, UploadFile, File, Depends
|
||||
from src.request_models import DirectoryRequest
|
||||
from core.constants import BASE_DIR, PERSONAL_DIR
|
||||
@@ -12,9 +13,39 @@ from core.middleware import require_admin
|
||||
from src.upload_handler import secure_filename
|
||||
|
||||
UPLOADS_DIR = os.path.join(BASE_DIR, "data", "personal_uploads")
|
||||
MAX_PERSONAL_UPLOAD_BYTES = int(
|
||||
os.getenv("ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES", str(25 * 1024 * 1024))
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _personal_upload_dir_for_owner(owner: str | None) -> str:
|
||||
"""Return the per-owner upload directory used for direct RAG uploads."""
|
||||
owner_segment = secure_filename((owner or "local").strip())[:80] or "local"
|
||||
upload_dir = os.path.abspath(os.path.join(UPLOADS_DIR, owner_segment))
|
||||
base_abs = os.path.abspath(UPLOADS_DIR)
|
||||
if os.path.commonpath([upload_dir, base_abs]) != base_abs:
|
||||
raise ValueError("Unsafe upload owner path")
|
||||
os.makedirs(upload_dir, exist_ok=True)
|
||||
return upload_dir
|
||||
|
||||
|
||||
def _unique_personal_upload_path(upload_dir: str, original_name: str | None) -> Tuple[str, str, str]:
|
||||
"""Build a collision-resistant upload path while preserving a display name."""
|
||||
safe_name = secure_filename(os.path.basename(original_name or "upload"))
|
||||
if not safe_name or safe_name.startswith("."):
|
||||
safe_name = "upload"
|
||||
|
||||
stem, ext = os.path.splitext(safe_name)
|
||||
stem = (stem or "upload")[:80]
|
||||
filename = f"{stem}-{uuid.uuid4().hex[:10]}{ext.lower()}"
|
||||
file_path = os.path.abspath(os.path.join(upload_dir, filename))
|
||||
upload_abs = os.path.abspath(upload_dir)
|
||||
if os.path.commonpath([file_path, upload_abs]) != upload_abs:
|
||||
raise ValueError("Unsafe upload filename")
|
||||
return file_path, filename, safe_name
|
||||
|
||||
def setup_personal_routes(personal_docs_manager, rag_manager, rag_available):
|
||||
"""
|
||||
Setup personal documents related routes.
|
||||
@@ -165,7 +196,7 @@ def setup_personal_routes(personal_docs_manager, rag_manager, rag_available):
|
||||
if not rag:
|
||||
raise HTTPException(503, "RAG system is not available — is the embedding service running?")
|
||||
|
||||
os.makedirs(UPLOADS_DIR, exist_ok=True)
|
||||
upload_dir = _personal_upload_dir_for_owner(user)
|
||||
|
||||
total_indexed = 0
|
||||
total_failed = 0
|
||||
@@ -173,18 +204,12 @@ def setup_personal_routes(personal_docs_manager, rag_manager, rag_available):
|
||||
|
||||
for upload in files:
|
||||
try:
|
||||
# Sanitize filename — strip directory components and unsafe chars
|
||||
safe_name = secure_filename(os.path.basename(upload.filename or "upload"))
|
||||
if not safe_name or safe_name.startswith("."):
|
||||
safe_name = f"upload_{total_indexed + total_failed}"
|
||||
file_path = os.path.join(UPLOADS_DIR, safe_name)
|
||||
# Defense-in-depth: ensure resolved path stays under UPLOADS_DIR
|
||||
base_abs = os.path.abspath(UPLOADS_DIR)
|
||||
if os.path.commonpath([os.path.abspath(file_path), base_abs]) != base_abs:
|
||||
logger.warning(f"Rejected unsafe upload path: {upload.filename!r}")
|
||||
file_path, stored_name, safe_name = _unique_personal_upload_path(upload_dir, upload.filename)
|
||||
content_bytes = await upload.read(MAX_PERSONAL_UPLOAD_BYTES + 1)
|
||||
if len(content_bytes) > MAX_PERSONAL_UPLOAD_BYTES:
|
||||
logger.warning(f"Rejected oversized personal upload: {upload.filename!r}")
|
||||
total_failed += 1
|
||||
continue
|
||||
content_bytes = await upload.read()
|
||||
with open(file_path, "wb") as f:
|
||||
f.write(content_bytes)
|
||||
|
||||
@@ -205,7 +230,8 @@ def setup_personal_routes(personal_docs_manager, rag_manager, rag_available):
|
||||
metadata = {
|
||||
"source": file_path,
|
||||
"filename": safe_name,
|
||||
"directory": UPLOADS_DIR,
|
||||
"stored_filename": stored_name,
|
||||
"directory": upload_dir,
|
||||
"type": ext,
|
||||
"chunk_id": i,
|
||||
}
|
||||
@@ -223,7 +249,7 @@ def setup_personal_routes(personal_docs_manager, rag_manager, rag_available):
|
||||
|
||||
# Track uploads directory
|
||||
if uploaded_files and hasattr(personal_docs_manager, "add_directory"):
|
||||
personal_docs_manager.add_directory(UPLOADS_DIR, index=False)
|
||||
personal_docs_manager.add_directory(upload_dir, index=False)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
|
||||
44
tests/test_personal_upload_isolation.py
Normal file
44
tests/test_personal_upload_isolation.py
Normal file
@@ -0,0 +1,44 @@
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from routes import personal_routes
|
||||
|
||||
|
||||
def test_personal_upload_paths_are_owner_scoped_and_unique(tmp_path, monkeypatch):
|
||||
monkeypatch.setattr(personal_routes, "UPLOADS_DIR", str(tmp_path))
|
||||
|
||||
alice_dir = personal_routes._personal_upload_dir_for_owner("alice")
|
||||
bob_dir = personal_routes._personal_upload_dir_for_owner("bob")
|
||||
|
||||
assert Path(alice_dir).parent == tmp_path
|
||||
assert Path(bob_dir).parent == tmp_path
|
||||
assert alice_dir != bob_dir
|
||||
|
||||
first_path, first_stored, first_display = personal_routes._unique_personal_upload_path(
|
||||
alice_dir,
|
||||
"notes.txt",
|
||||
)
|
||||
second_path, second_stored, second_display = personal_routes._unique_personal_upload_path(
|
||||
alice_dir,
|
||||
"notes.txt",
|
||||
)
|
||||
|
||||
assert first_display == second_display == "notes.txt"
|
||||
assert first_stored != second_stored
|
||||
assert first_path != second_path
|
||||
assert Path(first_path).parent == Path(alice_dir)
|
||||
assert Path(second_path).parent == Path(alice_dir)
|
||||
|
||||
|
||||
def test_personal_upload_paths_stay_under_upload_root(tmp_path, monkeypatch):
|
||||
monkeypatch.setattr(personal_routes, "UPLOADS_DIR", str(tmp_path))
|
||||
|
||||
upload_dir = personal_routes._personal_upload_dir_for_owner("../alice")
|
||||
file_path, stored_name, display_name = personal_routes._unique_personal_upload_path(
|
||||
upload_dir,
|
||||
"../../.env",
|
||||
)
|
||||
|
||||
assert os.path.commonpath([file_path, upload_dir]) == upload_dir
|
||||
assert Path(file_path).name == stored_name
|
||||
assert display_name == "env"
|
||||
Reference in New Issue
Block a user