Scope personal RAG uploads by owner (#446)

This commit is contained in:
red person
2026-06-01 16:36:53 +03:00
committed by GitHub
parent d36896c5f7
commit b2e8d692a4
2 changed files with 84 additions and 14 deletions

View File

@@ -2,7 +2,8 @@
"""Routes for personal documents management."""
import os
import logging
from typing import List
import uuid
from typing import List, Tuple
from fastapi import APIRouter, HTTPException, Query, Request, UploadFile, File, Depends
from src.request_models import DirectoryRequest
from core.constants import BASE_DIR, PERSONAL_DIR
@@ -12,9 +13,39 @@ from core.middleware import require_admin
from src.upload_handler import secure_filename
UPLOADS_DIR = os.path.join(BASE_DIR, "data", "personal_uploads")
MAX_PERSONAL_UPLOAD_BYTES = int(
os.getenv("ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES", str(25 * 1024 * 1024))
)
logger = logging.getLogger(__name__)
def _personal_upload_dir_for_owner(owner: str | None) -> str:
"""Return the per-owner upload directory used for direct RAG uploads."""
owner_segment = secure_filename((owner or "local").strip())[:80] or "local"
upload_dir = os.path.abspath(os.path.join(UPLOADS_DIR, owner_segment))
base_abs = os.path.abspath(UPLOADS_DIR)
if os.path.commonpath([upload_dir, base_abs]) != base_abs:
raise ValueError("Unsafe upload owner path")
os.makedirs(upload_dir, exist_ok=True)
return upload_dir
def _unique_personal_upload_path(upload_dir: str, original_name: str | None) -> Tuple[str, str, str]:
"""Build a collision-resistant upload path while preserving a display name."""
safe_name = secure_filename(os.path.basename(original_name or "upload"))
if not safe_name or safe_name.startswith("."):
safe_name = "upload"
stem, ext = os.path.splitext(safe_name)
stem = (stem or "upload")[:80]
filename = f"{stem}-{uuid.uuid4().hex[:10]}{ext.lower()}"
file_path = os.path.abspath(os.path.join(upload_dir, filename))
upload_abs = os.path.abspath(upload_dir)
if os.path.commonpath([file_path, upload_abs]) != upload_abs:
raise ValueError("Unsafe upload filename")
return file_path, filename, safe_name
def setup_personal_routes(personal_docs_manager, rag_manager, rag_available):
"""
Setup personal documents related routes.
@@ -165,7 +196,7 @@ def setup_personal_routes(personal_docs_manager, rag_manager, rag_available):
if not rag:
raise HTTPException(503, "RAG system is not available — is the embedding service running?")
os.makedirs(UPLOADS_DIR, exist_ok=True)
upload_dir = _personal_upload_dir_for_owner(user)
total_indexed = 0
total_failed = 0
@@ -173,18 +204,12 @@ def setup_personal_routes(personal_docs_manager, rag_manager, rag_available):
for upload in files:
try:
# Sanitize filename — strip directory components and unsafe chars
safe_name = secure_filename(os.path.basename(upload.filename or "upload"))
if not safe_name or safe_name.startswith("."):
safe_name = f"upload_{total_indexed + total_failed}"
file_path = os.path.join(UPLOADS_DIR, safe_name)
# Defense-in-depth: ensure resolved path stays under UPLOADS_DIR
base_abs = os.path.abspath(UPLOADS_DIR)
if os.path.commonpath([os.path.abspath(file_path), base_abs]) != base_abs:
logger.warning(f"Rejected unsafe upload path: {upload.filename!r}")
file_path, stored_name, safe_name = _unique_personal_upload_path(upload_dir, upload.filename)
content_bytes = await upload.read(MAX_PERSONAL_UPLOAD_BYTES + 1)
if len(content_bytes) > MAX_PERSONAL_UPLOAD_BYTES:
logger.warning(f"Rejected oversized personal upload: {upload.filename!r}")
total_failed += 1
continue
content_bytes = await upload.read()
with open(file_path, "wb") as f:
f.write(content_bytes)
@@ -205,7 +230,8 @@ def setup_personal_routes(personal_docs_manager, rag_manager, rag_available):
metadata = {
"source": file_path,
"filename": safe_name,
"directory": UPLOADS_DIR,
"stored_filename": stored_name,
"directory": upload_dir,
"type": ext,
"chunk_id": i,
}
@@ -223,7 +249,7 @@ def setup_personal_routes(personal_docs_manager, rag_manager, rag_available):
# Track uploads directory
if uploaded_files and hasattr(personal_docs_manager, "add_directory"):
personal_docs_manager.add_directory(UPLOADS_DIR, index=False)
personal_docs_manager.add_directory(upload_dir, index=False)
return {
"success": True,

View File

@@ -0,0 +1,44 @@
import os
from pathlib import Path
from routes import personal_routes
def test_personal_upload_paths_are_owner_scoped_and_unique(tmp_path, monkeypatch):
monkeypatch.setattr(personal_routes, "UPLOADS_DIR", str(tmp_path))
alice_dir = personal_routes._personal_upload_dir_for_owner("alice")
bob_dir = personal_routes._personal_upload_dir_for_owner("bob")
assert Path(alice_dir).parent == tmp_path
assert Path(bob_dir).parent == tmp_path
assert alice_dir != bob_dir
first_path, first_stored, first_display = personal_routes._unique_personal_upload_path(
alice_dir,
"notes.txt",
)
second_path, second_stored, second_display = personal_routes._unique_personal_upload_path(
alice_dir,
"notes.txt",
)
assert first_display == second_display == "notes.txt"
assert first_stored != second_stored
assert first_path != second_path
assert Path(first_path).parent == Path(alice_dir)
assert Path(second_path).parent == Path(alice_dir)
def test_personal_upload_paths_stay_under_upload_root(tmp_path, monkeypatch):
monkeypatch.setattr(personal_routes, "UPLOADS_DIR", str(tmp_path))
upload_dir = personal_routes._personal_upload_dir_for_owner("../alice")
file_path, stored_name, display_name = personal_routes._unique_personal_upload_path(
upload_dir,
"../../.env",
)
assert os.path.commonpath([file_path, upload_dir]) == upload_dir
assert Path(file_path).name == stored_name
assert display_name == "env"