Scope personal RAG uploads by owner (#446)
This commit is contained in:
@@ -2,7 +2,8 @@
|
|||||||
"""Routes for personal documents management."""
|
"""Routes for personal documents management."""
|
||||||
import os
|
import os
|
||||||
import logging
|
import logging
|
||||||
from typing import List
|
import uuid
|
||||||
|
from typing import List, Tuple
|
||||||
from fastapi import APIRouter, HTTPException, Query, Request, UploadFile, File, Depends
|
from fastapi import APIRouter, HTTPException, Query, Request, UploadFile, File, Depends
|
||||||
from src.request_models import DirectoryRequest
|
from src.request_models import DirectoryRequest
|
||||||
from core.constants import BASE_DIR, PERSONAL_DIR
|
from core.constants import BASE_DIR, PERSONAL_DIR
|
||||||
@@ -12,9 +13,39 @@ from core.middleware import require_admin
|
|||||||
from src.upload_handler import secure_filename
|
from src.upload_handler import secure_filename
|
||||||
|
|
||||||
UPLOADS_DIR = os.path.join(BASE_DIR, "data", "personal_uploads")
|
UPLOADS_DIR = os.path.join(BASE_DIR, "data", "personal_uploads")
|
||||||
|
MAX_PERSONAL_UPLOAD_BYTES = int(
|
||||||
|
os.getenv("ODYSSEUS_PERSONAL_UPLOAD_MAX_BYTES", str(25 * 1024 * 1024))
|
||||||
|
)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _personal_upload_dir_for_owner(owner: str | None) -> str:
|
||||||
|
"""Return the per-owner upload directory used for direct RAG uploads."""
|
||||||
|
owner_segment = secure_filename((owner or "local").strip())[:80] or "local"
|
||||||
|
upload_dir = os.path.abspath(os.path.join(UPLOADS_DIR, owner_segment))
|
||||||
|
base_abs = os.path.abspath(UPLOADS_DIR)
|
||||||
|
if os.path.commonpath([upload_dir, base_abs]) != base_abs:
|
||||||
|
raise ValueError("Unsafe upload owner path")
|
||||||
|
os.makedirs(upload_dir, exist_ok=True)
|
||||||
|
return upload_dir
|
||||||
|
|
||||||
|
|
||||||
|
def _unique_personal_upload_path(upload_dir: str, original_name: str | None) -> Tuple[str, str, str]:
|
||||||
|
"""Build a collision-resistant upload path while preserving a display name."""
|
||||||
|
safe_name = secure_filename(os.path.basename(original_name or "upload"))
|
||||||
|
if not safe_name or safe_name.startswith("."):
|
||||||
|
safe_name = "upload"
|
||||||
|
|
||||||
|
stem, ext = os.path.splitext(safe_name)
|
||||||
|
stem = (stem or "upload")[:80]
|
||||||
|
filename = f"{stem}-{uuid.uuid4().hex[:10]}{ext.lower()}"
|
||||||
|
file_path = os.path.abspath(os.path.join(upload_dir, filename))
|
||||||
|
upload_abs = os.path.abspath(upload_dir)
|
||||||
|
if os.path.commonpath([file_path, upload_abs]) != upload_abs:
|
||||||
|
raise ValueError("Unsafe upload filename")
|
||||||
|
return file_path, filename, safe_name
|
||||||
|
|
||||||
def setup_personal_routes(personal_docs_manager, rag_manager, rag_available):
|
def setup_personal_routes(personal_docs_manager, rag_manager, rag_available):
|
||||||
"""
|
"""
|
||||||
Setup personal documents related routes.
|
Setup personal documents related routes.
|
||||||
@@ -165,7 +196,7 @@ def setup_personal_routes(personal_docs_manager, rag_manager, rag_available):
|
|||||||
if not rag:
|
if not rag:
|
||||||
raise HTTPException(503, "RAG system is not available — is the embedding service running?")
|
raise HTTPException(503, "RAG system is not available — is the embedding service running?")
|
||||||
|
|
||||||
os.makedirs(UPLOADS_DIR, exist_ok=True)
|
upload_dir = _personal_upload_dir_for_owner(user)
|
||||||
|
|
||||||
total_indexed = 0
|
total_indexed = 0
|
||||||
total_failed = 0
|
total_failed = 0
|
||||||
@@ -173,18 +204,12 @@ def setup_personal_routes(personal_docs_manager, rag_manager, rag_available):
|
|||||||
|
|
||||||
for upload in files:
|
for upload in files:
|
||||||
try:
|
try:
|
||||||
# Sanitize filename — strip directory components and unsafe chars
|
file_path, stored_name, safe_name = _unique_personal_upload_path(upload_dir, upload.filename)
|
||||||
safe_name = secure_filename(os.path.basename(upload.filename or "upload"))
|
content_bytes = await upload.read(MAX_PERSONAL_UPLOAD_BYTES + 1)
|
||||||
if not safe_name or safe_name.startswith("."):
|
if len(content_bytes) > MAX_PERSONAL_UPLOAD_BYTES:
|
||||||
safe_name = f"upload_{total_indexed + total_failed}"
|
logger.warning(f"Rejected oversized personal upload: {upload.filename!r}")
|
||||||
file_path = os.path.join(UPLOADS_DIR, safe_name)
|
|
||||||
# Defense-in-depth: ensure resolved path stays under UPLOADS_DIR
|
|
||||||
base_abs = os.path.abspath(UPLOADS_DIR)
|
|
||||||
if os.path.commonpath([os.path.abspath(file_path), base_abs]) != base_abs:
|
|
||||||
logger.warning(f"Rejected unsafe upload path: {upload.filename!r}")
|
|
||||||
total_failed += 1
|
total_failed += 1
|
||||||
continue
|
continue
|
||||||
content_bytes = await upload.read()
|
|
||||||
with open(file_path, "wb") as f:
|
with open(file_path, "wb") as f:
|
||||||
f.write(content_bytes)
|
f.write(content_bytes)
|
||||||
|
|
||||||
@@ -205,7 +230,8 @@ def setup_personal_routes(personal_docs_manager, rag_manager, rag_available):
|
|||||||
metadata = {
|
metadata = {
|
||||||
"source": file_path,
|
"source": file_path,
|
||||||
"filename": safe_name,
|
"filename": safe_name,
|
||||||
"directory": UPLOADS_DIR,
|
"stored_filename": stored_name,
|
||||||
|
"directory": upload_dir,
|
||||||
"type": ext,
|
"type": ext,
|
||||||
"chunk_id": i,
|
"chunk_id": i,
|
||||||
}
|
}
|
||||||
@@ -223,7 +249,7 @@ def setup_personal_routes(personal_docs_manager, rag_manager, rag_available):
|
|||||||
|
|
||||||
# Track uploads directory
|
# Track uploads directory
|
||||||
if uploaded_files and hasattr(personal_docs_manager, "add_directory"):
|
if uploaded_files and hasattr(personal_docs_manager, "add_directory"):
|
||||||
personal_docs_manager.add_directory(UPLOADS_DIR, index=False)
|
personal_docs_manager.add_directory(upload_dir, index=False)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"success": True,
|
"success": True,
|
||||||
|
|||||||
44
tests/test_personal_upload_isolation.py
Normal file
44
tests/test_personal_upload_isolation.py
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from routes import personal_routes
|
||||||
|
|
||||||
|
|
||||||
|
def test_personal_upload_paths_are_owner_scoped_and_unique(tmp_path, monkeypatch):
|
||||||
|
monkeypatch.setattr(personal_routes, "UPLOADS_DIR", str(tmp_path))
|
||||||
|
|
||||||
|
alice_dir = personal_routes._personal_upload_dir_for_owner("alice")
|
||||||
|
bob_dir = personal_routes._personal_upload_dir_for_owner("bob")
|
||||||
|
|
||||||
|
assert Path(alice_dir).parent == tmp_path
|
||||||
|
assert Path(bob_dir).parent == tmp_path
|
||||||
|
assert alice_dir != bob_dir
|
||||||
|
|
||||||
|
first_path, first_stored, first_display = personal_routes._unique_personal_upload_path(
|
||||||
|
alice_dir,
|
||||||
|
"notes.txt",
|
||||||
|
)
|
||||||
|
second_path, second_stored, second_display = personal_routes._unique_personal_upload_path(
|
||||||
|
alice_dir,
|
||||||
|
"notes.txt",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert first_display == second_display == "notes.txt"
|
||||||
|
assert first_stored != second_stored
|
||||||
|
assert first_path != second_path
|
||||||
|
assert Path(first_path).parent == Path(alice_dir)
|
||||||
|
assert Path(second_path).parent == Path(alice_dir)
|
||||||
|
|
||||||
|
|
||||||
|
def test_personal_upload_paths_stay_under_upload_root(tmp_path, monkeypatch):
|
||||||
|
monkeypatch.setattr(personal_routes, "UPLOADS_DIR", str(tmp_path))
|
||||||
|
|
||||||
|
upload_dir = personal_routes._personal_upload_dir_for_owner("../alice")
|
||||||
|
file_path, stored_name, display_name = personal_routes._unique_personal_upload_path(
|
||||||
|
upload_dir,
|
||||||
|
"../../.env",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert os.path.commonpath([file_path, upload_dir]) == upload_dir
|
||||||
|
assert Path(file_path).name == stored_name
|
||||||
|
assert display_name == "env"
|
||||||
Reference in New Issue
Block a user