Enforce owner checks for upload attachments
This commit is contained in:
@@ -1,7 +1,6 @@
|
||||
# src/chat_handler.py
|
||||
"""Handler for chat endpoint operations."""
|
||||
import os
|
||||
import json
|
||||
import asyncio
|
||||
import logging
|
||||
from typing import Dict, List, Optional, Any
|
||||
@@ -149,23 +148,22 @@ class ChatHandler:
|
||||
vision_enabled = get_setting("vision_enabled", True)
|
||||
main_is_vision = is_vision_model(sess.model or "")
|
||||
|
||||
# Read uploads DB once and index by id (was read twice + linear-scanned per attachment)
|
||||
# Resolve uploads once with the session owner. Attachment IDs are
|
||||
# bearer-like references; never trust them without an owner check.
|
||||
files_by_id: Dict[str, Dict] = {}
|
||||
owner = getattr(sess, "owner", None)
|
||||
if att_ids:
|
||||
uploads_db_path = os.path.join(UPLOAD_DIR, "uploads.json")
|
||||
try:
|
||||
with open(uploads_db_path, "r", encoding="utf-8") as f:
|
||||
_all_files = json.load(f)
|
||||
files_by_id = {fi["id"]: fi for fi in _all_files.values() if "id" in fi}
|
||||
except (FileNotFoundError, json.JSONDecodeError):
|
||||
pass
|
||||
for att_id in att_ids:
|
||||
fi = self.upload_handler.resolve_upload(att_id, owner=owner)
|
||||
if fi:
|
||||
files_by_id[att_id] = fi
|
||||
|
||||
for att_id in att_ids:
|
||||
fi = files_by_id.get(att_id)
|
||||
if fi:
|
||||
attachment_meta.append({
|
||||
"id": fi["id"],
|
||||
"name": fi["name"],
|
||||
"name": fi.get("name") or fi.get("original_name") or fi["id"],
|
||||
"mime": fi.get("mime", ""),
|
||||
"size": fi.get("size", 0),
|
||||
"width": fi.get("width"),
|
||||
@@ -242,6 +240,8 @@ class ChatHandler:
|
||||
enhanced_message, att_ids, UPLOAD_DIR, self.upload_handler,
|
||||
session_id=getattr(sess, "id", None),
|
||||
auto_opened_docs=auto_opened_docs,
|
||||
owner=owner,
|
||||
resolved_uploads=files_by_id,
|
||||
)
|
||||
|
||||
# Strip image_url entries for text-only models (VL description is already in the text)
|
||||
|
||||
@@ -257,6 +257,8 @@ def build_user_content(
|
||||
upload_handler,
|
||||
session_id: str | None = None,
|
||||
auto_opened_docs: list[Dict[str, Any]] | None = None,
|
||||
owner: str | None = None,
|
||||
resolved_uploads: dict[str, Dict[str, Any]] | None = None,
|
||||
) -> str | List[Dict[str, Any]]:
|
||||
"""Build user content with attachments (text, images, audio, documents).
|
||||
|
||||
@@ -268,33 +270,30 @@ def build_user_content(
|
||||
"""
|
||||
content = [{"type": "text", "text": text}]
|
||||
|
||||
for fid in attachment_ids:
|
||||
if not upload_handler.validate_upload_id(fid):
|
||||
logger.warning(f"Invalid attachment ID format: {fid}")
|
||||
for fid in attachment_ids or []:
|
||||
upload_info = (resolved_uploads or {}).get(fid)
|
||||
if upload_info is None and hasattr(upload_handler, "resolve_upload"):
|
||||
upload_info = upload_handler.resolve_upload(fid, owner=owner)
|
||||
if upload_info is None:
|
||||
logger.warning(f"Attachment {fid} not found or not authorized")
|
||||
continue
|
||||
|
||||
path = os.path.join(upload_dir, fid)
|
||||
if not (upload_handler.inside_base_dir(path) and os.path.exists(path)):
|
||||
found = False
|
||||
for root, dirs, files in os.walk(upload_dir):
|
||||
if fid in files and not fid.endswith(".json"):
|
||||
path = os.path.join(root, fid)
|
||||
if upload_handler.inside_base_dir(path):
|
||||
found = True
|
||||
logger.info(f"Found attachment {fid} at {path}")
|
||||
break
|
||||
if not found:
|
||||
logger.warning(f"Attachment {fid} not found in upload directories")
|
||||
continue
|
||||
|
||||
if not upload_handler.inside_base_dir(path):
|
||||
path = upload_info.get("path")
|
||||
if not path or not os.path.exists(path):
|
||||
logger.warning(f"Attachment {fid} path is missing")
|
||||
continue
|
||||
if hasattr(upload_handler, "_inside_upload_dir") and not upload_handler._inside_upload_dir(path):
|
||||
logger.warning(f"Attachment {fid} path is outside upload directory: {path}")
|
||||
continue
|
||||
if not hasattr(upload_handler, "_inside_upload_dir") and not upload_handler.inside_base_dir(path):
|
||||
logger.warning(f"Attachment {fid} path is outside base directory: {path}")
|
||||
continue
|
||||
|
||||
_, ext = os.path.splitext(path.lower())
|
||||
mime = mimetypes.guess_type(path)[0] or "application/octet-stream"
|
||||
mime = upload_info.get("mime") or mimetypes.guess_type(path)[0] or "application/octet-stream"
|
||||
display_name = upload_info.get("name") or upload_info.get("original_name") or path
|
||||
|
||||
if upload_handler.is_image_file(path, mime):
|
||||
if upload_handler.is_image_file(display_name, mime):
|
||||
try:
|
||||
with open(path, "rb") as image_file:
|
||||
encoded_string = base64.b64encode(image_file.read()).decode("utf-8")
|
||||
@@ -310,7 +309,7 @@ def build_user_content(
|
||||
else:
|
||||
content.insert(0, {"type": "text", "text": "[Image attached but could not be processed]"})
|
||||
|
||||
elif upload_handler.is_audio_file(path, mime):
|
||||
elif upload_handler.is_audio_file(display_name, mime):
|
||||
try:
|
||||
with open(path, "rb") as audio_file:
|
||||
encoded_string = base64.b64encode(audio_file.read()).decode("utf-8")
|
||||
@@ -326,7 +325,7 @@ def build_user_content(
|
||||
else:
|
||||
content.insert(0, {"type": "text", "text": "[Audio attached but could not be processed]"})
|
||||
|
||||
elif upload_handler.is_document_file(path, mime):
|
||||
elif upload_handler.is_document_file(display_name, mime):
|
||||
if mime == "application/pdf":
|
||||
extracted_text = None
|
||||
if session_id:
|
||||
|
||||
@@ -8,7 +8,7 @@ import hashlib
|
||||
import mimetypes
|
||||
import threading
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, Any
|
||||
from typing import Dict, Any, Optional
|
||||
from fastapi import HTTPException, UploadFile
|
||||
def secure_filename(filename: str) -> str:
|
||||
"""Sanitize a filename (replaces werkzeug.utils.secure_filename)."""
|
||||
@@ -225,6 +225,106 @@ class UploadHandler:
|
||||
"""Validate that the upload ID matches the expected pattern."""
|
||||
pattern = r'^[0-9a-fA-F]{32}\.[A-Za-z0-9]+$'
|
||||
return re.fullmatch(pattern, upload_id) is not None
|
||||
|
||||
def _inside_upload_dir(self, path: str) -> bool:
|
||||
"""Check if path is inside the upload directory."""
|
||||
base = os.path.realpath(self.upload_dir)
|
||||
p = os.path.realpath(path)
|
||||
try:
|
||||
return os.path.commonpath([base, p]) == base
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def _load_upload_index(self) -> Dict[str, Any]:
|
||||
uploads_db_path = os.path.join(self.upload_dir, "uploads.json")
|
||||
if not os.path.exists(uploads_db_path):
|
||||
return {}
|
||||
try:
|
||||
with open(uploads_db_path, "r") as f:
|
||||
data = json.load(f)
|
||||
return data if isinstance(data, dict) else {}
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to read uploads database: {e}")
|
||||
return {}
|
||||
|
||||
def get_upload_info(self, upload_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""Return the uploads.json metadata row for an upload ID, if present."""
|
||||
if not self.validate_upload_id(upload_id):
|
||||
return None
|
||||
for info in self._load_upload_index().values():
|
||||
if isinstance(info, dict) and info.get("id") == upload_id:
|
||||
return dict(info)
|
||||
return None
|
||||
|
||||
def _find_upload_path(self, upload_id: str) -> Optional[str]:
|
||||
"""Find an upload file by ID while staying inside upload_dir."""
|
||||
if not self.validate_upload_id(upload_id):
|
||||
return None
|
||||
|
||||
direct = os.path.join(self.upload_dir, upload_id)
|
||||
if os.path.exists(direct) and self._inside_upload_dir(direct):
|
||||
return direct
|
||||
|
||||
for root, _dirs, files in os.walk(self.upload_dir, followlinks=False):
|
||||
if upload_id in files:
|
||||
path = os.path.join(root, upload_id)
|
||||
if self._inside_upload_dir(path):
|
||||
return path
|
||||
return None
|
||||
|
||||
def resolve_upload(
|
||||
self,
|
||||
upload_id: str,
|
||||
owner: Optional[str] = None,
|
||||
auth_manager: Any = None,
|
||||
allow_admin: bool = True,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Resolve an upload ID to metadata only if the caller may read it.
|
||||
|
||||
This is the owner-aware lookup used by internal processors. Public
|
||||
download routes already perform owner checks; chat/document paths must
|
||||
do the same before reading file bytes server-side.
|
||||
"""
|
||||
if not self.validate_upload_id(upload_id):
|
||||
logger.warning(f"Invalid upload ID format: {upload_id}")
|
||||
return None
|
||||
|
||||
auth_configured = bool(auth_manager and getattr(auth_manager, "is_configured", False))
|
||||
if auth_configured and not owner:
|
||||
return None
|
||||
|
||||
info = self.get_upload_info(upload_id) or {}
|
||||
is_admin = False
|
||||
if allow_admin and owner and auth_manager and hasattr(auth_manager, "is_admin"):
|
||||
try:
|
||||
is_admin = bool(auth_manager.is_admin(owner))
|
||||
except Exception:
|
||||
is_admin = False
|
||||
|
||||
if owner and not is_admin:
|
||||
if info.get("owner") != owner:
|
||||
logger.warning("Upload %s denied for owner %s", upload_id, owner)
|
||||
return None
|
||||
if not owner and info.get("owner") is not None:
|
||||
logger.warning("Upload %s denied without an authenticated owner", upload_id)
|
||||
return None
|
||||
|
||||
path = info.get("path")
|
||||
if not path or not os.path.exists(path) or not self._inside_upload_dir(path):
|
||||
path = self._find_upload_path(upload_id)
|
||||
if not path:
|
||||
return None
|
||||
if not self._inside_upload_dir(path):
|
||||
logger.warning(f"Upload path outside upload directory: {path}")
|
||||
return None
|
||||
|
||||
resolved = dict(info)
|
||||
resolved.setdefault("id", upload_id)
|
||||
resolved["path"] = path
|
||||
resolved.setdefault("name", os.path.basename(path))
|
||||
resolved.setdefault("original_name", resolved["name"])
|
||||
resolved.setdefault("mime", mimetypes.guess_type(path)[0] or "application/octet-stream")
|
||||
return resolved
|
||||
|
||||
def cleanup_rate_limits(self):
|
||||
"""Remove stale entries from upload_rate_log."""
|
||||
|
||||
Reference in New Issue
Block a user