252 lines
11 KiB
Python
252 lines
11 KiB
Python
# routes/upload_routes.py
|
|
import os
|
|
import time
|
|
import json
|
|
import asyncio
|
|
from fastapi import APIRouter, Request, File, UploadFile, HTTPException
|
|
from typing import List
|
|
import logging
|
|
from core.middleware import require_admin
|
|
from src.auth_helpers import get_current_user
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
router = APIRouter(prefix="/api/upload", tags=["upload"])
|
|
|
|
def setup_upload_routes(upload_handler):
|
|
"""Setup upload routes with the provided handler"""
|
|
|
|
@router.post("")
|
|
async def api_upload(request: Request, files: List[UploadFile] = File(...)):
|
|
"""Upload files with enhanced security and organization."""
|
|
if not files:
|
|
raise HTTPException(400, "No files uploaded")
|
|
|
|
client_ip = request.client.host if request.client else "unknown"
|
|
out = []
|
|
|
|
# Limit concurrent uploads per IP
|
|
ip_upload_count = sum(
|
|
1 for f in files
|
|
if client_ip in upload_handler.upload_rate_log and
|
|
any(now > time.time() - 10 for now in upload_handler.upload_rate_log[client_ip][-len(files):])
|
|
)
|
|
|
|
if ip_upload_count >= upload_handler.max_concurrent_uploads:
|
|
raise HTTPException(
|
|
status_code=429,
|
|
detail=f"Maximum concurrent uploads ({upload_handler.max_concurrent_uploads}) exceeded"
|
|
)
|
|
|
|
for u in files:
|
|
try:
|
|
meta = upload_handler.save_upload(u, client_ip, owner=get_current_user(request))
|
|
out.append({
|
|
"id": meta["id"],
|
|
"name": meta["name"],
|
|
"mime": meta["mime"],
|
|
"size": meta["size"],
|
|
"hash": meta["hash"],
|
|
"uploaded_at": meta["uploaded_at"],
|
|
"width": meta.get("width"),
|
|
"height": meta.get("height"),
|
|
"is_duplicate": meta.get("is_duplicate", False)
|
|
})
|
|
except HTTPException:
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Failed to process upload {u.filename}: {str(e)}")
|
|
continue
|
|
|
|
if not out:
|
|
raise HTTPException(500, "All file uploads failed")
|
|
|
|
return {"files": out}
|
|
|
|
@router.post("/cleanup")
|
|
async def manual_cleanup(request: Request):
|
|
"""Manually trigger cleanup of old uploads."""
|
|
require_admin(request)
|
|
cleaned_count = upload_handler.cleanup_old_uploads()
|
|
return {"status": "success", "files_cleaned": cleaned_count}
|
|
|
|
@router.get("/stats")
|
|
async def upload_stats(request: Request):
|
|
"""Get statistics about uploaded files."""
|
|
require_admin(request)
|
|
try:
|
|
return upload_handler.get_upload_stats()
|
|
except Exception as e:
|
|
logger.error(f"Failed to get upload stats: {e}")
|
|
raise HTTPException(500, "Failed to get upload statistics")
|
|
|
|
@router.get("/{file_id}")
|
|
async def download_file(request: Request, file_id: str, thumb: int = 0):
|
|
"""Serve an uploaded file by its ID. `?thumb=1` returns a small cached
|
|
JPEG thumbnail for images (used by chat attachment previews) so the
|
|
client isn't downloading the full-resolution photo just to show it tiny."""
|
|
if not upload_handler.validate_upload_id(file_id):
|
|
raise HTTPException(400, "Invalid file ID")
|
|
# Search upload directories for the file
|
|
from src.constants import UPLOAD_DIR
|
|
import mimetypes as _mt
|
|
path = os.path.join(UPLOAD_DIR, file_id)
|
|
if not os.path.exists(path):
|
|
for root, dirs, files in os.walk(UPLOAD_DIR):
|
|
if file_id in files:
|
|
path = os.path.join(root, file_id)
|
|
break
|
|
else:
|
|
raise HTTPException(404, "File not found")
|
|
if not upload_handler.inside_base_dir(path):
|
|
raise HTTPException(403, "Access denied")
|
|
# Look up original filename and owner from uploads.json
|
|
original_name = file_id
|
|
info = None
|
|
uploads_db = os.path.join(UPLOAD_DIR, "uploads.json")
|
|
if os.path.exists(uploads_db):
|
|
with open(uploads_db) as f:
|
|
db = json.load(f)
|
|
info = next((fi for fi in db.values() if fi["id"] == file_id), None)
|
|
if info:
|
|
original_name = info.get("name", file_id)
|
|
auth_mgr = getattr(request.app.state, "auth_manager", None)
|
|
auth_configured = bool(auth_mgr and auth_mgr.is_configured)
|
|
current_user = get_current_user(request)
|
|
file_owner = info.get("owner") if info else None
|
|
if auth_configured:
|
|
if not current_user:
|
|
raise HTTPException(403, "Access denied")
|
|
if file_owner != current_user and not auth_mgr.is_admin(current_user):
|
|
raise HTTPException(404, "File not found")
|
|
mime = _mt.guess_type(path)[0] or "application/octet-stream"
|
|
from fastapi.responses import FileResponse
|
|
# Downscaled thumbnail for image previews — generated once and cached.
|
|
if thumb and mime.startswith("image/"):
|
|
try:
|
|
from PIL import Image, ImageOps
|
|
thumb_dir = os.path.join(UPLOAD_DIR, ".thumbs")
|
|
os.makedirs(thumb_dir, exist_ok=True)
|
|
thumb_path = os.path.join(thumb_dir, file_id + ".jpg")
|
|
if (not os.path.exists(thumb_path)
|
|
or os.path.getmtime(thumb_path) < os.path.getmtime(path)):
|
|
im = Image.open(path)
|
|
# iPhone / camera JPEGs encode rotation in EXIF rather than
|
|
# the pixel data. Browsers honour that on the original via
|
|
# image-orientation:from-image, but PIL strips EXIF when it
|
|
# saves the JPEG thumb, leaving the pixels sideways. Bake
|
|
# the rotation into the pixels before thumbnailing.
|
|
im = ImageOps.exif_transpose(im)
|
|
im.thumbnail((320, 320))
|
|
if im.mode not in ("RGB", "L"):
|
|
im = im.convert("RGB")
|
|
im.save(thumb_path, "JPEG", quality=80)
|
|
return FileResponse(thumb_path, media_type="image/jpeg")
|
|
except Exception as e:
|
|
logger.warning(f"Thumbnail generation failed for {file_id}: {e}")
|
|
# Fall through to the full image.
|
|
return FileResponse(path, media_type=mime, filename=original_name)
|
|
|
|
def _load_upload_info(file_id: str):
|
|
"""Look up the uploads.json record for a file_id, with owner/auth checks."""
|
|
from src.constants import UPLOAD_DIR
|
|
info = None
|
|
uploads_db = os.path.join(UPLOAD_DIR, "uploads.json")
|
|
if os.path.exists(uploads_db):
|
|
with open(uploads_db) as f:
|
|
db = json.load(f)
|
|
info = next((fi for fi in db.values() if fi["id"] == file_id), None)
|
|
return info
|
|
|
|
def _vision_cache_path(file_id: str) -> str:
|
|
from src.constants import UPLOAD_DIR
|
|
cache_dir = os.path.join(UPLOAD_DIR, ".vision")
|
|
os.makedirs(cache_dir, exist_ok=True)
|
|
return os.path.join(cache_dir, file_id + ".txt")
|
|
|
|
@router.get("/{file_id}/vision")
|
|
async def get_vision_text(request: Request, file_id: str, force: int = 0):
|
|
"""Return the vision-model OCR/description for an uploaded image.
|
|
Cached under UPLOAD_DIR/.vision/{file_id}.txt — first call computes,
|
|
subsequent loads are instant. Pass force=1 to recompute."""
|
|
if not upload_handler.validate_upload_id(file_id):
|
|
raise HTTPException(400, "Invalid file ID")
|
|
from src.constants import UPLOAD_DIR
|
|
path = os.path.join(UPLOAD_DIR, file_id)
|
|
if not os.path.exists(path):
|
|
for root, dirs, files in os.walk(UPLOAD_DIR):
|
|
if file_id in files:
|
|
path = os.path.join(root, file_id)
|
|
break
|
|
else:
|
|
raise HTTPException(404, "File not found")
|
|
if not upload_handler.inside_base_dir(path):
|
|
raise HTTPException(403, "Access denied")
|
|
info = _load_upload_info(file_id)
|
|
auth_mgr = getattr(request.app.state, "auth_manager", None)
|
|
auth_configured = bool(auth_mgr and auth_mgr.is_configured)
|
|
current_user = get_current_user(request)
|
|
file_owner = info.get("owner") if info else None
|
|
if auth_configured:
|
|
if not current_user:
|
|
raise HTTPException(403, "Access denied")
|
|
if file_owner != current_user and not auth_mgr.is_admin(current_user):
|
|
raise HTTPException(404, "File not found")
|
|
import mimetypes as _mt
|
|
mime = _mt.guess_type(path)[0] or ""
|
|
if not mime.startswith("image/"):
|
|
raise HTTPException(400, "Not an image")
|
|
cache_path = _vision_cache_path(file_id)
|
|
if not force and os.path.exists(cache_path):
|
|
try:
|
|
with open(cache_path) as f:
|
|
return {"text": f.read(), "cached": True}
|
|
except Exception as e:
|
|
logger.warning(f"Vision cache read failed for {file_id}: {e}")
|
|
from src.document_processor import analyze_image_with_vl
|
|
try:
|
|
text = analyze_image_with_vl(path) or ""
|
|
except Exception as e:
|
|
logger.error(f"Vision analysis failed for {file_id}: {e}")
|
|
raise HTTPException(500, f"Vision analysis failed: {e}")
|
|
try:
|
|
with open(cache_path, "w") as f:
|
|
f.write(text)
|
|
except Exception as e:
|
|
logger.warning(f"Vision cache write failed for {file_id}: {e}")
|
|
return {"text": text, "cached": False}
|
|
|
|
@router.put("/{file_id}/vision")
|
|
async def put_vision_text(request: Request, file_id: str):
|
|
"""Persist a user-edited vision/OCR text for an attachment. Stored in
|
|
the same cache file so the chat send picks it up as the override."""
|
|
if not upload_handler.validate_upload_id(file_id):
|
|
raise HTTPException(400, "Invalid file ID")
|
|
info = _load_upload_info(file_id)
|
|
if not info:
|
|
raise HTTPException(404, "File not found")
|
|
auth_mgr = getattr(request.app.state, "auth_manager", None)
|
|
auth_configured = bool(auth_mgr and auth_mgr.is_configured)
|
|
current_user = get_current_user(request)
|
|
file_owner = info.get("owner")
|
|
if auth_configured:
|
|
if not current_user:
|
|
raise HTTPException(403, "Access denied")
|
|
if file_owner != current_user and not auth_mgr.is_admin(current_user):
|
|
raise HTTPException(404, "File not found")
|
|
body = await request.json()
|
|
text = (body or {}).get("text", "")
|
|
if not isinstance(text, str):
|
|
raise HTTPException(400, "text must be a string")
|
|
with open(_vision_cache_path(file_id), "w") as f:
|
|
f.write(text)
|
|
return {"ok": True}
|
|
|
|
async def periodic_rate_limit_cleanup():
|
|
"""Background task to run cleanup every hour"""
|
|
while True:
|
|
await asyncio.sleep(3600)
|
|
upload_handler.cleanup_rate_limits()
|
|
|
|
return router, periodic_rate_limit_cleanup
|