diff --git a/routes/upload_routes.py b/routes/upload_routes.py index 8572d47..d45ff5c 100644 --- a/routes/upload_routes.py +++ b/routes/upload_routes.py @@ -8,6 +8,7 @@ from typing import List import logging from core.middleware import require_admin from src.auth_helpers import get_current_user +from src.upload_handler import count_recent_uploads logger = logging.getLogger(__name__) @@ -24,15 +25,18 @@ def setup_upload_routes(upload_handler): client_ip = request.client.host if request.client else "unknown" out = [] - - # Limit concurrent uploads per IP - ip_upload_count = sum( - 1 for f in files - if client_ip in upload_handler.upload_rate_log and - any(now > time.time() - 10 for now in upload_handler.upload_rate_log[client_ip][-len(files):]) + + # Limit concurrent uploads per IP. Count genuine recent upload events — + # NOT the number of files in this batch. The previous check summed over + # `files`, so a single multi-file request counted itself as N concurrent + # uploads and tripped the limit (issue #1346: "attach more than one file + # → the model doesn't even see them"). save_upload still enforces the + # per-minute sliding-window rate limit per file. + recent_uploads = count_recent_uploads( + upload_handler.upload_rate_log.get(client_ip, []), time.time() ) - - if ip_upload_count >= upload_handler.max_concurrent_uploads: + + if recent_uploads >= upload_handler.max_concurrent_uploads: raise HTTPException( status_code=429, detail=f"Maximum concurrent uploads ({upload_handler.max_concurrent_uploads}) exceeded" diff --git a/src/upload_handler.py b/src/upload_handler.py index 7396a6a..61df395 100644 --- a/src/upload_handler.py +++ b/src/upload_handler.py @@ -43,6 +43,18 @@ def is_valid_upload_id(upload_id: str) -> bool: return UPLOAD_ID_RE.fullmatch(upload_id or "") is not None +def count_recent_uploads(timestamps, now: float, window: float = 10.0) -> int: + """Number of upload events in *timestamps* within the last *window* seconds. + + Used by the per-IP concurrency guard. The count is of genuine prior upload + events — it must NOT scale with how many files are in the *current* request, + or a single multi-file batch would reject itself (issue #1346).""" + if not timestamps: + return 0 + cutoff = now - window + return sum(1 for t in timestamps if t > cutoff) + + class UploadHandler: def __init__(self, base_dir: str, upload_dir: str): self.base_dir = base_dir @@ -50,7 +62,13 @@ class UploadHandler: self.max_upload_size = 10 * 1024 * 1024 # 10MB self.max_concurrent_uploads = 3 self.cleanup_days = 30 - self.upload_rate_limit = 5 # Max 5 uploads per minute per IP + # Per-IP per-minute cap. save_upload() counts EACH file, and the chat + # composer lets a user attach up to MAX_FILES (10, static/js/fileHandler.js) + # in one batch — so this must comfortably exceed 10, or a single 6+ file + # attach is rejected mid-batch (issue #1346: "5 work, 6 fail"). Burst abuse + # is separately bounded by max_concurrent_uploads. Headroom for a few full + # batches per minute. + self.upload_rate_limit = 60 # max 60 file-uploads per minute per IP self.upload_rate_window = 60 # 60 seconds # Track upload rates diff --git a/tests/test_upload_multifile.py b/tests/test_upload_multifile.py new file mode 100644 index 0000000..ef2e435 --- /dev/null +++ b/tests/test_upload_multifile.py @@ -0,0 +1,165 @@ +"""Regression tests for issue #1346 — attaching more than one file at once made +the model "not even see" the attachments. + +Root cause: the per-IP concurrency guard in routes/upload_routes.py summed its +condition over `files`, and the condition didn't depend on the loop variable, so +it collapsed to `len(files)` whenever the IP had any recent upload. A multi-file +batch sent right after a single upload (the reporter's exact flow) therefore +counted itself as N concurrent uploads and tripped `max_concurrent_uploads`, +returning 429. The browser swallowed the 429 (no `files` in the body) and sent +the chat message with no attachments. + +The fix counts genuine recent upload *events*, independent of the current +batch's file count. save_upload still enforces the per-minute rate limit. +""" +import io +import re +import types +from pathlib import Path + +import pytest +from fastapi import APIRouter + +from src.upload_handler import count_recent_uploads, UploadHandler +import routes.upload_routes as up + +_REPO = Path(__file__).resolve().parent.parent + + +def test_count_recent_uploads_ignores_batch_size(): + now = 1_000.0 + # No prior uploads -> zero, regardless of how big the incoming batch is. + assert count_recent_uploads([], now) == 0 + # Only events inside the window are counted. + assert count_recent_uploads([now - 1, now - 2, now - 3], now, window=10) == 3 + assert count_recent_uploads([now - 1, now - 50], now, window=10) == 1 + assert count_recent_uploads([now - 11], now, window=10) == 0 + + +def _fake_handler(): + h = types.SimpleNamespace() + h.upload_rate_log = {} + h.max_concurrent_uploads = 3 + + def save_upload(u, client_ip, owner=None): + # Mimic the real handler: every saved file logs a timestamp. + h.upload_rate_log.setdefault(client_ip, []).append(_NOW) + name = getattr(u, "filename", "f") + return { + "id": "0" * 32 + "." + "txt", + "name": name, + "mime": "text/plain", + "size": 1, + "hash": "h", + "uploaded_at": "now", + "width": None, + "height": None, + "is_duplicate": False, + } + + h.save_upload = save_upload + return h + + +_NOW = 5_000.0 + + +def _endpoint(router): + for r in router.routes: + if getattr(r, "path", None) == "/api/upload" and "POST" in getattr(r, "methods", set()): + return r.endpoint + raise AssertionError("upload endpoint not found") + + +def _request(ip="1.2.3.4", user="tester"): + return types.SimpleNamespace( + client=types.SimpleNamespace(host=ip), + state=types.SimpleNamespace(current_user=user), + ) + + +def _files(n): + return [types.SimpleNamespace(filename=f"f{i}.txt") for i in range(n)] + + +@pytest.fixture(autouse=True) +def _reset_router(monkeypatch): + # Module-level router accumulates routes across setup calls; reset it. + monkeypatch.setattr(up, "router", APIRouter(prefix="/api/upload", tags=["upload"])) + # Freeze time so the seeded "recent upload" is deterministic. + monkeypatch.setattr(up.time, "time", lambda: _NOW) + + +async def test_multifile_after_a_recent_upload_is_not_rejected(): + """The bug: one prior upload + a 3-file batch -> 429. Must now succeed.""" + h = _fake_handler() + h.upload_rate_log["1.2.3.4"] = [_NOW - 1] # step 1: a single file moments ago + up.setup_upload_routes(h) + endpoint = _endpoint(up.router) + + result = await endpoint(_request(), _files(3)) + + assert [f["name"] for f in result["files"]] == ["f0.txt", "f1.txt", "f2.txt"] + + +async def test_fresh_multifile_upload_succeeds(): + h = _fake_handler() + up.setup_upload_routes(h) + endpoint = _endpoint(up.router) + + result = await endpoint(_request(), _files(5)) + + assert len(result["files"]) == 5 + + +async def test_genuine_recent_volume_still_throttled(): + """The guard is preserved: enough genuine recent uploads still 429s.""" + from fastapi import HTTPException + + h = _fake_handler() + h.upload_rate_log["1.2.3.4"] = [_NOW - 1, _NOW - 2, _NOW - 3] # 3 recent events + up.setup_upload_routes(h) + endpoint = _endpoint(up.router) + + with pytest.raises(HTTPException) as ei: + await endpoint(_request(), _files(1)) + assert ei.value.status_code == 429 + + +# ── #1346 follow-up: the per-minute rate limit must not reject a single +# full multi-file batch. The reporter found "5 attachments work, 6 fail": +# save_upload() counts each file against upload_rate_limit, which was 5 while +# the composer allows MAX_FILES=10. ────────────────────────────────────────── + +def _max_files_from_frontend() -> int: + src = (_REPO / "static/js/fileHandler.js").read_text(encoding="utf-8") + m = re.search(r"MAX_FILES\s*=\s*(\d+)", src) + assert m, "MAX_FILES not found in fileHandler.js" + return int(m.group(1)) + + +def test_rate_limit_accommodates_a_full_batch(): + # The per-minute file cap must comfortably exceed the frontend batch cap, + # or a single legitimate multi-file attach trips it (issue #1346). + h = UploadHandler.__new__(UploadHandler) + UploadHandler.__init__(h, base_dir="/tmp", upload_dir="/tmp/_odysseus_test_uploads_cfg") + assert h.upload_rate_limit >= _max_files_from_frontend() + + +def test_six_file_batch_is_not_rate_limited(tmp_path): + from fastapi import HTTPException + + h = UploadHandler(base_dir=str(tmp_path), upload_dir=str(tmp_path / "uploads")) + saved = 0 + for i in range(6): + u = types.SimpleNamespace( + file=io.BytesIO(f"file number {i} unique content".encode()), + filename=f"f{i}.txt", + ) + try: + meta = h.save_upload(u, client_ip="9.9.9.9", owner="tester") + except HTTPException as e: + raise AssertionError(f"file {i} rejected with {e.status_code}: {e.detail}") + assert meta and meta.get("id") + saved += 1 + assert saved == 6