fix: uploads with _ or - in the extension become permanently unreadable (#1756)
This commit is contained in:
@@ -43,6 +43,20 @@ def is_valid_upload_id(upload_id: str) -> bool:
|
||||
return UPLOAD_ID_RE.fullmatch(upload_id or "") is not None
|
||||
|
||||
|
||||
def _build_upload_id(safe_filename: str) -> str:
|
||||
"""Build a unique upload id whose extension matches UPLOAD_ID_RE.
|
||||
|
||||
secure_filename keeps '_' and '-', so an extension like '.jpg-1' (the
|
||||
suffix browsers append to duplicate downloads) or '.v1_final' produced an
|
||||
id that failed is_valid_upload_id, making the saved file permanently
|
||||
unreadable (every read path gates on validate_upload_id). Sanitize the
|
||||
extension to the single-alnum shape the id contract requires.
|
||||
"""
|
||||
_, ext = os.path.splitext(safe_filename or "")
|
||||
ext = re.sub(r"[^A-Za-z0-9]", "", ext)
|
||||
return uuid.uuid4().hex + (("." + ext) if ext else "")
|
||||
|
||||
|
||||
def count_recent_uploads(timestamps, now: float, window: float = 10.0) -> int:
|
||||
"""Number of upload events in *timestamps* within the last *window* seconds.
|
||||
|
||||
@@ -596,8 +610,7 @@ class UploadHandler:
|
||||
}
|
||||
|
||||
# Generate unique ID and determine save location
|
||||
_, ext = os.path.splitext(safe_filename)
|
||||
file_id = f"{uuid.uuid4().hex}{ext}"
|
||||
file_id = _build_upload_id(safe_filename)
|
||||
|
||||
# Create date-based directory structure
|
||||
upload_dir = self.get_upload_dir()
|
||||
|
||||
37
tests/test_upload_id_extension.py
Normal file
37
tests/test_upload_id_extension.py
Normal file
@@ -0,0 +1,37 @@
|
||||
"""Upload ids must satisfy UPLOAD_ID_RE for every accepted filename.
|
||||
|
||||
secure_filename keeps '_' and '-', so a filename whose final extension
|
||||
contains them (e.g. "photo.jpg-1" — the suffix browsers add to duplicate
|
||||
downloads, or "doc.v1_final") produced an id like "<hex>.jpg-1" that fails
|
||||
is_valid_upload_id. Since every read path (download, resolve, vision)
|
||||
validates the id first, the saved bytes became permanently unreachable.
|
||||
"""
|
||||
import pytest
|
||||
|
||||
from src.upload_handler import _build_upload_id, is_valid_upload_id
|
||||
|
||||
|
||||
@pytest.mark.parametrize("name", [
|
||||
"photo.jpg-1",
|
||||
"doc.v1_final",
|
||||
"invoice.2024-01",
|
||||
"file.JPG_backup",
|
||||
"report.pdf",
|
||||
"image.png",
|
||||
"noextension",
|
||||
"",
|
||||
])
|
||||
def test_built_id_is_always_valid(name):
|
||||
fid = _build_upload_id(name)
|
||||
assert is_valid_upload_id(fid), (name, fid)
|
||||
|
||||
|
||||
def test_normal_extension_is_preserved():
|
||||
assert _build_upload_id("photo.png").endswith(".png")
|
||||
assert _build_upload_id("doc.pdf").endswith(".pdf")
|
||||
|
||||
|
||||
def test_problem_extension_is_sanitized_not_dropped_to_invalid():
|
||||
fid = _build_upload_id("photo.jpg-1")
|
||||
assert is_valid_upload_id(fid)
|
||||
assert fid.endswith(".jpg1") # the '-' is stripped, alnum kept
|
||||
Reference in New Issue
Block a user