fix: uploads with _ or - in the extension become permanently unreadable (#1756)
This commit is contained in:
@@ -43,6 +43,20 @@ def is_valid_upload_id(upload_id: str) -> bool:
|
|||||||
return UPLOAD_ID_RE.fullmatch(upload_id or "") is not None
|
return UPLOAD_ID_RE.fullmatch(upload_id or "") is not None
|
||||||
|
|
||||||
|
|
||||||
|
def _build_upload_id(safe_filename: str) -> str:
|
||||||
|
"""Build a unique upload id whose extension matches UPLOAD_ID_RE.
|
||||||
|
|
||||||
|
secure_filename keeps '_' and '-', so an extension like '.jpg-1' (the
|
||||||
|
suffix browsers append to duplicate downloads) or '.v1_final' produced an
|
||||||
|
id that failed is_valid_upload_id, making the saved file permanently
|
||||||
|
unreadable (every read path gates on validate_upload_id). Sanitize the
|
||||||
|
extension to the single-alnum shape the id contract requires.
|
||||||
|
"""
|
||||||
|
_, ext = os.path.splitext(safe_filename or "")
|
||||||
|
ext = re.sub(r"[^A-Za-z0-9]", "", ext)
|
||||||
|
return uuid.uuid4().hex + (("." + ext) if ext else "")
|
||||||
|
|
||||||
|
|
||||||
def count_recent_uploads(timestamps, now: float, window: float = 10.0) -> int:
|
def count_recent_uploads(timestamps, now: float, window: float = 10.0) -> int:
|
||||||
"""Number of upload events in *timestamps* within the last *window* seconds.
|
"""Number of upload events in *timestamps* within the last *window* seconds.
|
||||||
|
|
||||||
@@ -596,8 +610,7 @@ class UploadHandler:
|
|||||||
}
|
}
|
||||||
|
|
||||||
# Generate unique ID and determine save location
|
# Generate unique ID and determine save location
|
||||||
_, ext = os.path.splitext(safe_filename)
|
file_id = _build_upload_id(safe_filename)
|
||||||
file_id = f"{uuid.uuid4().hex}{ext}"
|
|
||||||
|
|
||||||
# Create date-based directory structure
|
# Create date-based directory structure
|
||||||
upload_dir = self.get_upload_dir()
|
upload_dir = self.get_upload_dir()
|
||||||
|
|||||||
37
tests/test_upload_id_extension.py
Normal file
37
tests/test_upload_id_extension.py
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
"""Upload ids must satisfy UPLOAD_ID_RE for every accepted filename.
|
||||||
|
|
||||||
|
secure_filename keeps '_' and '-', so a filename whose final extension
|
||||||
|
contains them (e.g. "photo.jpg-1" — the suffix browsers add to duplicate
|
||||||
|
downloads, or "doc.v1_final") produced an id like "<hex>.jpg-1" that fails
|
||||||
|
is_valid_upload_id. Since every read path (download, resolve, vision)
|
||||||
|
validates the id first, the saved bytes became permanently unreachable.
|
||||||
|
"""
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from src.upload_handler import _build_upload_id, is_valid_upload_id
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("name", [
|
||||||
|
"photo.jpg-1",
|
||||||
|
"doc.v1_final",
|
||||||
|
"invoice.2024-01",
|
||||||
|
"file.JPG_backup",
|
||||||
|
"report.pdf",
|
||||||
|
"image.png",
|
||||||
|
"noextension",
|
||||||
|
"",
|
||||||
|
])
|
||||||
|
def test_built_id_is_always_valid(name):
|
||||||
|
fid = _build_upload_id(name)
|
||||||
|
assert is_valid_upload_id(fid), (name, fid)
|
||||||
|
|
||||||
|
|
||||||
|
def test_normal_extension_is_preserved():
|
||||||
|
assert _build_upload_id("photo.png").endswith(".png")
|
||||||
|
assert _build_upload_id("doc.pdf").endswith(".pdf")
|
||||||
|
|
||||||
|
|
||||||
|
def test_problem_extension_is_sanitized_not_dropped_to_invalid():
|
||||||
|
fid = _build_upload_id("photo.jpg-1")
|
||||||
|
assert is_valid_upload_id(fid)
|
||||||
|
assert fid.endswith(".jpg1") # the '-' is stripped, alnum kept
|
||||||
Reference in New Issue
Block a user