Ignore stale duplicate upload rows (#1256)
This commit is contained in:
@@ -512,11 +512,23 @@ class UploadHandler:
|
|||||||
existing_key = None
|
existing_key = None
|
||||||
with self._index_lock:
|
with self._index_lock:
|
||||||
existing_files = self._load_upload_index()
|
existing_files = self._load_upload_index()
|
||||||
|
stale_keys = []
|
||||||
for key, info in existing_files.items():
|
for key, info in existing_files.items():
|
||||||
if info.get("hash") == file_hash and info.get("owner") == owner:
|
if info.get("hash") == file_hash and info.get("owner") == owner:
|
||||||
existing_key = key
|
stored_path = info.get("path")
|
||||||
existing_file = info
|
if stored_path and os.path.exists(stored_path) and self._inside_upload_dir(stored_path):
|
||||||
break
|
existing_key = key
|
||||||
|
existing_file = info
|
||||||
|
break
|
||||||
|
stale_keys.append(key)
|
||||||
|
if stale_keys:
|
||||||
|
for key in stale_keys:
|
||||||
|
existing_files.pop(key, None)
|
||||||
|
try:
|
||||||
|
self._atomic_write_json(uploads_db_path, existing_files)
|
||||||
|
logger.info("Removed %d stale upload index entries for missing duplicates", len(stale_keys))
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to remove stale upload index entries: {e}")
|
||||||
if existing_file:
|
if existing_file:
|
||||||
logger.info(f"Duplicate file upload detected: {original_filename} -> {existing_file['id']}")
|
logger.info(f"Duplicate file upload detected: {original_filename} -> {existing_file['id']}")
|
||||||
|
|
||||||
|
|||||||
@@ -339,6 +339,37 @@ def test_smoke_duplicate_upload(tmp_path):
|
|||||||
assert len(final) == 1, f"Duplicate upload should not add a new row, got {len(final)}"
|
assert len(final) == 1, f"Duplicate upload should not add a new row, got {len(final)}"
|
||||||
|
|
||||||
|
|
||||||
|
def test_duplicate_upload_ignores_stale_missing_file(tmp_path):
|
||||||
|
"""A stale uploads.json row should not make a new upload point at a
|
||||||
|
file that cleanup already removed from disk."""
|
||||||
|
handler = _make_handler(tmp_path)
|
||||||
|
handler.upload_rate_limit = 100
|
||||||
|
content = b"same-content-after-cleanup"
|
||||||
|
|
||||||
|
first = handler.save_upload(
|
||||||
|
SimpleNamespace(filename="cleanup.txt", file=io.BytesIO(content)),
|
||||||
|
"127.0.0.1",
|
||||||
|
"owner_a",
|
||||||
|
)
|
||||||
|
os.remove(first["path"])
|
||||||
|
|
||||||
|
second = handler.save_upload(
|
||||||
|
SimpleNamespace(filename="cleanup.txt", file=io.BytesIO(content)),
|
||||||
|
"127.0.0.1",
|
||||||
|
"owner_a",
|
||||||
|
)
|
||||||
|
|
||||||
|
assert second.get("is_duplicate") is not True
|
||||||
|
assert second["id"] != first["id"]
|
||||||
|
assert os.path.exists(second["path"])
|
||||||
|
|
||||||
|
with open(_db_path(handler), "r", encoding="utf-8") as f:
|
||||||
|
final = json.load(f)
|
||||||
|
ids = {row.get("id") for row in final.values()}
|
||||||
|
assert first["id"] not in ids
|
||||||
|
assert second["id"] in ids
|
||||||
|
|
||||||
|
|
||||||
def test_smoke_info_lookup_after_bak_recovery(tmp_path):
|
def test_smoke_info_lookup_after_bak_recovery(tmp_path):
|
||||||
"""Smoke test: after a torn write is recovered from the ``.bak`` sibling,
|
"""Smoke test: after a torn write is recovered from the ``.bak`` sibling,
|
||||||
``get_upload_info`` still finds the original entry by id."""
|
``get_upload_info`` still finds the original entry by id."""
|
||||||
|
|||||||
Reference in New Issue
Block a user