diff --git a/src/upload_handler.py b/src/upload_handler.py index ba54219..0937a14 100644 --- a/src/upload_handler.py +++ b/src/upload_handler.py @@ -512,11 +512,23 @@ class UploadHandler: existing_key = None with self._index_lock: existing_files = self._load_upload_index() + stale_keys = [] for key, info in existing_files.items(): if info.get("hash") == file_hash and info.get("owner") == owner: - existing_key = key - existing_file = info - break + stored_path = info.get("path") + if stored_path and os.path.exists(stored_path) and self._inside_upload_dir(stored_path): + existing_key = key + existing_file = info + break + stale_keys.append(key) + if stale_keys: + for key in stale_keys: + existing_files.pop(key, None) + try: + self._atomic_write_json(uploads_db_path, existing_files) + logger.info("Removed %d stale upload index entries for missing duplicates", len(stale_keys)) + except Exception as e: + logger.warning(f"Failed to remove stale upload index entries: {e}") if existing_file: logger.info(f"Duplicate file upload detected: {original_filename} -> {existing_file['id']}") diff --git a/tests/test_upload_handler_atomicity.py b/tests/test_upload_handler_atomicity.py index ceea9f0..73cf279 100644 --- a/tests/test_upload_handler_atomicity.py +++ b/tests/test_upload_handler_atomicity.py @@ -339,6 +339,37 @@ def test_smoke_duplicate_upload(tmp_path): assert len(final) == 1, f"Duplicate upload should not add a new row, got {len(final)}" +def test_duplicate_upload_ignores_stale_missing_file(tmp_path): + """A stale uploads.json row should not make a new upload point at a + file that cleanup already removed from disk.""" + handler = _make_handler(tmp_path) + handler.upload_rate_limit = 100 + content = b"same-content-after-cleanup" + + first = handler.save_upload( + SimpleNamespace(filename="cleanup.txt", file=io.BytesIO(content)), + "127.0.0.1", + "owner_a", + ) + os.remove(first["path"]) + + second = handler.save_upload( + SimpleNamespace(filename="cleanup.txt", file=io.BytesIO(content)), + "127.0.0.1", + "owner_a", + ) + + assert second.get("is_duplicate") is not True + assert second["id"] != first["id"] + assert os.path.exists(second["path"]) + + with open(_db_path(handler), "r", encoding="utf-8") as f: + final = json.load(f) + ids = {row.get("id") for row in final.values()} + assert first["id"] not in ids + assert second["id"] in ids + + def test_smoke_info_lookup_after_bak_recovery(tmp_path): """Smoke test: after a torn write is recovered from the ``.bak`` sibling, ``get_upload_info`` still finds the original entry by id."""