From 8bc16ef245db1f8bb3a40de9acc3c83375416689 Mon Sep 17 00:00:00 2001 From: Alexandre Teixeira <111787685+alteixeira20@users.noreply.github.com> Date: Thu, 4 Jun 2026 18:11:42 +0100 Subject: [PATCH] fix(tests): use non-repeating split chunk fixture Updates the split_chunks containment regression test to use deterministic non-repeating records instead of a repeating fixture that could produce accidental substring matches. Restores one focused part of the Python CI baseline tracked in #2580. --- tests/test_split_chunks_no_duplicate_tail.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_split_chunks_no_duplicate_tail.py b/tests/test_split_chunks_no_duplicate_tail.py index a7fc32d..7d2f1d1 100644 --- a/tests/test_split_chunks_no_duplicate_tail.py +++ b/tests/test_split_chunks_no_duplicate_tail.py @@ -14,7 +14,10 @@ def test_no_duplicate_tail_chunk(): def test_no_chunk_is_contained_in_another(): - text = "".join(chr(33 + (k % 90)) for k in range(2000)) + text = "\n".join( + f"unique-line-{k:04d}-square-{k * k:08d}-cube-{k * k * k:012d}" + for k in range(300) + ) chunks = split_chunks(text, size=1000, overlap=200) # The buggy version produced a final 200-char chunk fully inside the prior one. for a in range(len(chunks)):