fix(tests): use non-repeating split chunk fixture
Updates the split_chunks containment regression test to use deterministic non-repeating records instead of a repeating fixture that could produce accidental substring matches. Restores one focused part of the Python CI baseline tracked in #2580.
This commit is contained in:
committed by
GitHub
parent
050283c145
commit
8bc16ef245
@@ -14,7 +14,10 @@ def test_no_duplicate_tail_chunk():
|
|||||||
|
|
||||||
|
|
||||||
def test_no_chunk_is_contained_in_another():
|
def test_no_chunk_is_contained_in_another():
|
||||||
text = "".join(chr(33 + (k % 90)) for k in range(2000))
|
text = "\n".join(
|
||||||
|
f"unique-line-{k:04d}-square-{k * k:08d}-cube-{k * k * k:012d}"
|
||||||
|
for k in range(300)
|
||||||
|
)
|
||||||
chunks = split_chunks(text, size=1000, overlap=200)
|
chunks = split_chunks(text, size=1000, overlap=200)
|
||||||
# The buggy version produced a final 200-char chunk fully inside the prior one.
|
# The buggy version produced a final 200-char chunk fully inside the prior one.
|
||||||
for a in range(len(chunks)):
|
for a in range(len(chunks)):
|
||||||
|
|||||||
Reference in New Issue
Block a user