fix: is_markitdown_format crashes on a non-string path (#1618)
This commit is contained in:
@@ -26,6 +26,8 @@ MARKITDOWN_EXTS = frozenset({".docx", ".pptx", ".xlsx", ".xls", ".epub"})
|
|||||||
|
|
||||||
def is_markitdown_format(path: str) -> bool:
|
def is_markitdown_format(path: str) -> bool:
|
||||||
"""True if the file extension is one we route through markitdown."""
|
"""True if the file extension is one we route through markitdown."""
|
||||||
|
if not isinstance(path, str):
|
||||||
|
return False
|
||||||
return os.path.splitext(path)[1].lower() in MARKITDOWN_EXTS
|
return os.path.splitext(path)[1].lower() in MARKITDOWN_EXTS
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
16
tests/test_markitdown_format_nonstring.py
Normal file
16
tests/test_markitdown_format_nonstring.py
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
"""Regression: is_markitdown_format must tolerate a non-string path.
|
||||||
|
|
||||||
|
It did `os.path.splitext(path)`, which raises TypeError on None / non-string.
|
||||||
|
"""
|
||||||
|
from src.markitdown_runtime import is_markitdown_format
|
||||||
|
|
||||||
|
|
||||||
|
def test_non_string_returns_false():
|
||||||
|
assert is_markitdown_format(None) is False
|
||||||
|
assert is_markitdown_format(123) is False
|
||||||
|
assert is_markitdown_format(["a.docx"]) is False
|
||||||
|
|
||||||
|
|
||||||
|
def test_valid_extension_detected():
|
||||||
|
assert is_markitdown_format("report.docx") is True
|
||||||
|
assert is_markitdown_format("notes.txt") is False
|
||||||
Reference in New Issue
Block a user