fix: is_markitdown_format crashes on a non-string path (#1618)
This commit is contained in:
@@ -26,6 +26,8 @@ MARKITDOWN_EXTS = frozenset({".docx", ".pptx", ".xlsx", ".xls", ".epub"})
|
||||
|
||||
def is_markitdown_format(path: str) -> bool:
|
||||
"""True if the file extension is one we route through markitdown."""
|
||||
if not isinstance(path, str):
|
||||
return False
|
||||
return os.path.splitext(path)[1].lower() in MARKITDOWN_EXTS
|
||||
|
||||
|
||||
|
||||
16
tests/test_markitdown_format_nonstring.py
Normal file
16
tests/test_markitdown_format_nonstring.py
Normal file
@@ -0,0 +1,16 @@
|
||||
"""Regression: is_markitdown_format must tolerate a non-string path.
|
||||
|
||||
It did `os.path.splitext(path)`, which raises TypeError on None / non-string.
|
||||
"""
|
||||
from src.markitdown_runtime import is_markitdown_format
|
||||
|
||||
|
||||
def test_non_string_returns_false():
|
||||
assert is_markitdown_format(None) is False
|
||||
assert is_markitdown_format(123) is False
|
||||
assert is_markitdown_format(["a.docx"]) is False
|
||||
|
||||
|
||||
def test_valid_extension_detected():
|
||||
assert is_markitdown_format("report.docx") is True
|
||||
assert is_markitdown_format("notes.txt") is False
|
||||
Reference in New Issue
Block a user