fix: is_markitdown_format crashes on a non-string path (#1618)

This commit is contained in:
Afonso Coutinho
2026-06-03 01:00:10 +01:00
committed by GitHub
parent fc220f760f
commit aa5e3f6884
2 changed files with 18 additions and 0 deletions

View File

@@ -26,6 +26,8 @@ MARKITDOWN_EXTS = frozenset({".docx", ".pptx", ".xlsx", ".xls", ".epub"})
def is_markitdown_format(path: str) -> bool:
"""True if the file extension is one we route through markitdown."""
if not isinstance(path, str):
return False
return os.path.splitext(path)[1].lower() in MARKITDOWN_EXTS

View File

@@ -0,0 +1,16 @@
"""Regression: is_markitdown_format must tolerate a non-string path.
It did `os.path.splitext(path)`, which raises TypeError on None / non-string.
"""
from src.markitdown_runtime import is_markitdown_format
def test_non_string_returns_false():
assert is_markitdown_format(None) is False
assert is_markitdown_format(123) is False
assert is_markitdown_format(["a.docx"]) is False
def test_valid_extension_detected():
assert is_markitdown_format("report.docx") is True
assert is_markitdown_format("notes.txt") is False