From aa5e3f6884a91b0fd67fae0f861a51d6a534e3bd Mon Sep 17 00:00:00 2001 From: Afonso Coutinho Date: Wed, 3 Jun 2026 01:00:10 +0100 Subject: [PATCH] fix: is_markitdown_format crashes on a non-string path (#1618) --- src/markitdown_runtime.py | 2 ++ tests/test_markitdown_format_nonstring.py | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) create mode 100644 tests/test_markitdown_format_nonstring.py diff --git a/src/markitdown_runtime.py b/src/markitdown_runtime.py index f8338d3..ff30b01 100644 --- a/src/markitdown_runtime.py +++ b/src/markitdown_runtime.py @@ -26,6 +26,8 @@ MARKITDOWN_EXTS = frozenset({".docx", ".pptx", ".xlsx", ".xls", ".epub"}) def is_markitdown_format(path: str) -> bool: """True if the file extension is one we route through markitdown.""" + if not isinstance(path, str): + return False return os.path.splitext(path)[1].lower() in MARKITDOWN_EXTS diff --git a/tests/test_markitdown_format_nonstring.py b/tests/test_markitdown_format_nonstring.py new file mode 100644 index 0000000..2641948 --- /dev/null +++ b/tests/test_markitdown_format_nonstring.py @@ -0,0 +1,16 @@ +"""Regression: is_markitdown_format must tolerate a non-string path. + +It did `os.path.splitext(path)`, which raises TypeError on None / non-string. +""" +from src.markitdown_runtime import is_markitdown_format + + +def test_non_string_returns_false(): + assert is_markitdown_format(None) is False + assert is_markitdown_format(123) is False + assert is_markitdown_format(["a.docx"]) is False + + +def test_valid_extension_detected(): + assert is_markitdown_format("report.docx") is True + assert is_markitdown_format("notes.txt") is False