Ignore non-string email thread bodies (#1654)
This commit is contained in:
@@ -605,10 +605,10 @@ def _parse_html(html: str) -> list[dict[str, Any]] | None:
|
||||
def parse_thread(body_html: str | None, body_text: str | None) -> list[dict[str, Any]] | None:
|
||||
"""Public entry point. Prefer HTML when available, else plaintext.
|
||||
Returns None if no quoted material found (caller renders flat)."""
|
||||
if body_html:
|
||||
if isinstance(body_html, str) and body_html:
|
||||
out = _parse_html(body_html)
|
||||
if out:
|
||||
return out
|
||||
if body_text:
|
||||
if isinstance(body_text, str) and body_text:
|
||||
return _parse_plaintext(body_text)
|
||||
return None
|
||||
|
||||
13
tests/test_email_thread_parser_nonstring.py
Normal file
13
tests/test_email_thread_parser_nonstring.py
Normal file
@@ -0,0 +1,13 @@
|
||||
from src.email_thread_parser import parse_thread
|
||||
|
||||
|
||||
def test_parse_thread_ignores_non_string_bodies():
|
||||
assert parse_thread(123, {"bad": True}) is None
|
||||
assert parse_thread(["<blockquote>bad</blockquote>"], None) is None
|
||||
|
||||
|
||||
def test_parse_thread_still_handles_plaintext_quotes():
|
||||
turns = parse_thread(None, "hi\n\nOn Tue, Alice wrote:\n> older")
|
||||
|
||||
assert turns
|
||||
assert turns[0]["level"] == 0
|
||||
Reference in New Issue
Block a user