Ignore non-string email thread bodies (#1654)
This commit is contained in:
@@ -605,10 +605,10 @@ def _parse_html(html: str) -> list[dict[str, Any]] | None:
|
|||||||
def parse_thread(body_html: str | None, body_text: str | None) -> list[dict[str, Any]] | None:
|
def parse_thread(body_html: str | None, body_text: str | None) -> list[dict[str, Any]] | None:
|
||||||
"""Public entry point. Prefer HTML when available, else plaintext.
|
"""Public entry point. Prefer HTML when available, else plaintext.
|
||||||
Returns None if no quoted material found (caller renders flat)."""
|
Returns None if no quoted material found (caller renders flat)."""
|
||||||
if body_html:
|
if isinstance(body_html, str) and body_html:
|
||||||
out = _parse_html(body_html)
|
out = _parse_html(body_html)
|
||||||
if out:
|
if out:
|
||||||
return out
|
return out
|
||||||
if body_text:
|
if isinstance(body_text, str) and body_text:
|
||||||
return _parse_plaintext(body_text)
|
return _parse_plaintext(body_text)
|
||||||
return None
|
return None
|
||||||
|
|||||||
13
tests/test_email_thread_parser_nonstring.py
Normal file
13
tests/test_email_thread_parser_nonstring.py
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
from src.email_thread_parser import parse_thread
|
||||||
|
|
||||||
|
|
||||||
|
def test_parse_thread_ignores_non_string_bodies():
|
||||||
|
assert parse_thread(123, {"bad": True}) is None
|
||||||
|
assert parse_thread(["<blockquote>bad</blockquote>"], None) is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_parse_thread_still_handles_plaintext_quotes():
|
||||||
|
turns = parse_thread(None, "hi\n\nOn Tue, Alice wrote:\n> older")
|
||||||
|
|
||||||
|
assert turns
|
||||||
|
assert turns[0]["level"] == 0
|
||||||
Reference in New Issue
Block a user