Ignore non-string email thread bodies (#1654)

This commit is contained in:
red person
2026-06-03 08:06:31 +03:00
committed by GitHub
parent a54d34149a
commit 8af1f85665
2 changed files with 15 additions and 2 deletions

View File

@@ -605,10 +605,10 @@ def _parse_html(html: str) -> list[dict[str, Any]] | None:
def parse_thread(body_html: str | None, body_text: str | None) -> list[dict[str, Any]] | None:
"""Public entry point. Prefer HTML when available, else plaintext.
Returns None if no quoted material found (caller renders flat)."""
if body_html:
if isinstance(body_html, str) and body_html:
out = _parse_html(body_html)
if out:
return out
if body_text:
if isinstance(body_text, str) and body_text:
return _parse_plaintext(body_text)
return None

View File

@@ -0,0 +1,13 @@
from src.email_thread_parser import parse_thread
def test_parse_thread_ignores_non_string_bodies():
assert parse_thread(123, {"bad": True}) is None
assert parse_thread(["<blockquote>bad</blockquote>"], None) is None
def test_parse_thread_still_handles_plaintext_quotes():
turns = parse_thread(None, "hi\n\nOn Tue, Alice wrote:\n> older")
assert turns
assert turns[0]["level"] == 0