Ignore non-string email thread bodies (#1654)

This commit is contained in:
red person
2026-06-03 08:06:31 +03:00
committed by GitHub
parent a54d34149a
commit 8af1f85665
2 changed files with 15 additions and 2 deletions

View File

@@ -605,10 +605,10 @@ def _parse_html(html: str) -> list[dict[str, Any]] | None:
def parse_thread(body_html: str | None, body_text: str | None) -> list[dict[str, Any]] | None: def parse_thread(body_html: str | None, body_text: str | None) -> list[dict[str, Any]] | None:
"""Public entry point. Prefer HTML when available, else plaintext. """Public entry point. Prefer HTML when available, else plaintext.
Returns None if no quoted material found (caller renders flat).""" Returns None if no quoted material found (caller renders flat)."""
if body_html: if isinstance(body_html, str) and body_html:
out = _parse_html(body_html) out = _parse_html(body_html)
if out: if out:
return out return out
if body_text: if isinstance(body_text, str) and body_text:
return _parse_plaintext(body_text) return _parse_plaintext(body_text)
return None return None

View File

@@ -0,0 +1,13 @@
from src.email_thread_parser import parse_thread
def test_parse_thread_ignores_non_string_bodies():
assert parse_thread(123, {"bad": True}) is None
assert parse_thread(["<blockquote>bad</blockquote>"], None) is None
def test_parse_thread_still_handles_plaintext_quotes():
turns = parse_thread(None, "hi\n\nOn Tue, Alice wrote:\n> older")
assert turns
assert turns[0]["level"] == 0