Decode email headers without injected spaces

Use email.header.make_header for MIME header decoding so adjacent encoded/plain header parts preserve RFC spacing, with regression coverage.
This commit is contained in:
Afonso Coutinho
2026-06-03 05:45:33 +01:00
committed by GitHub
parent f29c827e6e
commit 46999debdb
2 changed files with 53 additions and 8 deletions

View File

@@ -337,14 +337,25 @@ def _decode_header(raw):
"""Decode MIME encoded header.""" """Decode MIME encoded header."""
if not raw: if not raw:
return "" return ""
parts = email.header.decode_header(raw) try:
decoded = [] # make_header concatenates per RFC 2047: no spurious space between an
for data, charset in parts: # encoded-word and adjacent plain text (plain runs keep their own
if isinstance(data, bytes): # whitespace), and whitespace between two adjacent encoded-words is
decoded.append(data.decode(charset or "utf-8", errors="replace")) # dropped. The old " ".join produced "Re: Jose" style double spaces
else: # on every non-ASCII subject or sender.
decoded.append(data) return str(email.header.make_header(email.header.decode_header(raw)))
return " ".join(decoded) except Exception:
# Malformed header or unknown charset: lossy per-part decode
decoded = []
for data, charset in email.header.decode_header(raw):
if isinstance(data, bytes):
try:
decoded.append(data.decode(charset or "utf-8", errors="replace"))
except LookupError:
decoded.append(data.decode("utf-8", errors="replace"))
else:
decoded.append(data)
return "".join(decoded)
def _extract_text(msg): def _extract_text(msg):

View File

@@ -0,0 +1,34 @@
"""mcp email server _decode_header must not inject spaces between parts.
email.header.decode_header returns plain-text runs WITH their surrounding
whitespace (e.g. (b"Re: ", None)), so joining parts with " " produced a
double space after "Re:" on every non-ASCII subject, a spurious space in
"Name <addr>" senders, and violated RFC 2047 6.2 which requires whitespace
between two adjacent encoded-words to be dropped.
"""
import pytest
pytest.importorskip("mcp")
import mcp_servers.email_server as es
def test_prefix_then_encoded_word_single_space():
assert es._decode_header("Re: =?utf-8?b?SsOzc2U=?=") == "Re: J\u00f3se"
def test_encoded_word_then_plain_text():
assert es._decode_header("=?utf-8?b?SsOzc2U=?= Smith") == "J\u00f3se Smith"
def test_adjacent_encoded_words_join_without_space():
out = es._decode_header("=?iso-8859-1?q?Caf=E9?= =?utf-8?b?5pel5pys?=")
assert out == "Caf\u00e9\u65e5\u672c"
def test_plain_ascii_header_unchanged():
assert es._decode_header("Weekly report") == "Weekly report"
def test_empty_header():
assert es._decode_header("") == ""