From 46999debdb623fec36248128cc95059830403ad3 Mon Sep 17 00:00:00 2001 From: Afonso Coutinho Date: Wed, 3 Jun 2026 05:45:33 +0100 Subject: [PATCH] Decode email headers without injected spaces Use email.header.make_header for MIME header decoding so adjacent encoded/plain header parts preserve RFC spacing, with regression coverage. --- mcp_servers/email_server.py | 27 +++++++++++----- tests/test_mcp_email_decode_header_spaces.py | 34 ++++++++++++++++++++ 2 files changed, 53 insertions(+), 8 deletions(-) create mode 100644 tests/test_mcp_email_decode_header_spaces.py diff --git a/mcp_servers/email_server.py b/mcp_servers/email_server.py index 354e592..8438577 100644 --- a/mcp_servers/email_server.py +++ b/mcp_servers/email_server.py @@ -337,14 +337,25 @@ def _decode_header(raw): """Decode MIME encoded header.""" if not raw: return "" - parts = email.header.decode_header(raw) - decoded = [] - for data, charset in parts: - if isinstance(data, bytes): - decoded.append(data.decode(charset or "utf-8", errors="replace")) - else: - decoded.append(data) - return " ".join(decoded) + try: + # make_header concatenates per RFC 2047: no spurious space between an + # encoded-word and adjacent plain text (plain runs keep their own + # whitespace), and whitespace between two adjacent encoded-words is + # dropped. The old " ".join produced "Re: Jose" style double spaces + # on every non-ASCII subject or sender. + return str(email.header.make_header(email.header.decode_header(raw))) + except Exception: + # Malformed header or unknown charset: lossy per-part decode + decoded = [] + for data, charset in email.header.decode_header(raw): + if isinstance(data, bytes): + try: + decoded.append(data.decode(charset or "utf-8", errors="replace")) + except LookupError: + decoded.append(data.decode("utf-8", errors="replace")) + else: + decoded.append(data) + return "".join(decoded) def _extract_text(msg): diff --git a/tests/test_mcp_email_decode_header_spaces.py b/tests/test_mcp_email_decode_header_spaces.py new file mode 100644 index 0000000..5ce7009 --- /dev/null +++ b/tests/test_mcp_email_decode_header_spaces.py @@ -0,0 +1,34 @@ +"""mcp email server _decode_header must not inject spaces between parts. + +email.header.decode_header returns plain-text runs WITH their surrounding +whitespace (e.g. (b"Re: ", None)), so joining parts with " " produced a +double space after "Re:" on every non-ASCII subject, a spurious space in +"Name " senders, and violated RFC 2047 6.2 which requires whitespace +between two adjacent encoded-words to be dropped. +""" +import pytest + +pytest.importorskip("mcp") + +import mcp_servers.email_server as es + + +def test_prefix_then_encoded_word_single_space(): + assert es._decode_header("Re: =?utf-8?b?SsOzc2U=?=") == "Re: J\u00f3se" + + +def test_encoded_word_then_plain_text(): + assert es._decode_header("=?utf-8?b?SsOzc2U=?= Smith") == "J\u00f3se Smith" + + +def test_adjacent_encoded_words_join_without_space(): + out = es._decode_header("=?iso-8859-1?q?Caf=E9?= =?utf-8?b?5pel5pys?=") + assert out == "Caf\u00e9\u65e5\u672c" + + +def test_plain_ascii_header_unchanged(): + assert es._decode_header("Weekly report") == "Weekly report" + + +def test_empty_header(): + assert es._decode_header("") == ""