From a54d34149ad9fabddfd65dae9b72f2008c7767ee Mon Sep 17 00:00:00 2001 From: Afonso Coutinho Date: Wed, 3 Jun 2026 05:45:56 +0100 Subject: [PATCH] Parse standard Gmail quote attribution dates Allow Gmail quote attribution parsing to handle standard US weekday/month/day/year comma patterns while preserving existing formats, with JS regression coverage. --- static/js/emailLibrary/signatureFold.js | 6 ++- tests/test_gmail_quote_attribution_js.py | 64 ++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 1 deletion(-) create mode 100644 tests/test_gmail_quote_attribution_js.py diff --git a/static/js/emailLibrary/signatureFold.js b/static/js/emailLibrary/signatureFold.js index 375cbc0..0dda486 100644 --- a/static/js/emailLibrary/signatureFold.js +++ b/static/js/emailLibrary/signatureFold.js @@ -154,7 +154,11 @@ export function _extractQuoteMeta(html) { let date = sentMatch ? sentMatch[1].trim() : ''; if (!from && !date) { - const gmail = txt.match(/On\s+([^,]+?,[^,]+?\d{4}[^,]*),?\s+(.+?)\s+wrote\s*:/i); + // The date may carry up to three commas before the year: the standard + // US Gmail attribution is "On Mon, Apr 18, 2026 at 9:31 AM, Jane wrote:" + // (weekday and day-of-month each add one). A single-comma pattern never + // reached the year there, so the fold lost its sender/date headline. + const gmail = txt.match(/On\s+((?:[^,]*,){0,3}?[^,]*?\d{4}[^,]*),?\s+(.+?)\s+wrote\s*:/i); if (gmail) { date = gmail[1].trim(); from = gmail[2].trim(); } } diff --git a/tests/test_gmail_quote_attribution_js.py b/tests/test_gmail_quote_attribution_js.py new file mode 100644 index 0000000..81d7c01 --- /dev/null +++ b/tests/test_gmail_quote_attribution_js.py @@ -0,0 +1,64 @@ +"""Pin _extractQuoteMeta's Gmail attribution parsing (static/js/emailLibrary/signatureFold.js). + +Driven through `node --input-type=module` (same approach as test_hex_to_rgb_js.py); +skips when `node` is not installed. + +Regression: the Gmail-fallback date pattern allowed only ONE comma before the +4-digit year, but the standard US Gmail attribution +"On Mon, Apr 18, 2026 at 9:31 AM, Jane Doe wrote:" carries +TWO (after the weekday and after the day-of-month). The match failed, so the +collapsed "Earlier thread"/"Earlier reply" fold rendered without its +sender/date headline for the most common Gmail reply format. +""" +import json +import shutil +import subprocess +from pathlib import Path + +import pytest + +_REPO = Path(__file__).resolve().parent.parent +_HELPER = _REPO / "static" / "js" / "emailLibrary" / "signatureFold.js" +_HAS_NODE = shutil.which("node") is not None + + +def _meta(html: str) -> str: + js = ( + # _esc in the module touches `document` lazily; stub it so the module + # can be exercised outside a browser. + "globalThis.document = { createElement() { return {" + " set textContent(v) { this._t = v; }," + " get innerHTML() { return this._t || ''; } }; } };" + f"const {{ _extractQuoteMeta }} = await import('{_HELPER.as_posix()}');" + f"console.log(JSON.stringify(_extractQuoteMeta({json.dumps(html)})));" + ) + proc = subprocess.run( + ["node", "--input-type=module"], + input=js, capture_output=True, text=True, cwd=str(_REPO), timeout=30, + ) + assert proc.returncode == 0, proc.stderr + return json.loads(proc.stdout.strip()) + + +@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH") +def test_us_gmail_attribution_with_weekday_extracts_sender_and_date(): + meta = _meta("On Mon, Apr 18, 2026 at 9:31 AM, Jane Doe <jane@example.com> wrote:") + # date is clamped to 28 chars by the helper; sender must be present. + assert meta.startswith("Jane Doe jane@example.com") + assert "Mon, Apr 18, 2026" in meta + + +@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH") +def test_gmail_attribution_without_time_extracts_sender(): + meta = _meta("On Wed, Jan 1, 2025, Jane wrote:") + assert meta == "Jane · Wed, Jan 1, 2025" + + +@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH") +def test_previously_working_formats_still_match(): + # No weekday (single comma before the year). + meta = _meta("On Apr 18, 2026 at 9:31 AM, Jane Doe wrote:") + assert meta.startswith("Jane Doe · Apr 18, 2026") + # UK/intl day-before-month order. + meta = _meta("On Mon, 18 Apr 2026 at 09:31, Jane Doe <jane@example.com> wrote:") + assert meta.startswith("Jane Doe jane@example.com")