From 49bf73b22861e50cd9097db082c5bdada475c009 Mon Sep 17 00:00:00 2001 From: Shaw Date: Wed, 3 Jun 2026 01:24:07 -0400 Subject: [PATCH] fix(forms): keep PDF-form export from dropping values when the label has '*' (#1407) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit parse_markdown_to_values — the read-back path for export-pdf, the export preview, and prepare-signed-reply — matched the bold field label with [^*]+, so it could not match a label containing '*' (the near-universal required-field marker: "Email *", "State *", "Signature *"). The value then stayed empty, so the exported PDF and the signed-reply attachment came out blank for that field with no error — a whole form of required fields could export completely empty. Match the label non-greedily (.+?) so '*' in labels is tolerated while still splitting at the first ':**' / '**[', which also preserves a value that itself contains ':**'. Adds tests/test_form_markdown_roundtrip.py (render -> parse roundtrip): asterisk text/choice/signature labels survive (fail before, pass after); plain labels and colon-bearing values are unaffected. Co-authored-by: NubsCarson --- src/pdf_form_doc.py | 9 ++++-- tests/test_form_markdown_roundtrip.py | 40 +++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 2 deletions(-) create mode 100644 tests/test_form_markdown_roundtrip.py diff --git a/src/pdf_form_doc.py b/src/pdf_form_doc.py index 5158459..47183b3 100644 --- a/src/pdf_form_doc.py +++ b/src/pdf_form_doc.py @@ -126,8 +126,13 @@ def _decode_name(enc: str) -> str: """Inverse of _encode_name.""" import urllib.parse return urllib.parse.unquote(enc or "") -_TEXT_VALUE_RE = re.compile(r'\*\*[^*]+:\*\*\s*(?P.*)$') -_CHOICE_VALUE_RE = re.compile(r'\*\*[^*]+\*\*\s*\[[^\]]*\]\s*:\s*(?P.*)$') +# Label segment is non-greedy (.+?) so labels containing '*' — the near-universal +# required-field marker, e.g. "Email *" — are tolerated, while still splitting at +# the FIRST ':**' / '**[' so a value that itself contains ':**' is preserved. +# (The old [^*]+ refused to match any label with an asterisk and silently +# dropped that field's value on export.) +_TEXT_VALUE_RE = re.compile(r'\*\*.+?:\*\*\s*(?P.*)$') +_CHOICE_VALUE_RE = re.compile(r'\*\*.+?\*\*\s*\[[^\]]*\]\s*:\s*(?P.*)$') _CHECKBOX_VALUE_RE = re.compile(r'^\s*\[(?P[xX ])\]') _PLACEHOLDERS = {"_(empty)_", "_(not selected)_", "_(empty)_.", "_(unsigned)_"} diff --git a/tests/test_form_markdown_roundtrip.py b/tests/test_form_markdown_roundtrip.py new file mode 100644 index 0000000..94d4ae5 --- /dev/null +++ b/tests/test_form_markdown_roundtrip.py @@ -0,0 +1,40 @@ +"""Regression: PDF-form markdown export must not drop values whose label +contains an asterisk. + +`parse_markdown_to_values` is the read-back path for GET .../export-pdf, the +export preview, and prepare-signed-reply. Its bullet regexes matched the bold +label with `[^*]+`, so they could not match a label like "Email *" / "State *" +/ "Signature *" — the near-universal required-field marker. The value then +stayed empty and the exported PDF (and signed-reply attachment) came out blank +for that field, with no error. +""" +from src.pdf_form_doc import render_form_as_markdown, parse_markdown_to_values + + +def test_asterisk_label_value_survives_export_roundtrip(): + fields = [ + {"name": "email", "label": "Email Address *", "type": "text", + "value": "me@x.com", "page": 1}, + {"name": "state", "label": "State *", "type": "choice", + "options": ["CA", "NY"], "value": "NY", "page": 1}, + {"name": "sign", "label": "Signature *", "type": "signature", + "value": "signature:s1", "page": 1}, + ] + md = render_form_as_markdown(fields, "u", "F") + vals = parse_markdown_to_values(md) + assert vals["email"] == "me@x.com" + assert vals["state"] == "NY" + assert vals["sign"] == "signature:s1" + + +def test_plain_labels_and_colon_values_unaffected(): + fields = [ + {"name": "name", "label": "Full Name", "type": "text", + "value": "Alice", "page": 1}, + {"name": "time", "label": "Start Time", "type": "text", + "value": "9:00 sharp", "page": 1}, + ] + md = render_form_as_markdown(fields, "u", "F") + vals = parse_markdown_to_values(md) + assert vals["name"] == "Alice" + assert vals["time"] == "9:00 sharp"