diff --git a/src/pdf_form_doc.py b/src/pdf_form_doc.py index 5158459..47183b3 100644 --- a/src/pdf_form_doc.py +++ b/src/pdf_form_doc.py @@ -126,8 +126,13 @@ def _decode_name(enc: str) -> str: """Inverse of _encode_name.""" import urllib.parse return urllib.parse.unquote(enc or "") -_TEXT_VALUE_RE = re.compile(r'\*\*[^*]+:\*\*\s*(?P.*)$') -_CHOICE_VALUE_RE = re.compile(r'\*\*[^*]+\*\*\s*\[[^\]]*\]\s*:\s*(?P.*)$') +# Label segment is non-greedy (.+?) so labels containing '*' — the near-universal +# required-field marker, e.g. "Email *" — are tolerated, while still splitting at +# the FIRST ':**' / '**[' so a value that itself contains ':**' is preserved. +# (The old [^*]+ refused to match any label with an asterisk and silently +# dropped that field's value on export.) +_TEXT_VALUE_RE = re.compile(r'\*\*.+?:\*\*\s*(?P.*)$') +_CHOICE_VALUE_RE = re.compile(r'\*\*.+?\*\*\s*\[[^\]]*\]\s*:\s*(?P.*)$') _CHECKBOX_VALUE_RE = re.compile(r'^\s*\[(?P[xX ])\]') _PLACEHOLDERS = {"_(empty)_", "_(not selected)_", "_(empty)_.", "_(unsigned)_"} diff --git a/tests/test_form_markdown_roundtrip.py b/tests/test_form_markdown_roundtrip.py new file mode 100644 index 0000000..94d4ae5 --- /dev/null +++ b/tests/test_form_markdown_roundtrip.py @@ -0,0 +1,40 @@ +"""Regression: PDF-form markdown export must not drop values whose label +contains an asterisk. + +`parse_markdown_to_values` is the read-back path for GET .../export-pdf, the +export preview, and prepare-signed-reply. Its bullet regexes matched the bold +label with `[^*]+`, so they could not match a label like "Email *" / "State *" +/ "Signature *" — the near-universal required-field marker. The value then +stayed empty and the exported PDF (and signed-reply attachment) came out blank +for that field, with no error. +""" +from src.pdf_form_doc import render_form_as_markdown, parse_markdown_to_values + + +def test_asterisk_label_value_survives_export_roundtrip(): + fields = [ + {"name": "email", "label": "Email Address *", "type": "text", + "value": "me@x.com", "page": 1}, + {"name": "state", "label": "State *", "type": "choice", + "options": ["CA", "NY"], "value": "NY", "page": 1}, + {"name": "sign", "label": "Signature *", "type": "signature", + "value": "signature:s1", "page": 1}, + ] + md = render_form_as_markdown(fields, "u", "F") + vals = parse_markdown_to_values(md) + assert vals["email"] == "me@x.com" + assert vals["state"] == "NY" + assert vals["sign"] == "signature:s1" + + +def test_plain_labels_and_colon_values_unaffected(): + fields = [ + {"name": "name", "label": "Full Name", "type": "text", + "value": "Alice", "page": 1}, + {"name": "time", "label": "Start Time", "type": "text", + "value": "9:00 sharp", "page": 1}, + ] + md = render_form_as_markdown(fields, "u", "F") + vals = parse_markdown_to_values(md) + assert vals["name"] == "Alice" + assert vals["time"] == "9:00 sharp"