fix(forms): keep PDF-form export from dropping values when the label has '*' (#1407)

parse_markdown_to_values — the read-back path for export-pdf, the export
preview, and prepare-signed-reply — matched the bold field label with [^*]+, so
it could not match a label containing '*' (the near-universal required-field
marker: "Email *", "State *", "Signature *"). The value then stayed empty, so
the exported PDF and the signed-reply attachment came out blank for that field
with no error — a whole form of required fields could export completely empty.

Match the label non-greedily (.+?) so '*' in labels is tolerated while still
splitting at the first ':**' / '**[', which also preserves a value that itself
contains ':**'.

Adds tests/test_form_markdown_roundtrip.py (render -> parse roundtrip): asterisk
text/choice/signature labels survive (fail before, pass after); plain labels and
colon-bearing values are unaffected.

Co-authored-by: NubsCarson <nubs@nubs.site>
This commit is contained in:
Shaw
2026-06-03 01:24:07 -04:00
committed by GitHub
parent 43ed3f7148
commit 49bf73b228
2 changed files with 47 additions and 2 deletions

View File

@@ -126,8 +126,13 @@ def _decode_name(enc: str) -> str:
"""Inverse of _encode_name."""
import urllib.parse
return urllib.parse.unquote(enc or "")
_TEXT_VALUE_RE = re.compile(r'\*\*[^*]+:\*\*\s*(?P<value>.*)$')
_CHOICE_VALUE_RE = re.compile(r'\*\*[^*]+\*\*\s*\[[^\]]*\]\s*:\s*(?P<value>.*)$')
# Label segment is non-greedy (.+?) so labels containing '*' — the near-universal
# required-field marker, e.g. "Email *" — are tolerated, while still splitting at
# the FIRST ':**' / '**[' so a value that itself contains ':**' is preserved.
# (The old [^*]+ refused to match any label with an asterisk and silently
# dropped that field's value on export.)
_TEXT_VALUE_RE = re.compile(r'\*\*.+?:\*\*\s*(?P<value>.*)$')
_CHOICE_VALUE_RE = re.compile(r'\*\*.+?\*\*\s*\[[^\]]*\]\s*:\s*(?P<value>.*)$')
_CHECKBOX_VALUE_RE = re.compile(r'^\s*\[(?P<state>[xX ])\]')
_PLACEHOLDERS = {"_(empty)_", "_(not selected)_", "_(empty)_.", "_(unsigned)_"}

View File

@@ -0,0 +1,40 @@
"""Regression: PDF-form markdown export must not drop values whose label
contains an asterisk.
`parse_markdown_to_values` is the read-back path for GET .../export-pdf, the
export preview, and prepare-signed-reply. Its bullet regexes matched the bold
label with `[^*]+`, so they could not match a label like "Email *" / "State *"
/ "Signature *" — the near-universal required-field marker. The value then
stayed empty and the exported PDF (and signed-reply attachment) came out blank
for that field, with no error.
"""
from src.pdf_form_doc import render_form_as_markdown, parse_markdown_to_values
def test_asterisk_label_value_survives_export_roundtrip():
fields = [
{"name": "email", "label": "Email Address *", "type": "text",
"value": "me@x.com", "page": 1},
{"name": "state", "label": "State *", "type": "choice",
"options": ["CA", "NY"], "value": "NY", "page": 1},
{"name": "sign", "label": "Signature *", "type": "signature",
"value": "signature:s1", "page": 1},
]
md = render_form_as_markdown(fields, "u", "F")
vals = parse_markdown_to_values(md)
assert vals["email"] == "me@x.com"
assert vals["state"] == "NY"
assert vals["sign"] == "signature:s1"
def test_plain_labels_and_colon_values_unaffected():
fields = [
{"name": "name", "label": "Full Name", "type": "text",
"value": "Alice", "page": 1},
{"name": "time", "label": "Start Time", "type": "text",
"value": "9:00 sharp", "page": 1},
]
md = render_form_as_markdown(fields, "u", "F")
vals = parse_markdown_to_values(md)
assert vals["name"] == "Alice"
assert vals["time"] == "9:00 sharp"