Harden email HTML URL sanitization (#2496)

This commit is contained in:
Vykos
2026-06-04 20:47:47 +02:00
committed by GitHub
parent 01c99c3990
commit e113c10d01
2 changed files with 141 additions and 12 deletions

View File

@@ -30,6 +30,28 @@ export function _esc(text) {
return div.innerHTML; return div.innerHTML;
} }
function _attrEsc(text) {
return String(text ?? '')
.replace(/"/g, '"')
.replace(/'/g, ''')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/`/g, '&#96;');
}
function _compactUrlSchemeValue(value) {
return String(value || '').replace(/[\u0000-\u0020\u007f-\u009f]+/g, '').toLowerCase();
}
function _isDangerousUrl(value) {
const compact = _compactUrlSchemeValue(value);
return compact.startsWith('javascript:') || compact.startsWith('vbscript:') || compact.startsWith('data:');
}
function _isDangerousSrcset(value) {
return String(value || '').split(',').some(candidate => _isDangerousUrl(candidate));
}
// Escape + linkify URLs and email addresses. Returns innerHTML-safe markup. // Escape + linkify URLs and email addresses. Returns innerHTML-safe markup.
export function _escLinkify(text) { export function _escLinkify(text) {
const escaped = _esc(text); const escaped = _esc(text);
@@ -39,9 +61,9 @@ export function _escLinkify(text) {
return escaped return escaped
.replace(urlRe, (m) => { .replace(urlRe, (m) => {
const href = m.startsWith('www.') ? `https://${m}` : m; const href = m.startsWith('www.') ? `https://${m}` : m;
return `<a href="${href}" target="_blank" rel="noopener noreferrer">${m}</a>`; return `<a href="${_attrEsc(href)}" target="_blank" rel="noopener noreferrer">${m}</a>`;
}) })
.replace(mailRe, (m) => `<a href="mailto:${m}">${m}</a>`); .replace(mailRe, (m) => `<a href="${_attrEsc(`mailto:${m}`)}">${m}</a>`);
} }
// Pull display name out of "Name <email@x>"; fallback to local-part of // Pull display name out of "Name <email@x>"; fallback to local-part of
@@ -133,19 +155,14 @@ export function _initials(s) {
// `data:` URLs on every known URL attribute, scrubs inline colour/font/ // `data:` URLs on every known URL attribute, scrubs inline colour/font/
// position styles so the theme can take over, and wraps highlight-bearing // position styles so the theme can take over, and wraps highlight-bearing
// inline tags in <mark> so they render legibly across themes. // inline tags in <mark> so they render legibly across themes.
export function _sanitizeHtml(html) { function _sanitizeHtmlOnce(html) {
const doc = new DOMParser().parseFromString(html, 'text/html'); const doc = new DOMParser().parseFromString(html, 'text/html');
doc.querySelectorAll( doc.querySelectorAll(
'script, iframe, object, embed, form, style, link, ' + 'script, iframe, object, embed, form, style, link, ' +
'svg, math, base, meta, noscript, frame, frameset, applet, portal' 'svg, math, base, meta, noscript, frame, frameset, applet, portal'
).forEach(el => el.remove()); ).forEach(el => el.remove());
const URL_ATTRS = ['href', 'src', 'srcset', 'action', 'formaction', 'background', 'poster', 'data']; const URL_ATTRS = ['href', 'src', 'xlink:href', 'srcset', 'action', 'formaction', 'background', 'poster', 'data'];
const isDangerousUrl = (val) => {
if (!val) return false;
const v = val.trim().toLowerCase();
return v.startsWith('javascript:') || v.startsWith('vbscript:') || v.startsWith('data:');
};
const STRIP_CSS_PROPS = ['color', 'background', 'background-color', const STRIP_CSS_PROPS = ['color', 'background', 'background-color',
'font-family', 'font', '-webkit-text-fill-color', 'font-family', 'font', '-webkit-text-fill-color',
@@ -160,7 +177,7 @@ export function _sanitizeHtml(html) {
const name = attr.name.toLowerCase(); const name = attr.name.toLowerCase();
if (name.startsWith('on')) { el.removeAttribute(attr.name); continue; } if (name.startsWith('on')) { el.removeAttribute(attr.name); continue; }
if (name === 'srcdoc') { el.removeAttribute(attr.name); continue; } if (name === 'srcdoc') { el.removeAttribute(attr.name); continue; }
if (URL_ATTRS.includes(name) && isDangerousUrl(attr.value)) { if (URL_ATTRS.includes(name) && (name === 'srcset' ? _isDangerousSrcset(attr.value) : _isDangerousUrl(attr.value))) {
el.removeAttribute(attr.name); el.removeAttribute(attr.name);
continue; continue;
} }
@@ -177,8 +194,8 @@ export function _sanitizeHtml(html) {
if (style) { if (style) {
const kept = style.split(';').map(s => s.trim()).filter(decl => { const kept = style.split(';').map(s => s.trim()).filter(decl => {
if (!decl) return false; if (!decl) return false;
const lower = decl.toLowerCase(); const lower = _compactUrlSchemeValue(decl);
if (lower.includes('javascript:') || lower.includes('expression(')) return false; if (lower.includes('javascript:') || lower.includes('vbscript:') || lower.includes('data:') || lower.includes('expression(')) return false;
const prop = decl.split(':', 1)[0].trim().toLowerCase(); const prop = decl.split(':', 1)[0].trim().toLowerCase();
return !STRIP_CSS_PROPS.includes(prop); return !STRIP_CSS_PROPS.includes(prop);
}); });
@@ -200,3 +217,13 @@ export function _sanitizeHtml(html) {
return doc.body.innerHTML; return doc.body.innerHTML;
} }
export function _sanitizeHtml(html) {
let out = String(html ?? '');
for (let i = 0; i < 4; i++) {
const next = _sanitizeHtmlOnce(out);
if (next === out) break;
out = next;
}
return out;
}

View File

@@ -0,0 +1,102 @@
"""DOM-XSS regressions for email plain-text linkification helpers."""
import json
import shutil
import subprocess
import textwrap
from pathlib import Path
import pytest
_REPO = Path(__file__).resolve().parent.parent
_HELPER = _REPO / "static" / "js" / "emailLibrary" / "utils.js"
_HAS_NODE = shutil.which("node") is not None
def _run(js: str) -> str:
proc = subprocess.run(
["node", "--input-type=module"],
input=js,
capture_output=True,
text=True,
cwd=str(_REPO),
timeout=30,
)
assert proc.returncode == 0, proc.stderr
return proc.stdout.strip()
@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
def test_plain_text_linkify_escapes_href_attribute_without_double_escaping():
js = textwrap.dedent(
f"""
globalThis.document = {{
createElement() {{
return {{
set textContent(v) {{
this._t = String(v ?? '')
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#39;');
}},
get innerHTML() {{ return this._t || ''; }}
}};
}}
}};
const {{ _escLinkify }} = await import('{_HELPER.as_posix()}');
const out = _escLinkify('See https://example.test/path?a=1&b=2 and www.example.test/a`b');
console.log(JSON.stringify(out));
"""
)
html = json.loads(_run(js))
assert 'href="https://example.test/path?a=1&amp;b=2"' in html
assert "amp;amp" not in html
assert 'href="https://www.example.test/a&#96;b"' in html
@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
def test_email_url_scheme_checks_strip_embedded_controls():
js = textwrap.dedent(
f"""
import fs from 'node:fs';
let source = fs.readFileSync('{_HELPER.as_posix()}', 'utf8');
source = source
.replace('function _compactUrlSchemeValue', 'export function _compactUrlSchemeValue')
.replace('function _isDangerousUrl', 'export function _isDangerousUrl')
.replace('function _isDangerousSrcset', 'export function _isDangerousSrcset');
const mod = await import('data:text/javascript;base64,' + Buffer.from(source).toString('base64'));
const checks = {{
compact: mod._compactUrlSchemeValue('java\\n script:\\talert(1)'),
jsUrl: mod._isDangerousUrl('java\\n script:\\talert(1)'),
vbUrl: mod._isDangerousUrl('vb\\rscript:msgbox(1)'),
dataUrl: mod._isDangerousUrl(' data:text/html,<script>alert(1)</script>'),
httpUrl: mod._isDangerousUrl('https://example.test/?q=javascript:alert(1)'),
srcset: mod._isDangerousSrcset('https://safe.test/a.png 1x, java\\nscript:alert(1) 2x'),
}};
console.log(JSON.stringify(checks));
"""
)
checks = json.loads(_run(js))
assert checks["compact"] == "javascript:alert(1)"
assert checks["jsUrl"] is True
assert checks["vbUrl"] is True
assert checks["dataUrl"] is True
assert checks["httpUrl"] is False
assert checks["srcset"] is True
def test_email_html_sanitizer_runs_to_fixpoint():
source = _HELPER.read_text(encoding="utf-8")
assert "function _sanitizeHtmlOnce(html)" in source
assert "for (let i = 0; i < 4; i++)" in source
assert "const next = _sanitizeHtmlOnce(out);" in source
assert "if (next === out) break;" in source