Harden markdown raw HTML sanitization (#2497)

This commit is contained in:
Vykos
2026-06-04 20:46:10 +02:00
committed by GitHub
parent 3ae89599f3
commit 01c99c3990
2 changed files with 46 additions and 3 deletions

View File

@@ -60,9 +60,21 @@ const _ALLOWED_HTML_BAD_TAGS = new Set([
'SVG', 'MATH', 'SVG', 'MATH',
]); ]);
const _ALLOWED_HTML_URL_ATTRS = new Set([ const _ALLOWED_HTML_URL_ATTRS = new Set([
'href', 'src', 'xlink:href', 'action', 'formaction', 'background', 'poster', 'href', 'src', 'srcset', 'xlink:href', 'action', 'formaction', 'background', 'poster',
]); ]);
function _compactUrlSchemeValue(value) {
return String(value || '').replace(/[\u0000-\u0020\u007f-\u009f]+/g, '').toLowerCase();
}
function _isDangerousUrl(value) {
return /^(javascript|vbscript|data):/.test(_compactUrlSchemeValue(value));
}
function _isDangerousSrcset(value) {
return String(value || '').split(',').some(candidate => _isDangerousUrl(candidate));
}
function _cleanAllowedHtmlOnce(htmlString) { function _cleanAllowedHtmlOnce(htmlString) {
const tpl = document.createElement('template'); const tpl = document.createElement('template');
tpl.innerHTML = htmlString; tpl.innerHTML = htmlString;
@@ -82,11 +94,17 @@ function _cleanAllowedHtmlOnce(htmlString) {
el.removeAttribute(attr.name); el.removeAttribute(attr.name);
continue; continue;
} }
if (name === 'style') {
const value = _compactUrlSchemeValue(attr.value);
if (/javascript:|vbscript:|data:|expression\(/.test(value)) {
el.removeAttribute(attr.name);
}
continue;
}
// Neutralize javascript:/vbscript:/data: in URL-bearing attributes. // Neutralize javascript:/vbscript:/data: in URL-bearing attributes.
// Strip control/space chars first so e.g. "java\tscript:" can't slip by. // Strip control/space chars first so e.g. "java\tscript:" can't slip by.
if (_ALLOWED_HTML_URL_ATTRS.has(name)) { if (_ALLOWED_HTML_URL_ATTRS.has(name)) {
const value = (attr.value || '').replace(/[\x00-\x20]+/g, '').toLowerCase(); if (name === 'srcset' ? _isDangerousSrcset(attr.value) : _isDangerousUrl(attr.value)) {
if (/^(javascript|vbscript|data):/.test(value)) {
el.removeAttribute(attr.name); el.removeAttribute(attr.name);
} }
} }

View File

@@ -0,0 +1,25 @@
"""Regression guards for markdown raw-HTML sanitizer helpers."""
from pathlib import Path
_REPO = Path(__file__).resolve().parent.parent
def test_markdown_raw_html_sanitizer_checks_url_attr_edge_cases():
src = (_REPO / "static" / "js" / "markdown.js").read_text(encoding="utf-8")
assert "function _compactUrlSchemeValue(value)" in src
assert "function _isDangerousUrl(value)" in src
assert "function _isDangerousSrcset(value)" in src
assert "'srcset'" in src
assert "candidate => _isDangerousUrl(candidate)" in src
assert "name === 'srcset' ? _isDangerousSrcset(attr.value) : _isDangerousUrl(attr.value)" in src
def test_markdown_raw_html_sanitizer_strips_scriptable_css():
src = (_REPO / "static" / "js" / "markdown.js").read_text(encoding="utf-8")
assert "if (name === 'style')" in src
assert r"javascript:|vbscript:|data:|expression\(" in src
assert "el.removeAttribute(attr.name);" in src