Harden markdown raw HTML sanitization (#2497)
This commit is contained in:
@@ -60,9 +60,21 @@ const _ALLOWED_HTML_BAD_TAGS = new Set([
|
||||
'SVG', 'MATH',
|
||||
]);
|
||||
const _ALLOWED_HTML_URL_ATTRS = new Set([
|
||||
'href', 'src', 'xlink:href', 'action', 'formaction', 'background', 'poster',
|
||||
'href', 'src', 'srcset', 'xlink:href', 'action', 'formaction', 'background', 'poster',
|
||||
]);
|
||||
|
||||
function _compactUrlSchemeValue(value) {
|
||||
return String(value || '').replace(/[\u0000-\u0020\u007f-\u009f]+/g, '').toLowerCase();
|
||||
}
|
||||
|
||||
function _isDangerousUrl(value) {
|
||||
return /^(javascript|vbscript|data):/.test(_compactUrlSchemeValue(value));
|
||||
}
|
||||
|
||||
function _isDangerousSrcset(value) {
|
||||
return String(value || '').split(',').some(candidate => _isDangerousUrl(candidate));
|
||||
}
|
||||
|
||||
function _cleanAllowedHtmlOnce(htmlString) {
|
||||
const tpl = document.createElement('template');
|
||||
tpl.innerHTML = htmlString;
|
||||
@@ -82,11 +94,17 @@ function _cleanAllowedHtmlOnce(htmlString) {
|
||||
el.removeAttribute(attr.name);
|
||||
continue;
|
||||
}
|
||||
if (name === 'style') {
|
||||
const value = _compactUrlSchemeValue(attr.value);
|
||||
if (/javascript:|vbscript:|data:|expression\(/.test(value)) {
|
||||
el.removeAttribute(attr.name);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
// Neutralize javascript:/vbscript:/data: in URL-bearing attributes.
|
||||
// Strip control/space chars first so e.g. "java\tscript:" can't slip by.
|
||||
if (_ALLOWED_HTML_URL_ATTRS.has(name)) {
|
||||
const value = (attr.value || '').replace(/[\x00-\x20]+/g, '').toLowerCase();
|
||||
if (/^(javascript|vbscript|data):/.test(value)) {
|
||||
if (name === 'srcset' ? _isDangerousSrcset(attr.value) : _isDangerousUrl(attr.value)) {
|
||||
el.removeAttribute(attr.name);
|
||||
}
|
||||
}
|
||||
|
||||
25
tests/test_markdown_dom_xss_helpers.py
Normal file
25
tests/test_markdown_dom_xss_helpers.py
Normal file
@@ -0,0 +1,25 @@
|
||||
"""Regression guards for markdown raw-HTML sanitizer helpers."""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
_REPO = Path(__file__).resolve().parent.parent
|
||||
|
||||
|
||||
def test_markdown_raw_html_sanitizer_checks_url_attr_edge_cases():
|
||||
src = (_REPO / "static" / "js" / "markdown.js").read_text(encoding="utf-8")
|
||||
|
||||
assert "function _compactUrlSchemeValue(value)" in src
|
||||
assert "function _isDangerousUrl(value)" in src
|
||||
assert "function _isDangerousSrcset(value)" in src
|
||||
assert "'srcset'" in src
|
||||
assert "candidate => _isDangerousUrl(candidate)" in src
|
||||
assert "name === 'srcset' ? _isDangerousSrcset(attr.value) : _isDangerousUrl(attr.value)" in src
|
||||
|
||||
|
||||
def test_markdown_raw_html_sanitizer_strips_scriptable_css():
|
||||
src = (_REPO / "static" / "js" / "markdown.js").read_text(encoding="utf-8")
|
||||
|
||||
assert "if (name === 'style')" in src
|
||||
assert r"javascript:|vbscript:|data:|expression\(" in src
|
||||
assert "el.removeAttribute(attr.name);" in src
|
||||
Reference in New Issue
Block a user