diff --git a/static/js/markdown.js b/static/js/markdown.js index bdbaff4..a2cfba0 100644 --- a/static/js/markdown.js +++ b/static/js/markdown.js @@ -60,9 +60,21 @@ const _ALLOWED_HTML_BAD_TAGS = new Set([ 'SVG', 'MATH', ]); const _ALLOWED_HTML_URL_ATTRS = new Set([ - 'href', 'src', 'xlink:href', 'action', 'formaction', 'background', 'poster', + 'href', 'src', 'srcset', 'xlink:href', 'action', 'formaction', 'background', 'poster', ]); +function _compactUrlSchemeValue(value) { + return String(value || '').replace(/[\u0000-\u0020\u007f-\u009f]+/g, '').toLowerCase(); +} + +function _isDangerousUrl(value) { + return /^(javascript|vbscript|data):/.test(_compactUrlSchemeValue(value)); +} + +function _isDangerousSrcset(value) { + return String(value || '').split(',').some(candidate => _isDangerousUrl(candidate)); +} + function _cleanAllowedHtmlOnce(htmlString) { const tpl = document.createElement('template'); tpl.innerHTML = htmlString; @@ -82,11 +94,17 @@ function _cleanAllowedHtmlOnce(htmlString) { el.removeAttribute(attr.name); continue; } + if (name === 'style') { + const value = _compactUrlSchemeValue(attr.value); + if (/javascript:|vbscript:|data:|expression\(/.test(value)) { + el.removeAttribute(attr.name); + } + continue; + } // Neutralize javascript:/vbscript:/data: in URL-bearing attributes. // Strip control/space chars first so e.g. "java\tscript:" can't slip by. if (_ALLOWED_HTML_URL_ATTRS.has(name)) { - const value = (attr.value || '').replace(/[\x00-\x20]+/g, '').toLowerCase(); - if (/^(javascript|vbscript|data):/.test(value)) { + if (name === 'srcset' ? _isDangerousSrcset(attr.value) : _isDangerousUrl(attr.value)) { el.removeAttribute(attr.name); } } diff --git a/tests/test_markdown_dom_xss_helpers.py b/tests/test_markdown_dom_xss_helpers.py new file mode 100644 index 0000000..25b1841 --- /dev/null +++ b/tests/test_markdown_dom_xss_helpers.py @@ -0,0 +1,25 @@ +"""Regression guards for markdown raw-HTML sanitizer helpers.""" + +from pathlib import Path + + +_REPO = Path(__file__).resolve().parent.parent + + +def test_markdown_raw_html_sanitizer_checks_url_attr_edge_cases(): + src = (_REPO / "static" / "js" / "markdown.js").read_text(encoding="utf-8") + + assert "function _compactUrlSchemeValue(value)" in src + assert "function _isDangerousUrl(value)" in src + assert "function _isDangerousSrcset(value)" in src + assert "'srcset'" in src + assert "candidate => _isDangerousUrl(candidate)" in src + assert "name === 'srcset' ? _isDangerousSrcset(attr.value) : _isDangerousUrl(attr.value)" in src + + +def test_markdown_raw_html_sanitizer_strips_scriptable_css(): + src = (_REPO / "static" / "js" / "markdown.js").read_text(encoding="utf-8") + + assert "if (name === 'style')" in src + assert r"javascript:|vbscript:|data:|expression\(" in src + assert "el.removeAttribute(attr.name);" in src