From b9a0586edcbdc1bff5a3fdffd3c7f2a8b876a160 Mon Sep 17 00:00:00 2001 From: nubs Date: Fri, 5 Jun 2026 00:57:20 +0000 Subject: [PATCH] fix(markdown): avoid autolinking dotted imports (#2295) --- static/js/markdown.js | 6 ++++-- tests/test_markdown_rendering_js.py | 26 ++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/static/js/markdown.js b/static/js/markdown.js index df92721..fdbd10a 100644 --- a/static/js/markdown.js +++ b/static/js/markdown.js @@ -524,9 +524,11 @@ export function mdToHtml(src, opts) { // allowlist keeps it from matching file names / versions ("package.json", // "node.js", "v1.2.3"); the required start/[\s(<] prefix means domains // already inside an http link (preceded by "//") or an email ("@") are - // skipped. Trailing sentence punctuation is kept outside the link. + // skipped. Require the TLD to end at a real domain boundary so dotted code + // identifiers like `sklearn.metrics` do not link `sklearn.me` and leave + // placeholder fragments in the remaining text. s = s.replace( - /(^|[\s(<])((?:www\.)?[a-z0-9](?:[a-z0-9-]*[a-z0-9])?(?:\.[a-z0-9-]+)*\.(?:com|org|net|io|ai|co|dev|app|gov|edu|news|info|tech|xyz|me)(?:\/[^\s<>"'`\])]*)?)/gi, + /(^|[\s(<])((?:www\.)?[a-z0-9](?:[a-z0-9-]*[a-z0-9])?(?:\.[a-z0-9-]+)*\.(?:com|org|net|io|ai|co|dev|app|gov|edu|news|info|tech|xyz|me)(?=$|[\/\s<>"'`\]).,;:!?])(?:\/[^\s<>"'`\])]*)?)/gi, (match, prefix, domain) => { const trail = (domain.match(/[.,;:!?)]+$/) || [''])[0]; const core = trail ? domain.slice(0, -trail.length) : domain; diff --git a/tests/test_markdown_rendering_js.py b/tests/test_markdown_rendering_js.py index 7cfd3b5..70c7d3b 100644 --- a/tests/test_markdown_rendering_js.py +++ b/tests/test_markdown_rendering_js.py @@ -27,6 +27,15 @@ def _run_markdown_case(markdown: str, render_expr: str = "mod.mdToHtml(input)"): globalThis.document = { readyState: 'loading', addEventListener() {}, + createElement(tag) { + if (tag !== 'template') throw new Error(`unsupported element: ${tag}`); + return { + _html: '', + content: { querySelectorAll() { return []; } }, + set innerHTML(value) { this._html = value; }, + get innerHTML() { return this._html; }, + }; + }, }; globalThis.MutationObserver = class { observe() {} }; @@ -159,3 +168,20 @@ def test_extract_thinking_blocks_handles_thought_tag(node_available): assert result["thinkingBlocks"] == ["internal reasoning"] assert result["content"] == "Final answer." + + +def test_dotted_python_import_paths_are_not_autolinked(node_available): + html = _run_markdown_case( + "from imblearn.combine import SMOTETomek\n" + "from sklearn.metrics import f1_score\n" + "from sklearn.compose import ColumnTransformer\n\n" + "See example.com/docs for normal domain autolinking." + ) + + assert "___ALLOWED_HTML_" not in html + assert "imblearn.combine" in html + assert "sklearn.metrics" in html + assert "sklearn.compose" in html + assert 'href="https://imblearn.com' not in html + assert 'href="https://sklearn.me' not in html + assert 'href="https://example.com/docs"' in html