fix(markdown): avoid autolinking dotted imports (#2295)

This commit is contained in:
nubs
2026-06-05 00:57:20 +00:00
committed by GitHub
parent 19a3fc59c9
commit b9a0586edc
2 changed files with 30 additions and 2 deletions

View File

@@ -524,9 +524,11 @@ export function mdToHtml(src, opts) {
// allowlist keeps it from matching file names / versions ("package.json", // allowlist keeps it from matching file names / versions ("package.json",
// "node.js", "v1.2.3"); the required start/[\s(<] prefix means domains // "node.js", "v1.2.3"); the required start/[\s(<] prefix means domains
// already inside an http link (preceded by "//") or an email ("@") are // already inside an http link (preceded by "//") or an email ("@") are
// skipped. Trailing sentence punctuation is kept outside the link. // skipped. Require the TLD to end at a real domain boundary so dotted code
// identifiers like `sklearn.metrics` do not link `sklearn.me` and leave
// placeholder fragments in the remaining text.
s = s.replace( s = s.replace(
/(^|[\s(<])((?:www\.)?[a-z0-9](?:[a-z0-9-]*[a-z0-9])?(?:\.[a-z0-9-]+)*\.(?:com|org|net|io|ai|co|dev|app|gov|edu|news|info|tech|xyz|me)(?:\/[^\s<>"'`\])]*)?)/gi, /(^|[\s(<])((?:www\.)?[a-z0-9](?:[a-z0-9-]*[a-z0-9])?(?:\.[a-z0-9-]+)*\.(?:com|org|net|io|ai|co|dev|app|gov|edu|news|info|tech|xyz|me)(?=$|[\/\s<>"'`\]).,;:!?])(?:\/[^\s<>"'`\])]*)?)/gi,
(match, prefix, domain) => { (match, prefix, domain) => {
const trail = (domain.match(/[.,;:!?)]+$/) || [''])[0]; const trail = (domain.match(/[.,;:!?)]+$/) || [''])[0];
const core = trail ? domain.slice(0, -trail.length) : domain; const core = trail ? domain.slice(0, -trail.length) : domain;

View File

@@ -27,6 +27,15 @@ def _run_markdown_case(markdown: str, render_expr: str = "mod.mdToHtml(input)"):
globalThis.document = { globalThis.document = {
readyState: 'loading', readyState: 'loading',
addEventListener() {}, addEventListener() {},
createElement(tag) {
if (tag !== 'template') throw new Error(`unsupported element: ${tag}`);
return {
_html: '',
content: { querySelectorAll() { return []; } },
set innerHTML(value) { this._html = value; },
get innerHTML() { return this._html; },
};
},
}; };
globalThis.MutationObserver = class { observe() {} }; globalThis.MutationObserver = class { observe() {} };
@@ -159,3 +168,20 @@ def test_extract_thinking_blocks_handles_thought_tag(node_available):
assert result["thinkingBlocks"] == ["internal reasoning"] assert result["thinkingBlocks"] == ["internal reasoning"]
assert result["content"] == "Final answer." assert result["content"] == "Final answer."
def test_dotted_python_import_paths_are_not_autolinked(node_available):
html = _run_markdown_case(
"from imblearn.combine import SMOTETomek\n"
"from sklearn.metrics import f1_score\n"
"from sklearn.compose import ColumnTransformer\n\n"
"See example.com/docs for normal domain autolinking."
)
assert "___ALLOWED_HTML_" not in html
assert "imblearn.combine" in html
assert "sklearn.metrics" in html
assert "sklearn.compose" in html
assert 'href="https://imblearn.com' not in html
assert 'href="https://sklearn.me' not in html
assert 'href="https://example.com/docs"' in html