fix(markdown): avoid autolinking dotted imports (#2295)
This commit is contained in:
@@ -524,9 +524,11 @@ export function mdToHtml(src, opts) {
|
||||
// allowlist keeps it from matching file names / versions ("package.json",
|
||||
// "node.js", "v1.2.3"); the required start/[\s(<] prefix means domains
|
||||
// already inside an http link (preceded by "//") or an email ("@") are
|
||||
// skipped. Trailing sentence punctuation is kept outside the link.
|
||||
// skipped. Require the TLD to end at a real domain boundary so dotted code
|
||||
// identifiers like `sklearn.metrics` do not link `sklearn.me` and leave
|
||||
// placeholder fragments in the remaining text.
|
||||
s = s.replace(
|
||||
/(^|[\s(<])((?:www\.)?[a-z0-9](?:[a-z0-9-]*[a-z0-9])?(?:\.[a-z0-9-]+)*\.(?:com|org|net|io|ai|co|dev|app|gov|edu|news|info|tech|xyz|me)(?:\/[^\s<>"'`\])]*)?)/gi,
|
||||
/(^|[\s(<])((?:www\.)?[a-z0-9](?:[a-z0-9-]*[a-z0-9])?(?:\.[a-z0-9-]+)*\.(?:com|org|net|io|ai|co|dev|app|gov|edu|news|info|tech|xyz|me)(?=$|[\/\s<>"'`\]).,;:!?])(?:\/[^\s<>"'`\])]*)?)/gi,
|
||||
(match, prefix, domain) => {
|
||||
const trail = (domain.match(/[.,;:!?)]+$/) || [''])[0];
|
||||
const core = trail ? domain.slice(0, -trail.length) : domain;
|
||||
|
||||
@@ -27,6 +27,15 @@ def _run_markdown_case(markdown: str, render_expr: str = "mod.mdToHtml(input)"):
|
||||
globalThis.document = {
|
||||
readyState: 'loading',
|
||||
addEventListener() {},
|
||||
createElement(tag) {
|
||||
if (tag !== 'template') throw new Error(`unsupported element: ${tag}`);
|
||||
return {
|
||||
_html: '',
|
||||
content: { querySelectorAll() { return []; } },
|
||||
set innerHTML(value) { this._html = value; },
|
||||
get innerHTML() { return this._html; },
|
||||
};
|
||||
},
|
||||
};
|
||||
globalThis.MutationObserver = class { observe() {} };
|
||||
|
||||
@@ -159,3 +168,20 @@ def test_extract_thinking_blocks_handles_thought_tag(node_available):
|
||||
|
||||
assert result["thinkingBlocks"] == ["internal reasoning"]
|
||||
assert result["content"] == "Final answer."
|
||||
|
||||
|
||||
def test_dotted_python_import_paths_are_not_autolinked(node_available):
|
||||
html = _run_markdown_case(
|
||||
"from imblearn.combine import SMOTETomek\n"
|
||||
"from sklearn.metrics import f1_score\n"
|
||||
"from sklearn.compose import ColumnTransformer\n\n"
|
||||
"See example.com/docs for normal domain autolinking."
|
||||
)
|
||||
|
||||
assert "___ALLOWED_HTML_" not in html
|
||||
assert "imblearn.combine" in html
|
||||
assert "sklearn.metrics" in html
|
||||
assert "sklearn.compose" in html
|
||||
assert 'href="https://imblearn.com' not in html
|
||||
assert 'href="https://sklearn.me' not in html
|
||||
assert 'href="https://example.com/docs"' in html
|
||||
|
||||
Reference in New Issue
Block a user