fix(markdown): avoid autolinking dotted imports (#2295)
This commit is contained in:
@@ -524,9 +524,11 @@ export function mdToHtml(src, opts) {
|
|||||||
// allowlist keeps it from matching file names / versions ("package.json",
|
// allowlist keeps it from matching file names / versions ("package.json",
|
||||||
// "node.js", "v1.2.3"); the required start/[\s(<] prefix means domains
|
// "node.js", "v1.2.3"); the required start/[\s(<] prefix means domains
|
||||||
// already inside an http link (preceded by "//") or an email ("@") are
|
// already inside an http link (preceded by "//") or an email ("@") are
|
||||||
// skipped. Trailing sentence punctuation is kept outside the link.
|
// skipped. Require the TLD to end at a real domain boundary so dotted code
|
||||||
|
// identifiers like `sklearn.metrics` do not link `sklearn.me` and leave
|
||||||
|
// placeholder fragments in the remaining text.
|
||||||
s = s.replace(
|
s = s.replace(
|
||||||
/(^|[\s(<])((?:www\.)?[a-z0-9](?:[a-z0-9-]*[a-z0-9])?(?:\.[a-z0-9-]+)*\.(?:com|org|net|io|ai|co|dev|app|gov|edu|news|info|tech|xyz|me)(?:\/[^\s<>"'`\])]*)?)/gi,
|
/(^|[\s(<])((?:www\.)?[a-z0-9](?:[a-z0-9-]*[a-z0-9])?(?:\.[a-z0-9-]+)*\.(?:com|org|net|io|ai|co|dev|app|gov|edu|news|info|tech|xyz|me)(?=$|[\/\s<>"'`\]).,;:!?])(?:\/[^\s<>"'`\])]*)?)/gi,
|
||||||
(match, prefix, domain) => {
|
(match, prefix, domain) => {
|
||||||
const trail = (domain.match(/[.,;:!?)]+$/) || [''])[0];
|
const trail = (domain.match(/[.,;:!?)]+$/) || [''])[0];
|
||||||
const core = trail ? domain.slice(0, -trail.length) : domain;
|
const core = trail ? domain.slice(0, -trail.length) : domain;
|
||||||
|
|||||||
@@ -27,6 +27,15 @@ def _run_markdown_case(markdown: str, render_expr: str = "mod.mdToHtml(input)"):
|
|||||||
globalThis.document = {
|
globalThis.document = {
|
||||||
readyState: 'loading',
|
readyState: 'loading',
|
||||||
addEventListener() {},
|
addEventListener() {},
|
||||||
|
createElement(tag) {
|
||||||
|
if (tag !== 'template') throw new Error(`unsupported element: ${tag}`);
|
||||||
|
return {
|
||||||
|
_html: '',
|
||||||
|
content: { querySelectorAll() { return []; } },
|
||||||
|
set innerHTML(value) { this._html = value; },
|
||||||
|
get innerHTML() { return this._html; },
|
||||||
|
};
|
||||||
|
},
|
||||||
};
|
};
|
||||||
globalThis.MutationObserver = class { observe() {} };
|
globalThis.MutationObserver = class { observe() {} };
|
||||||
|
|
||||||
@@ -159,3 +168,20 @@ def test_extract_thinking_blocks_handles_thought_tag(node_available):
|
|||||||
|
|
||||||
assert result["thinkingBlocks"] == ["internal reasoning"]
|
assert result["thinkingBlocks"] == ["internal reasoning"]
|
||||||
assert result["content"] == "Final answer."
|
assert result["content"] == "Final answer."
|
||||||
|
|
||||||
|
|
||||||
|
def test_dotted_python_import_paths_are_not_autolinked(node_available):
|
||||||
|
html = _run_markdown_case(
|
||||||
|
"from imblearn.combine import SMOTETomek\n"
|
||||||
|
"from sklearn.metrics import f1_score\n"
|
||||||
|
"from sklearn.compose import ColumnTransformer\n\n"
|
||||||
|
"See example.com/docs for normal domain autolinking."
|
||||||
|
)
|
||||||
|
|
||||||
|
assert "___ALLOWED_HTML_" not in html
|
||||||
|
assert "imblearn.combine" in html
|
||||||
|
assert "sklearn.metrics" in html
|
||||||
|
assert "sklearn.compose" in html
|
||||||
|
assert 'href="https://imblearn.com' not in html
|
||||||
|
assert 'href="https://sklearn.me' not in html
|
||||||
|
assert 'href="https://example.com/docs"' in html
|
||||||
|
|||||||
Reference in New Issue
Block a user