Files
odysseus/tests/test_markdown_rendering_js.py

188 lines
6.7 KiB
Python

"""Regression coverage for the browser markdown renderer."""
import json
import shutil
import subprocess
import textwrap
from pathlib import Path
import pytest
_REPO = Path(__file__).resolve().parent.parent
_HAS_NODE = shutil.which("node") is not None
@pytest.fixture(scope="module")
def node_available():
if not _HAS_NODE:
pytest.skip("node binary not on PATH")
def _run_markdown_case(markdown: str, render_expr: str = "mod.mdToHtml(input)"):
script = textwrap.dedent(
r"""
import fs from 'node:fs';
globalThis.window = { location: { origin: 'http://localhost' }, katex: null };
globalThis.document = {
readyState: 'loading',
addEventListener() {},
createElement(tag) {
if (tag !== 'template') throw new Error(`unsupported element: ${tag}`);
return {
_html: '',
content: { querySelectorAll() { return []; } },
set innerHTML(value) { this._html = value; },
get innerHTML() { return this._html; },
};
},
};
globalThis.MutationObserver = class { observe() {} };
let source = fs.readFileSync('./static/js/markdown.js', 'utf8');
source = source.replace(
/import uiModule from ['"]\.\/ui\.js['"];/,
''
);
source = source.replace(
/import \{ splitTableRow \} from ['"]\.\/markdown\/tableRow\.js['"];/,
`function splitTableRow(row) {
return (row || '').replace(/^\\s*\\|/, '').replace(/\\|\\s*$/, '').split('|').map(c => c.trim());
}`
);
// markdown.js imports the emoji-shortcode helpers relatively (issue #345),
// which a data: URL module can't resolve. Inline the REAL helpers (minus
// their export keywords) so the renderer's shortcode pass behaves exactly
// as it does in the browser.
const emojiSource = fs.readFileSync('./static/js/emojiShortcodes.js', 'utf8')
.replace(/^export default .*$/m, '')
.replace(/export const /g, 'const ')
.replace(/export function /g, 'function ');
source = source.replace(
/import \{ replaceEmojiShortcodes, hasEmojiShortcode \} from ['"]\.\/emojiShortcodes\.js['"];/,
() => emojiSource
);
source = source.replace(
/var escapeHtml = uiModule\.esc;/,
`var escapeHtml = (value) => String(value ?? '')
.replace(/&/g, '&')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/"/g, '&quot;')
.replace(/'/g, '&#39;');`
);
const moduleUrl = 'data:text/javascript;base64,' + Buffer.from(source).toString('base64');
const mod = await import(moduleUrl);
const input = JSON.parse(process.argv[1]);
console.log(JSON.stringify({ html: __RENDER_EXPR__ }));
"""
).replace("__RENDER_EXPR__", render_expr)
result = subprocess.run(
["node", "--input-type=module", "-e", script, json.dumps(markdown)],
cwd=_REPO,
capture_output=True,
timeout=15,
text=True,
)
if result.returncode != 0:
raise AssertionError(f"node failed:\nSTDERR:\n{result.stderr}\nSTDOUT:\n{result.stdout}")
return json.loads(result.stdout.splitlines()[-1])["html"]
def test_ordered_lists_render_as_one_unwrapped_ol(node_available):
html = _run_markdown_case(
"Before\n\n"
"1. **Check against the home page** — that's the visual reference for how things should feel.\n"
"2. **Open DevTools** and inspect the element — check fonts, colors, and spacing against this guide.\n"
"3. **Flag it** — note the page, the section, what's wrong, and what CSS rule you suspect.\n"
"4. **Small fixes** — if you know the fix (e.g. wrong CSS variable, wrong font), go ahead and change it in the CSS Module file.\n"
"5. **Big changes** — Talk it through before making wide changes across many pages.\n\n"
"After"
)
assert html.count("<ol>") == 1
assert html.count("</ol>") == 1
assert html.count("<li>") == 5
assert "<ul>" not in html
assert "<oli>" not in html
assert "<uli>" not in html
assert "<p><ol>" not in html
assert "<p><li>" not in html
assert "<p>Before</p>" in html
assert "<p>After</p>" in html
def test_table_separator_row_not_rendered_as_data(node_available):
html = _run_markdown_case("| A | B |\n|---|---|\n| 1 | 2 |")
assert html.count("<tr>") == 2
assert "<th" in html
assert "<td" in html
assert "---" not in html
def test_process_with_thinking_handles_gemma4_thought_channel(node_available):
html = _run_markdown_case(
"<|channel>thought\ninternal reasoning<channel|>Final answer.",
"mod.processWithThinking(input)",
)
assert "thinking-section" in html
assert "internal reasoning" in html
assert "Final answer." in html
assert "&lt;|channel&gt;" not in html
assert "<|channel>" not in html
def test_process_with_thinking_strips_empty_gemma4_thought_channel(node_available):
html = _run_markdown_case(
"<|channel>thought\n<channel|>Final answer.",
"mod.processWithThinking(input)",
)
assert "thinking-section" not in html
assert "Final answer." in html
assert "&lt;|channel&gt;" not in html
assert "<|channel>" not in html
def test_process_with_thinking_unwraps_gemma4_response_channel(node_available):
html = _run_markdown_case(
"<|channel>thought\ninternal reasoning<channel|><|channel>response\nFinal answer.<channel|>",
"mod.processWithThinking(input)",
)
assert "thinking-section" in html
assert "internal reasoning" in html
assert "Final answer." in html
assert "&lt;|channel&gt;" not in html
assert "<|channel>" not in html
def test_extract_thinking_blocks_handles_thought_tag(node_available):
result = _run_markdown_case(
"<thought>internal reasoning</thought>Final answer.",
"mod.extractThinkingBlocks(input)",
)
assert result["thinkingBlocks"] == ["internal reasoning"]
assert result["content"] == "Final answer."
def test_dotted_python_import_paths_are_not_autolinked(node_available):
html = _run_markdown_case(
"from imblearn.combine import SMOTETomek\n"
"from sklearn.metrics import f1_score\n"
"from sklearn.compose import ColumnTransformer\n\n"
"See example.com/docs for normal domain autolinking."
)
assert "___ALLOWED_HTML_" not in html
assert "imblearn.combine" in html
assert "sklearn.metrics" in html
assert "sklearn.compose" in html
assert 'href="https://imblearn.com' not in html
assert 'href="https://sklearn.me' not in html
assert 'href="https://example.com/docs"' in html