fix(markdown): keep allowed-html placeholders out of fenced code (#1788)
This commit is contained in:
@@ -372,10 +372,46 @@ export function processWithThinking(text) {
|
||||
* Convert markdown to HTML
|
||||
*/
|
||||
export function mdToHtml(src) {
|
||||
// CRITICAL: Extract allowed HTML blocks first (details/summary)
|
||||
const allowedHtmlBlocks = [];
|
||||
const codeBlocks = [];
|
||||
const mermaidBlocks = [];
|
||||
let s = (src ?? '');
|
||||
|
||||
// Extract fenced code blocks before any markdown/HTML preservation passes.
|
||||
// Otherwise placeholders from the allowed-HTML sanitizer (e.g.
|
||||
// ___ALLOWED_HTML_0___) can leak into quoted HTML/JS samples, because the
|
||||
// placeholder gets captured as literal code content and never restored inside
|
||||
// the final <pre><code> block.
|
||||
s = s.replace(/```(\w+)?\n([\s\S]*?)```/g, (_, lang, code) => {
|
||||
const cleaned = code
|
||||
.replace(/\r\n/g, '\n')
|
||||
.replace(/[ \t]+$/gm, '')
|
||||
.replace(/^\s*\n+/, '')
|
||||
.replace(/\n+\s*$/g, '');
|
||||
|
||||
// Mermaid diagrams: render as diagram instead of code block
|
||||
if (lang && lang.toLowerCase() === 'mermaid') {
|
||||
const mermaidId = 'mermaid-' + Date.now() + '-' + mermaidBlocks.length;
|
||||
const raw = cleaned.replace(/</g, '<').replace(/>/g, '>').replace(/&/g, '&');
|
||||
const placeholder = `___MERMAID_BLOCK_${mermaidBlocks.length}___`;
|
||||
mermaidBlocks.push(`<div class="mermaid-container"><pre class="mermaid" id="${mermaidId}">${escapeHtml(raw)}</pre></div>`);
|
||||
return placeholder;
|
||||
}
|
||||
|
||||
const escaped = cleaned.replace(/</g, '<').replace(/>/g, '>').replace(/&/g, '&');
|
||||
const placeholder = `___CODE_BLOCK_${codeBlocks.length}___`;
|
||||
|
||||
const langClass = lang ? ` class="language-${lang}"` : '';
|
||||
const runnableLangs = ['python','py','javascript','js','html','bash','sh','shell','zsh'];
|
||||
const runBtn = (lang && runnableLangs.includes(lang.toLowerCase()))
|
||||
? `<button type="button" class="run-code" data-code="${escapeHtml(escaped)}" data-lang="${lang}" title="Run code"><svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><polygon points="5 3 19 12 5 21 5 3"/></svg></button>`
|
||||
: '';
|
||||
const editBtn = `<button type="button" class="edit-code" title="Edit"><svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M11 4H4a2 2 0 0 0-2 2v14a2 2 0 0 0 2 2h14a2 2 0 0 0 2-2v-7"/><path d="M18.5 2.5a2.121 2.121 0 0 1 3 3L12 15l-4 1 1-4 9.5-9.5z"/></svg></button>`;
|
||||
codeBlocks.push(`<pre><code${langClass} data-lang="${lang || ''}">${escapeHtml(escaped)}</code>${runBtn}${editBtn}<button type="button" class="copy-code" data-code="${escapeHtml(escaped)}"><svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="9" y="9" width="13" height="13" rx="2"/><path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"/></svg></button></pre>`);
|
||||
|
||||
return placeholder;
|
||||
});
|
||||
|
||||
// Repair common ways the agent mangles the entity-anchor convention
|
||||
// (`[Name](#kind-<id>)`). Models reliably get the single-link case
|
||||
// right but slip into other formats when listing many in a table.
|
||||
@@ -450,39 +486,6 @@ export function mdToHtml(src) {
|
||||
|
||||
s = s.replace(/\n{3,}/g, '\n\n');
|
||||
|
||||
// CRITICAL: Extract code blocks and replace with placeholders
|
||||
const codeBlocks = [];
|
||||
const mermaidBlocks = [];
|
||||
s = s.replace(/```(\w+)?\n([\s\S]*?)```/g, (_, lang, code) => {
|
||||
const cleaned = code
|
||||
.replace(/\r\n/g, '\n')
|
||||
.replace(/[ \t]+$/gm, '')
|
||||
.replace(/^\s*\n+/, '')
|
||||
.replace(/\n+\s*$/g, '');
|
||||
|
||||
// Mermaid diagrams: render as diagram instead of code block
|
||||
if (lang && lang.toLowerCase() === 'mermaid') {
|
||||
const mermaidId = 'mermaid-' + Date.now() + '-' + mermaidBlocks.length;
|
||||
const raw = cleaned.replace(/</g, '<').replace(/>/g, '>').replace(/&/g, '&');
|
||||
const placeholder = `___MERMAID_BLOCK_${mermaidBlocks.length}___`;
|
||||
mermaidBlocks.push(`<div class="mermaid-container"><pre class="mermaid" id="${mermaidId}">${escapeHtml(raw)}</pre></div>`);
|
||||
return placeholder;
|
||||
}
|
||||
|
||||
const escaped = cleaned.replace(/</g, '<').replace(/>/g, '>').replace(/&/g, '&');
|
||||
const placeholder = `___CODE_BLOCK_${codeBlocks.length}___`;
|
||||
|
||||
const langClass = lang ? ` class="language-${lang}"` : '';
|
||||
const runnableLangs = ['python','py','javascript','js','html','bash','sh','shell','zsh'];
|
||||
const runBtn = (lang && runnableLangs.includes(lang.toLowerCase()))
|
||||
? `<button type="button" class="run-code" data-code="${escapeHtml(escaped)}" data-lang="${lang}" title="Run code"><svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><polygon points="5 3 19 12 5 21 5 3"/></svg></button>`
|
||||
: '';
|
||||
const editBtn = `<button type="button" class="edit-code" title="Edit"><svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M11 4H4a2 2 0 0 0-2 2v14a2 2 0 0 0 2 2h14a2 2 0 0 0 2-2v-7"/><path d="M18.5 2.5a2.121 2.121 0 0 1 3 3L12 15l-4 1 1-4 9.5-9.5z"/></svg></button>`;
|
||||
codeBlocks.push(`<pre><code${langClass} data-lang="${lang || ''}">${escapeHtml(escaped)}</code>${runBtn}${editBtn}<button type="button" class="copy-code" data-code="${escapeHtml(escaped)}"><svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="9" y="9" width="13" height="13" rx="2"/><path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"/></svg></button></pre>`);
|
||||
|
||||
return placeholder;
|
||||
});
|
||||
|
||||
// KaTeX math rendering (after code blocks are extracted, so math in code is safe)
|
||||
const mathBlocks = [];
|
||||
if (window.katex) {
|
||||
|
||||
65
tests/markdown_codefence_placeholder_regression.mjs
Normal file
65
tests/markdown_codefence_placeholder_regression.mjs
Normal file
@@ -0,0 +1,65 @@
|
||||
import assert from 'node:assert/strict';
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import vm from 'node:vm';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
const markdownPath = path.join(__dirname, '..', 'static', 'js', 'markdown.js');
|
||||
let src = fs.readFileSync(markdownPath, 'utf8');
|
||||
|
||||
src = src.replace(
|
||||
/import uiModule from '\.\/ui\.js';/,
|
||||
'const uiModule = { esc: (s) => String(s).replace(/&/g, "&").replace(/</g, "<").replace(/>/g, ">").replace(/\\"/g, """) };'
|
||||
);
|
||||
src = src.replace(
|
||||
/import \{ splitTableRow \} from '\.\/markdown\/tableRow\.js';/,
|
||||
'const splitTableRow = (row) => row.split("|").filter((cell) => cell.trim() !== "");'
|
||||
);
|
||||
src = src.replace(/export function /g, 'function ');
|
||||
src = src.replace(/export const /g, 'const ');
|
||||
src = src.replace(/export default markdownModule;?/g, '');
|
||||
src += '\nthis.__mdToHtml = mdToHtml;';
|
||||
|
||||
class MutationObserver {
|
||||
observe() {}
|
||||
disconnect() {}
|
||||
}
|
||||
|
||||
const sandbox = {
|
||||
console,
|
||||
URL,
|
||||
MutationObserver,
|
||||
localStorage: { getItem() { return '[]'; }, setItem() {} },
|
||||
document: {
|
||||
body: { classList: { contains() { return true; } } },
|
||||
addEventListener() {},
|
||||
querySelectorAll() { return []; },
|
||||
getElementById() { return null; },
|
||||
contains() { return true; },
|
||||
},
|
||||
window: {
|
||||
location: { origin: 'http://localhost' },
|
||||
katex: null,
|
||||
mermaid: null,
|
||||
},
|
||||
};
|
||||
|
||||
vm.createContext(sandbox);
|
||||
vm.runInContext(src, sandbox, { filename: markdownPath });
|
||||
|
||||
const input = [
|
||||
'> ```html',
|
||||
'> <script>',
|
||||
'> newWindow.addEventListener(\'click\', () => {',
|
||||
'> desktop.appendChild(newWindow);',
|
||||
'> });',
|
||||
'> </script>',
|
||||
'> ```',
|
||||
].join('\n');
|
||||
|
||||
const html = sandbox.__mdToHtml(input);
|
||||
assert.equal(html.includes('___ALLOWED_HTML_'), false, html);
|
||||
assert.equal(html.includes('appendChild'), true, html);
|
||||
|
||||
console.log('ok');
|
||||
Reference in New Issue
Block a user