fix(markdown): keep allowed-html placeholders out of fenced code (#1788)

This commit is contained in:
ooovenenoso
2026-06-03 00:25:26 -04:00
committed by GitHub
parent 2fa4d50115
commit 12696a05ae
2 changed files with 102 additions and 34 deletions

View File

@@ -372,10 +372,46 @@ export function processWithThinking(text) {
* Convert markdown to HTML
*/
export function mdToHtml(src) {
// CRITICAL: Extract allowed HTML blocks first (details/summary)
const allowedHtmlBlocks = [];
const codeBlocks = [];
const mermaidBlocks = [];
let s = (src ?? '');
// Extract fenced code blocks before any markdown/HTML preservation passes.
// Otherwise placeholders from the allowed-HTML sanitizer (e.g.
// ___ALLOWED_HTML_0___) can leak into quoted HTML/JS samples, because the
// placeholder gets captured as literal code content and never restored inside
// the final <pre><code> block.
s = s.replace(/```(\w+)?\n([\s\S]*?)```/g, (_, lang, code) => {
const cleaned = code
.replace(/\r\n/g, '\n')
.replace(/[ \t]+$/gm, '')
.replace(/^\s*\n+/, '')
.replace(/\n+\s*$/g, '');
// Mermaid diagrams: render as diagram instead of code block
if (lang && lang.toLowerCase() === 'mermaid') {
const mermaidId = 'mermaid-' + Date.now() + '-' + mermaidBlocks.length;
const raw = cleaned.replace(/&lt;/g, '<').replace(/&gt;/g, '>').replace(/&amp;/g, '&');
const placeholder = `___MERMAID_BLOCK_${mermaidBlocks.length}___`;
mermaidBlocks.push(`<div class="mermaid-container"><pre class="mermaid" id="${mermaidId}">${escapeHtml(raw)}</pre></div>`);
return placeholder;
}
const escaped = cleaned.replace(/&lt;/g, '<').replace(/&gt;/g, '>').replace(/&amp;/g, '&');
const placeholder = `___CODE_BLOCK_${codeBlocks.length}___`;
const langClass = lang ? ` class="language-${lang}"` : '';
const runnableLangs = ['python','py','javascript','js','html','bash','sh','shell','zsh'];
const runBtn = (lang && runnableLangs.includes(lang.toLowerCase()))
? `<button type="button" class="run-code" data-code="${escapeHtml(escaped)}" data-lang="${lang}" title="Run code"><svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><polygon points="5 3 19 12 5 21 5 3"/></svg></button>`
: '';
const editBtn = `<button type="button" class="edit-code" title="Edit"><svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M11 4H4a2 2 0 0 0-2 2v14a2 2 0 0 0 2 2h14a2 2 0 0 0 2-2v-7"/><path d="M18.5 2.5a2.121 2.121 0 0 1 3 3L12 15l-4 1 1-4 9.5-9.5z"/></svg></button>`;
codeBlocks.push(`<pre><code${langClass} data-lang="${lang || ''}">${escapeHtml(escaped)}</code>${runBtn}${editBtn}<button type="button" class="copy-code" data-code="${escapeHtml(escaped)}"><svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="9" y="9" width="13" height="13" rx="2"/><path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"/></svg></button></pre>`);
return placeholder;
});
// Repair common ways the agent mangles the entity-anchor convention
// (`[Name](#kind-<id>)`). Models reliably get the single-link case
// right but slip into other formats when listing many in a table.
@@ -450,39 +486,6 @@ export function mdToHtml(src) {
s = s.replace(/\n{3,}/g, '\n\n');
// CRITICAL: Extract code blocks and replace with placeholders
const codeBlocks = [];
const mermaidBlocks = [];
s = s.replace(/```(\w+)?\n([\s\S]*?)```/g, (_, lang, code) => {
const cleaned = code
.replace(/\r\n/g, '\n')
.replace(/[ \t]+$/gm, '')
.replace(/^\s*\n+/, '')
.replace(/\n+\s*$/g, '');
// Mermaid diagrams: render as diagram instead of code block
if (lang && lang.toLowerCase() === 'mermaid') {
const mermaidId = 'mermaid-' + Date.now() + '-' + mermaidBlocks.length;
const raw = cleaned.replace(/&lt;/g, '<').replace(/&gt;/g, '>').replace(/&amp;/g, '&');
const placeholder = `___MERMAID_BLOCK_${mermaidBlocks.length}___`;
mermaidBlocks.push(`<div class="mermaid-container"><pre class="mermaid" id="${mermaidId}">${escapeHtml(raw)}</pre></div>`);
return placeholder;
}
const escaped = cleaned.replace(/&lt;/g, '<').replace(/&gt;/g, '>').replace(/&amp;/g, '&');
const placeholder = `___CODE_BLOCK_${codeBlocks.length}___`;
const langClass = lang ? ` class="language-${lang}"` : '';
const runnableLangs = ['python','py','javascript','js','html','bash','sh','shell','zsh'];
const runBtn = (lang && runnableLangs.includes(lang.toLowerCase()))
? `<button type="button" class="run-code" data-code="${escapeHtml(escaped)}" data-lang="${lang}" title="Run code"><svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><polygon points="5 3 19 12 5 21 5 3"/></svg></button>`
: '';
const editBtn = `<button type="button" class="edit-code" title="Edit"><svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M11 4H4a2 2 0 0 0-2 2v14a2 2 0 0 0 2 2h14a2 2 0 0 0 2-2v-7"/><path d="M18.5 2.5a2.121 2.121 0 0 1 3 3L12 15l-4 1 1-4 9.5-9.5z"/></svg></button>`;
codeBlocks.push(`<pre><code${langClass} data-lang="${lang || ''}">${escapeHtml(escaped)}</code>${runBtn}${editBtn}<button type="button" class="copy-code" data-code="${escapeHtml(escaped)}"><svg width="14" height="14" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><rect x="9" y="9" width="13" height="13" rx="2"/><path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1"/></svg></button></pre>`);
return placeholder;
});
// KaTeX math rendering (after code blocks are extracted, so math in code is safe)
const mathBlocks = [];
if (window.katex) {

View File

@@ -0,0 +1,65 @@
import assert from 'node:assert/strict';
import fs from 'node:fs';
import path from 'node:path';
import vm from 'node:vm';
import { fileURLToPath } from 'node:url';
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const markdownPath = path.join(__dirname, '..', 'static', 'js', 'markdown.js');
let src = fs.readFileSync(markdownPath, 'utf8');
src = src.replace(
/import uiModule from '\.\/ui\.js';/,
'const uiModule = { esc: (s) => String(s).replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;").replace(/\\"/g, "&quot;") };'
);
src = src.replace(
/import \{ splitTableRow \} from '\.\/markdown\/tableRow\.js';/,
'const splitTableRow = (row) => row.split("|").filter((cell) => cell.trim() !== "");'
);
src = src.replace(/export function /g, 'function ');
src = src.replace(/export const /g, 'const ');
src = src.replace(/export default markdownModule;?/g, '');
src += '\nthis.__mdToHtml = mdToHtml;';
class MutationObserver {
observe() {}
disconnect() {}
}
const sandbox = {
console,
URL,
MutationObserver,
localStorage: { getItem() { return '[]'; }, setItem() {} },
document: {
body: { classList: { contains() { return true; } } },
addEventListener() {},
querySelectorAll() { return []; },
getElementById() { return null; },
contains() { return true; },
},
window: {
location: { origin: 'http://localhost' },
katex: null,
mermaid: null,
},
};
vm.createContext(sandbox);
vm.runInContext(src, sandbox, { filename: markdownPath });
const input = [
'> ```html',
'> <script>',
'> newWindow.addEventListener(\'click\', () => {',
'> desktop.appendChild(newWindow);',
'> });',
'> </script>',
'> ```',
].join('\n');
const html = sandbox.__mdToHtml(input);
assert.equal(html.includes('___ALLOWED_HTML_'), false, html);
assert.equal(html.includes('appendChild'), true, html);
console.log('ok');