From 12696a05aeb6327028656a2219a915d2f4476fb3 Mon Sep 17 00:00:00 2001 From: ooovenenoso <120500656+ooovenenoso@users.noreply.github.com> Date: Wed, 3 Jun 2026 00:25:26 -0400 Subject: [PATCH] fix(markdown): keep allowed-html placeholders out of fenced code (#1788) --- static/js/markdown.js | 71 ++++++++++--------- ...kdown_codefence_placeholder_regression.mjs | 65 +++++++++++++++++ 2 files changed, 102 insertions(+), 34 deletions(-) create mode 100644 tests/markdown_codefence_placeholder_regression.mjs diff --git a/static/js/markdown.js b/static/js/markdown.js index 7895a13..b158220 100644 --- a/static/js/markdown.js +++ b/static/js/markdown.js @@ -372,10 +372,46 @@ export function processWithThinking(text) { * Convert markdown to HTML */ export function mdToHtml(src) { - // CRITICAL: Extract allowed HTML blocks first (details/summary) const allowedHtmlBlocks = []; + const codeBlocks = []; + const mermaidBlocks = []; let s = (src ?? ''); + // Extract fenced code blocks before any markdown/HTML preservation passes. + // Otherwise placeholders from the allowed-HTML sanitizer (e.g. + // ___ALLOWED_HTML_0___) can leak into quoted HTML/JS samples, because the + // placeholder gets captured as literal code content and never restored inside + // the final
 block.
+  s = s.replace(/```(\w+)?\n([\s\S]*?)```/g, (_, lang, code) => {
+    const cleaned = code
+      .replace(/\r\n/g, '\n')
+      .replace(/[ \t]+$/gm, '')
+      .replace(/^\s*\n+/, '')
+      .replace(/\n+\s*$/g, '');
+
+    // Mermaid diagrams: render as diagram instead of code block
+    if (lang && lang.toLowerCase() === 'mermaid') {
+      const mermaidId = 'mermaid-' + Date.now() + '-' + mermaidBlocks.length;
+      const raw = cleaned.replace(/</g, '<').replace(/>/g, '>').replace(/&/g, '&');
+      const placeholder = `___MERMAID_BLOCK_${mermaidBlocks.length}___`;
+      mermaidBlocks.push(`
${escapeHtml(raw)}
`); + return placeholder; + } + + const escaped = cleaned.replace(/</g, '<').replace(/>/g, '>').replace(/&/g, '&'); + const placeholder = `___CODE_BLOCK_${codeBlocks.length}___`; + + const langClass = lang ? ` class="language-${lang}"` : ''; + const runnableLangs = ['python','py','javascript','js','html','bash','sh','shell','zsh']; + const runBtn = (lang && runnableLangs.includes(lang.toLowerCase())) + ? `` + : ''; + const editBtn = ``; + codeBlocks.push(`
${escapeHtml(escaped)}${runBtn}${editBtn}
`); + + return placeholder; + }); + // Repair common ways the agent mangles the entity-anchor convention // (`[Name](#kind-)`). Models reliably get the single-link case // right but slip into other formats when listing many in a table. @@ -450,39 +486,6 @@ export function mdToHtml(src) { s = s.replace(/\n{3,}/g, '\n\n'); - // CRITICAL: Extract code blocks and replace with placeholders - const codeBlocks = []; - const mermaidBlocks = []; - s = s.replace(/```(\w+)?\n([\s\S]*?)```/g, (_, lang, code) => { - const cleaned = code - .replace(/\r\n/g, '\n') - .replace(/[ \t]+$/gm, '') - .replace(/^\s*\n+/, '') - .replace(/\n+\s*$/g, ''); - - // Mermaid diagrams: render as diagram instead of code block - if (lang && lang.toLowerCase() === 'mermaid') { - const mermaidId = 'mermaid-' + Date.now() + '-' + mermaidBlocks.length; - const raw = cleaned.replace(/</g, '<').replace(/>/g, '>').replace(/&/g, '&'); - const placeholder = `___MERMAID_BLOCK_${mermaidBlocks.length}___`; - mermaidBlocks.push(`
${escapeHtml(raw)}
`); - return placeholder; - } - - const escaped = cleaned.replace(/</g, '<').replace(/>/g, '>').replace(/&/g, '&'); - const placeholder = `___CODE_BLOCK_${codeBlocks.length}___`; - - const langClass = lang ? ` class="language-${lang}"` : ''; - const runnableLangs = ['python','py','javascript','js','html','bash','sh','shell','zsh']; - const runBtn = (lang && runnableLangs.includes(lang.toLowerCase())) - ? `` - : ''; - const editBtn = ``; - codeBlocks.push(`
${escapeHtml(escaped)}${runBtn}${editBtn}
`); - - return placeholder; - }); - // KaTeX math rendering (after code blocks are extracted, so math in code is safe) const mathBlocks = []; if (window.katex) { diff --git a/tests/markdown_codefence_placeholder_regression.mjs b/tests/markdown_codefence_placeholder_regression.mjs new file mode 100644 index 0000000..a57cabe --- /dev/null +++ b/tests/markdown_codefence_placeholder_regression.mjs @@ -0,0 +1,65 @@ +import assert from 'node:assert/strict'; +import fs from 'node:fs'; +import path from 'node:path'; +import vm from 'node:vm'; +import { fileURLToPath } from 'node:url'; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const markdownPath = path.join(__dirname, '..', 'static', 'js', 'markdown.js'); +let src = fs.readFileSync(markdownPath, 'utf8'); + +src = src.replace( + /import uiModule from '\.\/ui\.js';/, + 'const uiModule = { esc: (s) => String(s).replace(/&/g, "&").replace(//g, ">").replace(/\\"/g, """) };' +); +src = src.replace( + /import \{ splitTableRow \} from '\.\/markdown\/tableRow\.js';/, + 'const splitTableRow = (row) => row.split("|").filter((cell) => cell.trim() !== "");' +); +src = src.replace(/export function /g, 'function '); +src = src.replace(/export const /g, 'const '); +src = src.replace(/export default markdownModule;?/g, ''); +src += '\nthis.__mdToHtml = mdToHtml;'; + +class MutationObserver { + observe() {} + disconnect() {} +} + +const sandbox = { + console, + URL, + MutationObserver, + localStorage: { getItem() { return '[]'; }, setItem() {} }, + document: { + body: { classList: { contains() { return true; } } }, + addEventListener() {}, + querySelectorAll() { return []; }, + getElementById() { return null; }, + contains() { return true; }, + }, + window: { + location: { origin: 'http://localhost' }, + katex: null, + mermaid: null, + }, +}; + +vm.createContext(sandbox); +vm.runInContext(src, sandbox, { filename: markdownPath }); + +const input = [ + '> ```html', + '> ', + '> ```', +].join('\n'); + +const html = sandbox.__mdToHtml(input); +assert.equal(html.includes('___ALLOWED_HTML_'), false, html); +assert.equal(html.includes('appendChild'), true, html); + +console.log('ok');