From 12696a05aeb6327028656a2219a915d2f4476fb3 Mon Sep 17 00:00:00 2001 From: ooovenenoso <120500656+ooovenenoso@users.noreply.github.com> Date: Wed, 3 Jun 2026 00:25:26 -0400 Subject: [PATCH] fix(markdown): keep allowed-html placeholders out of fenced code (#1788) --- static/js/markdown.js | 71 ++++++++++--------- ...kdown_codefence_placeholder_regression.mjs | 65 +++++++++++++++++ 2 files changed, 102 insertions(+), 34 deletions(-) create mode 100644 tests/markdown_codefence_placeholder_regression.mjs diff --git a/static/js/markdown.js b/static/js/markdown.js index 7895a13..b158220 100644 --- a/static/js/markdown.js +++ b/static/js/markdown.js @@ -372,10 +372,46 @@ export function processWithThinking(text) { * Convert markdown to HTML */ export function mdToHtml(src) { - // CRITICAL: Extract allowed HTML blocks first (details/summary) const allowedHtmlBlocks = []; + const codeBlocks = []; + const mermaidBlocks = []; let s = (src ?? ''); + // Extract fenced code blocks before any markdown/HTML preservation passes. + // Otherwise placeholders from the allowed-HTML sanitizer (e.g. + // ___ALLOWED_HTML_0___) can leak into quoted HTML/JS samples, because the + // placeholder gets captured as literal code content and never restored inside + // the final
block.
+ s = s.replace(/```(\w+)?\n([\s\S]*?)```/g, (_, lang, code) => {
+ const cleaned = code
+ .replace(/\r\n/g, '\n')
+ .replace(/[ \t]+$/gm, '')
+ .replace(/^\s*\n+/, '')
+ .replace(/\n+\s*$/g, '');
+
+ // Mermaid diagrams: render as diagram instead of code block
+ if (lang && lang.toLowerCase() === 'mermaid') {
+ const mermaidId = 'mermaid-' + Date.now() + '-' + mermaidBlocks.length;
+ const raw = cleaned.replace(/</g, '<').replace(/>/g, '>').replace(/&/g, '&');
+ const placeholder = `___MERMAID_BLOCK_${mermaidBlocks.length}___`;
+ mermaidBlocks.push(`${escapeHtml(raw)}`);
+ return placeholder;
+ }
+
+ const escaped = cleaned.replace(/</g, '<').replace(/>/g, '>').replace(/&/g, '&');
+ const placeholder = `___CODE_BLOCK_${codeBlocks.length}___`;
+
+ const langClass = lang ? ` class="language-${lang}"` : '';
+ const runnableLangs = ['python','py','javascript','js','html','bash','sh','shell','zsh'];
+ const runBtn = (lang && runnableLangs.includes(lang.toLowerCase()))
+ ? ``
+ : '';
+ const editBtn = ``;
+ codeBlocks.push(`${escapeHtml(escaped)}${runBtn}${editBtn}
`);
+
+ return placeholder;
+ });
+
// Repair common ways the agent mangles the entity-anchor convention
// (`[Name](#kind-)`). Models reliably get the single-link case
// right but slip into other formats when listing many in a table.
@@ -450,39 +486,6 @@ export function mdToHtml(src) {
s = s.replace(/\n{3,}/g, '\n\n');
- // CRITICAL: Extract code blocks and replace with placeholders
- const codeBlocks = [];
- const mermaidBlocks = [];
- s = s.replace(/```(\w+)?\n([\s\S]*?)```/g, (_, lang, code) => {
- const cleaned = code
- .replace(/\r\n/g, '\n')
- .replace(/[ \t]+$/gm, '')
- .replace(/^\s*\n+/, '')
- .replace(/\n+\s*$/g, '');
-
- // Mermaid diagrams: render as diagram instead of code block
- if (lang && lang.toLowerCase() === 'mermaid') {
- const mermaidId = 'mermaid-' + Date.now() + '-' + mermaidBlocks.length;
- const raw = cleaned.replace(/</g, '<').replace(/>/g, '>').replace(/&/g, '&');
- const placeholder = `___MERMAID_BLOCK_${mermaidBlocks.length}___`;
- mermaidBlocks.push(`${escapeHtml(raw)}`);
- return placeholder;
- }
-
- const escaped = cleaned.replace(/</g, '<').replace(/>/g, '>').replace(/&/g, '&');
- const placeholder = `___CODE_BLOCK_${codeBlocks.length}___`;
-
- const langClass = lang ? ` class="language-${lang}"` : '';
- const runnableLangs = ['python','py','javascript','js','html','bash','sh','shell','zsh'];
- const runBtn = (lang && runnableLangs.includes(lang.toLowerCase()))
- ? ``
- : '';
- const editBtn = ``;
- codeBlocks.push(`${escapeHtml(escaped)}${runBtn}${editBtn}
`);
-
- return placeholder;
- });
-
// KaTeX math rendering (after code blocks are extracted, so math in code is safe)
const mathBlocks = [];
if (window.katex) {
diff --git a/tests/markdown_codefence_placeholder_regression.mjs b/tests/markdown_codefence_placeholder_regression.mjs
new file mode 100644
index 0000000..a57cabe
--- /dev/null
+++ b/tests/markdown_codefence_placeholder_regression.mjs
@@ -0,0 +1,65 @@
+import assert from 'node:assert/strict';
+import fs from 'node:fs';
+import path from 'node:path';
+import vm from 'node:vm';
+import { fileURLToPath } from 'node:url';
+
+const __dirname = path.dirname(fileURLToPath(import.meta.url));
+const markdownPath = path.join(__dirname, '..', 'static', 'js', 'markdown.js');
+let src = fs.readFileSync(markdownPath, 'utf8');
+
+src = src.replace(
+ /import uiModule from '\.\/ui\.js';/,
+ 'const uiModule = { esc: (s) => String(s).replace(/&/g, "&").replace(//g, ">").replace(/\\"/g, """) };'
+);
+src = src.replace(
+ /import \{ splitTableRow \} from '\.\/markdown\/tableRow\.js';/,
+ 'const splitTableRow = (row) => row.split("|").filter((cell) => cell.trim() !== "");'
+);
+src = src.replace(/export function /g, 'function ');
+src = src.replace(/export const /g, 'const ');
+src = src.replace(/export default markdownModule;?/g, '');
+src += '\nthis.__mdToHtml = mdToHtml;';
+
+class MutationObserver {
+ observe() {}
+ disconnect() {}
+}
+
+const sandbox = {
+ console,
+ URL,
+ MutationObserver,
+ localStorage: { getItem() { return '[]'; }, setItem() {} },
+ document: {
+ body: { classList: { contains() { return true; } } },
+ addEventListener() {},
+ querySelectorAll() { return []; },
+ getElementById() { return null; },
+ contains() { return true; },
+ },
+ window: {
+ location: { origin: 'http://localhost' },
+ katex: null,
+ mermaid: null,
+ },
+};
+
+vm.createContext(sandbox);
+vm.runInContext(src, sandbox, { filename: markdownPath });
+
+const input = [
+ '> ```html',
+ '> ',
+ '> ```',
+].join('\n');
+
+const html = sandbox.__mdToHtml(input);
+assert.equal(html.includes('___ALLOWED_HTML_'), false, html);
+assert.equal(html.includes('appendChild'), true, html);
+
+console.log('ok');