// static/js/markdown.js
/**
* Markdown rendering and content processing utilities
*/
import uiModule from './ui.js';
import { splitTableRow } from './markdown/tableRow.js';
import { replaceEmojiShortcodes, hasEmojiShortcode } from './emojiShortcodes.js';
var escapeHtml = uiModule.esc;
function safeLinkUrl(rawUrl) {
const url = String(rawUrl || '').trim();
if (url.startsWith('#')) {
return /^#[A-Za-z0-9_-]*$/.test(url) ? url : '';
}
try {
const parsed = new URL(url, window.location.origin);
if (parsed.protocol === 'http:' || parsed.protocol === 'https:') {
return parsed.href;
}
} catch (_) {
return '';
}
return '';
}
function linkHtml(text, url) {
const safeUrl = safeLinkUrl(url);
const safeText = escapeHtml(text);
if (!safeUrl) return safeText;
if (safeUrl.startsWith('#')) {
return `${safeText}`;
}
return `${safeText}`;
}
/**
* Sanitize the raw-HTML fragments that mdToHtml deliberately preserves from
* the source text — blocks (collapsible agent output) and tags
* (emitted by the markdown link pass). Those fragments are later restored
* verbatim into innerHTML, so without scrubbing them a model — or any content
* routed through here — could smuggle in an `
`, an
* ``, an `onmouseover=` handler, etc. and execute
* script in the authenticated page (DOM XSS).
*
* Parsing into a is inert: assigning to template.innerHTML neither
* fetches resources nor runs scripts, so we can walk the resulting tree,
* drop script-capable elements, and strip event-handler attributes and
* dangerous URL schemes before the (now safe) fragment is handed back.
*/
const _ALLOWED_HTML_BAD_TAGS = new Set([
'SCRIPT', 'IFRAME', 'OBJECT', 'EMBED', 'LINK', 'META',
'STYLE', 'BASE', 'FORM', 'NOSCRIPT', 'TEMPLATE',
// Foreign-content roots. SVG/MathML have their own parser rules and are a
// classic mutation-XSS vehicle — e.g. an SVG-namespaced