Odysseus v1.0

This commit is contained in:
pewdiepie-archdaemon
2026-05-31 23:58:26 +09:00
commit e5c99a5eee
421 changed files with 271349 additions and 0 deletions

View File

@@ -0,0 +1,327 @@
// static/js/emailLibrary/signatureFold.js
//
// Heuristics that turn raw HTML email bodies into folded structures —
// "Earlier reply" details collapsing the quoted history, and "Signature"
// details collapsing the trailing corporate disclaimer / boilerplate.
//
// All pure functions of HTML strings (and one DOM-mutating exception:
// `_harvestAttribution` peels nodes off a container). No module state,
// no fetch, no globals. The icons (`_SIG_ICON`, `_QUOTE_ICON`) live here
// since `_foldSummary` is the only caller and other modules pass them in
// via that helper.
import {
_TALON_WROTE, _TALON_FROM, _TALON_SENT, _TALON_ORIG_RE,
_SIG_BLOAT_MIN_CHARS,
} from './utils.js';
// No leading icon on the signature fold — the user explicitly does not
// want a star/emoji-style glyph in this header.
export const _SIG_ICON = '';
export const _QUOTE_ICON = '<svg width="11" height="11" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><polyline points="9 17 4 12 9 7"/><path d="M20 18v-2a4 4 0 0 0-4-4H4"/></svg>';
// HTML-escape used by `_extractQuoteMeta`. Inlined here (rather than
// imported from utils) so this module remains free of cross-file links.
function _esc(text) {
const div = document.createElement('div');
div.textContent = text || '';
return div.innerHTML;
}
// Looks like a signature / corporate disclaimer rather than a quoted email.
// Heuristic: scores known "this is a disclaimer" tells against
// "this is a real email" tells. 3+ disclaimer hits with ≤1 conversational
// hit → signature.
export function _looksLikeSignature(html) {
if (!html) return false;
const txt = String(html).replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim();
if (!txt) return false;
let score = 0;
const SIG_TELLS = [
/\bregistered\s+in\b/i,
/\blimited\s+liability\s+partnership\b/i,
/\b(Pte\.?\s*Ltd|GmbH|S\.A\.|S\.A\.S|LLC|LLP|Inc\.?)\b/,
/\bintended\s+solely\s+for\b/i,
/\bconfidential(?:ity)?\s+(?:notice|information)\b/i,
/\b(?:disclaimer|please\s+(?:notify|delete))\b/i,
/\bunsubscribe\b/i,
/\bUEN\b\s*\w/i,
/\b\+\d[\d\s().-]{6,}\b/, // phone number
];
for (const re of SIG_TELLS) if (re.test(txt)) score++;
const PRIOR_TELLS = [
/\bHi\s+[A-Z][a-z]+\b/,
/\bDear\s+[A-Z][a-z]+\b/,
/\bRegards\b/i,
/\?\s*$/,
];
let priorScore = 0;
for (const re of PRIOR_TELLS) if (re.test(txt)) priorScore++;
return score >= 3 && priorScore <= 1;
}
// Look for an "On <date>, <addr> wrote:" line at the END of a fragment
// and remove it (returning the captured meta string, or null). Also
// handles Outlook-style "From: ... Sent: ... Subject: ..." blocks.
export function _harvestAttribution(container) {
const text = container.textContent || '';
const wroteLineRe = new RegExp(`${_TALON_WROTE}\\s*:\\s*$|${_TALON_WROTE}\\s*:\\s*<`, 'i');
const lastLines = text.trim().split('\n').slice(-3).join('\n');
if (!wroteLineRe.test(lastLines)) {
const outlookHeadRe = new RegExp(`${_TALON_FROM}\\s*:.*?${_TALON_SENT}\\s*:`, 'is');
if (!outlookHeadRe.test(text.split('\n').slice(-12).join('\n'))) {
if (!_TALON_ORIG_RE.test(text)) return null;
}
}
const trailing = [];
for (let i = container.childNodes.length - 1; i >= 0; i--) {
const node = container.childNodes[i];
const t = (node.textContent || '').trim();
if (!t) { trailing.unshift(node); continue; }
trailing.unshift(node);
if (trailing.map(n => n.textContent || '').join('\n').length > 600) break;
}
const meta = _extractQuoteMeta(trailing.map(n => n.outerHTML || n.textContent || '').join(''));
for (const n of trailing) {
try { container.removeChild(n); } catch {}
}
return meta || null;
}
export function _extractTurnMetaFromBlockquote(bq) {
const html = bq.innerHTML.slice(0, 2000);
const meta = _extractQuoteMeta(html);
return meta || null;
}
// "Earlier reply" / "Signature" summary header — caller supplies the
// label string + icon SVG. `meta`, when present, is split on " · " to
// promote the sender's name to the headline.
export function _foldSummary(label, iconSvg, meta) {
let primary = label;
let subMeta = meta || '';
if (meta) {
const idx = meta.indexOf(' · ');
if (idx > 0) {
primary = meta.slice(0, idx);
subMeta = meta.slice(idx + 3);
} else if (meta.length <= 80 && !/^\d/.test(meta)) {
primary = meta;
subMeta = '';
}
}
const metaSpan = subMeta
? `<span class="email-fold-summary-meta">${subMeta}</span>`
: '';
return (
'<summary class="email-fold-summary">'
+ iconSvg
+ `<span class="email-fold-summary-name">${primary}</span>`
+ metaSpan
+ '<svg class="email-summary-chevron" width="10" height="10" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2.5" stroke-linecap="round" stroke-linejoin="round" style="margin-left:auto;transition:transform .15s ease;"><polyline points="6 9 12 15 18 9"/></svg>'
+ '</summary>'
);
}
// Extract sender + date from a quoted email block. Tries Outlook-style
// "From: X · Sent: Y" header first, falls back to Gmail-style
// "On <date>, <addr> wrote:". Returns a display string like
// "Jane Doe · Mon, Apr 18, 2026 at 9:31 AM" or `''`.
export function _extractQuoteMeta(html) {
if (!html) return '';
const txt = html
.replace(/<style[\s\S]*?<\/style>/gi, '')
.replace(/<[^>]+>/g, ' ')
.replace(/&nbsp;/gi, ' ')
.replace(/&amp;/gi, '&')
.replace(/&lt;/gi, '<')
.replace(/&gt;/gi, '>')
.replace(/&quot;/gi, '"')
.replace(/\s+/g, ' ')
.slice(0, 1500);
const FROM = '(?:From|Från|Von|De|Da|От|Od|Van)';
const SENT = '(?:Sent|Skickat|Gesendet|Envoyé|Inviato|Enviado|Verzonden|Отправлено|Wysłane|Date)';
const STOP = `(?=\\s+(?:To|Cc|Bcc|Subject|Ämne|Betreff|Objet|Oggetto|Asunto|Onderwerp|Тема|Temat|${SENT})\\s*:)`;
const fromMatch = txt.match(new RegExp(`${FROM}\\s*:\\s*(.+?)${STOP}`, 'i'));
const sentMatch = txt.match(new RegExp(`${SENT}\\s*:\\s*([^\\n]+?)(?=\\s+(?:To|Cc|Bcc|Subject|Ämne|Betreff|Objet|Oggetto|Asunto|Onderwerp|Тема|Temat)\\s*:)`, 'i'));
let from = fromMatch ? fromMatch[1].trim() : '';
let date = sentMatch ? sentMatch[1].trim() : '';
if (!from && !date) {
const gmail = txt.match(/On\s+([^,]+?,[^,]+?\d{4}[^,]*),?\s+(.+?)\s+wrote\s*:/i);
if (gmail) { date = gmail[1].trim(); from = gmail[2].trim(); }
}
from = from.replace(/[<>]/g, '').replace(/\s+/g, ' ').trim();
date = date.replace(/\s+/g, ' ').trim();
if (from.length > 60) from = from.slice(0, 57) + '…';
if (date.length > 28) date = date.slice(0, 25) + '…';
if (from && date) return `${_esc(from)} · ${_esc(date)}`;
if (from) return _esc(from);
if (date) return _esc(date);
return '';
}
// Peel the first non-empty line off the signature tail. That line is
// usually the signer's name — keep it inline so "Kind regards, / Bob"
// reads naturally. Returns `{ preBloat, bloat }` — `bloat` is what
// should go into the fold; `preBloat` stays visible above it.
export function _peelSigNameLine(htmlAfterClosing) {
if (!htmlAfterClosing) return { preBloat: '', bloat: '' };
const breakRe = /<br\s*\/?>|<\/p>|<\/div>|\n/gi;
let cursor = 0;
let nameConsumed = false;
let mm;
while ((mm = breakRe.exec(htmlAfterClosing)) !== null) {
const seg = htmlAfterClosing.slice(cursor, mm.index)
.replace(/<[^>]+>/g, '').replace(/&nbsp;/gi, ' ').trim();
if (seg.length > 0) {
const looksBloat = /[@]|tel\.?:|mobile:|phone:|www\.|https?:\/\/|sent from|^\+?\d[\d \-().]{6,}$/i.test(seg);
if (looksBloat) {
return {
preBloat: htmlAfterClosing.slice(0, cursor),
bloat: htmlAfterClosing.slice(cursor),
};
}
if (!nameConsumed) {
nameConsumed = true;
const off = mm.index + mm[0].length;
return {
preBloat: htmlAfterClosing.slice(0, off),
bloat: htmlAfterClosing.slice(off),
};
}
}
cursor = mm.index + mm[0].length;
}
return { preBloat: htmlAfterClosing, bloat: '' };
}
export function _isBloatedSig(htmlFragment) {
if (!htmlFragment) return false;
const plain = htmlFragment
.replace(/<style[\s\S]*?<\/style>/gi, '')
.replace(/<script[\s\S]*?<\/script>/gi, '')
.replace(/<[^>]+>/g, ' ')
.replace(/&nbsp;/gi, ' ')
.replace(/&amp;/g, '&').replace(/&lt;/g, '<').replace(/&gt;/g, '>').replace(/&quot;/g, '"')
.replace(/\s+/g, ' ')
.trim();
return plain.length >= _SIG_BLOAT_MIN_CHARS;
}
// Try folding using a per-sender cached signature (built by the
// `learn_sender_signatures` action). When the cached text is found near
// the end of `html`, slice there and wrap the tail in a details fold.
// Returns the wrapped HTML or null when the hint doesn't apply.
export function _tryFoldHintSig(html, hintSig) {
if (!html || !hintSig || typeof hintSig !== 'string') return null;
if (hintSig.length < 20) return null;
const lines = hintSig.split(/\r?\n/).map(s => s.trim()).filter(Boolean);
const closingsRe = /^(?:Best regards|Best wishes|Kind regards|Yours (?:truly|sincerely|faithfully)|Sincerely|Cheers|Thanks|Thank you|Regards|Warm regards|Many thanks|Take care)[,!.\s]*$/i;
const anchor = (lines.find(l => l.length >= 8 && !closingsRe.test(l)) || lines[0] || '').trim();
if (anchor.length < 8) return null;
const plain = [];
const map = [];
let i = 0;
while (i < html.length) {
if (html[i] === '<') {
if (/^<br\s*\/?\s*>/i.test(html.slice(i, i + 6))) {
plain.push('\n'); map.push(i);
const e = html.indexOf('>', i);
i = e + 1;
continue;
}
const e = html.indexOf('>', i);
if (e < 0) break;
i = e + 1;
continue;
}
if (html[i] === '&') {
const semi = html.indexOf(';', i);
if (semi > 0 && semi - i < 8) {
const ent = html.slice(i + 1, semi);
const dec = ({nbsp: ' ', amp: '&', lt: '<', gt: '>', quot: '"', apos: "'"})[ent];
if (dec !== undefined) {
plain.push(dec); map.push(i);
i = semi + 1;
continue;
}
}
}
plain.push(html[i]); map.push(i);
i++;
}
const plainStr = plain.join('');
const idx = plainStr.lastIndexOf(anchor);
if (idx < 0) return null;
const htmlStart = map[idx];
if (htmlStart == null) return null;
const before = html.slice(0, htmlStart);
const sigSection = html.slice(htmlStart);
if (!_isBloatedSig(sigSection)) return null;
return before + '<details class="email-sig-fold">'
+ _foldSummary('Signature', _SIG_ICON)
+ sigSection + '</details>';
}
// Top-level signature fold — runs through several detection strategies
// in priority order. Returns the original html unchanged when no
// strategy fires.
export function _foldSignature(html, hintSig) {
if (!html || typeof html !== 'string') return html;
if (html.length > 80000) return html;
if (hintSig) {
const wrapped = _tryFoldHintSig(html, hintSig);
if (wrapped !== null) return wrapped;
}
const wrap = (before, marker, rest) => {
if (!_isBloatedSig(rest)) return html;
return before + (marker || '') + '<details class="email-sig-fold">'
+ _foldSummary('Signature', _SIG_ICON) + rest + '</details>';
};
let m = html.match(/<div[^>]*class=["'][^"']*\bgmail_signature\b[^"']*["'][\s\S]*$/i);
if (m) return wrap(html.slice(0, html.length - m[0].length), '', m[0]);
m = html.match(/<div[^>]*data-smartmail=["']gmail_signature["'][\s\S]*$/i);
if (m) return wrap(html.slice(0, html.length - m[0].length), '', m[0]);
m = html.match(/<div[^>]*id=["'](?:Signature|signature|divRplyFwdMsg)["'][\s\S]*$/i);
if (m) return wrap(html.slice(0, html.length - m[0].length), '', m[0]);
m = html.match(/(<br>|\n)\s*--\s*(<br>|\n)([\s\S]*)$/i);
if (m) {
const idx = html.lastIndexOf(m[0]);
return wrap(html.slice(0, idx), m[1], m[3]);
}
const blockBoundary = '(?:<br\\s*/?>|<\\/p>|<\\/div>|<\\/li>|<p[^>]*>|<div[^>]*>|<span[^>]*>|\\n)';
const closings = '(?:Best regards|Best wishes|Kind regards|Yours truly|Yours sincerely|Yours faithfully|Best,|Best\\s|Cheers,|Cheers\\s|Thanks,|Thanks\\s|Thank you,|Regards,|Regards\\s|Sincerely[, ]|Warm regards|Many thanks|Talk soon|Take care)';
m = html.match(new RegExp(`(${blockBoundary})\\s*(${closings})([\\s\\S]+)$`, 'i'));
if (m) {
const idx = html.lastIndexOf(m[0]);
const boundary = m[1];
const closing = m[2];
const after = m[3];
const { preBloat, bloat } = _peelSigNameLine(after);
if (!_isBloatedSig(bloat)) return html;
return html.slice(0, idx) + boundary + closing + preBloat
+ '<details class="email-sig-fold">' + _foldSummary('Signature', _SIG_ICON)
+ bloat + '</details>';
}
m = html.match(new RegExp(`(${blockBoundary})\\s*((?:Sent from my (?:iPhone|iPad|Android|Galaxy|Pixel|phone|mobile)|Get Outlook for (?:iOS|Android))[\\s\\S]*)$`, 'i'));
if (m) {
const idx = html.lastIndexOf(m[0]);
return wrap(html.slice(0, idx), m[1], m[2]);
}
m = html.match(new RegExp(`(${blockBoundary})\\s*((?:CONFIDENTIALITY NOTICE|DISCLAIMER|This e-?mail (?:is confidential|may contain confidential)|The information (?:contained )?in this e-?mail|This message and any attachments)[\\s\\S]*)$`, 'i'));
if (m) {
const idx = html.lastIndexOf(m[0]);
return wrap(html.slice(0, idx), m[1], m[2]);
}
return html;
}

View File

@@ -0,0 +1,34 @@
// static/js/emailLibrary/state.js
//
// Shared mutable state for the email-library popup. Keeping these on a
// single exported object lets sibling modules (utils, signatureFold,
// future render/menu/composer splits) read and write the same values
// without each one importing 19 `let` bindings — which ES modules
// don't allow from outside the defining module anyway.
//
// Writes look like `state._libOpen = true` everywhere; reads look like
// `state._libOpen`. The names match the originals so the refactor is a
// pure rename, not a semantic change.
export const state = {
_libOpen: false,
_libJustOpened: false,
_libEmails: [],
_libTotal: 0,
_libOffset: 0,
_libFolder: 'INBOX',
_libFolders: [],
_libAccountId: null, // null = backend default account
_libAccounts: [], // list of accounts for the chip strip
_libPendingExpandUid: null,
_libSearch: '',
_libFilter: 'all', // all, unread, unanswered
_libSort: 'recent', // recent, unread, favorites
_libHasAttachments: false,
_libLoading: false,
_docModule: null,
_onEmailClick: null,
_libEscHandler: null,
_selectMode: false,
_selectedUids: new Set(),
};

View File

@@ -0,0 +1,202 @@
// static/js/emailLibrary/utils.js
//
// Pure helpers extracted from emailLibrary.js. No DOM state, no fetch,
// no shared mutable references — safe to import anywhere.
// ── Talon-inspired multilingual quote-detection regexes ───────────
// Borrowed (loosely) from Mailgun's `talon` library. These are partial
// regex source strings — combined with surrounding patterns by callers.
// Multilingual on purpose: a typed "wrote:" line is locale-bound, and
// people forward / reply across language settings all the time.
export const _TALON_WROTE = '(?:wrote|écrit|escribió|scrisse|schrieb|skrev|schreef|napisał|написал|napsal|написа|έγραψε|katselivat|napisao|написав|napisała|napisali|hat geschrieben|kirjoitti|написала|escreveu|napisao|написа|написала)';
export const _TALON_FROM = '(?:From|Från|Von|De|Da|От|Od|Van|差出人|发件人|寄件人|Ut|Frá|Lähettäjä|Avsender|Pošiljatelj|Од|Від|Posiljatelj|Frå)';
export const _TALON_SENT = '(?:Sent|Skickat|Gesendet|Envoy[ée]|Inviato|Enviado|Verzonden|Отправлено|Wysłane|Date|送信日時|发送时间|寄件日期|Sendt|Lähetetty|Tarih|Datum|Data|Datum)';
export const _TALON_SUBJ = '(?:Subject|Ämne|Betreff|Objet|Oggetto|Asunto|Onderwerp|Тема|Temat|件名|主题|主旨|Emne|Aihe|Onderwerp|Konu)';
export const _TALON_TO = '(?:To|Till|An|À|A|Voor|Para|Naar|Кому|Do|宛先|收件人|Emri|Komu)';
export const _TALON_ORIG_RE = /(?:^|\n)[\s>]*[-_=]{3,}\s*(?:Original\s+Message|Ursprüngliche\s+Nachricht|Mensaje\s+original|Messaggio\s+originale|Message\s+d[']origine|Oorspronkelijk\s+bericht|Original\s+meddelande|Vor[ ]asal[a]\s+meddelande|原文|原始邮件|転送)\s*[-_=]{3,}/i;
// Minimum plain-text length of a "signature" before we bother folding it.
// Short closings ("Cheers, John") stay inline — folding them would add
// a click for two bytes of saving.
export const _SIG_BLOAT_MIN_CHARS = 200;
// HTML-escape a string by round-tripping through a detached div. Cheap
// and correct (handles all the entities that matter for innerHTML).
export function _esc(text) {
const div = document.createElement('div');
div.textContent = text || '';
return div.innerHTML;
}
// Escape + linkify URLs and email addresses. Returns innerHTML-safe markup.
export function _escLinkify(text) {
const escaped = _esc(text);
// URLs: http(s)://... or www....
const urlRe = /\b((?:https?:\/\/|www\.)[^\s<>"']+[^\s<>"'.,;:!?)\]])/g;
const mailRe = /\b([A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,})\b/g;
return escaped
.replace(urlRe, (m) => {
const href = m.startsWith('www.') ? `https://${m}` : m;
return `<a href="${href}" target="_blank" rel="noopener noreferrer">${m}</a>`;
})
.replace(mailRe, (m) => `<a href="mailto:${m}">${m}</a>`);
}
// Pull display name out of "Name <email@x>"; fallback to local-part of
// the email; final fallback to the input string.
export function _extractName(addr) {
const m = addr.match(/^"?([^"<]+?)"?\s*<([^>]+)>\s*$/);
if (m) return m[1].trim();
const localPart = addr.split('@')[0];
return localPart || addr;
}
// Parse the "Author <email> · Date" metadata string emitted by the
// server-side thread parser.
export function _parseTurnMeta(meta) {
if (!meta) return { author: '', email: '', date: '' };
const m = String(meta);
const eMatch = m.match(/<([^<>\s]+@[^<>\s]+)>/) ||
m.match(/\b([\w.+-]+@[\w.-]+\.[A-Za-z]{2,})\b/);
const email = eMatch ? eMatch[1].toLowerCase().trim() : '';
const parts = m.split(/\s+[·•]\s+/);
let author = '', date = '';
if (parts.length >= 2) {
author = parts[0].replace(/<[^>]+>/g, '').trim();
date = parts.slice(1).join(' · ').trim();
} else {
author = m.replace(/<[^>]+>/g, '').trim();
}
return { author, email, date };
}
// Short, locale-aware display string for a chat-bubble timestamp.
// Returns '' for invalid / empty input.
export function _formatBubbleDate(iso) {
if (!iso) return '';
const d = new Date(iso);
if (!d || isNaN(d.getTime())) return '';
try {
return d.toLocaleString(undefined, {
month: 'short', day: 'numeric', hour: '2-digit', minute: '2-digit',
});
} catch (_) { return ''; }
}
// Format a raw "to" address string ("Foo <foo@x.com>, bar@y.com") into a
// short, readable list — display names when present, just the local part
// of the email otherwise, and ", +N" once there are more than 2 recipients.
export function _formatRecipients(raw) {
if (!raw) return '';
const addrs = String(raw).split(',').map(s => s.trim()).filter(Boolean);
if (!addrs.length) return '';
const friendly = addrs.map(a => {
const m = a.match(/^\s*"?([^"<]+?)"?\s*<[^>]+>\s*$/);
if (m && m[1].trim()) return m[1].trim();
const em = a.replace(/[<>]/g, '').trim();
return em.split('@')[0] || em;
});
if (friendly.length === 1) return friendly[0];
if (friendly.length === 2) return friendly.join(', ');
return friendly.slice(0, 2).join(', ') + ' +' + (friendly.length - 2);
}
// Deterministic per-sender colour. Same hashing as
// emailInbox.js#_senderColor so a sender's avatar / name colour matches
// across the list view and the bubble reader.
export function _senderColor(name) {
if (!name) return 'hsl(220, 55%, 65%)';
const key = String(name).toLowerCase();
let hash = 0;
for (let i = 0; i < key.length; i++) {
hash = ((hash << 5) - hash + key.charCodeAt(i)) | 0;
}
const hue = ((hash % 360) + 360) % 360;
return `hsl(${hue}, 55%, 65%)`;
}
// 1- or 2-letter initials for an avatar bubble. Unicode-friendly.
export function _initials(s) {
if (!s) return '?';
const clean = String(s).replace(/<[^>]+>/g, '').replace(/[^\p{L}\s]/gu, ' ').trim();
const parts = clean.split(/\s+/).filter(Boolean);
if (!parts.length) return '?';
const first = parts[0][0] || '';
const last = parts.length > 1 ? parts[parts.length - 1][0] : '';
return (first + last).toUpperCase();
}
// HTML sanitizer for rendering remote email bodies. Strips script/iframe/
// form/style/etc., kills `on*` handlers, blocks `javascript:`/`vbscript:`/
// `data:` URLs on every known URL attribute, scrubs inline colour/font/
// position styles so the theme can take over, and wraps highlight-bearing
// inline tags in <mark> so they render legibly across themes.
export function _sanitizeHtml(html) {
const doc = new DOMParser().parseFromString(html, 'text/html');
doc.querySelectorAll(
'script, iframe, object, embed, form, style, link, ' +
'svg, math, base, meta, noscript, frame, frameset, applet, portal'
).forEach(el => el.remove());
const URL_ATTRS = ['href', 'src', 'srcset', 'action', 'formaction', 'background', 'poster', 'data'];
const isDangerousUrl = (val) => {
if (!val) return false;
const v = val.trim().toLowerCase();
return v.startsWith('javascript:') || v.startsWith('vbscript:') || v.startsWith('data:');
};
const STRIP_CSS_PROPS = ['color', 'background', 'background-color',
'font-family', 'font', '-webkit-text-fill-color',
'position', 'z-index'];
const HIGHLIGHT_INLINE_TAGS = new Set(['SPAN', 'FONT', 'EM', 'B', 'I',
'STRONG', 'SMALL', 'U']);
const HAS_BG_COLOR = /background(?:-color)?\s*:\s*(?!\s*(?:transparent|none|inherit|initial)\b)[^;]+/i;
const _markedForHighlight = [];
doc.querySelectorAll('*').forEach(el => {
for (const attr of [...el.attributes]) {
const name = attr.name.toLowerCase();
if (name.startsWith('on')) { el.removeAttribute(attr.name); continue; }
if (name === 'srcdoc') { el.removeAttribute(attr.name); continue; }
if (URL_ATTRS.includes(name) && isDangerousUrl(attr.value)) {
el.removeAttribute(attr.name);
continue;
}
}
el.removeAttribute('color');
const bgcolor = el.getAttribute('bgcolor');
el.removeAttribute('bgcolor');
el.removeAttribute('face');
const style = el.getAttribute('style');
const hadHighlight =
HIGHLIGHT_INLINE_TAGS.has(el.tagName) &&
((style && HAS_BG_COLOR.test(style)) || (bgcolor && bgcolor !== 'transparent'));
if (hadHighlight) _markedForHighlight.push(el);
if (style) {
const kept = style.split(';').map(s => s.trim()).filter(decl => {
if (!decl) return false;
const lower = decl.toLowerCase();
if (lower.includes('javascript:') || lower.includes('expression(')) return false;
const prop = decl.split(':', 1)[0].trim().toLowerCase();
return !STRIP_CSS_PROPS.includes(prop);
});
if (kept.length) el.setAttribute('style', kept.join('; '));
else el.removeAttribute('style');
}
if (el.tagName === 'A') {
el.setAttribute('target', '_blank');
el.setAttribute('rel', 'noopener noreferrer');
}
});
_markedForHighlight.forEach(el => {
if (el.tagName === 'MARK' || !el.firstChild) return;
const mark = doc.createElement('mark');
while (el.firstChild) mark.appendChild(el.firstChild);
el.appendChild(mark);
});
return doc.body.innerHTML;
}