diff --git a/static/js/document.js b/static/js/document.js
index 1d38121..87ad298 100644
--- a/static/js/document.js
+++ b/static/js/document.js
@@ -2246,7 +2246,9 @@ import * as Modals from './modalManager.js';
// WYSIWYG body — use it verbatim. (Checking a leading '<' isn't enough: a
// rich body often starts with plain text, e.g. "Hi there".)
if (/<\/?(b|i|u|s|strong|em|del|strike|a|p|div|br|ul|ol|li|h[1-3]|blockquote|span|code|pre)\b[^>]*>/i.test(t)) return t;
- try { return markdownModule.mdToHtml(text); }
+ // Email body: keep author-typed `:shortcode:` text literal. Issue #345
+ // (shortcode → emoji) is scoped to chat; do not rewrite colons in mail.
+ try { return markdownModule.mdToHtml(text, { shortcodes: false }); }
catch (_) {
const d = document.createElement('div'); d.textContent = text;
return d.innerHTML.replace(/\n/g, '
');
@@ -8386,7 +8388,7 @@ import * as Modals from './modalManager.js';
const text = textarea.value || '';
let body;
if (lang === 'markdown' && markdownModule?.mdToHtml) {
- body = markdownModule.mdToHtml(text);
+ body = markdownModule.mdToHtml(text, { shortcodes: false }); // export: keep :shortcodes: literal
} else {
body = '
' +
text.replace(/&/g,'&').replace(//g,'>') + '';
@@ -8417,7 +8419,7 @@ import * as Modals from './modalManager.js';
// Render content as HTML for PDF
let html;
if (lang === 'markdown' && markdownModule?.mdToHtml) {
- html = markdownModule.mdToHtml(text);
+ html = markdownModule.mdToHtml(text, { shortcodes: false }); // export: keep :shortcodes: literal
} else {
html = '' +
text.replace(/&/g,'&').replace(//g,'>') + '';
@@ -8547,7 +8549,7 @@ import * as Modals from './modalManager.js';
if (active) {
const md = textarea.value || '';
if (markdownModule && markdownModule.mdToHtml) {
- preview.innerHTML = markdownModule.mdToHtml(md);
+ preview.innerHTML = markdownModule.mdToHtml(md, { shortcodes: false }); // doc preview: keep :shortcodes: literal
} else {
preview.innerHTML = md.replace(/&/g,'&').replace(//g,'>').replace(/\n/g, '
');
}
diff --git a/static/js/emojiShortcodes.js b/static/js/emojiShortcodes.js
new file mode 100644
index 0000000..a51a64e
--- /dev/null
+++ b/static/js/emojiShortcodes.js
@@ -0,0 +1,458 @@
+// static/js/emojiShortcodes.js
+//
+// Emoji shortcode → Unicode conversion (issue #345).
+//
+// Chat models frequently emit GitHub/Slack-style `:shortcode:` text — e.g.
+// `:blush:`, `:fire:`, `:microphone:` — instead of the actual emoji character.
+// Nothing in the render pipeline used to translate these, so they showed up as
+// literal `:blush:` text in the chat bubble.
+//
+// This module turns the common shortcode set into the real Unicode emoji. The
+// chat renderer (markdown.js → svgifyEmoji) runs this BEFORE its existing
+// Unicode-emoji → monochrome-SVG pass, so a converted `:blush:` renders as the
+// same theme-tinted single-color line icon as any other emoji (project rule:
+// never colorful emoji), not as a colored system glyph.
+//
+// Pure and browser-free on purpose: no DOM, no imports, so it can be unit
+// tested with plain `node` (see tests/test_emoji_shortcodes_js.py).
+
+// Canonical map of common shortcode → Unicode emoji. Names follow the GitHub
+// convention (lowercase, underscore-separated). A handful of well-known aliases
+// (`+1`, `thumbsup`, `grinning_face`, …) point at the same glyph so the most
+// frequent model spellings all resolve.
+export const EMOJI_SHORTCODES = {
+ // ── Smileys & emotion ──
+ grinning: '😀', grinning_face: '😀',
+ smiley: '😃', smiley_face: '😃',
+ smile: '😄',
+ grin: '😁',
+ laughing: '😆', satisfied: '😆',
+ sweat_smile: '😅',
+ rofl: '🤣', rolling_on_the_floor_laughing: '🤣',
+ joy: '😂',
+ slightly_smiling_face: '🙂', slight_smile: '🙂',
+ upside_down_face: '🙃', upside_down: '🙃',
+ wink: '😉', winking_face: '😉',
+ blush: '😊', smiling_face_with_smiling_eyes: '😊',
+ innocent: '😇',
+ smiling_face_with_three_hearts: '🥰',
+ heart_eyes: '😍', heart_eyes_face: '😍',
+ star_struck: '🤩',
+ kissing_heart: '😘',
+ kissing: '😗',
+ kissing_closed_eyes: '😚',
+ kissing_smiling_eyes: '😙',
+ yum: '😋',
+ stuck_out_tongue: '😛',
+ stuck_out_tongue_winking_eye: '😜',
+ zany_face: '🤪',
+ stuck_out_tongue_closed_eyes: '😝',
+ money_mouth_face: '🤑',
+ hugs: '🤗', hugging_face: '🤗',
+ hand_over_mouth: '🤭',
+ shushing_face: '🤫',
+ thinking: '🤔', thinking_face: '🤔',
+ zipper_mouth_face: '🤐',
+ raised_eyebrow: '🤨',
+ neutral_face: '😐',
+ expressionless: '😑',
+ no_mouth: '😶',
+ smirk: '😏', smirk_face: '😏',
+ unamused: '😒',
+ roll_eyes: '🙄', face_with_rolling_eyes: '🙄',
+ grimacing: '😬',
+ lying_face: '🤥',
+ relieved: '😌',
+ pensive: '😔',
+ sleepy: '😪',
+ drooling_face: '🤤',
+ sleeping: '😴',
+ mask: '😷',
+ face_with_thermometer: '🤒',
+ face_with_head_bandage: '🤕',
+ nauseated_face: '🤢',
+ vomiting_face: '🤮',
+ sneezing_face: '🤧',
+ hot_face: '🥵',
+ cold_face: '🥶',
+ woozy_face: '🥴',
+ dizzy_face: '😵',
+ exploding_head: '🤯',
+ cowboy_hat_face: '🤠',
+ partying_face: '🥳',
+ sunglasses: '😎',
+ nerd_face: '🤓',
+ monocle_face: '🧐',
+ confused: '😕',
+ worried: '😟',
+ slightly_frowning_face: '🙁',
+ frowning_face: '☹️',
+ open_mouth: '😮',
+ hushed: '😯',
+ astonished: '😲',
+ flushed: '😳',
+ pleading_face: '🥺',
+ frowning: '😦',
+ anguished: '😧',
+ fearful: '😨',
+ cold_sweat: '😰',
+ disappointed_relieved: '😥',
+ cry: '😢',
+ sob: '😭',
+ scream: '😱',
+ confounded: '😖',
+ persevere: '😣',
+ disappointed: '😞',
+ sweat: '😓',
+ weary: '😩',
+ tired_face: '😫',
+ yawning_face: '🥱',
+ triumph: '😤',
+ rage: '😡', pout: '😡', pouting_face: '😡',
+ angry: '😠',
+ cursing_face: '🤬',
+ smiling_imp: '😈',
+ imp: '👿',
+ skull: '💀',
+ skull_and_crossbones: '☠️',
+ hankey: '💩', poop: '💩', shit: '💩',
+ clown_face: '🤡',
+ japanese_ogre: '👹',
+ japanese_goblin: '👺',
+ ghost: '👻',
+ alien: '👽',
+ space_invader: '👾',
+ robot: '🤖', robot_face: '🤖',
+ // ── Cats ──
+ smiley_cat: '😺',
+ smile_cat: '😸',
+ joy_cat: '😹',
+ heart_eyes_cat: '😻',
+ smirk_cat: '😼',
+ kissing_cat: '😽',
+ scream_cat: '🙀',
+ crying_cat_face: '😿',
+ pouting_cat: '😾',
+ see_no_evil: '🙈',
+ hear_no_evil: '🙉',
+ speak_no_evil: '🙊',
+ // ── Hands & body ──
+ wave: '👋', wave_hand: '👋',
+ raised_back_of_hand: '🤚',
+ raised_hand_with_fingers_splayed: '🖐️',
+ hand: '✋', raised_hand: '✋',
+ vulcan_salute: '🖖',
+ ok_hand: '👌',
+ pinched_fingers: '🤌',
+ pinching_hand: '🤏',
+ v: '✌️', victory_hand: '✌️',
+ crossed_fingers: '🤞',
+ love_you_gesture: '🤟',
+ metal: '🤘',
+ call_me_hand: '🤙',
+ point_left: '👈',
+ point_right: '👉',
+ point_up_2: '👆',
+ middle_finger: '🖕', fu: '🖕',
+ point_down: '👇',
+ point_up: '☝️',
+ '+1': '👍', thumbsup: '👍', thumbup: '👍', thumbs_up: '👍',
+ '-1': '👎', thumbsdown: '👎', thumbdown: '👎', thumbs_down: '👎',
+ fist_raised: '✊', fist: '✊',
+ fist_oncoming: '👊', facepunch: '👊', punch: '👊',
+ fist_left: '🤛',
+ fist_right: '🤜',
+ clap: '👏', clapping_hands: '👏',
+ raised_hands: '🙌',
+ open_hands: '👐',
+ palms_up_together: '🤲',
+ handshake: '🤝',
+ pray: '🙏', folded_hands: '🙏',
+ writing_hand: '✍️',
+ nail_care: '💅',
+ selfie: '🤳',
+ muscle: '💪', flexed_biceps: '💪',
+ // ── Hearts & symbols of feeling ──
+ heart: '❤️', red_heart: '❤️',
+ orange_heart: '🧡',
+ yellow_heart: '💛',
+ green_heart: '💚',
+ blue_heart: '💙',
+ purple_heart: '💜',
+ black_heart: '🖤',
+ white_heart: '🤍',
+ brown_heart: '🤎',
+ broken_heart: '💔',
+ heart_on_fire: '❤️🔥',
+ two_hearts: '💕',
+ revolving_hearts: '💞',
+ heartbeat: '💓',
+ heartpulse: '💗',
+ sparkling_heart: '💖',
+ cupid: '💘',
+ gift_heart: '💝',
+ heart_decoration: '💟',
+ heavy_heart_exclamation: '❣️',
+ // ── Celebration & misc objects ──
+ fire: '🔥', flame: '🔥',
+ '100': '💯', hundred: '💯',
+ sparkles: '✨',
+ star: '⭐',
+ star2: '🌟', glowing_star: '🌟',
+ dizzy: '💫',
+ boom: '💥', collision: '💥',
+ anger: '💢',
+ sweat_drops: '💦',
+ dash: '💨',
+ zzz: '💤',
+ tada: '🎉', party_popper: '🎉',
+ confetti_ball: '🎊',
+ balloon: '🎈',
+ gift: '🎁',
+ trophy: '🏆',
+ '1st_place_medal': '🥇',
+ '2nd_place_medal': '🥈',
+ '3rd_place_medal': '🥉',
+ medal_sports: '🏅',
+ zap: '⚡', lightning: '⚡',
+ bulb: '💡', light_bulb: '💡',
+ key: '🔑',
+ lock: '🔒',
+ unlock: '🔓',
+ bell: '🔔',
+ no_bell: '🔕',
+ loudspeaker: '📢',
+ mega: '📣', megaphone: '📣',
+ speech_balloon: '💬',
+ thought_balloon: '💭',
+ white_check_mark: '✅',
+ heavy_check_mark: '✔️', check_mark: '✔️',
+ ballot_box_with_check: '☑️',
+ x: '❌', cross_mark: '❌',
+ negative_squared_cross_mark: '❎',
+ question: '❓',
+ grey_question: '❔',
+ exclamation: '❗', heavy_exclamation_mark: '❗',
+ grey_exclamation: '❕',
+ warning: '⚠️',
+ no_entry: '⛔',
+ no_entry_sign: '🚫',
+ red_circle: '🔴',
+ green_circle: '🟢',
+ large_blue_circle: '🔵',
+ yellow_circle: '🟡',
+ white_circle: '⚪',
+ black_circle: '⚫',
+ orange_circle: '🟠',
+ purple_circle: '🟣',
+ brown_circle: '🟤',
+ // ── Tech, work, study ──
+ rocket: '🚀',
+ eyes: '👀',
+ eye: '👁️',
+ brain: '🧠',
+ books: '📚',
+ book: '📖', open_book: '📖',
+ memo: '📝', pencil: '📝',
+ pencil2: '✏️',
+ page_facing_up: '📄',
+ paperclip: '📎',
+ pushpin: '📌',
+ round_pushpin: '📍',
+ link: '🔗',
+ bar_chart: '📊',
+ chart_with_upwards_trend: '📈',
+ chart_with_downwards_trend: '📉',
+ mag: '🔍',
+ mag_right: '🔎',
+ globe_with_meridians: '🌐',
+ earth_africa: '🌍',
+ earth_americas: '🌎',
+ earth_asia: '🌏',
+ alarm_clock: '⏰',
+ hourglass_flowing_sand: '⏳',
+ hourglass: '⌛',
+ microphone: '🎤', mic: '🎤',
+ musical_note: '🎵',
+ notes: '🎶', musical_notes: '🎶',
+ headphones: '🎧',
+ camera: '📷',
+ camera_flash: '📸',
+ clapper: '🎬',
+ tv: '📺',
+ computer: '💻', laptop: '💻',
+ desktop_computer: '🖥️',
+ iphone: '📱', mobile_phone: '📱',
+ telephone: '☎️',
+ wrench: '🔧',
+ hammer: '🔨',
+ gear: '⚙️',
+ nut_and_bolt: '🔩',
+ magnet: '🧲',
+ test_tube: '🧪',
+ microscope: '🔬',
+ dart: '🎯', bullseye: '🎯',
+ game_die: '🎲',
+ jigsaw: '🧩',
+ // ── Food & drink ──
+ pizza: '🍕',
+ hamburger: '🍔',
+ fries: '🍟',
+ taco: '🌮',
+ sushi: '🍣',
+ doughnut: '🍩', donut: '🍩',
+ coffee: '☕',
+ beer: '🍺',
+ wine_glass: '🍷',
+ // ── Animals & nature ──
+ dog: '🐶',
+ cat: '🐱',
+ mouse: '🐭',
+ hamster: '🐹',
+ rabbit: '🐰',
+ fox_face: '🦊',
+ bear: '🐻',
+ panda_face: '🐼',
+ koala: '🐨',
+ tiger: '🐯',
+ lion: '🦁',
+ cow: '🐮',
+ pig: '🐷',
+ frog: '🐸',
+ monkey_face: '🐵',
+ chicken: '🐔',
+ penguin: '🐧',
+ bird: '🐦',
+ eagle: '🦅',
+ duck: '🦆',
+ owl: '🦉',
+ wolf: '🐺',
+ horse: '🐴',
+ unicorn: '🦄',
+ bee: '🐝', honeybee: '🐝',
+ bug: '🐛',
+ butterfly: '🦋',
+ snail: '🐌',
+ lady_beetle: '🐞',
+ snake: '🐍',
+ turtle: '🐢',
+ octopus: '🐙',
+ crab: '🦀',
+ tropical_fish: '🐠',
+ whale: '🐳',
+ shark: '🦈',
+ cherry_blossom: '🌸',
+ rose: '🌹',
+ sunflower: '🌻',
+ hibiscus: '🌺',
+ tulip: '🌷',
+ seedling: '🌱',
+ evergreen_tree: '🌲',
+ deciduous_tree: '🌳',
+ four_leaf_clover: '🍀',
+ apple: '🍎',
+ green_apple: '🍏',
+ pear: '🍐',
+ tangerine: '🍊',
+ lemon: '🍋',
+ banana: '🍌',
+ watermelon: '🍉',
+ grapes: '🍇',
+ strawberry: '🍓',
+ blueberries: '🫐',
+ peach: '🍑',
+ rainbow: '🌈',
+ sunny: '☀️', sun: '☀️',
+ partly_sunny: '⛅',
+ cloud: '☁️',
+ snowflake: '❄️',
+ ocean: '🌊',
+ // ── Arrows & signs ──
+ arrow_right: '➡️',
+ arrow_left: '⬅️',
+ arrow_up: '⬆️',
+ arrow_down: '⬇️',
+ arrow_upper_right: '↗️',
+ arrow_lower_right: '↘️',
+ arrow_lower_left: '↙️',
+ arrow_upper_left: '↖️',
+ leftwards_arrow_with_hook: '↩️',
+ arrow_right_hook: '↪️',
+ arrows_counterclockwise: '🔄',
+ arrows_clockwise: '🔃',
+ heavy_plus_sign: '➕',
+ heavy_minus_sign: '➖',
+ heavy_division_sign: '➗',
+ heavy_multiplication_x: '✖️',
+ infinity: '♾️',
+ copyright: '©️',
+ registered: '®️',
+ tm: '™️',
+ recycle: '♻️',
+ checkered_flag: '🏁',
+ triangular_flag_on_post: '🚩',
+ white_flag: '🏳️',
+ black_flag: '🏴',
+ // ── People & wearables ──
+ baby: '👶',
+ boy: '👦',
+ girl: '👧',
+ man: '👨',
+ woman: '👩',
+ older_man: '👴',
+ older_woman: '👵',
+ crown: '👑',
+ gem: '💎',
+ graduation_cap: '🎓', mortar_board: '🎓',
+};
+
+// `:name:` where name is letters/digits/`_`/`+`/`-`. Length ≥1 so `:+1:` and
+// `:-1:` match. Global + case-insensitive for replace; a separate non-global
+// literal is used for the cheap presence check so there's no shared lastIndex
+// state to reset.
+const SHORTCODE_RE = /:([a-z0-9_+-]{1,40}):/gi;
+
+/**
+ * Cheap test for whether `text` could contain any emoji shortcode at all.
+ * Lets callers skip the replace pass entirely on the common no-shortcode path.
+ */
+export function hasEmojiShortcode(text) {
+ return !!text && text.indexOf(':') !== -1 && /:[a-z0-9_+-]{1,40}:/i.test(text);
+}
+
+// A shortcode must stand on its own — flanked by whitespace, punctuation, a
+// string edge, or markup, never glued to an ASCII word character. Without this
+// guard, real `:name:` shortcodes that happen to sit inside a longer run of
+// digits/letters get converted by mistake and mangle perfectly literal text:
+// "1:100:2" → the `:100:` would become 💯 ("1💯2")
+// "host:fire:port", URL authorities, `key:value:` pairs, etc.
+// Chat models always emit shortcodes delimited by spaces/punctuation (":fire:",
+// "**:microphone:**", "nice :tada:!"), so requiring a boundary keeps every real
+// shortcode working while leaving embedded colon runs untouched. `_` counts as a
+// word char too (identifier-like), but `+`/`-` do not, so "C++ :fire:" still works.
+const _WORDISH = /[A-Za-z0-9_]/;
+function _boundedOnBothSides(str, start, end) {
+ const before = start > 0 ? str[start - 1] : '';
+ const after = end < str.length ? str[end] : '';
+ return !_WORDISH.test(before) && !_WORDISH.test(after);
+}
+
+/**
+ * Replace every known `:shortcode:` in `text` with its Unicode emoji. Unknown
+ * shortcodes (`:definitely_not_emoji:`), colon runs that don't form a shortcode
+ * (`10:30:45`, `16:9`), and known shortcodes embedded mid-token (`1:100:2`) are
+ * all left exactly as-is.
+ */
+export function replaceEmojiShortcodes(text) {
+ if (!text || text.indexOf(':') === -1) return text;
+ return text.replace(SHORTCODE_RE, (whole, name, offset, str) => {
+ const key = name.toLowerCase();
+ if (!Object.prototype.hasOwnProperty.call(EMOJI_SHORTCODES, key)) return whole;
+ // Only convert when the `:shortcode:` is a standalone token, not glued to a
+ // surrounding word/number (which would mean it's literal text, not an emoji).
+ if (!_boundedOnBothSides(str, offset, offset + whole.length)) return whole;
+ return EMOJI_SHORTCODES[key];
+ });
+}
+
+export default { EMOJI_SHORTCODES, replaceEmojiShortcodes, hasEmojiShortcode };
diff --git a/static/js/markdown.js b/static/js/markdown.js
index a2cfba0..df92721 100644
--- a/static/js/markdown.js
+++ b/static/js/markdown.js
@@ -6,6 +6,7 @@
import uiModule from './ui.js';
import { splitTableRow } from './markdown/tableRow.js';
+import { replaceEmojiShortcodes, hasEmojiShortcode } from './emojiShortcodes.js';
var escapeHtml = uiModule.esc;
@@ -366,8 +367,19 @@ function _useSvgEmoji() {
return typeof document === 'undefined' || !document.body?.classList.contains('text-emojis');
}
-export function svgifyEmoji(html) {
- if (!_useSvgEmoji() || !html || !_EMOJI_RE.test(html)) return html;
+// `opts.shortcodes` (default true) controls the issue-#345 `:name:` → emoji
+// expansion. Chat passes it through as true; document/email body renderers pass
+// false so author-typed `:shortcode:` text stays literal (see mdToHtml callers).
+// The Unicode-emoji → monochrome-SVG pass always runs regardless, so a real 😀
+// in a document still renders as the themed line icon as it always has.
+export function svgifyEmoji(html, opts) {
+ if (!_useSvgEmoji() || !html) return html;
+ const allowShortcodes = !opts || opts.shortcodes !== false;
+ // Two reasons to walk the HTML: real Unicode emoji to turn into SVG icons,
+ // or `:shortcode:` text the model emitted instead of an emoji (issue #345).
+ const hasUnicode = _EMOJI_RE.test(html);
+ const hasShortcode = allowShortcodes && hasEmojiShortcode(html);
+ if (!hasUnicode && !hasShortcode) return html;
const parts = html.split(/(<[^>]*>)/); // odd indices = tags
let codeDepth = 0;
for (let i = 0; i < parts.length; i++) {
@@ -377,7 +389,13 @@ export function svgifyEmoji(html) {
else if (/^<\/(pre|code)\s*>/.test(t)) codeDepth = Math.max(0, codeDepth - 1);
continue;
}
- if (codeDepth === 0 && _EMOJI_RE.test(parts[i])) parts[i] = _svgifyText(parts[i]);
+ if (codeDepth !== 0) continue;
+ let seg = parts[i];
+ // Expand shortcodes to Unicode first, then both they and any pre-existing
+ // Unicode emoji get rendered as the same monochrome line icons below.
+ if (hasShortcode) seg = replaceEmojiShortcodes(seg);
+ if (_EMOJI_RE.test(seg)) seg = _svgifyText(seg);
+ parts[i] = seg;
}
return parts.join('');
}
@@ -421,7 +439,7 @@ export function processWithThinking(text) {
/**
* Convert markdown to HTML
*/
-export function mdToHtml(src) {
+export function mdToHtml(src, opts) {
const allowedHtmlBlocks = [];
const codeBlocks = [];
const mermaidBlocks = [];
@@ -678,7 +696,7 @@ export function mdToHtml(src) {
s = s.replace(`___CODE_BLOCK_${index}___`, block);
});
- return _useSvgEmoji() ? svgifyEmoji(s) : s;
+ return _useSvgEmoji() ? svgifyEmoji(s, opts) : s;
}
/**
diff --git a/tests/markdown_codefence_placeholder_regression.mjs b/tests/markdown_codefence_placeholder_regression.mjs
index a57cabe..aaaa50c 100644
--- a/tests/markdown_codefence_placeholder_regression.mjs
+++ b/tests/markdown_codefence_placeholder_regression.mjs
@@ -16,6 +16,10 @@ src = src.replace(
/import \{ splitTableRow \} from '\.\/markdown\/tableRow\.js';/,
'const splitTableRow = (row) => row.split("|").filter((cell) => cell.trim() !== "");'
);
+src = src.replace(
+ /import \{ replaceEmojiShortcodes, hasEmojiShortcode \} from '\.\/emojiShortcodes\.js';/,
+ 'const hasEmojiShortcode = (t) => !!t && t.indexOf(":") !== -1 && /:[a-z0-9_+-]{1,40}:/i.test(t); const replaceEmojiShortcodes = (t) => t;'
+);
src = src.replace(/export function /g, 'function ');
src = src.replace(/export const /g, 'const ');
src = src.replace(/export default markdownModule;?/g, '');
diff --git a/tests/test_emoji_shortcodes_js.py b/tests/test_emoji_shortcodes_js.py
new file mode 100644
index 0000000..72f8e1e
--- /dev/null
+++ b/tests/test_emoji_shortcodes_js.py
@@ -0,0 +1,101 @@
+"""Pin the pure emoji shortcode → Unicode helpers in emojiShortcodes.js.
+
+Driven through `node --input-type=module` so we exercise the real JS without a
+full Vitest/Jest setup (same approach as test_reply_recipients_js.py / test_compare_js.py).
+Skips when `node` is not installed rather than failing.
+
+Regression for issue #345: chat models emit GitHub-style :shortcode: text
+(e.g. :blush:, :microphone:) instead of the actual emoji, and nothing in the
+render pipeline translated them, so they showed up as literal ":blush:" text.
+"""
+import json
+import shutil
+import subprocess
+from pathlib import Path
+
+import pytest
+
+_REPO = Path(__file__).resolve().parent.parent
+_HELPER = _REPO / "static" / "js" / "emojiShortcodes.js"
+_HAS_NODE = shutil.which("node") is not None
+
+
+def _run(js: str) -> str:
+ proc = subprocess.run(
+ ["node", "--input-type=module"],
+ input=js, capture_output=True, text=True, cwd=str(_REPO), timeout=30,
+ )
+ assert proc.returncode == 0, proc.stderr
+ return proc.stdout.strip()
+
+
+def _replace(text: str) -> str:
+ js = f"""
+ import {{ replaceEmojiShortcodes }} from '{_HELPER.as_posix()}';
+ console.log(JSON.stringify(replaceEmojiShortcodes({json.dumps(text)})));
+ """
+ return json.loads(_run(js))
+
+
+@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
+def test_issue_345_examples_convert():
+ # The exact shortcodes the issue reported as showing up as literal text.
+ assert _replace("visit today? :blush:") == "visit today? \U0001f60a"
+ assert _replace("hobbies? **:microphone:**") == "hobbies? **\U0001f3a4**"
+
+
+@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
+def test_common_shortcodes_and_aliases():
+ assert _replace(":fire:") == "\U0001f525"
+ assert _replace(":tada:") == "\U0001f389"
+ assert _replace(":thinking:") == "\U0001f914"
+ # +1 / thumbsup are aliases for the same glyph.
+ assert _replace(":+1:") == "\U0001f44d"
+ assert _replace(":thumbsup:") == "\U0001f44d"
+ # Multiple in one string, mixed with surrounding text.
+ assert _replace("nice :fire: work :100:") == "nice \U0001f525 work \U0001f4af"
+
+
+@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
+def test_unknown_and_nonshortcodes_untouched():
+ # Unknown shortcode left verbatim (incl. the :emoji: placeholder).
+ assert _replace(":definitely_not_an_emoji:") == ":definitely_not_an_emoji:"
+ assert _replace(":emoji:") == ":emoji:"
+ # Time ranges / ratios must not be mangled.
+ assert _replace("meet at 10:30:45 today") == "meet at 10:30:45 today"
+ assert _replace("ratio 16:9 vs 4:3") == "ratio 16:9 vs 4:3"
+ # No colons at all → returned as-is.
+ assert _replace("plain text") == "plain text"
+
+
+@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
+def test_known_shortcode_embedded_in_token_is_not_converted():
+ # Regression: a KNOWN shortcode that happens to sit inside a longer run of
+ # digits/letters is literal text, not an emoji. The classic trap is a numeric
+ # range whose middle segment spells a real shortcode (`:100:` → 💯):
+ assert _replace("1:100:2") == "1:100:2"
+ assert _replace("scale 3:100:7 ok") == "scale 3:100:7 ok"
+ # Glued to a word on either side → left alone (e.g. `key:value:` style text,
+ # URL authorities like `host:fire:port`).
+ assert _replace("host:fire:port") == "host:fire:port"
+ assert _replace("status:fire:") == "status:fire:"
+ assert _replace(":fire:done") == ":fire:done"
+ # But a standalone shortcode flanked by whitespace/punctuation still converts,
+ # including back-to-back shortcodes and the leading `:100:` once delimited.
+ assert _replace("we hit :100: today") == "we hit \U0001f4af today"
+ assert _replace("see :fire:!") == "see \U0001f525!"
+ assert _replace(":fire::tada:") == "\U0001f525\U0001f389"
+
+
+@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
+def test_has_emoji_shortcode_detector():
+ js = f"""
+ import {{ hasEmojiShortcode }} from '{_HELPER.as_posix()}';
+ const out = [
+ hasEmojiShortcode(':blush:'),
+ hasEmojiShortcode('no shortcodes here'),
+ hasEmojiShortcode('a single : colon'),
+ ];
+ console.log(JSON.stringify(out));
+ """
+ assert json.loads(_run(js)) == [True, False, False]
diff --git a/tests/test_markdown_rendering_js.py b/tests/test_markdown_rendering_js.py
index 4f36528..7cfd3b5 100644
--- a/tests/test_markdown_rendering_js.py
+++ b/tests/test_markdown_rendering_js.py
@@ -41,6 +41,18 @@ def _run_markdown_case(markdown: str, render_expr: str = "mod.mdToHtml(input)"):
return (row || '').replace(/^\\s*\\|/, '').replace(/\\|\\s*$/, '').split('|').map(c => c.trim());
}`
);
+ // markdown.js imports the emoji-shortcode helpers relatively (issue #345),
+ // which a data: URL module can't resolve. Inline the REAL helpers (minus
+ // their export keywords) so the renderer's shortcode pass behaves exactly
+ // as it does in the browser.
+ const emojiSource = fs.readFileSync('./static/js/emojiShortcodes.js', 'utf8')
+ .replace(/^export default .*$/m, '')
+ .replace(/export const /g, 'const ')
+ .replace(/export function /g, 'function ');
+ source = source.replace(
+ /import \{ replaceEmojiShortcodes, hasEmojiShortcode \} from ['"]\.\/emojiShortcodes\.js['"];/,
+ () => emojiSource
+ );
source = source.replace(
/var escapeHtml = uiModule\.esc;/,
`var escapeHtml = (value) => String(value ?? '')