Render emoji shortcodes as icons in chat (#345) (#629)

Chat models often emit GitHub/Slack-style :shortcode: text (e.g. 😊,
🎤) instead of the actual emoji. The renderer only converted real
Unicode emoji to the monochrome line icons, so shortcodes rendered as literal
text.

Add a pure, browser-free shortcode->Unicode map (emojiShortcodes.js) and run it
inside svgifyEmoji ahead of the existing Unicode->SVG pass, skipping <code>/<pre>
so code stays literal. Covers ~430 common shortcodes plus common aliases
(+1/thumbsup, etc.).

Keep the conversion from touching anything it shouldn't:
* Scope it to chat. mdToHtml/svgifyEmoji take a { shortcodes } option (default
  on); document and email body rendering (compose, export, preview) pass it as
  false so author-typed :shortcode: text stays literal. The Unicode->SVG pass
  still runs there exactly as before.
* Only convert a :shortcode: that stands on its own. A word-boundary guard
  leaves embedded colon runs alone, so "1:100:2", "10:30:45", "16:9" and
  host:fire:port are never rewritten.

Tests: extend the node-driven unit test with the boundary/false-positive cases,
and fix the markdown-rendering test loader to resolve the new emojiShortcodes
import.
This commit is contained in:
Zeus-Deus
2026-06-05 02:28:42 +02:00
committed by GitHub
parent f9c81f3c8d
commit 85334e8f3d
6 changed files with 604 additions and 9 deletions

View File

@@ -2246,7 +2246,9 @@ import * as Modals from './modalManager.js';
// WYSIWYG body — use it verbatim. (Checking a leading '<' isn't enough: a // WYSIWYG body — use it verbatim. (Checking a leading '<' isn't enough: a
// rich body often starts with plain text, e.g. "Hi <b>there</b>".) // rich body often starts with plain text, e.g. "Hi <b>there</b>".)
if (/<\/?(b|i|u|s|strong|em|del|strike|a|p|div|br|ul|ol|li|h[1-3]|blockquote|span|code|pre)\b[^>]*>/i.test(t)) return t; if (/<\/?(b|i|u|s|strong|em|del|strike|a|p|div|br|ul|ol|li|h[1-3]|blockquote|span|code|pre)\b[^>]*>/i.test(t)) return t;
try { return markdownModule.mdToHtml(text); } // Email body: keep author-typed `:shortcode:` text literal. Issue #345
// (shortcode → emoji) is scoped to chat; do not rewrite colons in mail.
try { return markdownModule.mdToHtml(text, { shortcodes: false }); }
catch (_) { catch (_) {
const d = document.createElement('div'); d.textContent = text; const d = document.createElement('div'); d.textContent = text;
return d.innerHTML.replace(/\n/g, '<br>'); return d.innerHTML.replace(/\n/g, '<br>');
@@ -8386,7 +8388,7 @@ import * as Modals from './modalManager.js';
const text = textarea.value || ''; const text = textarea.value || '';
let body; let body;
if (lang === 'markdown' && markdownModule?.mdToHtml) { if (lang === 'markdown' && markdownModule?.mdToHtml) {
body = markdownModule.mdToHtml(text); body = markdownModule.mdToHtml(text, { shortcodes: false }); // export: keep :shortcodes: literal
} else { } else {
body = '<pre style="white-space:pre-wrap;font-size:12px;font-family:monospace;">' + body = '<pre style="white-space:pre-wrap;font-size:12px;font-family:monospace;">' +
text.replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;') + '</pre>'; text.replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;') + '</pre>';
@@ -8417,7 +8419,7 @@ import * as Modals from './modalManager.js';
// Render content as HTML for PDF // Render content as HTML for PDF
let html; let html;
if (lang === 'markdown' && markdownModule?.mdToHtml) { if (lang === 'markdown' && markdownModule?.mdToHtml) {
html = markdownModule.mdToHtml(text); html = markdownModule.mdToHtml(text, { shortcodes: false }); // export: keep :shortcodes: literal
} else { } else {
html = '<pre style="white-space:pre-wrap;font-size:11px;font-family:monospace;color:#000;background:#fff;">' + html = '<pre style="white-space:pre-wrap;font-size:11px;font-family:monospace;color:#000;background:#fff;">' +
text.replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;') + '</pre>'; text.replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;') + '</pre>';
@@ -8547,7 +8549,7 @@ import * as Modals from './modalManager.js';
if (active) { if (active) {
const md = textarea.value || ''; const md = textarea.value || '';
if (markdownModule && markdownModule.mdToHtml) { if (markdownModule && markdownModule.mdToHtml) {
preview.innerHTML = markdownModule.mdToHtml(md); preview.innerHTML = markdownModule.mdToHtml(md, { shortcodes: false }); // doc preview: keep :shortcodes: literal
} else { } else {
preview.innerHTML = md.replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;').replace(/\n/g, '<br>'); preview.innerHTML = md.replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;').replace(/\n/g, '<br>');
} }

View File

@@ -0,0 +1,458 @@
// static/js/emojiShortcodes.js
//
// Emoji shortcode → Unicode conversion (issue #345).
//
// Chat models frequently emit GitHub/Slack-style `:shortcode:` text — e.g.
// `:blush:`, `:fire:`, `:microphone:` — instead of the actual emoji character.
// Nothing in the render pipeline used to translate these, so they showed up as
// literal `:blush:` text in the chat bubble.
//
// This module turns the common shortcode set into the real Unicode emoji. The
// chat renderer (markdown.js → svgifyEmoji) runs this BEFORE its existing
// Unicode-emoji → monochrome-SVG pass, so a converted `:blush:` renders as the
// same theme-tinted single-color line icon as any other emoji (project rule:
// never colorful emoji), not as a colored system glyph.
//
// Pure and browser-free on purpose: no DOM, no imports, so it can be unit
// tested with plain `node` (see tests/test_emoji_shortcodes_js.py).
// Canonical map of common shortcode → Unicode emoji. Names follow the GitHub
// convention (lowercase, underscore-separated). A handful of well-known aliases
// (`+1`, `thumbsup`, `grinning_face`, …) point at the same glyph so the most
// frequent model spellings all resolve.
export const EMOJI_SHORTCODES = {
// ── Smileys & emotion ──
grinning: '😀', grinning_face: '😀',
smiley: '😃', smiley_face: '😃',
smile: '😄',
grin: '😁',
laughing: '😆', satisfied: '😆',
sweat_smile: '😅',
rofl: '🤣', rolling_on_the_floor_laughing: '🤣',
joy: '😂',
slightly_smiling_face: '🙂', slight_smile: '🙂',
upside_down_face: '🙃', upside_down: '🙃',
wink: '😉', winking_face: '😉',
blush: '😊', smiling_face_with_smiling_eyes: '😊',
innocent: '😇',
smiling_face_with_three_hearts: '🥰',
heart_eyes: '😍', heart_eyes_face: '😍',
star_struck: '🤩',
kissing_heart: '😘',
kissing: '😗',
kissing_closed_eyes: '😚',
kissing_smiling_eyes: '😙',
yum: '😋',
stuck_out_tongue: '😛',
stuck_out_tongue_winking_eye: '😜',
zany_face: '🤪',
stuck_out_tongue_closed_eyes: '😝',
money_mouth_face: '🤑',
hugs: '🤗', hugging_face: '🤗',
hand_over_mouth: '🤭',
shushing_face: '🤫',
thinking: '🤔', thinking_face: '🤔',
zipper_mouth_face: '🤐',
raised_eyebrow: '🤨',
neutral_face: '😐',
expressionless: '😑',
no_mouth: '😶',
smirk: '😏', smirk_face: '😏',
unamused: '😒',
roll_eyes: '🙄', face_with_rolling_eyes: '🙄',
grimacing: '😬',
lying_face: '🤥',
relieved: '😌',
pensive: '😔',
sleepy: '😪',
drooling_face: '🤤',
sleeping: '😴',
mask: '😷',
face_with_thermometer: '🤒',
face_with_head_bandage: '🤕',
nauseated_face: '🤢',
vomiting_face: '🤮',
sneezing_face: '🤧',
hot_face: '🥵',
cold_face: '🥶',
woozy_face: '🥴',
dizzy_face: '😵',
exploding_head: '🤯',
cowboy_hat_face: '🤠',
partying_face: '🥳',
sunglasses: '😎',
nerd_face: '🤓',
monocle_face: '🧐',
confused: '😕',
worried: '😟',
slightly_frowning_face: '🙁',
frowning_face: '☹️',
open_mouth: '😮',
hushed: '😯',
astonished: '😲',
flushed: '😳',
pleading_face: '🥺',
frowning: '😦',
anguished: '😧',
fearful: '😨',
cold_sweat: '😰',
disappointed_relieved: '😥',
cry: '😢',
sob: '😭',
scream: '😱',
confounded: '😖',
persevere: '😣',
disappointed: '😞',
sweat: '😓',
weary: '😩',
tired_face: '😫',
yawning_face: '🥱',
triumph: '😤',
rage: '😡', pout: '😡', pouting_face: '😡',
angry: '😠',
cursing_face: '🤬',
smiling_imp: '😈',
imp: '👿',
skull: '💀',
skull_and_crossbones: '☠️',
hankey: '💩', poop: '💩', shit: '💩',
clown_face: '🤡',
japanese_ogre: '👹',
japanese_goblin: '👺',
ghost: '👻',
alien: '👽',
space_invader: '👾',
robot: '🤖', robot_face: '🤖',
// ── Cats ──
smiley_cat: '😺',
smile_cat: '😸',
joy_cat: '😹',
heart_eyes_cat: '😻',
smirk_cat: '😼',
kissing_cat: '😽',
scream_cat: '🙀',
crying_cat_face: '😿',
pouting_cat: '😾',
see_no_evil: '🙈',
hear_no_evil: '🙉',
speak_no_evil: '🙊',
// ── Hands & body ──
wave: '👋', wave_hand: '👋',
raised_back_of_hand: '🤚',
raised_hand_with_fingers_splayed: '🖐️',
hand: '✋', raised_hand: '✋',
vulcan_salute: '🖖',
ok_hand: '👌',
pinched_fingers: '🤌',
pinching_hand: '🤏',
v: '✌️', victory_hand: '✌️',
crossed_fingers: '🤞',
love_you_gesture: '🤟',
metal: '🤘',
call_me_hand: '🤙',
point_left: '👈',
point_right: '👉',
point_up_2: '👆',
middle_finger: '🖕', fu: '🖕',
point_down: '👇',
point_up: '☝️',
'+1': '👍', thumbsup: '👍', thumbup: '👍', thumbs_up: '👍',
'-1': '👎', thumbsdown: '👎', thumbdown: '👎', thumbs_down: '👎',
fist_raised: '✊', fist: '✊',
fist_oncoming: '👊', facepunch: '👊', punch: '👊',
fist_left: '🤛',
fist_right: '🤜',
clap: '👏', clapping_hands: '👏',
raised_hands: '🙌',
open_hands: '👐',
palms_up_together: '🤲',
handshake: '🤝',
pray: '🙏', folded_hands: '🙏',
writing_hand: '✍️',
nail_care: '💅',
selfie: '🤳',
muscle: '💪', flexed_biceps: '💪',
// ── Hearts & symbols of feeling ──
heart: '❤️', red_heart: '❤️',
orange_heart: '🧡',
yellow_heart: '💛',
green_heart: '💚',
blue_heart: '💙',
purple_heart: '💜',
black_heart: '🖤',
white_heart: '🤍',
brown_heart: '🤎',
broken_heart: '💔',
heart_on_fire: '❤️‍🔥',
two_hearts: '💕',
revolving_hearts: '💞',
heartbeat: '💓',
heartpulse: '💗',
sparkling_heart: '💖',
cupid: '💘',
gift_heart: '💝',
heart_decoration: '💟',
heavy_heart_exclamation: '❣️',
// ── Celebration & misc objects ──
fire: '🔥', flame: '🔥',
'100': '💯', hundred: '💯',
sparkles: '✨',
star: '⭐',
star2: '🌟', glowing_star: '🌟',
dizzy: '💫',
boom: '💥', collision: '💥',
anger: '💢',
sweat_drops: '💦',
dash: '💨',
zzz: '💤',
tada: '🎉', party_popper: '🎉',
confetti_ball: '🎊',
balloon: '🎈',
gift: '🎁',
trophy: '🏆',
'1st_place_medal': '🥇',
'2nd_place_medal': '🥈',
'3rd_place_medal': '🥉',
medal_sports: '🏅',
zap: '⚡', lightning: '⚡',
bulb: '💡', light_bulb: '💡',
key: '🔑',
lock: '🔒',
unlock: '🔓',
bell: '🔔',
no_bell: '🔕',
loudspeaker: '📢',
mega: '📣', megaphone: '📣',
speech_balloon: '💬',
thought_balloon: '💭',
white_check_mark: '✅',
heavy_check_mark: '✔️', check_mark: '✔️',
ballot_box_with_check: '☑️',
x: '❌', cross_mark: '❌',
negative_squared_cross_mark: '❎',
question: '❓',
grey_question: '❔',
exclamation: '❗', heavy_exclamation_mark: '❗',
grey_exclamation: '❕',
warning: '⚠️',
no_entry: '⛔',
no_entry_sign: '🚫',
red_circle: '🔴',
green_circle: '🟢',
large_blue_circle: '🔵',
yellow_circle: '🟡',
white_circle: '⚪',
black_circle: '⚫',
orange_circle: '🟠',
purple_circle: '🟣',
brown_circle: '🟤',
// ── Tech, work, study ──
rocket: '🚀',
eyes: '👀',
eye: '👁️',
brain: '🧠',
books: '📚',
book: '📖', open_book: '📖',
memo: '📝', pencil: '📝',
pencil2: '✏️',
page_facing_up: '📄',
paperclip: '📎',
pushpin: '📌',
round_pushpin: '📍',
link: '🔗',
bar_chart: '📊',
chart_with_upwards_trend: '📈',
chart_with_downwards_trend: '📉',
mag: '🔍',
mag_right: '🔎',
globe_with_meridians: '🌐',
earth_africa: '🌍',
earth_americas: '🌎',
earth_asia: '🌏',
alarm_clock: '⏰',
hourglass_flowing_sand: '⏳',
hourglass: '⌛',
microphone: '🎤', mic: '🎤',
musical_note: '🎵',
notes: '🎶', musical_notes: '🎶',
headphones: '🎧',
camera: '📷',
camera_flash: '📸',
clapper: '🎬',
tv: '📺',
computer: '💻', laptop: '💻',
desktop_computer: '🖥️',
iphone: '📱', mobile_phone: '📱',
telephone: '☎️',
wrench: '🔧',
hammer: '🔨',
gear: '⚙️',
nut_and_bolt: '🔩',
magnet: '🧲',
test_tube: '🧪',
microscope: '🔬',
dart: '🎯', bullseye: '🎯',
game_die: '🎲',
jigsaw: '🧩',
// ── Food & drink ──
pizza: '🍕',
hamburger: '🍔',
fries: '🍟',
taco: '🌮',
sushi: '🍣',
doughnut: '🍩', donut: '🍩',
coffee: '☕',
beer: '🍺',
wine_glass: '🍷',
// ── Animals & nature ──
dog: '🐶',
cat: '🐱',
mouse: '🐭',
hamster: '🐹',
rabbit: '🐰',
fox_face: '🦊',
bear: '🐻',
panda_face: '🐼',
koala: '🐨',
tiger: '🐯',
lion: '🦁',
cow: '🐮',
pig: '🐷',
frog: '🐸',
monkey_face: '🐵',
chicken: '🐔',
penguin: '🐧',
bird: '🐦',
eagle: '🦅',
duck: '🦆',
owl: '🦉',
wolf: '🐺',
horse: '🐴',
unicorn: '🦄',
bee: '🐝', honeybee: '🐝',
bug: '🐛',
butterfly: '🦋',
snail: '🐌',
lady_beetle: '🐞',
snake: '🐍',
turtle: '🐢',
octopus: '🐙',
crab: '🦀',
tropical_fish: '🐠',
whale: '🐳',
shark: '🦈',
cherry_blossom: '🌸',
rose: '🌹',
sunflower: '🌻',
hibiscus: '🌺',
tulip: '🌷',
seedling: '🌱',
evergreen_tree: '🌲',
deciduous_tree: '🌳',
four_leaf_clover: '🍀',
apple: '🍎',
green_apple: '🍏',
pear: '🍐',
tangerine: '🍊',
lemon: '🍋',
banana: '🍌',
watermelon: '🍉',
grapes: '🍇',
strawberry: '🍓',
blueberries: '🫐',
peach: '🍑',
rainbow: '🌈',
sunny: '☀️', sun: '☀️',
partly_sunny: '⛅',
cloud: '☁️',
snowflake: '❄️',
ocean: '🌊',
// ── Arrows & signs ──
arrow_right: '➡️',
arrow_left: '⬅️',
arrow_up: '⬆️',
arrow_down: '⬇️',
arrow_upper_right: '↗️',
arrow_lower_right: '↘️',
arrow_lower_left: '↙️',
arrow_upper_left: '↖️',
leftwards_arrow_with_hook: '↩️',
arrow_right_hook: '↪️',
arrows_counterclockwise: '🔄',
arrows_clockwise: '🔃',
heavy_plus_sign: '',
heavy_minus_sign: '',
heavy_division_sign: '➗',
heavy_multiplication_x: '✖️',
infinity: '♾️',
copyright: '©️',
registered: '®️',
tm: '™️',
recycle: '♻️',
checkered_flag: '🏁',
triangular_flag_on_post: '🚩',
white_flag: '🏳️',
black_flag: '🏴',
// ── People & wearables ──
baby: '👶',
boy: '👦',
girl: '👧',
man: '👨',
woman: '👩',
older_man: '👴',
older_woman: '👵',
crown: '👑',
gem: '💎',
graduation_cap: '🎓', mortar_board: '🎓',
};
// `:name:` where name is letters/digits/`_`/`+`/`-`. Length ≥1 so `:+1:` and
// `:-1:` match. Global + case-insensitive for replace; a separate non-global
// literal is used for the cheap presence check so there's no shared lastIndex
// state to reset.
const SHORTCODE_RE = /:([a-z0-9_+-]{1,40}):/gi;
/**
* Cheap test for whether `text` could contain any emoji shortcode at all.
* Lets callers skip the replace pass entirely on the common no-shortcode path.
*/
export function hasEmojiShortcode(text) {
return !!text && text.indexOf(':') !== -1 && /:[a-z0-9_+-]{1,40}:/i.test(text);
}
// A shortcode must stand on its own — flanked by whitespace, punctuation, a
// string edge, or markup, never glued to an ASCII word character. Without this
// guard, real `:name:` shortcodes that happen to sit inside a longer run of
// digits/letters get converted by mistake and mangle perfectly literal text:
// "1:100:2" → the `:100:` would become 💯 ("1💯2")
// "host:fire:port", URL authorities, `key:value:` pairs, etc.
// Chat models always emit shortcodes delimited by spaces/punctuation (":fire:",
// "**:microphone:**", "nice :tada:!"), so requiring a boundary keeps every real
// shortcode working while leaving embedded colon runs untouched. `_` counts as a
// word char too (identifier-like), but `+`/`-` do not, so "C++ :fire:" still works.
const _WORDISH = /[A-Za-z0-9_]/;
function _boundedOnBothSides(str, start, end) {
const before = start > 0 ? str[start - 1] : '';
const after = end < str.length ? str[end] : '';
return !_WORDISH.test(before) && !_WORDISH.test(after);
}
/**
* Replace every known `:shortcode:` in `text` with its Unicode emoji. Unknown
* shortcodes (`:definitely_not_emoji:`), colon runs that don't form a shortcode
* (`10:30:45`, `16:9`), and known shortcodes embedded mid-token (`1:100:2`) are
* all left exactly as-is.
*/
export function replaceEmojiShortcodes(text) {
if (!text || text.indexOf(':') === -1) return text;
return text.replace(SHORTCODE_RE, (whole, name, offset, str) => {
const key = name.toLowerCase();
if (!Object.prototype.hasOwnProperty.call(EMOJI_SHORTCODES, key)) return whole;
// Only convert when the `:shortcode:` is a standalone token, not glued to a
// surrounding word/number (which would mean it's literal text, not an emoji).
if (!_boundedOnBothSides(str, offset, offset + whole.length)) return whole;
return EMOJI_SHORTCODES[key];
});
}
export default { EMOJI_SHORTCODES, replaceEmojiShortcodes, hasEmojiShortcode };

View File

@@ -6,6 +6,7 @@
import uiModule from './ui.js'; import uiModule from './ui.js';
import { splitTableRow } from './markdown/tableRow.js'; import { splitTableRow } from './markdown/tableRow.js';
import { replaceEmojiShortcodes, hasEmojiShortcode } from './emojiShortcodes.js';
var escapeHtml = uiModule.esc; var escapeHtml = uiModule.esc;
@@ -366,8 +367,19 @@ function _useSvgEmoji() {
return typeof document === 'undefined' || !document.body?.classList.contains('text-emojis'); return typeof document === 'undefined' || !document.body?.classList.contains('text-emojis');
} }
export function svgifyEmoji(html) { // `opts.shortcodes` (default true) controls the issue-#345 `:name:` → emoji
if (!_useSvgEmoji() || !html || !_EMOJI_RE.test(html)) return html; // expansion. Chat passes it through as true; document/email body renderers pass
// false so author-typed `:shortcode:` text stays literal (see mdToHtml callers).
// The Unicode-emoji → monochrome-SVG pass always runs regardless, so a real 😀
// in a document still renders as the themed line icon as it always has.
export function svgifyEmoji(html, opts) {
if (!_useSvgEmoji() || !html) return html;
const allowShortcodes = !opts || opts.shortcodes !== false;
// Two reasons to walk the HTML: real Unicode emoji to turn into SVG icons,
// or `:shortcode:` text the model emitted instead of an emoji (issue #345).
const hasUnicode = _EMOJI_RE.test(html);
const hasShortcode = allowShortcodes && hasEmojiShortcode(html);
if (!hasUnicode && !hasShortcode) return html;
const parts = html.split(/(<[^>]*>)/); // odd indices = tags const parts = html.split(/(<[^>]*>)/); // odd indices = tags
let codeDepth = 0; let codeDepth = 0;
for (let i = 0; i < parts.length; i++) { for (let i = 0; i < parts.length; i++) {
@@ -377,7 +389,13 @@ export function svgifyEmoji(html) {
else if (/^<\/(pre|code)\s*>/.test(t)) codeDepth = Math.max(0, codeDepth - 1); else if (/^<\/(pre|code)\s*>/.test(t)) codeDepth = Math.max(0, codeDepth - 1);
continue; continue;
} }
if (codeDepth === 0 && _EMOJI_RE.test(parts[i])) parts[i] = _svgifyText(parts[i]); if (codeDepth !== 0) continue;
let seg = parts[i];
// Expand shortcodes to Unicode first, then both they and any pre-existing
// Unicode emoji get rendered as the same monochrome line icons below.
if (hasShortcode) seg = replaceEmojiShortcodes(seg);
if (_EMOJI_RE.test(seg)) seg = _svgifyText(seg);
parts[i] = seg;
} }
return parts.join(''); return parts.join('');
} }
@@ -421,7 +439,7 @@ export function processWithThinking(text) {
/** /**
* Convert markdown to HTML * Convert markdown to HTML
*/ */
export function mdToHtml(src) { export function mdToHtml(src, opts) {
const allowedHtmlBlocks = []; const allowedHtmlBlocks = [];
const codeBlocks = []; const codeBlocks = [];
const mermaidBlocks = []; const mermaidBlocks = [];
@@ -678,7 +696,7 @@ export function mdToHtml(src) {
s = s.replace(`___CODE_BLOCK_${index}___`, block); s = s.replace(`___CODE_BLOCK_${index}___`, block);
}); });
return _useSvgEmoji() ? svgifyEmoji(s) : s; return _useSvgEmoji() ? svgifyEmoji(s, opts) : s;
} }
/** /**

View File

@@ -16,6 +16,10 @@ src = src.replace(
/import \{ splitTableRow \} from '\.\/markdown\/tableRow\.js';/, /import \{ splitTableRow \} from '\.\/markdown\/tableRow\.js';/,
'const splitTableRow = (row) => row.split("|").filter((cell) => cell.trim() !== "");' 'const splitTableRow = (row) => row.split("|").filter((cell) => cell.trim() !== "");'
); );
src = src.replace(
/import \{ replaceEmojiShortcodes, hasEmojiShortcode \} from '\.\/emojiShortcodes\.js';/,
'const hasEmojiShortcode = (t) => !!t && t.indexOf(":") !== -1 && /:[a-z0-9_+-]{1,40}:/i.test(t); const replaceEmojiShortcodes = (t) => t;'
);
src = src.replace(/export function /g, 'function '); src = src.replace(/export function /g, 'function ');
src = src.replace(/export const /g, 'const '); src = src.replace(/export const /g, 'const ');
src = src.replace(/export default markdownModule;?/g, ''); src = src.replace(/export default markdownModule;?/g, '');

View File

@@ -0,0 +1,101 @@
"""Pin the pure emoji shortcode → Unicode helpers in emojiShortcodes.js.
Driven through `node --input-type=module` so we exercise the real JS without a
full Vitest/Jest setup (same approach as test_reply_recipients_js.py / test_compare_js.py).
Skips when `node` is not installed rather than failing.
Regression for issue #345: chat models emit GitHub-style :shortcode: text
(e.g. :blush:, :microphone:) instead of the actual emoji, and nothing in the
render pipeline translated them, so they showed up as literal ":blush:" text.
"""
import json
import shutil
import subprocess
from pathlib import Path
import pytest
_REPO = Path(__file__).resolve().parent.parent
_HELPER = _REPO / "static" / "js" / "emojiShortcodes.js"
_HAS_NODE = shutil.which("node") is not None
def _run(js: str) -> str:
proc = subprocess.run(
["node", "--input-type=module"],
input=js, capture_output=True, text=True, cwd=str(_REPO), timeout=30,
)
assert proc.returncode == 0, proc.stderr
return proc.stdout.strip()
def _replace(text: str) -> str:
js = f"""
import {{ replaceEmojiShortcodes }} from '{_HELPER.as_posix()}';
console.log(JSON.stringify(replaceEmojiShortcodes({json.dumps(text)})));
"""
return json.loads(_run(js))
@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
def test_issue_345_examples_convert():
# The exact shortcodes the issue reported as showing up as literal text.
assert _replace("visit today? :blush:") == "visit today? \U0001f60a"
assert _replace("hobbies? **:microphone:**") == "hobbies? **\U0001f3a4**"
@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
def test_common_shortcodes_and_aliases():
assert _replace(":fire:") == "\U0001f525"
assert _replace(":tada:") == "\U0001f389"
assert _replace(":thinking:") == "\U0001f914"
# +1 / thumbsup are aliases for the same glyph.
assert _replace(":+1:") == "\U0001f44d"
assert _replace(":thumbsup:") == "\U0001f44d"
# Multiple in one string, mixed with surrounding text.
assert _replace("nice :fire: work :100:") == "nice \U0001f525 work \U0001f4af"
@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
def test_unknown_and_nonshortcodes_untouched():
# Unknown shortcode left verbatim (incl. the :emoji: placeholder).
assert _replace(":definitely_not_an_emoji:") == ":definitely_not_an_emoji:"
assert _replace(":emoji:") == ":emoji:"
# Time ranges / ratios must not be mangled.
assert _replace("meet at 10:30:45 today") == "meet at 10:30:45 today"
assert _replace("ratio 16:9 vs 4:3") == "ratio 16:9 vs 4:3"
# No colons at all → returned as-is.
assert _replace("plain text") == "plain text"
@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
def test_known_shortcode_embedded_in_token_is_not_converted():
# Regression: a KNOWN shortcode that happens to sit inside a longer run of
# digits/letters is literal text, not an emoji. The classic trap is a numeric
# range whose middle segment spells a real shortcode (`:100:` → 💯):
assert _replace("1:100:2") == "1:100:2"
assert _replace("scale 3:100:7 ok") == "scale 3:100:7 ok"
# Glued to a word on either side → left alone (e.g. `key:value:` style text,
# URL authorities like `host:fire:port`).
assert _replace("host:fire:port") == "host:fire:port"
assert _replace("status:fire:") == "status:fire:"
assert _replace(":fire:done") == ":fire:done"
# But a standalone shortcode flanked by whitespace/punctuation still converts,
# including back-to-back shortcodes and the leading `:100:` once delimited.
assert _replace("we hit :100: today") == "we hit \U0001f4af today"
assert _replace("see :fire:!") == "see \U0001f525!"
assert _replace(":fire::tada:") == "\U0001f525\U0001f389"
@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
def test_has_emoji_shortcode_detector():
js = f"""
import {{ hasEmojiShortcode }} from '{_HELPER.as_posix()}';
const out = [
hasEmojiShortcode(':blush:'),
hasEmojiShortcode('no shortcodes here'),
hasEmojiShortcode('a single : colon'),
];
console.log(JSON.stringify(out));
"""
assert json.loads(_run(js)) == [True, False, False]

View File

@@ -41,6 +41,18 @@ def _run_markdown_case(markdown: str, render_expr: str = "mod.mdToHtml(input)"):
return (row || '').replace(/^\\s*\\|/, '').replace(/\\|\\s*$/, '').split('|').map(c => c.trim()); return (row || '').replace(/^\\s*\\|/, '').replace(/\\|\\s*$/, '').split('|').map(c => c.trim());
}` }`
); );
// markdown.js imports the emoji-shortcode helpers relatively (issue #345),
// which a data: URL module can't resolve. Inline the REAL helpers (minus
// their export keywords) so the renderer's shortcode pass behaves exactly
// as it does in the browser.
const emojiSource = fs.readFileSync('./static/js/emojiShortcodes.js', 'utf8')
.replace(/^export default .*$/m, '')
.replace(/export const /g, 'const ')
.replace(/export function /g, 'function ');
source = source.replace(
/import \{ replaceEmojiShortcodes, hasEmojiShortcode \} from ['"]\.\/emojiShortcodes\.js['"];/,
() => emojiSource
);
source = source.replace( source = source.replace(
/var escapeHtml = uiModule\.esc;/, /var escapeHtml = uiModule\.esc;/,
`var escapeHtml = (value) => String(value ?? '') `var escapeHtml = (value) => String(value ?? '')