Chat models often emit GitHub/Slack-style :shortcode: text (e.g. 😊, 🎤) instead of the actual emoji. The renderer only converted real Unicode emoji to the monochrome line icons, so shortcodes rendered as literal text. Add a pure, browser-free shortcode->Unicode map (emojiShortcodes.js) and run it inside svgifyEmoji ahead of the existing Unicode->SVG pass, skipping <code>/<pre> so code stays literal. Covers ~430 common shortcodes plus common aliases (+1/thumbsup, etc.). Keep the conversion from touching anything it shouldn't: * Scope it to chat. mdToHtml/svgifyEmoji take a { shortcodes } option (default on); document and email body rendering (compose, export, preview) pass it as false so author-typed :shortcode: text stays literal. The Unicode->SVG pass still runs there exactly as before. * Only convert a :shortcode: that stands on its own. A word-boundary guard leaves embedded colon runs alone, so "1:100:2", "10:30:45", "16:9" and host:fire:port are never rewritten. Tests: extend the node-driven unit test with the boundary/false-positive cases, and fix the markdown-rendering test loader to resolve the new emojiShortcodes import.
459 lines
13 KiB
JavaScript
459 lines
13 KiB
JavaScript
// static/js/emojiShortcodes.js
|
||
//
|
||
// Emoji shortcode → Unicode conversion (issue #345).
|
||
//
|
||
// Chat models frequently emit GitHub/Slack-style `:shortcode:` text — e.g.
|
||
// `:blush:`, `:fire:`, `:microphone:` — instead of the actual emoji character.
|
||
// Nothing in the render pipeline used to translate these, so they showed up as
|
||
// literal `:blush:` text in the chat bubble.
|
||
//
|
||
// This module turns the common shortcode set into the real Unicode emoji. The
|
||
// chat renderer (markdown.js → svgifyEmoji) runs this BEFORE its existing
|
||
// Unicode-emoji → monochrome-SVG pass, so a converted `:blush:` renders as the
|
||
// same theme-tinted single-color line icon as any other emoji (project rule:
|
||
// never colorful emoji), not as a colored system glyph.
|
||
//
|
||
// Pure and browser-free on purpose: no DOM, no imports, so it can be unit
|
||
// tested with plain `node` (see tests/test_emoji_shortcodes_js.py).
|
||
|
||
// Canonical map of common shortcode → Unicode emoji. Names follow the GitHub
|
||
// convention (lowercase, underscore-separated). A handful of well-known aliases
|
||
// (`+1`, `thumbsup`, `grinning_face`, …) point at the same glyph so the most
|
||
// frequent model spellings all resolve.
|
||
export const EMOJI_SHORTCODES = {
|
||
// ── Smileys & emotion ──
|
||
grinning: '😀', grinning_face: '😀',
|
||
smiley: '😃', smiley_face: '😃',
|
||
smile: '😄',
|
||
grin: '😁',
|
||
laughing: '😆', satisfied: '😆',
|
||
sweat_smile: '😅',
|
||
rofl: '🤣', rolling_on_the_floor_laughing: '🤣',
|
||
joy: '😂',
|
||
slightly_smiling_face: '🙂', slight_smile: '🙂',
|
||
upside_down_face: '🙃', upside_down: '🙃',
|
||
wink: '😉', winking_face: '😉',
|
||
blush: '😊', smiling_face_with_smiling_eyes: '😊',
|
||
innocent: '😇',
|
||
smiling_face_with_three_hearts: '🥰',
|
||
heart_eyes: '😍', heart_eyes_face: '😍',
|
||
star_struck: '🤩',
|
||
kissing_heart: '😘',
|
||
kissing: '😗',
|
||
kissing_closed_eyes: '😚',
|
||
kissing_smiling_eyes: '😙',
|
||
yum: '😋',
|
||
stuck_out_tongue: '😛',
|
||
stuck_out_tongue_winking_eye: '😜',
|
||
zany_face: '🤪',
|
||
stuck_out_tongue_closed_eyes: '😝',
|
||
money_mouth_face: '🤑',
|
||
hugs: '🤗', hugging_face: '🤗',
|
||
hand_over_mouth: '🤭',
|
||
shushing_face: '🤫',
|
||
thinking: '🤔', thinking_face: '🤔',
|
||
zipper_mouth_face: '🤐',
|
||
raised_eyebrow: '🤨',
|
||
neutral_face: '😐',
|
||
expressionless: '😑',
|
||
no_mouth: '😶',
|
||
smirk: '😏', smirk_face: '😏',
|
||
unamused: '😒',
|
||
roll_eyes: '🙄', face_with_rolling_eyes: '🙄',
|
||
grimacing: '😬',
|
||
lying_face: '🤥',
|
||
relieved: '😌',
|
||
pensive: '😔',
|
||
sleepy: '😪',
|
||
drooling_face: '🤤',
|
||
sleeping: '😴',
|
||
mask: '😷',
|
||
face_with_thermometer: '🤒',
|
||
face_with_head_bandage: '🤕',
|
||
nauseated_face: '🤢',
|
||
vomiting_face: '🤮',
|
||
sneezing_face: '🤧',
|
||
hot_face: '🥵',
|
||
cold_face: '🥶',
|
||
woozy_face: '🥴',
|
||
dizzy_face: '😵',
|
||
exploding_head: '🤯',
|
||
cowboy_hat_face: '🤠',
|
||
partying_face: '🥳',
|
||
sunglasses: '😎',
|
||
nerd_face: '🤓',
|
||
monocle_face: '🧐',
|
||
confused: '😕',
|
||
worried: '😟',
|
||
slightly_frowning_face: '🙁',
|
||
frowning_face: '☹️',
|
||
open_mouth: '😮',
|
||
hushed: '😯',
|
||
astonished: '😲',
|
||
flushed: '😳',
|
||
pleading_face: '🥺',
|
||
frowning: '😦',
|
||
anguished: '😧',
|
||
fearful: '😨',
|
||
cold_sweat: '😰',
|
||
disappointed_relieved: '😥',
|
||
cry: '😢',
|
||
sob: '😭',
|
||
scream: '😱',
|
||
confounded: '😖',
|
||
persevere: '😣',
|
||
disappointed: '😞',
|
||
sweat: '😓',
|
||
weary: '😩',
|
||
tired_face: '😫',
|
||
yawning_face: '🥱',
|
||
triumph: '😤',
|
||
rage: '😡', pout: '😡', pouting_face: '😡',
|
||
angry: '😠',
|
||
cursing_face: '🤬',
|
||
smiling_imp: '😈',
|
||
imp: '👿',
|
||
skull: '💀',
|
||
skull_and_crossbones: '☠️',
|
||
hankey: '💩', poop: '💩', shit: '💩',
|
||
clown_face: '🤡',
|
||
japanese_ogre: '👹',
|
||
japanese_goblin: '👺',
|
||
ghost: '👻',
|
||
alien: '👽',
|
||
space_invader: '👾',
|
||
robot: '🤖', robot_face: '🤖',
|
||
// ── Cats ──
|
||
smiley_cat: '😺',
|
||
smile_cat: '😸',
|
||
joy_cat: '😹',
|
||
heart_eyes_cat: '😻',
|
||
smirk_cat: '😼',
|
||
kissing_cat: '😽',
|
||
scream_cat: '🙀',
|
||
crying_cat_face: '😿',
|
||
pouting_cat: '😾',
|
||
see_no_evil: '🙈',
|
||
hear_no_evil: '🙉',
|
||
speak_no_evil: '🙊',
|
||
// ── Hands & body ──
|
||
wave: '👋', wave_hand: '👋',
|
||
raised_back_of_hand: '🤚',
|
||
raised_hand_with_fingers_splayed: '🖐️',
|
||
hand: '✋', raised_hand: '✋',
|
||
vulcan_salute: '🖖',
|
||
ok_hand: '👌',
|
||
pinched_fingers: '🤌',
|
||
pinching_hand: '🤏',
|
||
v: '✌️', victory_hand: '✌️',
|
||
crossed_fingers: '🤞',
|
||
love_you_gesture: '🤟',
|
||
metal: '🤘',
|
||
call_me_hand: '🤙',
|
||
point_left: '👈',
|
||
point_right: '👉',
|
||
point_up_2: '👆',
|
||
middle_finger: '🖕', fu: '🖕',
|
||
point_down: '👇',
|
||
point_up: '☝️',
|
||
'+1': '👍', thumbsup: '👍', thumbup: '👍', thumbs_up: '👍',
|
||
'-1': '👎', thumbsdown: '👎', thumbdown: '👎', thumbs_down: '👎',
|
||
fist_raised: '✊', fist: '✊',
|
||
fist_oncoming: '👊', facepunch: '👊', punch: '👊',
|
||
fist_left: '🤛',
|
||
fist_right: '🤜',
|
||
clap: '👏', clapping_hands: '👏',
|
||
raised_hands: '🙌',
|
||
open_hands: '👐',
|
||
palms_up_together: '🤲',
|
||
handshake: '🤝',
|
||
pray: '🙏', folded_hands: '🙏',
|
||
writing_hand: '✍️',
|
||
nail_care: '💅',
|
||
selfie: '🤳',
|
||
muscle: '💪', flexed_biceps: '💪',
|
||
// ── Hearts & symbols of feeling ──
|
||
heart: '❤️', red_heart: '❤️',
|
||
orange_heart: '🧡',
|
||
yellow_heart: '💛',
|
||
green_heart: '💚',
|
||
blue_heart: '💙',
|
||
purple_heart: '💜',
|
||
black_heart: '🖤',
|
||
white_heart: '🤍',
|
||
brown_heart: '🤎',
|
||
broken_heart: '💔',
|
||
heart_on_fire: '❤️🔥',
|
||
two_hearts: '💕',
|
||
revolving_hearts: '💞',
|
||
heartbeat: '💓',
|
||
heartpulse: '💗',
|
||
sparkling_heart: '💖',
|
||
cupid: '💘',
|
||
gift_heart: '💝',
|
||
heart_decoration: '💟',
|
||
heavy_heart_exclamation: '❣️',
|
||
// ── Celebration & misc objects ──
|
||
fire: '🔥', flame: '🔥',
|
||
'100': '💯', hundred: '💯',
|
||
sparkles: '✨',
|
||
star: '⭐',
|
||
star2: '🌟', glowing_star: '🌟',
|
||
dizzy: '💫',
|
||
boom: '💥', collision: '💥',
|
||
anger: '💢',
|
||
sweat_drops: '💦',
|
||
dash: '💨',
|
||
zzz: '💤',
|
||
tada: '🎉', party_popper: '🎉',
|
||
confetti_ball: '🎊',
|
||
balloon: '🎈',
|
||
gift: '🎁',
|
||
trophy: '🏆',
|
||
'1st_place_medal': '🥇',
|
||
'2nd_place_medal': '🥈',
|
||
'3rd_place_medal': '🥉',
|
||
medal_sports: '🏅',
|
||
zap: '⚡', lightning: '⚡',
|
||
bulb: '💡', light_bulb: '💡',
|
||
key: '🔑',
|
||
lock: '🔒',
|
||
unlock: '🔓',
|
||
bell: '🔔',
|
||
no_bell: '🔕',
|
||
loudspeaker: '📢',
|
||
mega: '📣', megaphone: '📣',
|
||
speech_balloon: '💬',
|
||
thought_balloon: '💭',
|
||
white_check_mark: '✅',
|
||
heavy_check_mark: '✔️', check_mark: '✔️',
|
||
ballot_box_with_check: '☑️',
|
||
x: '❌', cross_mark: '❌',
|
||
negative_squared_cross_mark: '❎',
|
||
question: '❓',
|
||
grey_question: '❔',
|
||
exclamation: '❗', heavy_exclamation_mark: '❗',
|
||
grey_exclamation: '❕',
|
||
warning: '⚠️',
|
||
no_entry: '⛔',
|
||
no_entry_sign: '🚫',
|
||
red_circle: '🔴',
|
||
green_circle: '🟢',
|
||
large_blue_circle: '🔵',
|
||
yellow_circle: '🟡',
|
||
white_circle: '⚪',
|
||
black_circle: '⚫',
|
||
orange_circle: '🟠',
|
||
purple_circle: '🟣',
|
||
brown_circle: '🟤',
|
||
// ── Tech, work, study ──
|
||
rocket: '🚀',
|
||
eyes: '👀',
|
||
eye: '👁️',
|
||
brain: '🧠',
|
||
books: '📚',
|
||
book: '📖', open_book: '📖',
|
||
memo: '📝', pencil: '📝',
|
||
pencil2: '✏️',
|
||
page_facing_up: '📄',
|
||
paperclip: '📎',
|
||
pushpin: '📌',
|
||
round_pushpin: '📍',
|
||
link: '🔗',
|
||
bar_chart: '📊',
|
||
chart_with_upwards_trend: '📈',
|
||
chart_with_downwards_trend: '📉',
|
||
mag: '🔍',
|
||
mag_right: '🔎',
|
||
globe_with_meridians: '🌐',
|
||
earth_africa: '🌍',
|
||
earth_americas: '🌎',
|
||
earth_asia: '🌏',
|
||
alarm_clock: '⏰',
|
||
hourglass_flowing_sand: '⏳',
|
||
hourglass: '⌛',
|
||
microphone: '🎤', mic: '🎤',
|
||
musical_note: '🎵',
|
||
notes: '🎶', musical_notes: '🎶',
|
||
headphones: '🎧',
|
||
camera: '📷',
|
||
camera_flash: '📸',
|
||
clapper: '🎬',
|
||
tv: '📺',
|
||
computer: '💻', laptop: '💻',
|
||
desktop_computer: '🖥️',
|
||
iphone: '📱', mobile_phone: '📱',
|
||
telephone: '☎️',
|
||
wrench: '🔧',
|
||
hammer: '🔨',
|
||
gear: '⚙️',
|
||
nut_and_bolt: '🔩',
|
||
magnet: '🧲',
|
||
test_tube: '🧪',
|
||
microscope: '🔬',
|
||
dart: '🎯', bullseye: '🎯',
|
||
game_die: '🎲',
|
||
jigsaw: '🧩',
|
||
// ── Food & drink ──
|
||
pizza: '🍕',
|
||
hamburger: '🍔',
|
||
fries: '🍟',
|
||
taco: '🌮',
|
||
sushi: '🍣',
|
||
doughnut: '🍩', donut: '🍩',
|
||
coffee: '☕',
|
||
beer: '🍺',
|
||
wine_glass: '🍷',
|
||
// ── Animals & nature ──
|
||
dog: '🐶',
|
||
cat: '🐱',
|
||
mouse: '🐭',
|
||
hamster: '🐹',
|
||
rabbit: '🐰',
|
||
fox_face: '🦊',
|
||
bear: '🐻',
|
||
panda_face: '🐼',
|
||
koala: '🐨',
|
||
tiger: '🐯',
|
||
lion: '🦁',
|
||
cow: '🐮',
|
||
pig: '🐷',
|
||
frog: '🐸',
|
||
monkey_face: '🐵',
|
||
chicken: '🐔',
|
||
penguin: '🐧',
|
||
bird: '🐦',
|
||
eagle: '🦅',
|
||
duck: '🦆',
|
||
owl: '🦉',
|
||
wolf: '🐺',
|
||
horse: '🐴',
|
||
unicorn: '🦄',
|
||
bee: '🐝', honeybee: '🐝',
|
||
bug: '🐛',
|
||
butterfly: '🦋',
|
||
snail: '🐌',
|
||
lady_beetle: '🐞',
|
||
snake: '🐍',
|
||
turtle: '🐢',
|
||
octopus: '🐙',
|
||
crab: '🦀',
|
||
tropical_fish: '🐠',
|
||
whale: '🐳',
|
||
shark: '🦈',
|
||
cherry_blossom: '🌸',
|
||
rose: '🌹',
|
||
sunflower: '🌻',
|
||
hibiscus: '🌺',
|
||
tulip: '🌷',
|
||
seedling: '🌱',
|
||
evergreen_tree: '🌲',
|
||
deciduous_tree: '🌳',
|
||
four_leaf_clover: '🍀',
|
||
apple: '🍎',
|
||
green_apple: '🍏',
|
||
pear: '🍐',
|
||
tangerine: '🍊',
|
||
lemon: '🍋',
|
||
banana: '🍌',
|
||
watermelon: '🍉',
|
||
grapes: '🍇',
|
||
strawberry: '🍓',
|
||
blueberries: '🫐',
|
||
peach: '🍑',
|
||
rainbow: '🌈',
|
||
sunny: '☀️', sun: '☀️',
|
||
partly_sunny: '⛅',
|
||
cloud: '☁️',
|
||
snowflake: '❄️',
|
||
ocean: '🌊',
|
||
// ── Arrows & signs ──
|
||
arrow_right: '➡️',
|
||
arrow_left: '⬅️',
|
||
arrow_up: '⬆️',
|
||
arrow_down: '⬇️',
|
||
arrow_upper_right: '↗️',
|
||
arrow_lower_right: '↘️',
|
||
arrow_lower_left: '↙️',
|
||
arrow_upper_left: '↖️',
|
||
leftwards_arrow_with_hook: '↩️',
|
||
arrow_right_hook: '↪️',
|
||
arrows_counterclockwise: '🔄',
|
||
arrows_clockwise: '🔃',
|
||
heavy_plus_sign: '➕',
|
||
heavy_minus_sign: '➖',
|
||
heavy_division_sign: '➗',
|
||
heavy_multiplication_x: '✖️',
|
||
infinity: '♾️',
|
||
copyright: '©️',
|
||
registered: '®️',
|
||
tm: '™️',
|
||
recycle: '♻️',
|
||
checkered_flag: '🏁',
|
||
triangular_flag_on_post: '🚩',
|
||
white_flag: '🏳️',
|
||
black_flag: '🏴',
|
||
// ── People & wearables ──
|
||
baby: '👶',
|
||
boy: '👦',
|
||
girl: '👧',
|
||
man: '👨',
|
||
woman: '👩',
|
||
older_man: '👴',
|
||
older_woman: '👵',
|
||
crown: '👑',
|
||
gem: '💎',
|
||
graduation_cap: '🎓', mortar_board: '🎓',
|
||
};
|
||
|
||
// `:name:` where name is letters/digits/`_`/`+`/`-`. Length ≥1 so `:+1:` and
|
||
// `:-1:` match. Global + case-insensitive for replace; a separate non-global
|
||
// literal is used for the cheap presence check so there's no shared lastIndex
|
||
// state to reset.
|
||
const SHORTCODE_RE = /:([a-z0-9_+-]{1,40}):/gi;
|
||
|
||
/**
|
||
* Cheap test for whether `text` could contain any emoji shortcode at all.
|
||
* Lets callers skip the replace pass entirely on the common no-shortcode path.
|
||
*/
|
||
export function hasEmojiShortcode(text) {
|
||
return !!text && text.indexOf(':') !== -1 && /:[a-z0-9_+-]{1,40}:/i.test(text);
|
||
}
|
||
|
||
// A shortcode must stand on its own — flanked by whitespace, punctuation, a
|
||
// string edge, or markup, never glued to an ASCII word character. Without this
|
||
// guard, real `:name:` shortcodes that happen to sit inside a longer run of
|
||
// digits/letters get converted by mistake and mangle perfectly literal text:
|
||
// "1:100:2" → the `:100:` would become 💯 ("1💯2")
|
||
// "host:fire:port", URL authorities, `key:value:` pairs, etc.
|
||
// Chat models always emit shortcodes delimited by spaces/punctuation (":fire:",
|
||
// "**:microphone:**", "nice :tada:!"), so requiring a boundary keeps every real
|
||
// shortcode working while leaving embedded colon runs untouched. `_` counts as a
|
||
// word char too (identifier-like), but `+`/`-` do not, so "C++ :fire:" still works.
|
||
const _WORDISH = /[A-Za-z0-9_]/;
|
||
function _boundedOnBothSides(str, start, end) {
|
||
const before = start > 0 ? str[start - 1] : '';
|
||
const after = end < str.length ? str[end] : '';
|
||
return !_WORDISH.test(before) && !_WORDISH.test(after);
|
||
}
|
||
|
||
/**
|
||
* Replace every known `:shortcode:` in `text` with its Unicode emoji. Unknown
|
||
* shortcodes (`:definitely_not_emoji:`), colon runs that don't form a shortcode
|
||
* (`10:30:45`, `16:9`), and known shortcodes embedded mid-token (`1:100:2`) are
|
||
* all left exactly as-is.
|
||
*/
|
||
export function replaceEmojiShortcodes(text) {
|
||
if (!text || text.indexOf(':') === -1) return text;
|
||
return text.replace(SHORTCODE_RE, (whole, name, offset, str) => {
|
||
const key = name.toLowerCase();
|
||
if (!Object.prototype.hasOwnProperty.call(EMOJI_SHORTCODES, key)) return whole;
|
||
// Only convert when the `:shortcode:` is a standalone token, not glued to a
|
||
// surrounding word/number (which would mean it's literal text, not an emoji).
|
||
if (!_boundedOnBothSides(str, offset, offset + whole.length)) return whole;
|
||
return EMOJI_SHORTCODES[key];
|
||
});
|
||
}
|
||
|
||
export default { EMOJI_SHORTCODES, replaceEmojiShortcodes, hasEmojiShortcode };
|