// static/js/emojiShortcodes.js // // Emoji shortcode → Unicode conversion (issue #345). // // Chat models frequently emit GitHub/Slack-style `:shortcode:` text — e.g. // `:blush:`, `:fire:`, `:microphone:` — instead of the actual emoji character. // Nothing in the render pipeline used to translate these, so they showed up as // literal `:blush:` text in the chat bubble. // // This module turns the common shortcode set into the real Unicode emoji. The // chat renderer (markdown.js → svgifyEmoji) runs this BEFORE its existing // Unicode-emoji → monochrome-SVG pass, so a converted `:blush:` renders as the // same theme-tinted single-color line icon as any other emoji (project rule: // never colorful emoji), not as a colored system glyph. // // Pure and browser-free on purpose: no DOM, no imports, so it can be unit // tested with plain `node` (see tests/test_emoji_shortcodes_js.py). // Canonical map of common shortcode → Unicode emoji. Names follow the GitHub // convention (lowercase, underscore-separated). A handful of well-known aliases // (`+1`, `thumbsup`, `grinning_face`, …) point at the same glyph so the most // frequent model spellings all resolve. export const EMOJI_SHORTCODES = { // ── Smileys & emotion ── grinning: '😀', grinning_face: '😀', smiley: '😃', smiley_face: '😃', smile: '😄', grin: '😁', laughing: '😆', satisfied: '😆', sweat_smile: '😅', rofl: '🤣', rolling_on_the_floor_laughing: '🤣', joy: '😂', slightly_smiling_face: '🙂', slight_smile: '🙂', upside_down_face: '🙃', upside_down: '🙃', wink: '😉', winking_face: '😉', blush: '😊', smiling_face_with_smiling_eyes: '😊', innocent: '😇', smiling_face_with_three_hearts: '🥰', heart_eyes: '😍', heart_eyes_face: '😍', star_struck: '🤩', kissing_heart: '😘', kissing: '😗', kissing_closed_eyes: '😚', kissing_smiling_eyes: '😙', yum: '😋', stuck_out_tongue: '😛', stuck_out_tongue_winking_eye: '😜', zany_face: '🤪', stuck_out_tongue_closed_eyes: '😝', money_mouth_face: '🤑', hugs: '🤗', hugging_face: '🤗', hand_over_mouth: '🤭', shushing_face: '🤫', thinking: '🤔', thinking_face: '🤔', zipper_mouth_face: '🤐', raised_eyebrow: '🤨', neutral_face: '😐', expressionless: '😑', no_mouth: '😶', smirk: '😏', smirk_face: '😏', unamused: '😒', roll_eyes: '🙄', face_with_rolling_eyes: '🙄', grimacing: '😬', lying_face: '🤥', relieved: '😌', pensive: '😔', sleepy: '😪', drooling_face: '🤤', sleeping: '😴', mask: '😷', face_with_thermometer: '🤒', face_with_head_bandage: '🤕', nauseated_face: '🤢', vomiting_face: '🤮', sneezing_face: '🤧', hot_face: '🥵', cold_face: '🥶', woozy_face: '🥴', dizzy_face: '😵', exploding_head: '🤯', cowboy_hat_face: '🤠', partying_face: '🥳', sunglasses: '😎', nerd_face: '🤓', monocle_face: '🧐', confused: '😕', worried: '😟', slightly_frowning_face: '🙁', frowning_face: '☹️', open_mouth: '😮', hushed: '😯', astonished: '😲', flushed: '😳', pleading_face: '🥺', frowning: '😦', anguished: '😧', fearful: '😨', cold_sweat: '😰', disappointed_relieved: '😥', cry: '😢', sob: '😭', scream: '😱', confounded: '😖', persevere: '😣', disappointed: '😞', sweat: '😓', weary: '😩', tired_face: '😫', yawning_face: '🥱', triumph: '😤', rage: '😡', pout: '😡', pouting_face: '😡', angry: '😠', cursing_face: '🤬', smiling_imp: '😈', imp: '👿', skull: '💀', skull_and_crossbones: '☠️', hankey: '💩', poop: '💩', shit: '💩', clown_face: '🤡', japanese_ogre: '👹', japanese_goblin: '👺', ghost: '👻', alien: '👽', space_invader: '👾', robot: '🤖', robot_face: '🤖', // ── Cats ── smiley_cat: '😺', smile_cat: '😸', joy_cat: '😹', heart_eyes_cat: '😻', smirk_cat: '😼', kissing_cat: '😽', scream_cat: '🙀', crying_cat_face: '😿', pouting_cat: '😾', see_no_evil: '🙈', hear_no_evil: '🙉', speak_no_evil: '🙊', // ── Hands & body ── wave: '👋', wave_hand: '👋', raised_back_of_hand: '🤚', raised_hand_with_fingers_splayed: '🖐️', hand: '✋', raised_hand: '✋', vulcan_salute: '🖖', ok_hand: '👌', pinched_fingers: '🤌', pinching_hand: '🤏', v: '✌️', victory_hand: '✌️', crossed_fingers: '🤞', love_you_gesture: '🤟', metal: '🤘', call_me_hand: '🤙', point_left: '👈', point_right: '👉', point_up_2: '👆', middle_finger: '🖕', fu: '🖕', point_down: '👇', point_up: '☝️', '+1': '👍', thumbsup: '👍', thumbup: '👍', thumbs_up: '👍', '-1': '👎', thumbsdown: '👎', thumbdown: '👎', thumbs_down: '👎', fist_raised: '✊', fist: '✊', fist_oncoming: '👊', facepunch: '👊', punch: '👊', fist_left: '🤛', fist_right: '🤜', clap: '👏', clapping_hands: '👏', raised_hands: '🙌', open_hands: '👐', palms_up_together: '🤲', handshake: '🤝', pray: '🙏', folded_hands: '🙏', writing_hand: '✍️', nail_care: '💅', selfie: '🤳', muscle: '💪', flexed_biceps: '💪', // ── Hearts & symbols of feeling ── heart: '❤️', red_heart: '❤️', orange_heart: '🧡', yellow_heart: '💛', green_heart: '💚', blue_heart: '💙', purple_heart: '💜', black_heart: '🖤', white_heart: '🤍', brown_heart: '🤎', broken_heart: '💔', heart_on_fire: '❤️‍🔥', two_hearts: '💕', revolving_hearts: '💞', heartbeat: '💓', heartpulse: '💗', sparkling_heart: '💖', cupid: '💘', gift_heart: '💝', heart_decoration: '💟', heavy_heart_exclamation: '❣️', // ── Celebration & misc objects ── fire: '🔥', flame: '🔥', '100': '💯', hundred: '💯', sparkles: '✨', star: '⭐', star2: '🌟', glowing_star: '🌟', dizzy: '💫', boom: '💥', collision: '💥', anger: '💢', sweat_drops: '💦', dash: '💨', zzz: '💤', tada: '🎉', party_popper: '🎉', confetti_ball: '🎊', balloon: '🎈', gift: '🎁', trophy: '🏆', '1st_place_medal': '🥇', '2nd_place_medal': '🥈', '3rd_place_medal': '🥉', medal_sports: '🏅', zap: '⚡', lightning: '⚡', bulb: '💡', light_bulb: '💡', key: '🔑', lock: '🔒', unlock: '🔓', bell: '🔔', no_bell: '🔕', loudspeaker: '📢', mega: '📣', megaphone: '📣', speech_balloon: '💬', thought_balloon: '💭', white_check_mark: '✅', heavy_check_mark: '✔️', check_mark: '✔️', ballot_box_with_check: '☑️', x: '❌', cross_mark: '❌', negative_squared_cross_mark: '❎', question: '❓', grey_question: '❔', exclamation: '❗', heavy_exclamation_mark: '❗', grey_exclamation: '❕', warning: '⚠️', no_entry: '⛔', no_entry_sign: '🚫', red_circle: '🔴', green_circle: '🟢', large_blue_circle: '🔵', yellow_circle: '🟡', white_circle: '⚪', black_circle: '⚫', orange_circle: '🟠', purple_circle: '🟣', brown_circle: '🟤', // ── Tech, work, study ── rocket: '🚀', eyes: '👀', eye: '👁️', brain: '🧠', books: '📚', book: '📖', open_book: '📖', memo: '📝', pencil: '📝', pencil2: '✏️', page_facing_up: '📄', paperclip: '📎', pushpin: '📌', round_pushpin: '📍', link: '🔗', bar_chart: '📊', chart_with_upwards_trend: '📈', chart_with_downwards_trend: '📉', mag: '🔍', mag_right: '🔎', globe_with_meridians: '🌐', earth_africa: '🌍', earth_americas: '🌎', earth_asia: '🌏', alarm_clock: '⏰', hourglass_flowing_sand: '⏳', hourglass: '⌛', microphone: '🎤', mic: '🎤', musical_note: '🎵', notes: '🎶', musical_notes: '🎶', headphones: '🎧', camera: '📷', camera_flash: '📸', clapper: '🎬', tv: '📺', computer: '💻', laptop: '💻', desktop_computer: '🖥️', iphone: '📱', mobile_phone: '📱', telephone: '☎️', wrench: '🔧', hammer: '🔨', gear: '⚙️', nut_and_bolt: '🔩', magnet: '🧲', test_tube: '🧪', microscope: '🔬', dart: '🎯', bullseye: '🎯', game_die: '🎲', jigsaw: '🧩', // ── Food & drink ── pizza: '🍕', hamburger: '🍔', fries: '🍟', taco: '🌮', sushi: '🍣', doughnut: '🍩', donut: '🍩', coffee: '☕', beer: '🍺', wine_glass: '🍷', // ── Animals & nature ── dog: '🐶', cat: '🐱', mouse: '🐭', hamster: '🐹', rabbit: '🐰', fox_face: '🦊', bear: '🐻', panda_face: '🐼', koala: '🐨', tiger: '🐯', lion: '🦁', cow: '🐮', pig: '🐷', frog: '🐸', monkey_face: '🐵', chicken: '🐔', penguin: '🐧', bird: '🐦', eagle: '🦅', duck: '🦆', owl: '🦉', wolf: '🐺', horse: '🐴', unicorn: '🦄', bee: '🐝', honeybee: '🐝', bug: '🐛', butterfly: '🦋', snail: '🐌', lady_beetle: '🐞', snake: '🐍', turtle: '🐢', octopus: '🐙', crab: '🦀', tropical_fish: '🐠', whale: '🐳', shark: '🦈', cherry_blossom: '🌸', rose: '🌹', sunflower: '🌻', hibiscus: '🌺', tulip: '🌷', seedling: '🌱', evergreen_tree: '🌲', deciduous_tree: '🌳', four_leaf_clover: '🍀', apple: '🍎', green_apple: '🍏', pear: '🍐', tangerine: '🍊', lemon: '🍋', banana: '🍌', watermelon: '🍉', grapes: '🍇', strawberry: '🍓', blueberries: '🫐', peach: '🍑', rainbow: '🌈', sunny: '☀️', sun: '☀️', partly_sunny: '⛅', cloud: '☁️', snowflake: '❄️', ocean: '🌊', // ── Arrows & signs ── arrow_right: '➡️', arrow_left: '⬅️', arrow_up: '⬆️', arrow_down: '⬇️', arrow_upper_right: '↗️', arrow_lower_right: '↘️', arrow_lower_left: '↙️', arrow_upper_left: '↖️', leftwards_arrow_with_hook: '↩️', arrow_right_hook: '↪️', arrows_counterclockwise: '🔄', arrows_clockwise: '🔃', heavy_plus_sign: '➕', heavy_minus_sign: '➖', heavy_division_sign: '➗', heavy_multiplication_x: '✖️', infinity: '♾️', copyright: '©️', registered: '®️', tm: '™️', recycle: '♻️', checkered_flag: '🏁', triangular_flag_on_post: '🚩', white_flag: '🏳️', black_flag: '🏴', // ── People & wearables ── baby: '👶', boy: '👦', girl: '👧', man: '👨', woman: '👩', older_man: '👴', older_woman: '👵', crown: '👑', gem: '💎', graduation_cap: '🎓', mortar_board: '🎓', }; // `:name:` where name is letters/digits/`_`/`+`/`-`. Length ≥1 so `:+1:` and // `:-1:` match. Global + case-insensitive for replace; a separate non-global // literal is used for the cheap presence check so there's no shared lastIndex // state to reset. const SHORTCODE_RE = /:([a-z0-9_+-]{1,40}):/gi; /** * Cheap test for whether `text` could contain any emoji shortcode at all. * Lets callers skip the replace pass entirely on the common no-shortcode path. */ export function hasEmojiShortcode(text) { return !!text && text.indexOf(':') !== -1 && /:[a-z0-9_+-]{1,40}:/i.test(text); } // A shortcode must stand on its own — flanked by whitespace, punctuation, a // string edge, or markup, never glued to an ASCII word character. Without this // guard, real `:name:` shortcodes that happen to sit inside a longer run of // digits/letters get converted by mistake and mangle perfectly literal text: // "1:100:2" → the `:100:` would become 💯 ("1💯2") // "host:fire:port", URL authorities, `key:value:` pairs, etc. // Chat models always emit shortcodes delimited by spaces/punctuation (":fire:", // "**:microphone:**", "nice :tada:!"), so requiring a boundary keeps every real // shortcode working while leaving embedded colon runs untouched. `_` counts as a // word char too (identifier-like), but `+`/`-` do not, so "C++ :fire:" still works. const _WORDISH = /[A-Za-z0-9_]/; function _boundedOnBothSides(str, start, end) { const before = start > 0 ? str[start - 1] : ''; const after = end < str.length ? str[end] : ''; return !_WORDISH.test(before) && !_WORDISH.test(after); } /** * Replace every known `:shortcode:` in `text` with its Unicode emoji. Unknown * shortcodes (`:definitely_not_emoji:`), colon runs that don't form a shortcode * (`10:30:45`, `16:9`), and known shortcodes embedded mid-token (`1:100:2`) are * all left exactly as-is. */ export function replaceEmojiShortcodes(text) { if (!text || text.indexOf(':') === -1) return text; return text.replace(SHORTCODE_RE, (whole, name, offset, str) => { const key = name.toLowerCase(); if (!Object.prototype.hasOwnProperty.call(EMOJI_SHORTCODES, key)) return whole; // Only convert when the `:shortcode:` is a standalone token, not glued to a // surrounding word/number (which would mean it's literal text, not an emoji). if (!_boundedOnBothSides(str, offset, offset + whole.length)) return whole; return EMOJI_SHORTCODES[key]; }); } export default { EMOJI_SHORTCODES, replaceEmojiShortcodes, hasEmojiShortcode };