Files
odysseus/static/js/emojiShortcodes.js
Zeus-Deus 85334e8f3d Render emoji shortcodes as icons in chat (#345) (#629)
Chat models often emit GitHub/Slack-style :shortcode: text (e.g. 😊,
🎤) instead of the actual emoji. The renderer only converted real
Unicode emoji to the monochrome line icons, so shortcodes rendered as literal
text.

Add a pure, browser-free shortcode->Unicode map (emojiShortcodes.js) and run it
inside svgifyEmoji ahead of the existing Unicode->SVG pass, skipping <code>/<pre>
so code stays literal. Covers ~430 common shortcodes plus common aliases
(+1/thumbsup, etc.).

Keep the conversion from touching anything it shouldn't:
* Scope it to chat. mdToHtml/svgifyEmoji take a { shortcodes } option (default
  on); document and email body rendering (compose, export, preview) pass it as
  false so author-typed :shortcode: text stays literal. The Unicode->SVG pass
  still runs there exactly as before.
* Only convert a :shortcode: that stands on its own. A word-boundary guard
  leaves embedded colon runs alone, so "1:100:2", "10:30:45", "16:9" and
  host:fire:port are never rewritten.

Tests: extend the node-driven unit test with the boundary/false-positive cases,
and fix the markdown-rendering test loader to resolve the new emojiShortcodes
import.
2026-06-05 02:28:42 +02:00

459 lines
13 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
// static/js/emojiShortcodes.js
//
// Emoji shortcode → Unicode conversion (issue #345).
//
// Chat models frequently emit GitHub/Slack-style `:shortcode:` text — e.g.
// `:blush:`, `:fire:`, `:microphone:` — instead of the actual emoji character.
// Nothing in the render pipeline used to translate these, so they showed up as
// literal `:blush:` text in the chat bubble.
//
// This module turns the common shortcode set into the real Unicode emoji. The
// chat renderer (markdown.js → svgifyEmoji) runs this BEFORE its existing
// Unicode-emoji → monochrome-SVG pass, so a converted `:blush:` renders as the
// same theme-tinted single-color line icon as any other emoji (project rule:
// never colorful emoji), not as a colored system glyph.
//
// Pure and browser-free on purpose: no DOM, no imports, so it can be unit
// tested with plain `node` (see tests/test_emoji_shortcodes_js.py).
// Canonical map of common shortcode → Unicode emoji. Names follow the GitHub
// convention (lowercase, underscore-separated). A handful of well-known aliases
// (`+1`, `thumbsup`, `grinning_face`, …) point at the same glyph so the most
// frequent model spellings all resolve.
export const EMOJI_SHORTCODES = {
// ── Smileys & emotion ──
grinning: '😀', grinning_face: '😀',
smiley: '😃', smiley_face: '😃',
smile: '😄',
grin: '😁',
laughing: '😆', satisfied: '😆',
sweat_smile: '😅',
rofl: '🤣', rolling_on_the_floor_laughing: '🤣',
joy: '😂',
slightly_smiling_face: '🙂', slight_smile: '🙂',
upside_down_face: '🙃', upside_down: '🙃',
wink: '😉', winking_face: '😉',
blush: '😊', smiling_face_with_smiling_eyes: '😊',
innocent: '😇',
smiling_face_with_three_hearts: '🥰',
heart_eyes: '😍', heart_eyes_face: '😍',
star_struck: '🤩',
kissing_heart: '😘',
kissing: '😗',
kissing_closed_eyes: '😚',
kissing_smiling_eyes: '😙',
yum: '😋',
stuck_out_tongue: '😛',
stuck_out_tongue_winking_eye: '😜',
zany_face: '🤪',
stuck_out_tongue_closed_eyes: '😝',
money_mouth_face: '🤑',
hugs: '🤗', hugging_face: '🤗',
hand_over_mouth: '🤭',
shushing_face: '🤫',
thinking: '🤔', thinking_face: '🤔',
zipper_mouth_face: '🤐',
raised_eyebrow: '🤨',
neutral_face: '😐',
expressionless: '😑',
no_mouth: '😶',
smirk: '😏', smirk_face: '😏',
unamused: '😒',
roll_eyes: '🙄', face_with_rolling_eyes: '🙄',
grimacing: '😬',
lying_face: '🤥',
relieved: '😌',
pensive: '😔',
sleepy: '😪',
drooling_face: '🤤',
sleeping: '😴',
mask: '😷',
face_with_thermometer: '🤒',
face_with_head_bandage: '🤕',
nauseated_face: '🤢',
vomiting_face: '🤮',
sneezing_face: '🤧',
hot_face: '🥵',
cold_face: '🥶',
woozy_face: '🥴',
dizzy_face: '😵',
exploding_head: '🤯',
cowboy_hat_face: '🤠',
partying_face: '🥳',
sunglasses: '😎',
nerd_face: '🤓',
monocle_face: '🧐',
confused: '😕',
worried: '😟',
slightly_frowning_face: '🙁',
frowning_face: '☹️',
open_mouth: '😮',
hushed: '😯',
astonished: '😲',
flushed: '😳',
pleading_face: '🥺',
frowning: '😦',
anguished: '😧',
fearful: '😨',
cold_sweat: '😰',
disappointed_relieved: '😥',
cry: '😢',
sob: '😭',
scream: '😱',
confounded: '😖',
persevere: '😣',
disappointed: '😞',
sweat: '😓',
weary: '😩',
tired_face: '😫',
yawning_face: '🥱',
triumph: '😤',
rage: '😡', pout: '😡', pouting_face: '😡',
angry: '😠',
cursing_face: '🤬',
smiling_imp: '😈',
imp: '👿',
skull: '💀',
skull_and_crossbones: '☠️',
hankey: '💩', poop: '💩', shit: '💩',
clown_face: '🤡',
japanese_ogre: '👹',
japanese_goblin: '👺',
ghost: '👻',
alien: '👽',
space_invader: '👾',
robot: '🤖', robot_face: '🤖',
// ── Cats ──
smiley_cat: '😺',
smile_cat: '😸',
joy_cat: '😹',
heart_eyes_cat: '😻',
smirk_cat: '😼',
kissing_cat: '😽',
scream_cat: '🙀',
crying_cat_face: '😿',
pouting_cat: '😾',
see_no_evil: '🙈',
hear_no_evil: '🙉',
speak_no_evil: '🙊',
// ── Hands & body ──
wave: '👋', wave_hand: '👋',
raised_back_of_hand: '🤚',
raised_hand_with_fingers_splayed: '🖐️',
hand: '✋', raised_hand: '✋',
vulcan_salute: '🖖',
ok_hand: '👌',
pinched_fingers: '🤌',
pinching_hand: '🤏',
v: '✌️', victory_hand: '✌️',
crossed_fingers: '🤞',
love_you_gesture: '🤟',
metal: '🤘',
call_me_hand: '🤙',
point_left: '👈',
point_right: '👉',
point_up_2: '👆',
middle_finger: '🖕', fu: '🖕',
point_down: '👇',
point_up: '☝️',
'+1': '👍', thumbsup: '👍', thumbup: '👍', thumbs_up: '👍',
'-1': '👎', thumbsdown: '👎', thumbdown: '👎', thumbs_down: '👎',
fist_raised: '✊', fist: '✊',
fist_oncoming: '👊', facepunch: '👊', punch: '👊',
fist_left: '🤛',
fist_right: '🤜',
clap: '👏', clapping_hands: '👏',
raised_hands: '🙌',
open_hands: '👐',
palms_up_together: '🤲',
handshake: '🤝',
pray: '🙏', folded_hands: '🙏',
writing_hand: '✍️',
nail_care: '💅',
selfie: '🤳',
muscle: '💪', flexed_biceps: '💪',
// ── Hearts & symbols of feeling ──
heart: '❤️', red_heart: '❤️',
orange_heart: '🧡',
yellow_heart: '💛',
green_heart: '💚',
blue_heart: '💙',
purple_heart: '💜',
black_heart: '🖤',
white_heart: '🤍',
brown_heart: '🤎',
broken_heart: '💔',
heart_on_fire: '❤️‍🔥',
two_hearts: '💕',
revolving_hearts: '💞',
heartbeat: '💓',
heartpulse: '💗',
sparkling_heart: '💖',
cupid: '💘',
gift_heart: '💝',
heart_decoration: '💟',
heavy_heart_exclamation: '❣️',
// ── Celebration & misc objects ──
fire: '🔥', flame: '🔥',
'100': '💯', hundred: '💯',
sparkles: '✨',
star: '⭐',
star2: '🌟', glowing_star: '🌟',
dizzy: '💫',
boom: '💥', collision: '💥',
anger: '💢',
sweat_drops: '💦',
dash: '💨',
zzz: '💤',
tada: '🎉', party_popper: '🎉',
confetti_ball: '🎊',
balloon: '🎈',
gift: '🎁',
trophy: '🏆',
'1st_place_medal': '🥇',
'2nd_place_medal': '🥈',
'3rd_place_medal': '🥉',
medal_sports: '🏅',
zap: '⚡', lightning: '⚡',
bulb: '💡', light_bulb: '💡',
key: '🔑',
lock: '🔒',
unlock: '🔓',
bell: '🔔',
no_bell: '🔕',
loudspeaker: '📢',
mega: '📣', megaphone: '📣',
speech_balloon: '💬',
thought_balloon: '💭',
white_check_mark: '✅',
heavy_check_mark: '✔️', check_mark: '✔️',
ballot_box_with_check: '☑️',
x: '❌', cross_mark: '❌',
negative_squared_cross_mark: '❎',
question: '❓',
grey_question: '❔',
exclamation: '❗', heavy_exclamation_mark: '❗',
grey_exclamation: '❕',
warning: '⚠️',
no_entry: '⛔',
no_entry_sign: '🚫',
red_circle: '🔴',
green_circle: '🟢',
large_blue_circle: '🔵',
yellow_circle: '🟡',
white_circle: '⚪',
black_circle: '⚫',
orange_circle: '🟠',
purple_circle: '🟣',
brown_circle: '🟤',
// ── Tech, work, study ──
rocket: '🚀',
eyes: '👀',
eye: '👁️',
brain: '🧠',
books: '📚',
book: '📖', open_book: '📖',
memo: '📝', pencil: '📝',
pencil2: '✏️',
page_facing_up: '📄',
paperclip: '📎',
pushpin: '📌',
round_pushpin: '📍',
link: '🔗',
bar_chart: '📊',
chart_with_upwards_trend: '📈',
chart_with_downwards_trend: '📉',
mag: '🔍',
mag_right: '🔎',
globe_with_meridians: '🌐',
earth_africa: '🌍',
earth_americas: '🌎',
earth_asia: '🌏',
alarm_clock: '⏰',
hourglass_flowing_sand: '⏳',
hourglass: '⌛',
microphone: '🎤', mic: '🎤',
musical_note: '🎵',
notes: '🎶', musical_notes: '🎶',
headphones: '🎧',
camera: '📷',
camera_flash: '📸',
clapper: '🎬',
tv: '📺',
computer: '💻', laptop: '💻',
desktop_computer: '🖥️',
iphone: '📱', mobile_phone: '📱',
telephone: '☎️',
wrench: '🔧',
hammer: '🔨',
gear: '⚙️',
nut_and_bolt: '🔩',
magnet: '🧲',
test_tube: '🧪',
microscope: '🔬',
dart: '🎯', bullseye: '🎯',
game_die: '🎲',
jigsaw: '🧩',
// ── Food & drink ──
pizza: '🍕',
hamburger: '🍔',
fries: '🍟',
taco: '🌮',
sushi: '🍣',
doughnut: '🍩', donut: '🍩',
coffee: '☕',
beer: '🍺',
wine_glass: '🍷',
// ── Animals & nature ──
dog: '🐶',
cat: '🐱',
mouse: '🐭',
hamster: '🐹',
rabbit: '🐰',
fox_face: '🦊',
bear: '🐻',
panda_face: '🐼',
koala: '🐨',
tiger: '🐯',
lion: '🦁',
cow: '🐮',
pig: '🐷',
frog: '🐸',
monkey_face: '🐵',
chicken: '🐔',
penguin: '🐧',
bird: '🐦',
eagle: '🦅',
duck: '🦆',
owl: '🦉',
wolf: '🐺',
horse: '🐴',
unicorn: '🦄',
bee: '🐝', honeybee: '🐝',
bug: '🐛',
butterfly: '🦋',
snail: '🐌',
lady_beetle: '🐞',
snake: '🐍',
turtle: '🐢',
octopus: '🐙',
crab: '🦀',
tropical_fish: '🐠',
whale: '🐳',
shark: '🦈',
cherry_blossom: '🌸',
rose: '🌹',
sunflower: '🌻',
hibiscus: '🌺',
tulip: '🌷',
seedling: '🌱',
evergreen_tree: '🌲',
deciduous_tree: '🌳',
four_leaf_clover: '🍀',
apple: '🍎',
green_apple: '🍏',
pear: '🍐',
tangerine: '🍊',
lemon: '🍋',
banana: '🍌',
watermelon: '🍉',
grapes: '🍇',
strawberry: '🍓',
blueberries: '🫐',
peach: '🍑',
rainbow: '🌈',
sunny: '☀️', sun: '☀️',
partly_sunny: '⛅',
cloud: '☁️',
snowflake: '❄️',
ocean: '🌊',
// ── Arrows & signs ──
arrow_right: '➡️',
arrow_left: '⬅️',
arrow_up: '⬆️',
arrow_down: '⬇️',
arrow_upper_right: '↗️',
arrow_lower_right: '↘️',
arrow_lower_left: '↙️',
arrow_upper_left: '↖️',
leftwards_arrow_with_hook: '↩️',
arrow_right_hook: '↪️',
arrows_counterclockwise: '🔄',
arrows_clockwise: '🔃',
heavy_plus_sign: '',
heavy_minus_sign: '',
heavy_division_sign: '➗',
heavy_multiplication_x: '✖️',
infinity: '♾️',
copyright: '©️',
registered: '®️',
tm: '™️',
recycle: '♻️',
checkered_flag: '🏁',
triangular_flag_on_post: '🚩',
white_flag: '🏳️',
black_flag: '🏴',
// ── People & wearables ──
baby: '👶',
boy: '👦',
girl: '👧',
man: '👨',
woman: '👩',
older_man: '👴',
older_woman: '👵',
crown: '👑',
gem: '💎',
graduation_cap: '🎓', mortar_board: '🎓',
};
// `:name:` where name is letters/digits/`_`/`+`/`-`. Length ≥1 so `:+1:` and
// `:-1:` match. Global + case-insensitive for replace; a separate non-global
// literal is used for the cheap presence check so there's no shared lastIndex
// state to reset.
const SHORTCODE_RE = /:([a-z0-9_+-]{1,40}):/gi;
/**
* Cheap test for whether `text` could contain any emoji shortcode at all.
* Lets callers skip the replace pass entirely on the common no-shortcode path.
*/
export function hasEmojiShortcode(text) {
return !!text && text.indexOf(':') !== -1 && /:[a-z0-9_+-]{1,40}:/i.test(text);
}
// A shortcode must stand on its own — flanked by whitespace, punctuation, a
// string edge, or markup, never glued to an ASCII word character. Without this
// guard, real `:name:` shortcodes that happen to sit inside a longer run of
// digits/letters get converted by mistake and mangle perfectly literal text:
// "1:100:2" → the `:100:` would become 💯 ("1💯2")
// "host:fire:port", URL authorities, `key:value:` pairs, etc.
// Chat models always emit shortcodes delimited by spaces/punctuation (":fire:",
// "**:microphone:**", "nice :tada:!"), so requiring a boundary keeps every real
// shortcode working while leaving embedded colon runs untouched. `_` counts as a
// word char too (identifier-like), but `+`/`-` do not, so "C++ :fire:" still works.
const _WORDISH = /[A-Za-z0-9_]/;
function _boundedOnBothSides(str, start, end) {
const before = start > 0 ? str[start - 1] : '';
const after = end < str.length ? str[end] : '';
return !_WORDISH.test(before) && !_WORDISH.test(after);
}
/**
* Replace every known `:shortcode:` in `text` with its Unicode emoji. Unknown
* shortcodes (`:definitely_not_emoji:`), colon runs that don't form a shortcode
* (`10:30:45`, `16:9`), and known shortcodes embedded mid-token (`1:100:2`) are
* all left exactly as-is.
*/
export function replaceEmojiShortcodes(text) {
if (!text || text.indexOf(':') === -1) return text;
return text.replace(SHORTCODE_RE, (whole, name, offset, str) => {
const key = name.toLowerCase();
if (!Object.prototype.hasOwnProperty.call(EMOJI_SHORTCODES, key)) return whole;
// Only convert when the `:shortcode:` is a standalone token, not glued to a
// surrounding word/number (which would mean it's literal text, not an emoji).
if (!_boundedOnBothSides(str, offset, offset + whole.length)) return whole;
return EMOJI_SHORTCODES[key];
});
}
export default { EMOJI_SHORTCODES, replaceEmojiShortcodes, hasEmojiShortcode };