diff --git a/static/js/document.js b/static/js/document.js index 1d38121..87ad298 100644 --- a/static/js/document.js +++ b/static/js/document.js @@ -2246,7 +2246,9 @@ import * as Modals from './modalManager.js'; // WYSIWYG body — use it verbatim. (Checking a leading '<' isn't enough: a // rich body often starts with plain text, e.g. "Hi there".) if (/<\/?(b|i|u|s|strong|em|del|strike|a|p|div|br|ul|ol|li|h[1-3]|blockquote|span|code|pre)\b[^>]*>/i.test(t)) return t; - try { return markdownModule.mdToHtml(text); } + // Email body: keep author-typed `:shortcode:` text literal. Issue #345 + // (shortcode → emoji) is scoped to chat; do not rewrite colons in mail. + try { return markdownModule.mdToHtml(text, { shortcodes: false }); } catch (_) { const d = document.createElement('div'); d.textContent = text; return d.innerHTML.replace(/\n/g, '
'); @@ -8386,7 +8388,7 @@ import * as Modals from './modalManager.js'; const text = textarea.value || ''; let body; if (lang === 'markdown' && markdownModule?.mdToHtml) { - body = markdownModule.mdToHtml(text); + body = markdownModule.mdToHtml(text, { shortcodes: false }); // export: keep :shortcodes: literal } else { body = '
' +
         text.replace(/&/g,'&').replace(//g,'>') + '
'; @@ -8417,7 +8419,7 @@ import * as Modals from './modalManager.js'; // Render content as HTML for PDF let html; if (lang === 'markdown' && markdownModule?.mdToHtml) { - html = markdownModule.mdToHtml(text); + html = markdownModule.mdToHtml(text, { shortcodes: false }); // export: keep :shortcodes: literal } else { html = '
' +
         text.replace(/&/g,'&').replace(//g,'>') + '
'; @@ -8547,7 +8549,7 @@ import * as Modals from './modalManager.js'; if (active) { const md = textarea.value || ''; if (markdownModule && markdownModule.mdToHtml) { - preview.innerHTML = markdownModule.mdToHtml(md); + preview.innerHTML = markdownModule.mdToHtml(md, { shortcodes: false }); // doc preview: keep :shortcodes: literal } else { preview.innerHTML = md.replace(/&/g,'&').replace(//g,'>').replace(/\n/g, '
'); } diff --git a/static/js/emojiShortcodes.js b/static/js/emojiShortcodes.js new file mode 100644 index 0000000..a51a64e --- /dev/null +++ b/static/js/emojiShortcodes.js @@ -0,0 +1,458 @@ +// static/js/emojiShortcodes.js +// +// Emoji shortcode → Unicode conversion (issue #345). +// +// Chat models frequently emit GitHub/Slack-style `:shortcode:` text — e.g. +// `:blush:`, `:fire:`, `:microphone:` — instead of the actual emoji character. +// Nothing in the render pipeline used to translate these, so they showed up as +// literal `:blush:` text in the chat bubble. +// +// This module turns the common shortcode set into the real Unicode emoji. The +// chat renderer (markdown.js → svgifyEmoji) runs this BEFORE its existing +// Unicode-emoji → monochrome-SVG pass, so a converted `:blush:` renders as the +// same theme-tinted single-color line icon as any other emoji (project rule: +// never colorful emoji), not as a colored system glyph. +// +// Pure and browser-free on purpose: no DOM, no imports, so it can be unit +// tested with plain `node` (see tests/test_emoji_shortcodes_js.py). + +// Canonical map of common shortcode → Unicode emoji. Names follow the GitHub +// convention (lowercase, underscore-separated). A handful of well-known aliases +// (`+1`, `thumbsup`, `grinning_face`, …) point at the same glyph so the most +// frequent model spellings all resolve. +export const EMOJI_SHORTCODES = { + // ── Smileys & emotion ── + grinning: '😀', grinning_face: '😀', + smiley: '😃', smiley_face: '😃', + smile: '😄', + grin: '😁', + laughing: '😆', satisfied: '😆', + sweat_smile: '😅', + rofl: '🤣', rolling_on_the_floor_laughing: '🤣', + joy: '😂', + slightly_smiling_face: '🙂', slight_smile: '🙂', + upside_down_face: '🙃', upside_down: '🙃', + wink: '😉', winking_face: '😉', + blush: '😊', smiling_face_with_smiling_eyes: '😊', + innocent: '😇', + smiling_face_with_three_hearts: '🥰', + heart_eyes: '😍', heart_eyes_face: '😍', + star_struck: '🤩', + kissing_heart: '😘', + kissing: '😗', + kissing_closed_eyes: '😚', + kissing_smiling_eyes: '😙', + yum: '😋', + stuck_out_tongue: '😛', + stuck_out_tongue_winking_eye: '😜', + zany_face: '🤪', + stuck_out_tongue_closed_eyes: '😝', + money_mouth_face: '🤑', + hugs: '🤗', hugging_face: '🤗', + hand_over_mouth: '🤭', + shushing_face: '🤫', + thinking: '🤔', thinking_face: '🤔', + zipper_mouth_face: '🤐', + raised_eyebrow: '🤨', + neutral_face: '😐', + expressionless: '😑', + no_mouth: '😶', + smirk: '😏', smirk_face: '😏', + unamused: '😒', + roll_eyes: '🙄', face_with_rolling_eyes: '🙄', + grimacing: '😬', + lying_face: '🤥', + relieved: '😌', + pensive: '😔', + sleepy: '😪', + drooling_face: '🤤', + sleeping: '😴', + mask: '😷', + face_with_thermometer: '🤒', + face_with_head_bandage: '🤕', + nauseated_face: '🤢', + vomiting_face: '🤮', + sneezing_face: '🤧', + hot_face: '🥵', + cold_face: '🥶', + woozy_face: '🥴', + dizzy_face: '😵', + exploding_head: '🤯', + cowboy_hat_face: '🤠', + partying_face: '🥳', + sunglasses: '😎', + nerd_face: '🤓', + monocle_face: '🧐', + confused: '😕', + worried: '😟', + slightly_frowning_face: '🙁', + frowning_face: '☹️', + open_mouth: '😮', + hushed: '😯', + astonished: '😲', + flushed: '😳', + pleading_face: '🥺', + frowning: '😦', + anguished: '😧', + fearful: '😨', + cold_sweat: '😰', + disappointed_relieved: '😥', + cry: '😢', + sob: '😭', + scream: '😱', + confounded: '😖', + persevere: '😣', + disappointed: '😞', + sweat: '😓', + weary: '😩', + tired_face: '😫', + yawning_face: '🥱', + triumph: '😤', + rage: '😡', pout: '😡', pouting_face: '😡', + angry: '😠', + cursing_face: '🤬', + smiling_imp: '😈', + imp: '👿', + skull: '💀', + skull_and_crossbones: '☠️', + hankey: '💩', poop: '💩', shit: '💩', + clown_face: '🤡', + japanese_ogre: '👹', + japanese_goblin: '👺', + ghost: '👻', + alien: '👽', + space_invader: '👾', + robot: '🤖', robot_face: '🤖', + // ── Cats ── + smiley_cat: '😺', + smile_cat: '😸', + joy_cat: '😹', + heart_eyes_cat: '😻', + smirk_cat: '😼', + kissing_cat: '😽', + scream_cat: '🙀', + crying_cat_face: '😿', + pouting_cat: '😾', + see_no_evil: '🙈', + hear_no_evil: '🙉', + speak_no_evil: '🙊', + // ── Hands & body ── + wave: '👋', wave_hand: '👋', + raised_back_of_hand: '🤚', + raised_hand_with_fingers_splayed: '🖐️', + hand: '✋', raised_hand: '✋', + vulcan_salute: '🖖', + ok_hand: '👌', + pinched_fingers: '🤌', + pinching_hand: '🤏', + v: '✌️', victory_hand: '✌️', + crossed_fingers: '🤞', + love_you_gesture: '🤟', + metal: '🤘', + call_me_hand: '🤙', + point_left: '👈', + point_right: '👉', + point_up_2: '👆', + middle_finger: '🖕', fu: '🖕', + point_down: '👇', + point_up: '☝️', + '+1': '👍', thumbsup: '👍', thumbup: '👍', thumbs_up: '👍', + '-1': '👎', thumbsdown: '👎', thumbdown: '👎', thumbs_down: '👎', + fist_raised: '✊', fist: '✊', + fist_oncoming: '👊', facepunch: '👊', punch: '👊', + fist_left: '🤛', + fist_right: '🤜', + clap: '👏', clapping_hands: '👏', + raised_hands: '🙌', + open_hands: '👐', + palms_up_together: '🤲', + handshake: '🤝', + pray: '🙏', folded_hands: '🙏', + writing_hand: '✍️', + nail_care: '💅', + selfie: '🤳', + muscle: '💪', flexed_biceps: '💪', + // ── Hearts & symbols of feeling ── + heart: '❤️', red_heart: '❤️', + orange_heart: '🧡', + yellow_heart: '💛', + green_heart: '💚', + blue_heart: '💙', + purple_heart: '💜', + black_heart: '🖤', + white_heart: '🤍', + brown_heart: '🤎', + broken_heart: '💔', + heart_on_fire: '❤️‍🔥', + two_hearts: '💕', + revolving_hearts: '💞', + heartbeat: '💓', + heartpulse: '💗', + sparkling_heart: '💖', + cupid: '💘', + gift_heart: '💝', + heart_decoration: '💟', + heavy_heart_exclamation: '❣️', + // ── Celebration & misc objects ── + fire: '🔥', flame: '🔥', + '100': '💯', hundred: '💯', + sparkles: '✨', + star: '⭐', + star2: '🌟', glowing_star: '🌟', + dizzy: '💫', + boom: '💥', collision: '💥', + anger: '💢', + sweat_drops: '💦', + dash: '💨', + zzz: '💤', + tada: '🎉', party_popper: '🎉', + confetti_ball: '🎊', + balloon: '🎈', + gift: '🎁', + trophy: '🏆', + '1st_place_medal': '🥇', + '2nd_place_medal': '🥈', + '3rd_place_medal': '🥉', + medal_sports: '🏅', + zap: '⚡', lightning: '⚡', + bulb: '💡', light_bulb: '💡', + key: '🔑', + lock: '🔒', + unlock: '🔓', + bell: '🔔', + no_bell: '🔕', + loudspeaker: '📢', + mega: '📣', megaphone: '📣', + speech_balloon: '💬', + thought_balloon: '💭', + white_check_mark: '✅', + heavy_check_mark: '✔️', check_mark: '✔️', + ballot_box_with_check: '☑️', + x: '❌', cross_mark: '❌', + negative_squared_cross_mark: '❎', + question: '❓', + grey_question: '❔', + exclamation: '❗', heavy_exclamation_mark: '❗', + grey_exclamation: '❕', + warning: '⚠️', + no_entry: '⛔', + no_entry_sign: '🚫', + red_circle: '🔴', + green_circle: '🟢', + large_blue_circle: '🔵', + yellow_circle: '🟡', + white_circle: '⚪', + black_circle: '⚫', + orange_circle: '🟠', + purple_circle: '🟣', + brown_circle: '🟤', + // ── Tech, work, study ── + rocket: '🚀', + eyes: '👀', + eye: '👁️', + brain: '🧠', + books: '📚', + book: '📖', open_book: '📖', + memo: '📝', pencil: '📝', + pencil2: '✏️', + page_facing_up: '📄', + paperclip: '📎', + pushpin: '📌', + round_pushpin: '📍', + link: '🔗', + bar_chart: '📊', + chart_with_upwards_trend: '📈', + chart_with_downwards_trend: '📉', + mag: '🔍', + mag_right: '🔎', + globe_with_meridians: '🌐', + earth_africa: '🌍', + earth_americas: '🌎', + earth_asia: '🌏', + alarm_clock: '⏰', + hourglass_flowing_sand: '⏳', + hourglass: '⌛', + microphone: '🎤', mic: '🎤', + musical_note: '🎵', + notes: '🎶', musical_notes: '🎶', + headphones: '🎧', + camera: '📷', + camera_flash: '📸', + clapper: '🎬', + tv: '📺', + computer: '💻', laptop: '💻', + desktop_computer: '🖥️', + iphone: '📱', mobile_phone: '📱', + telephone: '☎️', + wrench: '🔧', + hammer: '🔨', + gear: '⚙️', + nut_and_bolt: '🔩', + magnet: '🧲', + test_tube: '🧪', + microscope: '🔬', + dart: '🎯', bullseye: '🎯', + game_die: '🎲', + jigsaw: '🧩', + // ── Food & drink ── + pizza: '🍕', + hamburger: '🍔', + fries: '🍟', + taco: '🌮', + sushi: '🍣', + doughnut: '🍩', donut: '🍩', + coffee: '☕', + beer: '🍺', + wine_glass: '🍷', + // ── Animals & nature ── + dog: '🐶', + cat: '🐱', + mouse: '🐭', + hamster: '🐹', + rabbit: '🐰', + fox_face: '🦊', + bear: '🐻', + panda_face: '🐼', + koala: '🐨', + tiger: '🐯', + lion: '🦁', + cow: '🐮', + pig: '🐷', + frog: '🐸', + monkey_face: '🐵', + chicken: '🐔', + penguin: '🐧', + bird: '🐦', + eagle: '🦅', + duck: '🦆', + owl: '🦉', + wolf: '🐺', + horse: '🐴', + unicorn: '🦄', + bee: '🐝', honeybee: '🐝', + bug: '🐛', + butterfly: '🦋', + snail: '🐌', + lady_beetle: '🐞', + snake: '🐍', + turtle: '🐢', + octopus: '🐙', + crab: '🦀', + tropical_fish: '🐠', + whale: '🐳', + shark: '🦈', + cherry_blossom: '🌸', + rose: '🌹', + sunflower: '🌻', + hibiscus: '🌺', + tulip: '🌷', + seedling: '🌱', + evergreen_tree: '🌲', + deciduous_tree: '🌳', + four_leaf_clover: '🍀', + apple: '🍎', + green_apple: '🍏', + pear: '🍐', + tangerine: '🍊', + lemon: '🍋', + banana: '🍌', + watermelon: '🍉', + grapes: '🍇', + strawberry: '🍓', + blueberries: '🫐', + peach: '🍑', + rainbow: '🌈', + sunny: '☀️', sun: '☀️', + partly_sunny: '⛅', + cloud: '☁️', + snowflake: '❄️', + ocean: '🌊', + // ── Arrows & signs ── + arrow_right: '➡️', + arrow_left: '⬅️', + arrow_up: '⬆️', + arrow_down: '⬇️', + arrow_upper_right: '↗️', + arrow_lower_right: '↘️', + arrow_lower_left: '↙️', + arrow_upper_left: '↖️', + leftwards_arrow_with_hook: '↩️', + arrow_right_hook: '↪️', + arrows_counterclockwise: '🔄', + arrows_clockwise: '🔃', + heavy_plus_sign: '➕', + heavy_minus_sign: '➖', + heavy_division_sign: '➗', + heavy_multiplication_x: '✖️', + infinity: '♾️', + copyright: '©️', + registered: '®️', + tm: '™️', + recycle: '♻️', + checkered_flag: '🏁', + triangular_flag_on_post: '🚩', + white_flag: '🏳️', + black_flag: '🏴', + // ── People & wearables ── + baby: '👶', + boy: '👦', + girl: '👧', + man: '👨', + woman: '👩', + older_man: '👴', + older_woman: '👵', + crown: '👑', + gem: '💎', + graduation_cap: '🎓', mortar_board: '🎓', +}; + +// `:name:` where name is letters/digits/`_`/`+`/`-`. Length ≥1 so `:+1:` and +// `:-1:` match. Global + case-insensitive for replace; a separate non-global +// literal is used for the cheap presence check so there's no shared lastIndex +// state to reset. +const SHORTCODE_RE = /:([a-z0-9_+-]{1,40}):/gi; + +/** + * Cheap test for whether `text` could contain any emoji shortcode at all. + * Lets callers skip the replace pass entirely on the common no-shortcode path. + */ +export function hasEmojiShortcode(text) { + return !!text && text.indexOf(':') !== -1 && /:[a-z0-9_+-]{1,40}:/i.test(text); +} + +// A shortcode must stand on its own — flanked by whitespace, punctuation, a +// string edge, or markup, never glued to an ASCII word character. Without this +// guard, real `:name:` shortcodes that happen to sit inside a longer run of +// digits/letters get converted by mistake and mangle perfectly literal text: +// "1:100:2" → the `:100:` would become 💯 ("1💯2") +// "host:fire:port", URL authorities, `key:value:` pairs, etc. +// Chat models always emit shortcodes delimited by spaces/punctuation (":fire:", +// "**:microphone:**", "nice :tada:!"), so requiring a boundary keeps every real +// shortcode working while leaving embedded colon runs untouched. `_` counts as a +// word char too (identifier-like), but `+`/`-` do not, so "C++ :fire:" still works. +const _WORDISH = /[A-Za-z0-9_]/; +function _boundedOnBothSides(str, start, end) { + const before = start > 0 ? str[start - 1] : ''; + const after = end < str.length ? str[end] : ''; + return !_WORDISH.test(before) && !_WORDISH.test(after); +} + +/** + * Replace every known `:shortcode:` in `text` with its Unicode emoji. Unknown + * shortcodes (`:definitely_not_emoji:`), colon runs that don't form a shortcode + * (`10:30:45`, `16:9`), and known shortcodes embedded mid-token (`1:100:2`) are + * all left exactly as-is. + */ +export function replaceEmojiShortcodes(text) { + if (!text || text.indexOf(':') === -1) return text; + return text.replace(SHORTCODE_RE, (whole, name, offset, str) => { + const key = name.toLowerCase(); + if (!Object.prototype.hasOwnProperty.call(EMOJI_SHORTCODES, key)) return whole; + // Only convert when the `:shortcode:` is a standalone token, not glued to a + // surrounding word/number (which would mean it's literal text, not an emoji). + if (!_boundedOnBothSides(str, offset, offset + whole.length)) return whole; + return EMOJI_SHORTCODES[key]; + }); +} + +export default { EMOJI_SHORTCODES, replaceEmojiShortcodes, hasEmojiShortcode }; diff --git a/static/js/markdown.js b/static/js/markdown.js index a2cfba0..df92721 100644 --- a/static/js/markdown.js +++ b/static/js/markdown.js @@ -6,6 +6,7 @@ import uiModule from './ui.js'; import { splitTableRow } from './markdown/tableRow.js'; +import { replaceEmojiShortcodes, hasEmojiShortcode } from './emojiShortcodes.js'; var escapeHtml = uiModule.esc; @@ -366,8 +367,19 @@ function _useSvgEmoji() { return typeof document === 'undefined' || !document.body?.classList.contains('text-emojis'); } -export function svgifyEmoji(html) { - if (!_useSvgEmoji() || !html || !_EMOJI_RE.test(html)) return html; +// `opts.shortcodes` (default true) controls the issue-#345 `:name:` → emoji +// expansion. Chat passes it through as true; document/email body renderers pass +// false so author-typed `:shortcode:` text stays literal (see mdToHtml callers). +// The Unicode-emoji → monochrome-SVG pass always runs regardless, so a real 😀 +// in a document still renders as the themed line icon as it always has. +export function svgifyEmoji(html, opts) { + if (!_useSvgEmoji() || !html) return html; + const allowShortcodes = !opts || opts.shortcodes !== false; + // Two reasons to walk the HTML: real Unicode emoji to turn into SVG icons, + // or `:shortcode:` text the model emitted instead of an emoji (issue #345). + const hasUnicode = _EMOJI_RE.test(html); + const hasShortcode = allowShortcodes && hasEmojiShortcode(html); + if (!hasUnicode && !hasShortcode) return html; const parts = html.split(/(<[^>]*>)/); // odd indices = tags let codeDepth = 0; for (let i = 0; i < parts.length; i++) { @@ -377,7 +389,13 @@ export function svgifyEmoji(html) { else if (/^<\/(pre|code)\s*>/.test(t)) codeDepth = Math.max(0, codeDepth - 1); continue; } - if (codeDepth === 0 && _EMOJI_RE.test(parts[i])) parts[i] = _svgifyText(parts[i]); + if (codeDepth !== 0) continue; + let seg = parts[i]; + // Expand shortcodes to Unicode first, then both they and any pre-existing + // Unicode emoji get rendered as the same monochrome line icons below. + if (hasShortcode) seg = replaceEmojiShortcodes(seg); + if (_EMOJI_RE.test(seg)) seg = _svgifyText(seg); + parts[i] = seg; } return parts.join(''); } @@ -421,7 +439,7 @@ export function processWithThinking(text) { /** * Convert markdown to HTML */ -export function mdToHtml(src) { +export function mdToHtml(src, opts) { const allowedHtmlBlocks = []; const codeBlocks = []; const mermaidBlocks = []; @@ -678,7 +696,7 @@ export function mdToHtml(src) { s = s.replace(`___CODE_BLOCK_${index}___`, block); }); - return _useSvgEmoji() ? svgifyEmoji(s) : s; + return _useSvgEmoji() ? svgifyEmoji(s, opts) : s; } /** diff --git a/tests/markdown_codefence_placeholder_regression.mjs b/tests/markdown_codefence_placeholder_regression.mjs index a57cabe..aaaa50c 100644 --- a/tests/markdown_codefence_placeholder_regression.mjs +++ b/tests/markdown_codefence_placeholder_regression.mjs @@ -16,6 +16,10 @@ src = src.replace( /import \{ splitTableRow \} from '\.\/markdown\/tableRow\.js';/, 'const splitTableRow = (row) => row.split("|").filter((cell) => cell.trim() !== "");' ); +src = src.replace( + /import \{ replaceEmojiShortcodes, hasEmojiShortcode \} from '\.\/emojiShortcodes\.js';/, + 'const hasEmojiShortcode = (t) => !!t && t.indexOf(":") !== -1 && /:[a-z0-9_+-]{1,40}:/i.test(t); const replaceEmojiShortcodes = (t) => t;' +); src = src.replace(/export function /g, 'function '); src = src.replace(/export const /g, 'const '); src = src.replace(/export default markdownModule;?/g, ''); diff --git a/tests/test_emoji_shortcodes_js.py b/tests/test_emoji_shortcodes_js.py new file mode 100644 index 0000000..72f8e1e --- /dev/null +++ b/tests/test_emoji_shortcodes_js.py @@ -0,0 +1,101 @@ +"""Pin the pure emoji shortcode → Unicode helpers in emojiShortcodes.js. + +Driven through `node --input-type=module` so we exercise the real JS without a +full Vitest/Jest setup (same approach as test_reply_recipients_js.py / test_compare_js.py). +Skips when `node` is not installed rather than failing. + +Regression for issue #345: chat models emit GitHub-style :shortcode: text +(e.g. :blush:, :microphone:) instead of the actual emoji, and nothing in the +render pipeline translated them, so they showed up as literal ":blush:" text. +""" +import json +import shutil +import subprocess +from pathlib import Path + +import pytest + +_REPO = Path(__file__).resolve().parent.parent +_HELPER = _REPO / "static" / "js" / "emojiShortcodes.js" +_HAS_NODE = shutil.which("node") is not None + + +def _run(js: str) -> str: + proc = subprocess.run( + ["node", "--input-type=module"], + input=js, capture_output=True, text=True, cwd=str(_REPO), timeout=30, + ) + assert proc.returncode == 0, proc.stderr + return proc.stdout.strip() + + +def _replace(text: str) -> str: + js = f""" + import {{ replaceEmojiShortcodes }} from '{_HELPER.as_posix()}'; + console.log(JSON.stringify(replaceEmojiShortcodes({json.dumps(text)}))); + """ + return json.loads(_run(js)) + + +@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH") +def test_issue_345_examples_convert(): + # The exact shortcodes the issue reported as showing up as literal text. + assert _replace("visit today? :blush:") == "visit today? \U0001f60a" + assert _replace("hobbies? **:microphone:**") == "hobbies? **\U0001f3a4**" + + +@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH") +def test_common_shortcodes_and_aliases(): + assert _replace(":fire:") == "\U0001f525" + assert _replace(":tada:") == "\U0001f389" + assert _replace(":thinking:") == "\U0001f914" + # +1 / thumbsup are aliases for the same glyph. + assert _replace(":+1:") == "\U0001f44d" + assert _replace(":thumbsup:") == "\U0001f44d" + # Multiple in one string, mixed with surrounding text. + assert _replace("nice :fire: work :100:") == "nice \U0001f525 work \U0001f4af" + + +@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH") +def test_unknown_and_nonshortcodes_untouched(): + # Unknown shortcode left verbatim (incl. the :emoji: placeholder). + assert _replace(":definitely_not_an_emoji:") == ":definitely_not_an_emoji:" + assert _replace(":emoji:") == ":emoji:" + # Time ranges / ratios must not be mangled. + assert _replace("meet at 10:30:45 today") == "meet at 10:30:45 today" + assert _replace("ratio 16:9 vs 4:3") == "ratio 16:9 vs 4:3" + # No colons at all → returned as-is. + assert _replace("plain text") == "plain text" + + +@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH") +def test_known_shortcode_embedded_in_token_is_not_converted(): + # Regression: a KNOWN shortcode that happens to sit inside a longer run of + # digits/letters is literal text, not an emoji. The classic trap is a numeric + # range whose middle segment spells a real shortcode (`:100:` → 💯): + assert _replace("1:100:2") == "1:100:2" + assert _replace("scale 3:100:7 ok") == "scale 3:100:7 ok" + # Glued to a word on either side → left alone (e.g. `key:value:` style text, + # URL authorities like `host:fire:port`). + assert _replace("host:fire:port") == "host:fire:port" + assert _replace("status:fire:") == "status:fire:" + assert _replace(":fire:done") == ":fire:done" + # But a standalone shortcode flanked by whitespace/punctuation still converts, + # including back-to-back shortcodes and the leading `:100:` once delimited. + assert _replace("we hit :100: today") == "we hit \U0001f4af today" + assert _replace("see :fire:!") == "see \U0001f525!" + assert _replace(":fire::tada:") == "\U0001f525\U0001f389" + + +@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH") +def test_has_emoji_shortcode_detector(): + js = f""" + import {{ hasEmojiShortcode }} from '{_HELPER.as_posix()}'; + const out = [ + hasEmojiShortcode(':blush:'), + hasEmojiShortcode('no shortcodes here'), + hasEmojiShortcode('a single : colon'), + ]; + console.log(JSON.stringify(out)); + """ + assert json.loads(_run(js)) == [True, False, False] diff --git a/tests/test_markdown_rendering_js.py b/tests/test_markdown_rendering_js.py index 4f36528..7cfd3b5 100644 --- a/tests/test_markdown_rendering_js.py +++ b/tests/test_markdown_rendering_js.py @@ -41,6 +41,18 @@ def _run_markdown_case(markdown: str, render_expr: str = "mod.mdToHtml(input)"): return (row || '').replace(/^\\s*\\|/, '').replace(/\\|\\s*$/, '').split('|').map(c => c.trim()); }` ); + // markdown.js imports the emoji-shortcode helpers relatively (issue #345), + // which a data: URL module can't resolve. Inline the REAL helpers (minus + // their export keywords) so the renderer's shortcode pass behaves exactly + // as it does in the browser. + const emojiSource = fs.readFileSync('./static/js/emojiShortcodes.js', 'utf8') + .replace(/^export default .*$/m, '') + .replace(/export const /g, 'const ') + .replace(/export function /g, 'function '); + source = source.replace( + /import \{ replaceEmojiShortcodes, hasEmojiShortcode \} from ['"]\.\/emojiShortcodes\.js['"];/, + () => emojiSource + ); source = source.replace( /var escapeHtml = uiModule\.esc;/, `var escapeHtml = (value) => String(value ?? '')