Chat models often emit GitHub/Slack-style :shortcode: text (e.g. 😊, 🎤) instead of the actual emoji. The renderer only converted real Unicode emoji to the monochrome line icons, so shortcodes rendered as literal text. Add a pure, browser-free shortcode->Unicode map (emojiShortcodes.js) and run it inside svgifyEmoji ahead of the existing Unicode->SVG pass, skipping <code>/<pre> so code stays literal. Covers ~430 common shortcodes plus common aliases (+1/thumbsup, etc.). Keep the conversion from touching anything it shouldn't: * Scope it to chat. mdToHtml/svgifyEmoji take a { shortcodes } option (default on); document and email body rendering (compose, export, preview) pass it as false so author-typed :shortcode: text stays literal. The Unicode->SVG pass still runs there exactly as before. * Only convert a :shortcode: that stands on its own. A word-boundary guard leaves embedded colon runs alone, so "1:100:2", "10:30:45", "16:9" and host:fire:port are never rewritten. Tests: extend the node-driven unit test with the boundary/false-positive cases, and fix the markdown-rendering test loader to resolve the new emojiShortcodes import.
This commit is contained in:
@@ -16,6 +16,10 @@ src = src.replace(
|
||||
/import \{ splitTableRow \} from '\.\/markdown\/tableRow\.js';/,
|
||||
'const splitTableRow = (row) => row.split("|").filter((cell) => cell.trim() !== "");'
|
||||
);
|
||||
src = src.replace(
|
||||
/import \{ replaceEmojiShortcodes, hasEmojiShortcode \} from '\.\/emojiShortcodes\.js';/,
|
||||
'const hasEmojiShortcode = (t) => !!t && t.indexOf(":") !== -1 && /:[a-z0-9_+-]{1,40}:/i.test(t); const replaceEmojiShortcodes = (t) => t;'
|
||||
);
|
||||
src = src.replace(/export function /g, 'function ');
|
||||
src = src.replace(/export const /g, 'const ');
|
||||
src = src.replace(/export default markdownModule;?/g, '');
|
||||
|
||||
101
tests/test_emoji_shortcodes_js.py
Normal file
101
tests/test_emoji_shortcodes_js.py
Normal file
@@ -0,0 +1,101 @@
|
||||
"""Pin the pure emoji shortcode → Unicode helpers in emojiShortcodes.js.
|
||||
|
||||
Driven through `node --input-type=module` so we exercise the real JS without a
|
||||
full Vitest/Jest setup (same approach as test_reply_recipients_js.py / test_compare_js.py).
|
||||
Skips when `node` is not installed rather than failing.
|
||||
|
||||
Regression for issue #345: chat models emit GitHub-style :shortcode: text
|
||||
(e.g. :blush:, :microphone:) instead of the actual emoji, and nothing in the
|
||||
render pipeline translated them, so they showed up as literal ":blush:" text.
|
||||
"""
|
||||
import json
|
||||
import shutil
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
_REPO = Path(__file__).resolve().parent.parent
|
||||
_HELPER = _REPO / "static" / "js" / "emojiShortcodes.js"
|
||||
_HAS_NODE = shutil.which("node") is not None
|
||||
|
||||
|
||||
def _run(js: str) -> str:
|
||||
proc = subprocess.run(
|
||||
["node", "--input-type=module"],
|
||||
input=js, capture_output=True, text=True, cwd=str(_REPO), timeout=30,
|
||||
)
|
||||
assert proc.returncode == 0, proc.stderr
|
||||
return proc.stdout.strip()
|
||||
|
||||
|
||||
def _replace(text: str) -> str:
|
||||
js = f"""
|
||||
import {{ replaceEmojiShortcodes }} from '{_HELPER.as_posix()}';
|
||||
console.log(JSON.stringify(replaceEmojiShortcodes({json.dumps(text)})));
|
||||
"""
|
||||
return json.loads(_run(js))
|
||||
|
||||
|
||||
@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
|
||||
def test_issue_345_examples_convert():
|
||||
# The exact shortcodes the issue reported as showing up as literal text.
|
||||
assert _replace("visit today? :blush:") == "visit today? \U0001f60a"
|
||||
assert _replace("hobbies? **:microphone:**") == "hobbies? **\U0001f3a4**"
|
||||
|
||||
|
||||
@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
|
||||
def test_common_shortcodes_and_aliases():
|
||||
assert _replace(":fire:") == "\U0001f525"
|
||||
assert _replace(":tada:") == "\U0001f389"
|
||||
assert _replace(":thinking:") == "\U0001f914"
|
||||
# +1 / thumbsup are aliases for the same glyph.
|
||||
assert _replace(":+1:") == "\U0001f44d"
|
||||
assert _replace(":thumbsup:") == "\U0001f44d"
|
||||
# Multiple in one string, mixed with surrounding text.
|
||||
assert _replace("nice :fire: work :100:") == "nice \U0001f525 work \U0001f4af"
|
||||
|
||||
|
||||
@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
|
||||
def test_unknown_and_nonshortcodes_untouched():
|
||||
# Unknown shortcode left verbatim (incl. the :emoji: placeholder).
|
||||
assert _replace(":definitely_not_an_emoji:") == ":definitely_not_an_emoji:"
|
||||
assert _replace(":emoji:") == ":emoji:"
|
||||
# Time ranges / ratios must not be mangled.
|
||||
assert _replace("meet at 10:30:45 today") == "meet at 10:30:45 today"
|
||||
assert _replace("ratio 16:9 vs 4:3") == "ratio 16:9 vs 4:3"
|
||||
# No colons at all → returned as-is.
|
||||
assert _replace("plain text") == "plain text"
|
||||
|
||||
|
||||
@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
|
||||
def test_known_shortcode_embedded_in_token_is_not_converted():
|
||||
# Regression: a KNOWN shortcode that happens to sit inside a longer run of
|
||||
# digits/letters is literal text, not an emoji. The classic trap is a numeric
|
||||
# range whose middle segment spells a real shortcode (`:100:` → 💯):
|
||||
assert _replace("1:100:2") == "1:100:2"
|
||||
assert _replace("scale 3:100:7 ok") == "scale 3:100:7 ok"
|
||||
# Glued to a word on either side → left alone (e.g. `key:value:` style text,
|
||||
# URL authorities like `host:fire:port`).
|
||||
assert _replace("host:fire:port") == "host:fire:port"
|
||||
assert _replace("status:fire:") == "status:fire:"
|
||||
assert _replace(":fire:done") == ":fire:done"
|
||||
# But a standalone shortcode flanked by whitespace/punctuation still converts,
|
||||
# including back-to-back shortcodes and the leading `:100:` once delimited.
|
||||
assert _replace("we hit :100: today") == "we hit \U0001f4af today"
|
||||
assert _replace("see :fire:!") == "see \U0001f525!"
|
||||
assert _replace(":fire::tada:") == "\U0001f525\U0001f389"
|
||||
|
||||
|
||||
@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
|
||||
def test_has_emoji_shortcode_detector():
|
||||
js = f"""
|
||||
import {{ hasEmojiShortcode }} from '{_HELPER.as_posix()}';
|
||||
const out = [
|
||||
hasEmojiShortcode(':blush:'),
|
||||
hasEmojiShortcode('no shortcodes here'),
|
||||
hasEmojiShortcode('a single : colon'),
|
||||
];
|
||||
console.log(JSON.stringify(out));
|
||||
"""
|
||||
assert json.loads(_run(js)) == [True, False, False]
|
||||
@@ -41,6 +41,18 @@ def _run_markdown_case(markdown: str, render_expr: str = "mod.mdToHtml(input)"):
|
||||
return (row || '').replace(/^\\s*\\|/, '').replace(/\\|\\s*$/, '').split('|').map(c => c.trim());
|
||||
}`
|
||||
);
|
||||
// markdown.js imports the emoji-shortcode helpers relatively (issue #345),
|
||||
// which a data: URL module can't resolve. Inline the REAL helpers (minus
|
||||
// their export keywords) so the renderer's shortcode pass behaves exactly
|
||||
// as it does in the browser.
|
||||
const emojiSource = fs.readFileSync('./static/js/emojiShortcodes.js', 'utf8')
|
||||
.replace(/^export default .*$/m, '')
|
||||
.replace(/export const /g, 'const ')
|
||||
.replace(/export function /g, 'function ');
|
||||
source = source.replace(
|
||||
/import \{ replaceEmojiShortcodes, hasEmojiShortcode \} from ['"]\.\/emojiShortcodes\.js['"];/,
|
||||
() => emojiSource
|
||||
);
|
||||
source = source.replace(
|
||||
/var escapeHtml = uiModule\.esc;/,
|
||||
`var escapeHtml = (value) => String(value ?? '')
|
||||
|
||||
Reference in New Issue
Block a user