Chat models often emit GitHub/Slack-style :shortcode: text (e.g. 😊, 🎤) instead of the actual emoji. The renderer only converted real Unicode emoji to the monochrome line icons, so shortcodes rendered as literal text. Add a pure, browser-free shortcode->Unicode map (emojiShortcodes.js) and run it inside svgifyEmoji ahead of the existing Unicode->SVG pass, skipping <code>/<pre> so code stays literal. Covers ~430 common shortcodes plus common aliases (+1/thumbsup, etc.). Keep the conversion from touching anything it shouldn't: * Scope it to chat. mdToHtml/svgifyEmoji take a { shortcodes } option (default on); document and email body rendering (compose, export, preview) pass it as false so author-typed :shortcode: text stays literal. The Unicode->SVG pass still runs there exactly as before. * Only convert a :shortcode: that stands on its own. A word-boundary guard leaves embedded colon runs alone, so "1:100:2", "10:30:45", "16:9" and host:fire:port are never rewritten. Tests: extend the node-driven unit test with the boundary/false-positive cases, and fix the markdown-rendering test loader to resolve the new emojiShortcodes import.
102 lines
4.2 KiB
Python
102 lines
4.2 KiB
Python
"""Pin the pure emoji shortcode → Unicode helpers in emojiShortcodes.js.
|
|
|
|
Driven through `node --input-type=module` so we exercise the real JS without a
|
|
full Vitest/Jest setup (same approach as test_reply_recipients_js.py / test_compare_js.py).
|
|
Skips when `node` is not installed rather than failing.
|
|
|
|
Regression for issue #345: chat models emit GitHub-style :shortcode: text
|
|
(e.g. :blush:, :microphone:) instead of the actual emoji, and nothing in the
|
|
render pipeline translated them, so they showed up as literal ":blush:" text.
|
|
"""
|
|
import json
|
|
import shutil
|
|
import subprocess
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
_REPO = Path(__file__).resolve().parent.parent
|
|
_HELPER = _REPO / "static" / "js" / "emojiShortcodes.js"
|
|
_HAS_NODE = shutil.which("node") is not None
|
|
|
|
|
|
def _run(js: str) -> str:
|
|
proc = subprocess.run(
|
|
["node", "--input-type=module"],
|
|
input=js, capture_output=True, text=True, cwd=str(_REPO), timeout=30,
|
|
)
|
|
assert proc.returncode == 0, proc.stderr
|
|
return proc.stdout.strip()
|
|
|
|
|
|
def _replace(text: str) -> str:
|
|
js = f"""
|
|
import {{ replaceEmojiShortcodes }} from '{_HELPER.as_posix()}';
|
|
console.log(JSON.stringify(replaceEmojiShortcodes({json.dumps(text)})));
|
|
"""
|
|
return json.loads(_run(js))
|
|
|
|
|
|
@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
|
|
def test_issue_345_examples_convert():
|
|
# The exact shortcodes the issue reported as showing up as literal text.
|
|
assert _replace("visit today? :blush:") == "visit today? \U0001f60a"
|
|
assert _replace("hobbies? **:microphone:**") == "hobbies? **\U0001f3a4**"
|
|
|
|
|
|
@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
|
|
def test_common_shortcodes_and_aliases():
|
|
assert _replace(":fire:") == "\U0001f525"
|
|
assert _replace(":tada:") == "\U0001f389"
|
|
assert _replace(":thinking:") == "\U0001f914"
|
|
# +1 / thumbsup are aliases for the same glyph.
|
|
assert _replace(":+1:") == "\U0001f44d"
|
|
assert _replace(":thumbsup:") == "\U0001f44d"
|
|
# Multiple in one string, mixed with surrounding text.
|
|
assert _replace("nice :fire: work :100:") == "nice \U0001f525 work \U0001f4af"
|
|
|
|
|
|
@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
|
|
def test_unknown_and_nonshortcodes_untouched():
|
|
# Unknown shortcode left verbatim (incl. the :emoji: placeholder).
|
|
assert _replace(":definitely_not_an_emoji:") == ":definitely_not_an_emoji:"
|
|
assert _replace(":emoji:") == ":emoji:"
|
|
# Time ranges / ratios must not be mangled.
|
|
assert _replace("meet at 10:30:45 today") == "meet at 10:30:45 today"
|
|
assert _replace("ratio 16:9 vs 4:3") == "ratio 16:9 vs 4:3"
|
|
# No colons at all → returned as-is.
|
|
assert _replace("plain text") == "plain text"
|
|
|
|
|
|
@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
|
|
def test_known_shortcode_embedded_in_token_is_not_converted():
|
|
# Regression: a KNOWN shortcode that happens to sit inside a longer run of
|
|
# digits/letters is literal text, not an emoji. The classic trap is a numeric
|
|
# range whose middle segment spells a real shortcode (`:100:` → 💯):
|
|
assert _replace("1:100:2") == "1:100:2"
|
|
assert _replace("scale 3:100:7 ok") == "scale 3:100:7 ok"
|
|
# Glued to a word on either side → left alone (e.g. `key:value:` style text,
|
|
# URL authorities like `host:fire:port`).
|
|
assert _replace("host:fire:port") == "host:fire:port"
|
|
assert _replace("status:fire:") == "status:fire:"
|
|
assert _replace(":fire:done") == ":fire:done"
|
|
# But a standalone shortcode flanked by whitespace/punctuation still converts,
|
|
# including back-to-back shortcodes and the leading `:100:` once delimited.
|
|
assert _replace("we hit :100: today") == "we hit \U0001f4af today"
|
|
assert _replace("see :fire:!") == "see \U0001f525!"
|
|
assert _replace(":fire::tada:") == "\U0001f525\U0001f389"
|
|
|
|
|
|
@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
|
|
def test_has_emoji_shortcode_detector():
|
|
js = f"""
|
|
import {{ hasEmojiShortcode }} from '{_HELPER.as_posix()}';
|
|
const out = [
|
|
hasEmojiShortcode(':blush:'),
|
|
hasEmojiShortcode('no shortcodes here'),
|
|
hasEmojiShortcode('a single : colon'),
|
|
];
|
|
console.log(JSON.stringify(out));
|
|
"""
|
|
assert json.loads(_run(js)) == [True, False, False]
|