Files
odysseus/tests/test_emoji_shortcodes_js.py
Zeus-Deus 85334e8f3d Render emoji shortcodes as icons in chat (#345) (#629)
Chat models often emit GitHub/Slack-style :shortcode: text (e.g. 😊,
🎤) instead of the actual emoji. The renderer only converted real
Unicode emoji to the monochrome line icons, so shortcodes rendered as literal
text.

Add a pure, browser-free shortcode->Unicode map (emojiShortcodes.js) and run it
inside svgifyEmoji ahead of the existing Unicode->SVG pass, skipping <code>/<pre>
so code stays literal. Covers ~430 common shortcodes plus common aliases
(+1/thumbsup, etc.).

Keep the conversion from touching anything it shouldn't:
* Scope it to chat. mdToHtml/svgifyEmoji take a { shortcodes } option (default
  on); document and email body rendering (compose, export, preview) pass it as
  false so author-typed :shortcode: text stays literal. The Unicode->SVG pass
  still runs there exactly as before.
* Only convert a :shortcode: that stands on its own. A word-boundary guard
  leaves embedded colon runs alone, so "1:100:2", "10:30:45", "16:9" and
  host:fire:port are never rewritten.

Tests: extend the node-driven unit test with the boundary/false-positive cases,
and fix the markdown-rendering test loader to resolve the new emojiShortcodes
import.
2026-06-05 02:28:42 +02:00

102 lines
4.2 KiB
Python

"""Pin the pure emoji shortcode → Unicode helpers in emojiShortcodes.js.
Driven through `node --input-type=module` so we exercise the real JS without a
full Vitest/Jest setup (same approach as test_reply_recipients_js.py / test_compare_js.py).
Skips when `node` is not installed rather than failing.
Regression for issue #345: chat models emit GitHub-style :shortcode: text
(e.g. :blush:, :microphone:) instead of the actual emoji, and nothing in the
render pipeline translated them, so they showed up as literal ":blush:" text.
"""
import json
import shutil
import subprocess
from pathlib import Path
import pytest
_REPO = Path(__file__).resolve().parent.parent
_HELPER = _REPO / "static" / "js" / "emojiShortcodes.js"
_HAS_NODE = shutil.which("node") is not None
def _run(js: str) -> str:
proc = subprocess.run(
["node", "--input-type=module"],
input=js, capture_output=True, text=True, cwd=str(_REPO), timeout=30,
)
assert proc.returncode == 0, proc.stderr
return proc.stdout.strip()
def _replace(text: str) -> str:
js = f"""
import {{ replaceEmojiShortcodes }} from '{_HELPER.as_posix()}';
console.log(JSON.stringify(replaceEmojiShortcodes({json.dumps(text)})));
"""
return json.loads(_run(js))
@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
def test_issue_345_examples_convert():
# The exact shortcodes the issue reported as showing up as literal text.
assert _replace("visit today? :blush:") == "visit today? \U0001f60a"
assert _replace("hobbies? **:microphone:**") == "hobbies? **\U0001f3a4**"
@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
def test_common_shortcodes_and_aliases():
assert _replace(":fire:") == "\U0001f525"
assert _replace(":tada:") == "\U0001f389"
assert _replace(":thinking:") == "\U0001f914"
# +1 / thumbsup are aliases for the same glyph.
assert _replace(":+1:") == "\U0001f44d"
assert _replace(":thumbsup:") == "\U0001f44d"
# Multiple in one string, mixed with surrounding text.
assert _replace("nice :fire: work :100:") == "nice \U0001f525 work \U0001f4af"
@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
def test_unknown_and_nonshortcodes_untouched():
# Unknown shortcode left verbatim (incl. the :emoji: placeholder).
assert _replace(":definitely_not_an_emoji:") == ":definitely_not_an_emoji:"
assert _replace(":emoji:") == ":emoji:"
# Time ranges / ratios must not be mangled.
assert _replace("meet at 10:30:45 today") == "meet at 10:30:45 today"
assert _replace("ratio 16:9 vs 4:3") == "ratio 16:9 vs 4:3"
# No colons at all → returned as-is.
assert _replace("plain text") == "plain text"
@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
def test_known_shortcode_embedded_in_token_is_not_converted():
# Regression: a KNOWN shortcode that happens to sit inside a longer run of
# digits/letters is literal text, not an emoji. The classic trap is a numeric
# range whose middle segment spells a real shortcode (`:100:` → 💯):
assert _replace("1:100:2") == "1:100:2"
assert _replace("scale 3:100:7 ok") == "scale 3:100:7 ok"
# Glued to a word on either side → left alone (e.g. `key:value:` style text,
# URL authorities like `host:fire:port`).
assert _replace("host:fire:port") == "host:fire:port"
assert _replace("status:fire:") == "status:fire:"
assert _replace(":fire:done") == ":fire:done"
# But a standalone shortcode flanked by whitespace/punctuation still converts,
# including back-to-back shortcodes and the leading `:100:` once delimited.
assert _replace("we hit :100: today") == "we hit \U0001f4af today"
assert _replace("see :fire:!") == "see \U0001f525!"
assert _replace(":fire::tada:") == "\U0001f525\U0001f389"
@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
def test_has_emoji_shortcode_detector():
js = f"""
import {{ hasEmojiShortcode }} from '{_HELPER.as_posix()}';
const out = [
hasEmojiShortcode(':blush:'),
hasEmojiShortcode('no shortcodes here'),
hasEmojiShortcode('a single : colon'),
];
console.log(JSON.stringify(out));
"""
assert json.loads(_run(js)) == [True, False, False]