odysseus/tests/test_emoji_shortcodes_js.py

"""Pin the pure emoji shortcode → Unicode helpers in emojiShortcodes.js.

Driven through `node --input-type=module` so we exercise the real JS without a
full Vitest/Jest setup (same approach as test_reply_recipients_js.py / test_compare_js.py).
Skips when `node` is not installed rather than failing.

Regression for issue #345: chat models emit GitHub-style :shortcode: text
(e.g. :blush:, :microphone:) instead of the actual emoji, and nothing in the
render pipeline translated them, so they showed up as literal ":blush:" text.
"""
import json
import shutil
import subprocess
from pathlib import Path

import pytest

_REPO = Path(__file__).resolve().parent.parent
_HELPER = _REPO / "static" / "js" / "emojiShortcodes.js"
_HAS_NODE = shutil.which("node") is not None


def _run(js: str) -> str:
    proc = subprocess.run(
        ["node", "--input-type=module"],
        input=js, capture_output=True, text=True, cwd=str(_REPO), timeout=30,
    )
    assert proc.returncode == 0, proc.stderr
    return proc.stdout.strip()


def _replace(text: str) -> str:
    js = f"""
    import {{ replaceEmojiShortcodes }} from '{_HELPER.as_posix()}';
    console.log(JSON.stringify(replaceEmojiShortcodes({json.dumps(text)})));
    """
    return json.loads(_run(js))


@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
def test_issue_345_examples_convert():
    # The exact shortcodes the issue reported as showing up as literal text.
    assert _replace("visit today? :blush:") == "visit today? \U0001f60a"
    assert _replace("hobbies? **:microphone:**") == "hobbies? **\U0001f3a4**"


@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
def test_common_shortcodes_and_aliases():
    assert _replace(":fire:") == "\U0001f525"
    assert _replace(":tada:") == "\U0001f389"
    assert _replace(":thinking:") == "\U0001f914"
    # +1 / thumbsup are aliases for the same glyph.
    assert _replace(":+1:") == "\U0001f44d"
    assert _replace(":thumbsup:") == "\U0001f44d"
    # Multiple in one string, mixed with surrounding text.
    assert _replace("nice :fire: work :100:") == "nice \U0001f525 work \U0001f4af"


@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
def test_unknown_and_nonshortcodes_untouched():
    # Unknown shortcode left verbatim (incl. the :emoji: placeholder).
    assert _replace(":definitely_not_an_emoji:") == ":definitely_not_an_emoji:"
    assert _replace(":emoji:") == ":emoji:"
    # Time ranges / ratios must not be mangled.
    assert _replace("meet at 10:30:45 today") == "meet at 10:30:45 today"
    assert _replace("ratio 16:9 vs 4:3") == "ratio 16:9 vs 4:3"
    # No colons at all → returned as-is.
    assert _replace("plain text") == "plain text"


@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
def test_known_shortcode_embedded_in_token_is_not_converted():
    # Regression: a KNOWN shortcode that happens to sit inside a longer run of
    # digits/letters is literal text, not an emoji. The classic trap is a numeric
    # range whose middle segment spells a real shortcode (`:100:` → 💯):
    assert _replace("1:100:2") == "1:100:2"
    assert _replace("scale 3:100:7 ok") == "scale 3:100:7 ok"
    # Glued to a word on either side → left alone (e.g. `key:value:` style text,
    # URL authorities like `host:fire:port`).
    assert _replace("host:fire:port") == "host:fire:port"
    assert _replace("status:fire:") == "status:fire:"
    assert _replace(":fire:done") == ":fire:done"
    # But a standalone shortcode flanked by whitespace/punctuation still converts,
    # including back-to-back shortcodes and the leading `:100:` once delimited.
    assert _replace("we hit :100: today") == "we hit \U0001f4af today"
    assert _replace("see :fire:!") == "see \U0001f525!"
    assert _replace(":fire::tada:") == "\U0001f525\U0001f389"


@pytest.mark.skipif(not _HAS_NODE, reason="node binary not on PATH")
def test_has_emoji_shortcode_detector():
    js = f"""
    import {{ hasEmojiShortcode }} from '{_HELPER.as_posix()}';
    const out = [
      hasEmojiShortcode(':blush:'),
      hasEmojiShortcode('no shortcodes here'),
      hasEmojiShortcode('a single : colon'),
    ];
    console.log(JSON.stringify(out));
    """
    assert json.loads(_run(js)) == [True, False, False]