diff --git a/src/builtin_actions.py b/src/builtin_actions.py index e54eb2f..c107bec 100644 --- a/src/builtin_actions.py +++ b/src/builtin_actions.py @@ -956,6 +956,17 @@ async def action_mark_email_boundaries(owner: str, **kwargs) -> Tuple[str, bool] return str(e), False +# Sender local-parts (matched exactly or by prefix) whose mail never carries a +# personal signature worth learning. These compare against the local-part +# (before "@"), so role names must NOT include a trailing "@" — "support@" etc. +# could never match a local-part of "support" and were silently dead. +_SIG_SKIP_PREFIXES = ( + "noreply", "no-reply", "donotreply", "do-not-reply", + "mailer-daemon", "notifications", "notification", "bounce", + "newsletter", "support", "info", "admin", +) + + async def action_learn_sender_signatures(owner: str, **kwargs) -> Tuple[str, bool]: """For each sender with ≥3 recent inbox emails, ask the LLM to extract the common signature block across their messages. The cached sig is @@ -1013,16 +1024,11 @@ async def action_learn_sender_signatures(owner: str, **kwargs) -> Tuple[str, boo return "No emails to scan", True # 2. Group by sender; drop addresses that don't carry useful sigs. - SKIP_PREFIXES = ( - "noreply", "no-reply", "donotreply", "do-not-reply", - "mailer-daemon", "notifications", "notification", "bounce", - "newsletter", "support@", "info@", "admin@", - ) by_sender: dict[str, list[dict]] = {} for m in mails: addr = m["from_address"] local = addr.split("@", 1)[0] - if any(local == p or local.startswith(p) for p in SKIP_PREFIXES): + if any(local == p or local.startswith(p) for p in _SIG_SKIP_PREFIXES): continue # Skip plus-aliases / list-style addresses too. if "+" in local or "-noreply" in addr or "-bounces" in addr: diff --git a/tests/test_sender_signature_skip_roles.py b/tests/test_sender_signature_skip_roles.py new file mode 100644 index 0000000..e7270a3 --- /dev/null +++ b/tests/test_sender_signature_skip_roles.py @@ -0,0 +1,35 @@ +"""Sender-signature learning must skip role addresses like support@/info@. + +The skip-list compares against the email local-part (before "@"), but the +entries were written "support@", "info@", "admin@" — which can never equal or +prefix a local-part of "support"/"info"/"admin", so those role senders were +NOT skipped and the LLM wasted work learning signatures from them. The entries +must omit the "@". +""" +from src.builtin_actions import _SIG_SKIP_PREFIXES + + +def _skipped(addr): + local = addr.split("@", 1)[0] + return any(local == p or local.startswith(p) for p in _SIG_SKIP_PREFIXES) + + +def test_role_addresses_are_skipped(): + assert _skipped("support@vendor.com") + assert _skipped("info@company.com") + assert _skipped("admin@example.org") + + +def test_noreply_style_still_skipped(): + assert _skipped("noreply@x.com") + assert _skipped("mailer-daemon@x.com") + assert _skipped("newsletter@x.com") + + +def test_real_person_is_not_skipped(): + assert not _skipped("john.smith@x.com") + assert not _skipped("alice@x.com") + + +def test_no_skip_entry_contains_at(): + assert all("@" not in p for p in _SIG_SKIP_PREFIXES)