fix: signature learning never skips support@/info@/admin@ senders (#1773)

This commit is contained in:
Afonso Coutinho
2026-06-03 05:22:52 +01:00
committed by GitHub
parent 694647375c
commit db1596f3b4
2 changed files with 47 additions and 6 deletions

View File

@@ -956,6 +956,17 @@ async def action_mark_email_boundaries(owner: str, **kwargs) -> Tuple[str, bool]
return str(e), False return str(e), False
# Sender local-parts (matched exactly or by prefix) whose mail never carries a
# personal signature worth learning. These compare against the local-part
# (before "@"), so role names must NOT include a trailing "@" — "support@" etc.
# could never match a local-part of "support" and were silently dead.
_SIG_SKIP_PREFIXES = (
"noreply", "no-reply", "donotreply", "do-not-reply",
"mailer-daemon", "notifications", "notification", "bounce",
"newsletter", "support", "info", "admin",
)
async def action_learn_sender_signatures(owner: str, **kwargs) -> Tuple[str, bool]: async def action_learn_sender_signatures(owner: str, **kwargs) -> Tuple[str, bool]:
"""For each sender with ≥3 recent inbox emails, ask the LLM to extract """For each sender with ≥3 recent inbox emails, ask the LLM to extract
the common signature block across their messages. The cached sig is the common signature block across their messages. The cached sig is
@@ -1013,16 +1024,11 @@ async def action_learn_sender_signatures(owner: str, **kwargs) -> Tuple[str, boo
return "No emails to scan", True return "No emails to scan", True
# 2. Group by sender; drop addresses that don't carry useful sigs. # 2. Group by sender; drop addresses that don't carry useful sigs.
SKIP_PREFIXES = (
"noreply", "no-reply", "donotreply", "do-not-reply",
"mailer-daemon", "notifications", "notification", "bounce",
"newsletter", "support@", "info@", "admin@",
)
by_sender: dict[str, list[dict]] = {} by_sender: dict[str, list[dict]] = {}
for m in mails: for m in mails:
addr = m["from_address"] addr = m["from_address"]
local = addr.split("@", 1)[0] local = addr.split("@", 1)[0]
if any(local == p or local.startswith(p) for p in SKIP_PREFIXES): if any(local == p or local.startswith(p) for p in _SIG_SKIP_PREFIXES):
continue continue
# Skip plus-aliases / list-style addresses too. # Skip plus-aliases / list-style addresses too.
if "+" in local or "-noreply" in addr or "-bounces" in addr: if "+" in local or "-noreply" in addr or "-bounces" in addr:

View File

@@ -0,0 +1,35 @@
"""Sender-signature learning must skip role addresses like support@/info@.
The skip-list compares against the email local-part (before "@"), but the
entries were written "support@", "info@", "admin@" — which can never equal or
prefix a local-part of "support"/"info"/"admin", so those role senders were
NOT skipped and the LLM wasted work learning signatures from them. The entries
must omit the "@".
"""
from src.builtin_actions import _SIG_SKIP_PREFIXES
def _skipped(addr):
local = addr.split("@", 1)[0]
return any(local == p or local.startswith(p) for p in _SIG_SKIP_PREFIXES)
def test_role_addresses_are_skipped():
assert _skipped("support@vendor.com")
assert _skipped("info@company.com")
assert _skipped("admin@example.org")
def test_noreply_style_still_skipped():
assert _skipped("noreply@x.com")
assert _skipped("mailer-daemon@x.com")
assert _skipped("newsletter@x.com")
def test_real_person_is_not_skipped():
assert not _skipped("john.smith@x.com")
assert not _skipped("alice@x.com")
def test_no_skip_entry_contains_at():
assert all("@" not in p for p in _SIG_SKIP_PREFIXES)