Skip invalid skill extractor rows (#1546)

This commit is contained in:
red person
2026-06-03 08:06:53 +03:00
committed by GitHub
parent 815bdf57d5
commit ee8c049f9e
2 changed files with 31 additions and 4 deletions

View File

@@ -48,6 +48,21 @@ MIN_CONFIDENCE = 0.6
CONTEXT_WINDOW = 12
def _skill_dicts(skills):
for skill in skills or []:
if isinstance(skill, dict):
yield skill
def _has_duplicate_title(skills, title: str) -> bool:
wanted = title.lower()
for skill in _skill_dicts(skills):
existing = skill.get("title", "")
if isinstance(existing, str) and existing.lower() == wanted:
return True
return False
async def maybe_extract_skill(
session,
skills_manager,
@@ -191,8 +206,7 @@ async def maybe_extract_skill(
# Check for duplicate skills
existing = skills_manager.load(owner=owner)
for sk in existing:
if sk.get("title", "").lower() == title.lower():
if _has_duplicate_title(existing, title):
logger.debug("[skill-extract] '%s' already exists — dropped as duplicate", title)
return None

View File

@@ -0,0 +1,13 @@
from services.memory import skill_extractor
def test_duplicate_title_skips_invalid_skill_rows():
rows = [
"bad-row",
None,
{"title": 123},
{"title": "Small PR workflow"},
]
assert skill_extractor._has_duplicate_title(rows, "small pr workflow")
assert not skill_extractor._has_duplicate_title(rows, "release checklist")