Skip invalid skill extractor rows (#1546)
This commit is contained in:
@@ -48,6 +48,21 @@ MIN_CONFIDENCE = 0.6
|
|||||||
CONTEXT_WINDOW = 12
|
CONTEXT_WINDOW = 12
|
||||||
|
|
||||||
|
|
||||||
|
def _skill_dicts(skills):
|
||||||
|
for skill in skills or []:
|
||||||
|
if isinstance(skill, dict):
|
||||||
|
yield skill
|
||||||
|
|
||||||
|
|
||||||
|
def _has_duplicate_title(skills, title: str) -> bool:
|
||||||
|
wanted = title.lower()
|
||||||
|
for skill in _skill_dicts(skills):
|
||||||
|
existing = skill.get("title", "")
|
||||||
|
if isinstance(existing, str) and existing.lower() == wanted:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
async def maybe_extract_skill(
|
async def maybe_extract_skill(
|
||||||
session,
|
session,
|
||||||
skills_manager,
|
skills_manager,
|
||||||
@@ -191,10 +206,9 @@ async def maybe_extract_skill(
|
|||||||
|
|
||||||
# Check for duplicate skills
|
# Check for duplicate skills
|
||||||
existing = skills_manager.load(owner=owner)
|
existing = skills_manager.load(owner=owner)
|
||||||
for sk in existing:
|
if _has_duplicate_title(existing, title):
|
||||||
if sk.get("title", "").lower() == title.lower():
|
logger.debug("[skill-extract] '%s' already exists — dropped as duplicate", title)
|
||||||
logger.debug("[skill-extract] '%s' already exists — dropped as duplicate", title)
|
return None
|
||||||
return None
|
|
||||||
|
|
||||||
entry = skills_manager.add_skill(
|
entry = skills_manager.add_skill(
|
||||||
title=title,
|
title=title,
|
||||||
|
|||||||
13
tests/test_skill_extractor_rows.py
Normal file
13
tests/test_skill_extractor_rows.py
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
from services.memory import skill_extractor
|
||||||
|
|
||||||
|
|
||||||
|
def test_duplicate_title_skips_invalid_skill_rows():
|
||||||
|
rows = [
|
||||||
|
"bad-row",
|
||||||
|
None,
|
||||||
|
{"title": 123},
|
||||||
|
{"title": "Small PR workflow"},
|
||||||
|
]
|
||||||
|
|
||||||
|
assert skill_extractor._has_duplicate_title(rows, "small pr workflow")
|
||||||
|
assert not skill_extractor._has_duplicate_title(rows, "release checklist")
|
||||||
Reference in New Issue
Block a user