Skip invalid memory extractor rows (#1535)

This commit is contained in:
red person
2026-06-03 08:07:00 +03:00
committed by GitHub
parent d8f5c04340
commit d7a6cadbe2
2 changed files with 33 additions and 2 deletions

View File

@@ -34,7 +34,7 @@ def _fingerprint_entries(entries) -> str:
only on id+text+category. Any add/edit/delete invalidates it.""" only on id+text+category. Any add/edit/delete invalidates it."""
items = sorted( items = sorted(
(str(e.get("id", "")), e.get("text", ""), e.get("category", "")) (str(e.get("id", "")), e.get("text", ""), e.get("category", ""))
for e in entries for e in _memory_dicts(entries)
) )
h = hashlib.sha256() h = hashlib.sha256()
for triple in items: for triple in items:
@@ -42,6 +42,12 @@ def _fingerprint_entries(entries) -> str:
return h.hexdigest() return h.hexdigest()
def _memory_dicts(entries):
for entry in entries or []:
if isinstance(entry, dict):
yield entry
def _load_tidy_state(memory_manager) -> dict: def _load_tidy_state(memory_manager) -> dict:
path = _tidy_state_path(memory_manager) path = _tidy_state_path(memory_manager)
try: try:
@@ -211,7 +217,7 @@ def _is_text_duplicate(new_text: str, existing: list, threshold: float = 0.6) ->
new_tokens = set(new_text.lower().split()) new_tokens = set(new_text.lower().split())
if not new_tokens: if not new_tokens:
return False return False
for entry in existing: for entry in _memory_dicts(existing):
old_tokens = set(entry.get("text", "").lower().split()) old_tokens = set(entry.get("text", "").lower().split())
if not old_tokens: if not old_tokens:
continue continue

View File

@@ -0,0 +1,25 @@
from services.memory import memory_extractor
def test_fingerprint_entries_skips_invalid_rows():
value = memory_extractor._fingerprint_entries([
{"id": "1", "text": "User likes small PRs.", "category": "preference"},
"bad-row",
None,
])
expected = memory_extractor._fingerprint_entries([
{"id": "1", "text": "User likes small PRs.", "category": "preference"},
])
assert value == expected
def test_duplicate_check_skips_invalid_rows():
existing = [
"bad-row",
{"text": "User likes small pull requests."},
None,
]
assert memory_extractor._is_text_duplicate("User likes small pull requests.", existing)