Skip invalid memory extractor rows (#1535)
This commit is contained in:
@@ -34,7 +34,7 @@ def _fingerprint_entries(entries) -> str:
|
|||||||
only on id+text+category. Any add/edit/delete invalidates it."""
|
only on id+text+category. Any add/edit/delete invalidates it."""
|
||||||
items = sorted(
|
items = sorted(
|
||||||
(str(e.get("id", "")), e.get("text", ""), e.get("category", ""))
|
(str(e.get("id", "")), e.get("text", ""), e.get("category", ""))
|
||||||
for e in entries
|
for e in _memory_dicts(entries)
|
||||||
)
|
)
|
||||||
h = hashlib.sha256()
|
h = hashlib.sha256()
|
||||||
for triple in items:
|
for triple in items:
|
||||||
@@ -42,6 +42,12 @@ def _fingerprint_entries(entries) -> str:
|
|||||||
return h.hexdigest()
|
return h.hexdigest()
|
||||||
|
|
||||||
|
|
||||||
|
def _memory_dicts(entries):
|
||||||
|
for entry in entries or []:
|
||||||
|
if isinstance(entry, dict):
|
||||||
|
yield entry
|
||||||
|
|
||||||
|
|
||||||
def _load_tidy_state(memory_manager) -> dict:
|
def _load_tidy_state(memory_manager) -> dict:
|
||||||
path = _tidy_state_path(memory_manager)
|
path = _tidy_state_path(memory_manager)
|
||||||
try:
|
try:
|
||||||
@@ -211,7 +217,7 @@ def _is_text_duplicate(new_text: str, existing: list, threshold: float = 0.6) ->
|
|||||||
new_tokens = set(new_text.lower().split())
|
new_tokens = set(new_text.lower().split())
|
||||||
if not new_tokens:
|
if not new_tokens:
|
||||||
return False
|
return False
|
||||||
for entry in existing:
|
for entry in _memory_dicts(existing):
|
||||||
old_tokens = set(entry.get("text", "").lower().split())
|
old_tokens = set(entry.get("text", "").lower().split())
|
||||||
if not old_tokens:
|
if not old_tokens:
|
||||||
continue
|
continue
|
||||||
|
|||||||
25
tests/test_memory_extractor_rows.py
Normal file
25
tests/test_memory_extractor_rows.py
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
from services.memory import memory_extractor
|
||||||
|
|
||||||
|
|
||||||
|
def test_fingerprint_entries_skips_invalid_rows():
|
||||||
|
value = memory_extractor._fingerprint_entries([
|
||||||
|
{"id": "1", "text": "User likes small PRs.", "category": "preference"},
|
||||||
|
"bad-row",
|
||||||
|
None,
|
||||||
|
])
|
||||||
|
|
||||||
|
expected = memory_extractor._fingerprint_entries([
|
||||||
|
{"id": "1", "text": "User likes small PRs.", "category": "preference"},
|
||||||
|
])
|
||||||
|
|
||||||
|
assert value == expected
|
||||||
|
|
||||||
|
|
||||||
|
def test_duplicate_check_skips_invalid_rows():
|
||||||
|
existing = [
|
||||||
|
"bad-row",
|
||||||
|
{"text": "User likes small pull requests."},
|
||||||
|
None,
|
||||||
|
]
|
||||||
|
|
||||||
|
assert memory_extractor._is_text_duplicate("User likes small pull requests.", existing)
|
||||||
Reference in New Issue
Block a user