From d7a6cadbe2e2d69be9a67bb46e8150bad10207fc Mon Sep 17 00:00:00 2001 From: red person Date: Wed, 3 Jun 2026 08:07:00 +0300 Subject: [PATCH] Skip invalid memory extractor rows (#1535) --- services/memory/memory_extractor.py | 10 ++++++++-- tests/test_memory_extractor_rows.py | 25 +++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) create mode 100644 tests/test_memory_extractor_rows.py diff --git a/services/memory/memory_extractor.py b/services/memory/memory_extractor.py index f31dc7c..32412e6 100644 --- a/services/memory/memory_extractor.py +++ b/services/memory/memory_extractor.py @@ -34,7 +34,7 @@ def _fingerprint_entries(entries) -> str: only on id+text+category. Any add/edit/delete invalidates it.""" items = sorted( (str(e.get("id", "")), e.get("text", ""), e.get("category", "")) - for e in entries + for e in _memory_dicts(entries) ) h = hashlib.sha256() for triple in items: @@ -42,6 +42,12 @@ def _fingerprint_entries(entries) -> str: return h.hexdigest() +def _memory_dicts(entries): + for entry in entries or []: + if isinstance(entry, dict): + yield entry + + def _load_tidy_state(memory_manager) -> dict: path = _tidy_state_path(memory_manager) try: @@ -211,7 +217,7 @@ def _is_text_duplicate(new_text: str, existing: list, threshold: float = 0.6) -> new_tokens = set(new_text.lower().split()) if not new_tokens: return False - for entry in existing: + for entry in _memory_dicts(existing): old_tokens = set(entry.get("text", "").lower().split()) if not old_tokens: continue diff --git a/tests/test_memory_extractor_rows.py b/tests/test_memory_extractor_rows.py new file mode 100644 index 0000000..7ff8d47 --- /dev/null +++ b/tests/test_memory_extractor_rows.py @@ -0,0 +1,25 @@ +from services.memory import memory_extractor + + +def test_fingerprint_entries_skips_invalid_rows(): + value = memory_extractor._fingerprint_entries([ + {"id": "1", "text": "User likes small PRs.", "category": "preference"}, + "bad-row", + None, + ]) + + expected = memory_extractor._fingerprint_entries([ + {"id": "1", "text": "User likes small PRs.", "category": "preference"}, + ]) + + assert value == expected + + +def test_duplicate_check_skips_invalid_rows(): + existing = [ + "bad-row", + {"text": "User likes small pull requests."}, + None, + ] + + assert memory_extractor._is_text_duplicate("User likes small pull requests.", existing)