Fix AttributeError on bullet lines in extract_memory_from_chat (#873)

The fallback memory extractor (used by routes/memory_routes.py when the LLM extractor fails) matched list items with `r'^[-*•]|\d+\.\s*(.*)'`. Operator precedence makes that `(^[-*•]) | (\d+\.\s*(.*))`, so the capture group only exists on the numbered-list branch. A bullet line ("- foo") matches the first branch, so `group(1)` is None and `text_match.group(1).strip()` raises AttributeError — crashing extraction for any assistant message that contains a bullet list (i.e. most of them). Numbered lists happened to work. Group both markers — `r'^(?:[-*•]|\d+\.)\s*(.*)'` — so the capture applies to bullets and numbers alike. Adds tests/test_memory_bullet_extraction.py (red before, green after).
2026-06-02 05:46:06 +03:00
parent 2b39412355
commit f13d897093
2 changed files with 32 additions and 2 deletions
--- a/src/memory.py
+++ b/src/memory.py
@@ -59,8 +59,12 @@ class MemoryManager:
                    line = line.strip()
                    # Look for bullet points or numbered lists that might contain memories
                    if re.match(r'^[-*•]|\d+\.', line):
-                        # Extract the text after the bullet/number
-                        text_match = re.match(r'^[-*•]|\d+\.\s*(.*)', line)
+                        # Extract the text after the bullet/number. Group both
+                        # markers so the capture applies to either — the previous
+                        # `^[-*•]|\d+\.\s*(.*)` put the group on the numbered branch
+                        # only, so a bullet line matched with group(1)=None and
+                        # crashed on .strip().
+                        text_match = re.match(r'^(?:[-*•]|\d+\.)\s*(.*)', line)
                        if text_match:
                            text = text_match.group(1).strip()
                            if text:
--- a/tests/test_memory_bullet_extraction.py
+++ b/tests/test_memory_bullet_extraction.py
@@ -0,0 +1,26 @@
+"""Regression test: extract_memory_from_chat must not crash on bullet lines.
+
+The fallback memory extractor (invoked by routes/memory_routes.py when the LLM
+extractor fails) matched list items with ``r'^[-*•]|\\d+\\.\\s*(.*)'``. Because
+of alternation precedence that pattern is ``(^[-*•]) | (\\d+\\.\\s*(.*))`` — the
+capture group lives only in the numbered-list branch. A bullet line ("- ...")
+matches the first branch, so ``group(1)`` is ``None`` and ``.strip()`` raised
+``AttributeError``, crashing extraction for any assistant message that contains
+a bullet list (the dominant case).
+"""
+from src.memory import MemoryManager
+
+
+def test_extract_memory_from_chat_handles_bullets(tmp_path):
+    mgr = MemoryManager(str(tmp_path))
+    chat = [{
+        "role": "assistant",
+        "content": "- User likes coffee\n* Prefers tea in winter\n1. Wakes at 6am",
+    }]
+
+    out = mgr.extract_memory_from_chat(chat)
+    texts = [m["text"] for m in out]
+
+    assert "User likes coffee" in texts       # '-' bullet (used to crash)
+    assert "Prefers tea in winter" in texts   # '*' bullet (used to crash)
+    assert "Wakes at 6am" in texts            # numbered list (already worked)