From f13d897093b99caff4af9776f83b716d7778aeca Mon Sep 17 00:00:00 2001 From: mist Date: Tue, 2 Jun 2026 05:46:06 +0300 Subject: [PATCH] Fix AttributeError on bullet lines in extract_memory_from_chat (#873) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fallback memory extractor (used by routes/memory_routes.py when the LLM extractor fails) matched list items with `r'^[-*•]|\d+\.\s*(.*)'`. Operator precedence makes that `(^[-*•]) | (\d+\.\s*(.*))`, so the capture group only exists on the numbered-list branch. A bullet line ("- foo") matches the first branch, so `group(1)` is None and `text_match.group(1).strip()` raises AttributeError — crashing extraction for any assistant message that contains a bullet list (i.e. most of them). Numbered lists happened to work. Group both markers — `r'^(?:[-*•]|\d+\.)\s*(.*)'` — so the capture applies to bullets and numbers alike. Adds tests/test_memory_bullet_extraction.py (red before, green after). --- src/memory.py | 8 ++++++-- tests/test_memory_bullet_extraction.py | 26 ++++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 2 deletions(-) create mode 100644 tests/test_memory_bullet_extraction.py diff --git a/src/memory.py b/src/memory.py index 4370f7b..2254c28 100644 --- a/src/memory.py +++ b/src/memory.py @@ -59,8 +59,12 @@ class MemoryManager: line = line.strip() # Look for bullet points or numbered lists that might contain memories if re.match(r'^[-*•]|\d+\.', line): - # Extract the text after the bullet/number - text_match = re.match(r'^[-*•]|\d+\.\s*(.*)', line) + # Extract the text after the bullet/number. Group both + # markers so the capture applies to either — the previous + # `^[-*•]|\d+\.\s*(.*)` put the group on the numbered branch + # only, so a bullet line matched with group(1)=None and + # crashed on .strip(). + text_match = re.match(r'^(?:[-*•]|\d+\.)\s*(.*)', line) if text_match: text = text_match.group(1).strip() if text: diff --git a/tests/test_memory_bullet_extraction.py b/tests/test_memory_bullet_extraction.py new file mode 100644 index 0000000..3c871ee --- /dev/null +++ b/tests/test_memory_bullet_extraction.py @@ -0,0 +1,26 @@ +"""Regression test: extract_memory_from_chat must not crash on bullet lines. + +The fallback memory extractor (invoked by routes/memory_routes.py when the LLM +extractor fails) matched list items with ``r'^[-*•]|\\d+\\.\\s*(.*)'``. Because +of alternation precedence that pattern is ``(^[-*•]) | (\\d+\\.\\s*(.*))`` — the +capture group lives only in the numbered-list branch. A bullet line ("- ...") +matches the first branch, so ``group(1)`` is ``None`` and ``.strip()`` raised +``AttributeError``, crashing extraction for any assistant message that contains +a bullet list (the dominant case). +""" +from src.memory import MemoryManager + + +def test_extract_memory_from_chat_handles_bullets(tmp_path): + mgr = MemoryManager(str(tmp_path)) + chat = [{ + "role": "assistant", + "content": "- User likes coffee\n* Prefers tea in winter\n1. Wakes at 6am", + }] + + out = mgr.extract_memory_from_chat(chat) + texts = [m["text"] for m in out] + + assert "User likes coffee" in texts # '-' bullet (used to crash) + assert "Prefers tea in winter" in texts # '*' bullet (used to crash) + assert "Wakes at 6am" in texts # numbered list (already worked)