Fix AttributeError on bullet lines in extract_memory_from_chat (#873)
The fallback memory extractor (used by routes/memory_routes.py when the LLM
extractor fails) matched list items with `r'^[-*•]|\d+\.\s*(.*)'`. Operator
precedence makes that `(^[-*•]) | (\d+\.\s*(.*))`, so the capture group only
exists on the numbered-list branch.
A bullet line ("- foo") matches the first branch, so `group(1)` is None and
`text_match.group(1).strip()` raises AttributeError — crashing extraction for
any assistant message that contains a bullet list (i.e. most of them). Numbered
lists happened to work.
Group both markers — `r'^(?:[-*•]|\d+\.)\s*(.*)'` — so the capture applies to
bullets and numbers alike.
Adds tests/test_memory_bullet_extraction.py (red before, green after).
This commit is contained in:
@@ -59,8 +59,12 @@ class MemoryManager:
|
||||
line = line.strip()
|
||||
# Look for bullet points or numbered lists that might contain memories
|
||||
if re.match(r'^[-*•]|\d+\.', line):
|
||||
# Extract the text after the bullet/number
|
||||
text_match = re.match(r'^[-*•]|\d+\.\s*(.*)', line)
|
||||
# Extract the text after the bullet/number. Group both
|
||||
# markers so the capture applies to either — the previous
|
||||
# `^[-*•]|\d+\.\s*(.*)` put the group on the numbered branch
|
||||
# only, so a bullet line matched with group(1)=None and
|
||||
# crashed on .strip().
|
||||
text_match = re.match(r'^(?:[-*•]|\d+\.)\s*(.*)', line)
|
||||
if text_match:
|
||||
text = text_match.group(1).strip()
|
||||
if text:
|
||||
|
||||
26
tests/test_memory_bullet_extraction.py
Normal file
26
tests/test_memory_bullet_extraction.py
Normal file
@@ -0,0 +1,26 @@
|
||||
"""Regression test: extract_memory_from_chat must not crash on bullet lines.
|
||||
|
||||
The fallback memory extractor (invoked by routes/memory_routes.py when the LLM
|
||||
extractor fails) matched list items with ``r'^[-*•]|\\d+\\.\\s*(.*)'``. Because
|
||||
of alternation precedence that pattern is ``(^[-*•]) | (\\d+\\.\\s*(.*))`` — the
|
||||
capture group lives only in the numbered-list branch. A bullet line ("- ...")
|
||||
matches the first branch, so ``group(1)`` is ``None`` and ``.strip()`` raised
|
||||
``AttributeError``, crashing extraction for any assistant message that contains
|
||||
a bullet list (the dominant case).
|
||||
"""
|
||||
from src.memory import MemoryManager
|
||||
|
||||
|
||||
def test_extract_memory_from_chat_handles_bullets(tmp_path):
|
||||
mgr = MemoryManager(str(tmp_path))
|
||||
chat = [{
|
||||
"role": "assistant",
|
||||
"content": "- User likes coffee\n* Prefers tea in winter\n1. Wakes at 6am",
|
||||
}]
|
||||
|
||||
out = mgr.extract_memory_from_chat(chat)
|
||||
texts = [m["text"] for m in out]
|
||||
|
||||
assert "User likes coffee" in texts # '-' bullet (used to crash)
|
||||
assert "Prefers tea in winter" in texts # '*' bullet (used to crash)
|
||||
assert "Wakes at 6am" in texts # numbered list (already worked)
|
||||
Reference in New Issue
Block a user