Topics: hydrate session history before analysis
analyze_topics() iterates session_manager.sessions and reads
session_data.get("history", []) directly. But SessionManager.load_sessions
seeds sessions metadata-only with empty history — messages are loaded
lazily, only when get_session(session_id) is called. So analyze_topics saw
empty history for every session that hadn't been individually opened this
process lifetime and reported total_topics: 0, even when the database held
plenty of matching messages.
Hydrate each candidate session via session_manager.get_session(session_id)
(the existing lazy-load path) before reading its history, after the
owner/archived filters so skipped sessions aren't loaded. Falls back to the
raw cached history when the manager has no get_session (test stubs).
tests/test_topic_analyzer.py: new test_topic_analyzer_hydrates_sessions
seeds a real SQLite DB with a session + message, runs the real
SessionManager (asserting cached history starts empty), then asserts
analyze_topics finds the topic. Fails before this change. The existing
keyword tests now pass an explicit owner to satisfy the owner-required
early return.
This commit is contained in:
@@ -49,7 +49,15 @@ def analyze_topics(session_manager, owner: str = None) -> Dict[str, Any]:
|
||||
if sess_owner != owner:
|
||||
continue
|
||||
|
||||
for msg in session_data.get("history", []):
|
||||
# Hydrate session to load history from DB if needed
|
||||
if hasattr(session_manager, "get_session"):
|
||||
hydrated_session = session_manager.get_session(session_id)
|
||||
history = hydrated_session.history
|
||||
else:
|
||||
hydrated_session = session_data
|
||||
history = session_data.get("history", [])
|
||||
|
||||
for msg in history:
|
||||
content_raw = msg.get("content") if isinstance(msg, dict) else getattr(msg, "content", None)
|
||||
if not content_raw:
|
||||
continue
|
||||
|
||||
@@ -1,12 +1,17 @@
|
||||
"""Tests for topic keyword matching (src/topic_analyzer.py)."""
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
from sqlalchemy import create_engine
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from core.database import Base, Session as DbSession, ChatMessage as DbChatMessage
|
||||
from core.session_manager import SessionManager
|
||||
from src.topic_analyzer import analyze_topics
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
def _sm(*messages):
|
||||
history = [{"role": "user", "content": c} for c in messages]
|
||||
return SimpleNamespace(sessions={"s1": {"owner": None, "name": "S", "history": history}})
|
||||
return SimpleNamespace(sessions={"s1": {"owner": "alice", "name": "S", "history": history}})
|
||||
|
||||
|
||||
def _freq(result):
|
||||
@@ -16,15 +21,76 @@ def _freq(result):
|
||||
def test_substring_does_not_false_match_technology():
|
||||
# Regression: "ai" matched inside "email"/"again"/"rain"/"wait", flagging
|
||||
# Technology for messages with no technical content at all.
|
||||
result = analyze_topics(_sm("Can you send me an email again about the rain? I will wait."))
|
||||
result = analyze_topics(_sm("Can you send me an email again about the rain? I will wait."), owner="alice")
|
||||
assert "Technology" not in _freq(result)
|
||||
|
||||
|
||||
def test_real_keywords_still_match():
|
||||
result = analyze_topics(_sm("I wrote some Python code to test the algorithm."))
|
||||
result = analyze_topics(_sm("I wrote some Python code to test the algorithm."), owner="alice")
|
||||
assert _freq(result).get("Technology", 0) >= 1
|
||||
|
||||
|
||||
def test_multiword_keyword_matches():
|
||||
result = analyze_topics(_sm("Can you explain how to set this up?"))
|
||||
result = analyze_topics(_sm("Can you explain how to set this up?"), owner="alice")
|
||||
assert "Learning" in _freq(result)
|
||||
|
||||
|
||||
def test_topic_analyzer_hydrates_sessions(monkeypatch):
|
||||
# 1. Create clean in-memory database
|
||||
engine = create_engine("sqlite:///:memory:")
|
||||
Base.metadata.create_all(bind=engine)
|
||||
|
||||
# 2. Create test session factory
|
||||
TestSessionLocal = sessionmaker(bind=engine)
|
||||
|
||||
# 3. Populate test database with a session and a message about Python
|
||||
db = TestSessionLocal()
|
||||
session_id = "session-1"
|
||||
|
||||
s = DbSession(
|
||||
id=session_id,
|
||||
name="Python chat",
|
||||
endpoint_url="http://localhost:8000",
|
||||
model="gpt-4",
|
||||
owner="alice",
|
||||
message_count=1,
|
||||
created_at=datetime.utcnow(),
|
||||
updated_at=datetime.utcnow()
|
||||
)
|
||||
m = DbChatMessage(
|
||||
id="msg-1",
|
||||
session_id=session_id,
|
||||
role="user",
|
||||
content="I love writing python code.",
|
||||
timestamp=datetime.utcnow()
|
||||
)
|
||||
|
||||
db.add(s)
|
||||
db.add(m)
|
||||
db.commit()
|
||||
db.close()
|
||||
|
||||
# 4. Patch SessionLocal to use our in-memory DB
|
||||
import core.session_manager
|
||||
import core.database
|
||||
monkeypatch.setattr(core.session_manager, "SessionLocal", TestSessionLocal)
|
||||
monkeypatch.setattr(core.database, "SessionLocal", TestSessionLocal)
|
||||
|
||||
# 5. Initialize the real SessionManager and load metadata (seeds sessions with empty history)
|
||||
sm = SessionManager()
|
||||
|
||||
# Verify that the session is in sm.sessions, and its history is currently empty
|
||||
assert session_id in sm.sessions
|
||||
assert len(sm.sessions[session_id].history) == 0
|
||||
|
||||
# 6. Execute the topic analysis
|
||||
res = analyze_topics(sm, owner="alice")
|
||||
|
||||
# 7. Assertions
|
||||
# There should be 1 topic found (Technology, since "python" / "code" are keywords)
|
||||
assert res["total_topics"] > 0
|
||||
|
||||
# Check that the topic is Technology
|
||||
tech_topic = next((t for t in res["topics"] if t["topic"] == "Technology"), None)
|
||||
assert tech_topic is not None
|
||||
assert tech_topic["frequency"] >= 1
|
||||
|
||||
Reference in New Issue
Block a user