* fix: extract full year in research query entities, not just the century * fix: same year capture-group bug in the services search copy * test: research query extracts the full year
22 lines
760 B
Python
22 lines
760 B
Python
"""Tests for research query entity extraction (src/search/query.py)."""
|
|
|
|
from src.search.query import _extract_entities
|
|
|
|
|
|
def test_extracts_full_four_digit_year():
|
|
# Regression: the year pattern used a capturing group `(19|20)`, so
|
|
# re.findall returned just the century ("20") instead of the full year.
|
|
entities = _extract_entities("What happened to OpenAI in 2024")
|
|
assert "2024" in entities["dates"]
|
|
assert "20" not in entities["dates"]
|
|
|
|
|
|
def test_extracts_multiple_years():
|
|
entities = _extract_entities("Compare revenue in 1999 and 2008")
|
|
assert entities["dates"] == ["1999", "2008"]
|
|
|
|
|
|
def test_no_false_year_from_other_numbers():
|
|
entities = _extract_entities("Top 50 albums of all time")
|
|
assert entities["dates"] == []
|