fix: _extract_entities crashes on a non-string query (#1724)
This commit is contained in:
@@ -27,6 +27,8 @@ def _detect_question_type(query: str) -> Optional[str]:
|
||||
|
||||
def _extract_entities(query: str) -> Dict[str, List[str]]:
|
||||
"""Lightweight entity extraction: capitalized words and date patterns."""
|
||||
if not isinstance(query, str):
|
||||
return {"names": [], "dates": []}
|
||||
entities: Dict[str, List[str]] = {"names": [], "dates": []}
|
||||
qtype = _detect_question_type(query)
|
||||
cleaned = query
|
||||
|
||||
15
tests/test_search_query_entities_nonstring.py
Normal file
15
tests/test_search_query_entities_nonstring.py
Normal file
@@ -0,0 +1,15 @@
|
||||
from services.search.query import _extract_entities
|
||||
|
||||
|
||||
def test_extract_entities_handles_non_string_query():
|
||||
# _detect_question_type already guards non-strings, but the function then
|
||||
# runs re.findall over `query` directly, which raises TypeError on a
|
||||
# non-string. A non-str query should yield no entities.
|
||||
assert _extract_entities(None) == {"names": [], "dates": []}
|
||||
assert _extract_entities(123) == {"names": [], "dates": []}
|
||||
|
||||
|
||||
def test_extract_entities_still_finds_names_and_years():
|
||||
out = _extract_entities("What did Alice do in 2024")
|
||||
assert "Alice" in out["names"]
|
||||
assert "2024" in out["dates"]
|
||||
Reference in New Issue
Block a user