fix: _extract_entities crashes on a non-string query (#1724)
This commit is contained in:
@@ -27,6 +27,8 @@ def _detect_question_type(query: str) -> Optional[str]:
|
|||||||
|
|
||||||
def _extract_entities(query: str) -> Dict[str, List[str]]:
|
def _extract_entities(query: str) -> Dict[str, List[str]]:
|
||||||
"""Lightweight entity extraction: capitalized words and date patterns."""
|
"""Lightweight entity extraction: capitalized words and date patterns."""
|
||||||
|
if not isinstance(query, str):
|
||||||
|
return {"names": [], "dates": []}
|
||||||
entities: Dict[str, List[str]] = {"names": [], "dates": []}
|
entities: Dict[str, List[str]] = {"names": [], "dates": []}
|
||||||
qtype = _detect_question_type(query)
|
qtype = _detect_question_type(query)
|
||||||
cleaned = query
|
cleaned = query
|
||||||
|
|||||||
15
tests/test_search_query_entities_nonstring.py
Normal file
15
tests/test_search_query_entities_nonstring.py
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
from services.search.query import _extract_entities
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_entities_handles_non_string_query():
|
||||||
|
# _detect_question_type already guards non-strings, but the function then
|
||||||
|
# runs re.findall over `query` directly, which raises TypeError on a
|
||||||
|
# non-string. A non-str query should yield no entities.
|
||||||
|
assert _extract_entities(None) == {"names": [], "dates": []}
|
||||||
|
assert _extract_entities(123) == {"names": [], "dates": []}
|
||||||
|
|
||||||
|
|
||||||
|
def test_extract_entities_still_finds_names_and_years():
|
||||||
|
out = _extract_entities("What did Alice do in 2024")
|
||||||
|
assert "Alice" in out["names"]
|
||||||
|
assert "2024" in out["dates"]
|
||||||
Reference in New Issue
Block a user