fix: _extract_entities crashes on a non-string query (#1724)

This commit is contained in:
Afonso Coutinho
2026-06-03 05:30:28 +01:00
committed by GitHub
parent 0c37943267
commit c5bc39de88
2 changed files with 17 additions and 0 deletions

View File

@@ -27,6 +27,8 @@ def _detect_question_type(query: str) -> Optional[str]:
def _extract_entities(query: str) -> Dict[str, List[str]]: def _extract_entities(query: str) -> Dict[str, List[str]]:
"""Lightweight entity extraction: capitalized words and date patterns.""" """Lightweight entity extraction: capitalized words and date patterns."""
if not isinstance(query, str):
return {"names": [], "dates": []}
entities: Dict[str, List[str]] = {"names": [], "dates": []} entities: Dict[str, List[str]] = {"names": [], "dates": []}
qtype = _detect_question_type(query) qtype = _detect_question_type(query)
cleaned = query cleaned = query

View File

@@ -0,0 +1,15 @@
from services.search.query import _extract_entities
def test_extract_entities_handles_non_string_query():
# _detect_question_type already guards non-strings, but the function then
# runs re.findall over `query` directly, which raises TypeError on a
# non-string. A non-str query should yield no entities.
assert _extract_entities(None) == {"names": [], "dates": []}
assert _extract_entities(123) == {"names": [], "dates": []}
def test_extract_entities_still_finds_names_and_years():
out = _extract_entities("What did Alice do in 2024")
assert "Alice" in out["names"]
assert "2024" in out["dates"]