From c5bc39de88e22e81d475221cabf3cce7324cd5fb Mon Sep 17 00:00:00 2001 From: Afonso Coutinho Date: Wed, 3 Jun 2026 05:30:28 +0100 Subject: [PATCH] fix: _extract_entities crashes on a non-string query (#1724) --- services/search/query.py | 2 ++ tests/test_search_query_entities_nonstring.py | 15 +++++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 tests/test_search_query_entities_nonstring.py diff --git a/services/search/query.py b/services/search/query.py index 03596ed..3bb3984 100644 --- a/services/search/query.py +++ b/services/search/query.py @@ -27,6 +27,8 @@ def _detect_question_type(query: str) -> Optional[str]: def _extract_entities(query: str) -> Dict[str, List[str]]: """Lightweight entity extraction: capitalized words and date patterns.""" + if not isinstance(query, str): + return {"names": [], "dates": []} entities: Dict[str, List[str]] = {"names": [], "dates": []} qtype = _detect_question_type(query) cleaned = query diff --git a/tests/test_search_query_entities_nonstring.py b/tests/test_search_query_entities_nonstring.py new file mode 100644 index 0000000..0c4f9b1 --- /dev/null +++ b/tests/test_search_query_entities_nonstring.py @@ -0,0 +1,15 @@ +from services.search.query import _extract_entities + + +def test_extract_entities_handles_non_string_query(): + # _detect_question_type already guards non-strings, but the function then + # runs re.findall over `query` directly, which raises TypeError on a + # non-string. A non-str query should yield no entities. + assert _extract_entities(None) == {"names": [], "dates": []} + assert _extract_entities(123) == {"names": [], "dates": []} + + +def test_extract_entities_still_finds_names_and_years(): + out = _extract_entities("What did Alice do in 2024") + assert "Alice" in out["names"] + assert "2024" in out["dates"]