fix: research query misclassifies 'whatsapp'/'however' as questions (#1247)

* fix: detect question words as whole words, not prefixes

* fix: same question-word prefix bug in the services search copy

* test: question-word detection rejects prefix lookalikes
This commit is contained in:
Afonso Coutinho
2026-06-02 17:10:06 +01:00
committed by GitHub
parent 311f226d44
commit f62d6ea3d7
3 changed files with 26 additions and 2 deletions

View File

@@ -0,0 +1,18 @@
"""Tests for question-word detection in research query enhancement."""
from src.search.query import _detect_question_type
def test_whole_word_questions_detected():
assert _detect_question_type("what is topological data analysis") == "what"
assert _detect_question_type("how do transformers work") == "how"
assert _detect_question_type("why") == "why"
def test_prefix_lookalikes_not_misclassified():
# Regression: a bare prefix used to flag these as questions and append
# spurious boost terms in enhance_query.
assert _detect_question_type("whatsapp pricing") is None
assert _detect_question_type("however we proceed") is None
assert _detect_question_type("whole foods stock") is None
assert _detect_question_type("howard stern show") is None