fix: research query misclassifies 'whatsapp'/'however' as questions (#1247)
* fix: detect question words as whole words, not prefixes * fix: same question-word prefix bug in the services search copy * test: question-word detection rejects prefix lookalikes
This commit is contained in:
@@ -15,7 +15,10 @@ def _detect_question_type(query: str) -> Optional[str]:
|
||||
"""Return the leading question word if present (who, what, when, where, why, how)."""
|
||||
q = query.strip().lower()
|
||||
for word in ("who", "what", "when", "where", "why", "how"):
|
||||
if q.startswith(word):
|
||||
# Require a whole-word match: a bare prefix mis-flags ordinary queries
|
||||
# like "whatsapp pricing" (-> what) or "however ..." (-> how), which
|
||||
# then get spurious boost terms OR-appended in enhance_query.
|
||||
if q == word or q.startswith(word + " "):
|
||||
return word
|
||||
return None
|
||||
|
||||
|
||||
@@ -15,7 +15,10 @@ def _detect_question_type(query: str) -> Optional[str]:
|
||||
"""Return the leading question word if present (who, what, when, where, why, how)."""
|
||||
q = query.strip().lower()
|
||||
for word in ("who", "what", "when", "where", "why", "how"):
|
||||
if q.startswith(word):
|
||||
# Require a whole-word match: a bare prefix mis-flags ordinary queries
|
||||
# like "whatsapp pricing" (-> what) or "however ..." (-> how), which
|
||||
# then get spurious boost terms OR-appended in enhance_query.
|
||||
if q == word or q.startswith(word + " "):
|
||||
return word
|
||||
return None
|
||||
|
||||
|
||||
18
tests/test_question_type_detection.py
Normal file
18
tests/test_question_type_detection.py
Normal file
@@ -0,0 +1,18 @@
|
||||
"""Tests for question-word detection in research query enhancement."""
|
||||
|
||||
from src.search.query import _detect_question_type
|
||||
|
||||
|
||||
def test_whole_word_questions_detected():
|
||||
assert _detect_question_type("what is topological data analysis") == "what"
|
||||
assert _detect_question_type("how do transformers work") == "how"
|
||||
assert _detect_question_type("why") == "why"
|
||||
|
||||
|
||||
def test_prefix_lookalikes_not_misclassified():
|
||||
# Regression: a bare prefix used to flag these as questions and append
|
||||
# spurious boost terms in enhance_query.
|
||||
assert _detect_question_type("whatsapp pricing") is None
|
||||
assert _detect_question_type("however we proceed") is None
|
||||
assert _detect_question_type("whole foods stock") is None
|
||||
assert _detect_question_type("howard stern show") is None
|
||||
Reference in New Issue
Block a user