fix: research query misclassifies 'whatsapp'/'however' as questions (#1247)
* fix: detect question words as whole words, not prefixes * fix: same question-word prefix bug in the services search copy * test: question-word detection rejects prefix lookalikes
This commit is contained in:
@@ -15,7 +15,10 @@ def _detect_question_type(query: str) -> Optional[str]:
|
|||||||
"""Return the leading question word if present (who, what, when, where, why, how)."""
|
"""Return the leading question word if present (who, what, when, where, why, how)."""
|
||||||
q = query.strip().lower()
|
q = query.strip().lower()
|
||||||
for word in ("who", "what", "when", "where", "why", "how"):
|
for word in ("who", "what", "when", "where", "why", "how"):
|
||||||
if q.startswith(word):
|
# Require a whole-word match: a bare prefix mis-flags ordinary queries
|
||||||
|
# like "whatsapp pricing" (-> what) or "however ..." (-> how), which
|
||||||
|
# then get spurious boost terms OR-appended in enhance_query.
|
||||||
|
if q == word or q.startswith(word + " "):
|
||||||
return word
|
return word
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|||||||
@@ -15,7 +15,10 @@ def _detect_question_type(query: str) -> Optional[str]:
|
|||||||
"""Return the leading question word if present (who, what, when, where, why, how)."""
|
"""Return the leading question word if present (who, what, when, where, why, how)."""
|
||||||
q = query.strip().lower()
|
q = query.strip().lower()
|
||||||
for word in ("who", "what", "when", "where", "why", "how"):
|
for word in ("who", "what", "when", "where", "why", "how"):
|
||||||
if q.startswith(word):
|
# Require a whole-word match: a bare prefix mis-flags ordinary queries
|
||||||
|
# like "whatsapp pricing" (-> what) or "however ..." (-> how), which
|
||||||
|
# then get spurious boost terms OR-appended in enhance_query.
|
||||||
|
if q == word or q.startswith(word + " "):
|
||||||
return word
|
return word
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|||||||
18
tests/test_question_type_detection.py
Normal file
18
tests/test_question_type_detection.py
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
"""Tests for question-word detection in research query enhancement."""
|
||||||
|
|
||||||
|
from src.search.query import _detect_question_type
|
||||||
|
|
||||||
|
|
||||||
|
def test_whole_word_questions_detected():
|
||||||
|
assert _detect_question_type("what is topological data analysis") == "what"
|
||||||
|
assert _detect_question_type("how do transformers work") == "how"
|
||||||
|
assert _detect_question_type("why") == "why"
|
||||||
|
|
||||||
|
|
||||||
|
def test_prefix_lookalikes_not_misclassified():
|
||||||
|
# Regression: a bare prefix used to flag these as questions and append
|
||||||
|
# spurious boost terms in enhance_query.
|
||||||
|
assert _detect_question_type("whatsapp pricing") is None
|
||||||
|
assert _detect_question_type("however we proceed") is None
|
||||||
|
assert _detect_question_type("whole foods stock") is None
|
||||||
|
assert _detect_question_type("howard stern show") is None
|
||||||
Reference in New Issue
Block a user