diff --git a/src/tool_index.py b/src/tool_index.py index f8e8fae..fa7ba31 100644 --- a/src/tool_index.py +++ b/src/tool_index.py @@ -431,10 +431,14 @@ class ToolIndex: base = set(always_include or ALWAYS_AVAILABLE) retrieved = self.retrieve(query, k=k) base.update(retrieved) - # Keyword-based force-include for common intents + # Keyword-based force-include for common intents. Match on word + # boundaries, not raw substrings, so short hints like "fix", "line", + # "serve", "reply" or "unread" don't fire inside unrelated words + # ("prefix", "deadline"/"online", "observe"/"reserve", "replying", + # "unreadable"). Same word-boundary matching used in topic_analyzer. ql = query.lower() for keywords, tools in self._KEYWORD_HINTS.items(): - if any(kw in ql for kw in keywords): + if any(re.search(rf"\b{re.escape(kw)}\b", ql) for kw in keywords): base.update(tools) # Structural scheduling-intent detection — typo-resilient (the literal # keyword "every day" misses "every dya"). Catches "every ", diff --git a/tests/test_tool_index_keyword_boundaries.py b/tests/test_tool_index_keyword_boundaries.py new file mode 100644 index 0000000..d1465e6 --- /dev/null +++ b/tests/test_tool_index_keyword_boundaries.py @@ -0,0 +1,53 @@ +"""Keyword-hint force-include must match on word boundaries, not substrings. + +`get_tools_for_query` force-includes whole tool families when a query mentions +an intent keyword. The match used a raw substring test (`kw in ql`), so short +hints fired inside unrelated words: "fix" in "prefix", "line" in "deadline"/ +"online", "serve" in "observe"/"reserve", "reply" in "replying", "unread" in +"unreadable". That bloated the tool set with irrelevant email/document/serve +tools for queries that have nothing to do with them. Same substring-vs-word +pitfall already fixed in topic_analyzer.py. + +`retrieve` (which needs a chroma collection) is stubbed out so these tests +exercise only the keyword-hint loop. +""" +from src.tool_index import ToolIndex + + +def _index(): + ti = ToolIndex.__new__(ToolIndex) + ti.retrieve = lambda query, k=8: [] # no chroma; isolate the keyword loop + return ti + + +def test_substring_inside_word_does_not_force_email_tools(): + ti = _index() + # "replying" contains "reply"; "unreadable" contains "unread". + for q in ("i am replying to your github comment", "this document is unreadable"): + tools = ti.get_tools_for_query(q) + assert "send_email" not in tools, q + assert "reply_to_email" not in tools, q + + +def test_substring_inside_word_does_not_force_document_tools(): + ti = _index() + # "prefix" contains "fix"; "deadline"/"online" contain "line". + for q in ("prefix the output with a label", "the deadline is online already"): + tools = ti.get_tools_for_query(q) + assert "edit_document" not in tools, q + assert "update_document" not in tools, q + + +def test_substring_inside_word_does_not_force_serve_tools(): + ti = _index() + # "observe"/"reserve" contain "serve". + tools = ti.get_tools_for_query("please observe the reserve levels") + assert "serve_model" not in tools + assert "serve_preset" not in tools + + +def test_genuine_keywords_still_force_include(): + ti = _index() + assert "reply_to_email" in ti.get_tools_for_query("reply to this email") + assert "edit_document" in ti.get_tools_for_query("edit the document") + assert "serve_model" in ti.get_tools_for_query("serve the model")