diff --git a/src/tool_index.py b/src/tool_index.py
index f8e8fae..fa7ba31 100644
--- a/src/tool_index.py
+++ b/src/tool_index.py
@@ -431,10 +431,14 @@ class ToolIndex:
         base = set(always_include or ALWAYS_AVAILABLE)
         retrieved = self.retrieve(query, k=k)
         base.update(retrieved)
-        # Keyword-based force-include for common intents
+        # Keyword-based force-include for common intents. Match on word
+        # boundaries, not raw substrings, so short hints like "fix", "line",
+        # "serve", "reply" or "unread" don't fire inside unrelated words
+        # ("prefix", "deadline"/"online", "observe"/"reserve", "replying",
+        # "unreadable"). Same word-boundary matching used in topic_analyzer.
         ql = query.lower()
         for keywords, tools in self._KEYWORD_HINTS.items():
-            if any(kw in ql for kw in keywords):
+            if any(re.search(rf"\b{re.escape(kw)}\b", ql) for kw in keywords):
                 base.update(tools)
         # Structural scheduling-intent detection — typo-resilient (the literal
         # keyword "every day" misses "every dya"). Catches "every <word>",
diff --git a/tests/test_tool_index_keyword_boundaries.py b/tests/test_tool_index_keyword_boundaries.py
new file mode 100644
index 0000000..d1465e6
--- /dev/null
+++ b/tests/test_tool_index_keyword_boundaries.py
@@ -0,0 +1,53 @@
+"""Keyword-hint force-include must match on word boundaries, not substrings.
+
+`get_tools_for_query` force-includes whole tool families when a query mentions
+an intent keyword. The match used a raw substring test (`kw in ql`), so short
+hints fired inside unrelated words: "fix" in "prefix", "line" in "deadline"/
+"online", "serve" in "observe"/"reserve", "reply" in "replying", "unread" in
+"unreadable". That bloated the tool set with irrelevant email/document/serve
+tools for queries that have nothing to do with them. Same substring-vs-word
+pitfall already fixed in topic_analyzer.py.
+
+`retrieve` (which needs a chroma collection) is stubbed out so these tests
+exercise only the keyword-hint loop.
+"""
+from src.tool_index import ToolIndex
+
+
+def _index():
+    ti = ToolIndex.__new__(ToolIndex)
+    ti.retrieve = lambda query, k=8: []  # no chroma; isolate the keyword loop
+    return ti
+
+
+def test_substring_inside_word_does_not_force_email_tools():
+    ti = _index()
+    # "replying" contains "reply"; "unreadable" contains "unread".
+    for q in ("i am replying to your github comment", "this document is unreadable"):
+        tools = ti.get_tools_for_query(q)
+        assert "send_email" not in tools, q
+        assert "reply_to_email" not in tools, q
+
+
+def test_substring_inside_word_does_not_force_document_tools():
+    ti = _index()
+    # "prefix" contains "fix"; "deadline"/"online" contain "line".
+    for q in ("prefix the output with a label", "the deadline is online already"):
+        tools = ti.get_tools_for_query(q)
+        assert "edit_document" not in tools, q
+        assert "update_document" not in tools, q
+
+
+def test_substring_inside_word_does_not_force_serve_tools():
+    ti = _index()
+    # "observe"/"reserve" contain "serve".
+    tools = ti.get_tools_for_query("please observe the reserve levels")
+    assert "serve_model" not in tools
+    assert "serve_preset" not in tools
+
+
+def test_genuine_keywords_still_force_include():
+    ti = _index()
+    assert "reply_to_email" in ti.get_tools_for_query("reply to this email")
+    assert "edit_document" in ti.get_tools_for_query("edit the document")
+    assert "serve_model" in ti.get_tools_for_query("serve the model")