diff --git a/src/chat_helpers.py b/src/chat_helpers.py
index 52c2246..1c8d1c9 100644
--- a/src/chat_helpers.py
+++ b/src/chat_helpers.py
@@ -40,12 +40,16 @@ _VISION_MODEL_KEYWORDS = (
     "internvl", "cogvlm", "qwen-vl", "qwen2-vl", "qwen3-vl", "qwen3vl",
     # multimodal families whose names don't contain "vision"/"vl" but DO accept
     # images — without these the image is silently dropped for common Ollama tags
-    # like gemma3:4b (issue #1274). Gemma 3 (4b+), Llama 4 (all), and Mistral
-    # Small 3.1/3.2 are vision-capable; per the err-toward-True policy (#124) a
-    # rare text-only tag (e.g. gemma3:1b) being treated as vision is the safer
-    # failure than dropping a real image.
-    "gemma-3", "gemma3", "llama-4", "llama4",
+    # like gemma3:4b or gemma4:12b (issue #1274). Gemma 3/4 (4b+), Llama 4 (all),
+    # Mistral Small 3.1/3.2, and Phi-4 multimodal are vision-capable; per the
+    # err-toward-True policy (#124) a rare text-only tag being treated as vision is
+    # the safer failure than silently dropping a real image.
+    "gemma-3", "gemma3", "gemma-4", "gemma4",
+    "llama-4", "llama4",
     "mistral-small-3.1", "mistral-small3.1", "mistral-small-3.2", "mistral-small3.2",
+    # Microsoft Phi-4 ships a dedicated multimodal variant ("phi-4-multimodal-instruct")
+    # but users often load it under the bare "phi-4" or "phi4" Ollama tag.
+    "phi-4", "phi4",
     # zhipu / glm (glm-4.5v, glm-4.6v, glm-5v-turbo, etc.)
     "glm-4.5v", "glm-4.6v", "glm-5v",
 )