From cb3d86608c8c2cb56353eaaf1008f29b68e70924 Mon Sep 17 00:00:00 2001
From: Sirsyorrz <Sirsyorrz@gmail.com>
Date: Tue, 2 Jun 2026 12:47:15 +1000
Subject: [PATCH] Cookbook: pick the correct vLLM tool-call-parser for Qwen2.5
 (#580)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The model-name detector treated every Qwen model as a Qwen3, falling
into the qwen3_xml parser:

    if (n.includes('qwen3') && n.includes('coder')) return 'qwen3_coder';
    if (n.includes('qwen')) return 'qwen3_xml';   // catches qwen2.5 too

qwen3_xml is the parser for Qwen3 reasoning/instruct models. Qwen2.5
(and Qwen2, Qwen1.5) ship with hermes-style tool calling, so the
qwen3_xml parser never recognises their tool calls — they leak through
as plain text in the assistant reply and the agent silently fails to
execute anything.

Reproduces with:
  vllm serve Qwen/Qwen2.5-Coder-14B-Instruct-AWQ ... \
    --enable-auto-tool-choice --tool-call-parser qwen3_xml
  → ask the agent to call any tool → JSON shows up in chat, no call runs.

Fix the ordering:
  qwen3 + coder → qwen3_coder
  qwen3         → qwen3_xml
  qwen          → hermes   (Qwen2.5 / Qwen2 / Qwen1.5)

Verified against the model matrix:

  Qwen2.5-Coder-14B-Instruct-AWQ → hermes
  Qwen2.5-7B-Instruct            → hermes
  Qwen3-8B                       → qwen3_xml
  Qwen3-32B                      → qwen3_xml
  Qwen3-Coder-30B-A3B            → qwen3_coder
  Qwen2-72B-Instruct             → hermes
  Qwen1.5-7B-Chat                → hermes
---
 static/js/cookbook.js | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/static/js/cookbook.js b/static/js/cookbook.js
index 8eb914a..cac5a90 100644
--- a/static/js/cookbook.js
+++ b/static/js/cookbook.js
@@ -223,11 +223,20 @@ function _detectModelOptimizations(modelName) {
   return opts;
 }
 
-/** Detect the right vLLM tool-call-parser based on model name */
+/** Detect the right vLLM tool-call-parser based on model name.
+ *  Qwen tool-call formats split by generation:
+ *   - Qwen3-Coder           → qwen3_coder  (XML <tool_call> with named params)
+ *   - Qwen3 (non-coder)     → qwen3_xml    (reasoning/instruct, XML wrapper)
+ *   - Qwen2.5 / Qwen2 / 1.5 → hermes       (Qwen2.5 was trained on Hermes format)
+ *  Catching "qwen" first and labelling everything qwen3_xml breaks tool
+ *  calls on the Qwen2.5 line (the model emits hermes-style which the
+ *  qwen3_xml parser doesn't recognise, so the call leaks through as text).
+ */
 export function _detectToolParser(modelName) {
   const n = (modelName || '').toLowerCase();
   if (n.includes('qwen3') && n.includes('coder')) return 'qwen3_coder';
-  if (n.includes('qwen')) return 'qwen3_xml';
+  if (n.includes('qwen3')) return 'qwen3_xml';
+  if (n.includes('qwen')) return 'hermes';   // Qwen2.5 / Qwen2 / Qwen1.5
   if (n.includes('llama-4') || n.includes('llama4')) return 'llama4_json';
   if (n.includes('llama') || n.includes('nemotron')) return 'llama3_json';
   if (n.includes('mistral') || n.includes('mixtral')) return 'mistral';