From cb3d86608c8c2cb56353eaaf1008f29b68e70924 Mon Sep 17 00:00:00 2001 From: Sirsyorrz Date: Tue, 2 Jun 2026 12:47:15 +1000 Subject: [PATCH] Cookbook: pick the correct vLLM tool-call-parser for Qwen2.5 (#580) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The model-name detector treated every Qwen model as a Qwen3, falling into the qwen3_xml parser: if (n.includes('qwen3') && n.includes('coder')) return 'qwen3_coder'; if (n.includes('qwen')) return 'qwen3_xml'; // catches qwen2.5 too qwen3_xml is the parser for Qwen3 reasoning/instruct models. Qwen2.5 (and Qwen2, Qwen1.5) ship with hermes-style tool calling, so the qwen3_xml parser never recognises their tool calls — they leak through as plain text in the assistant reply and the agent silently fails to execute anything. Reproduces with: vllm serve Qwen/Qwen2.5-Coder-14B-Instruct-AWQ ... \ --enable-auto-tool-choice --tool-call-parser qwen3_xml → ask the agent to call any tool → JSON shows up in chat, no call runs. Fix the ordering: qwen3 + coder → qwen3_coder qwen3 → qwen3_xml qwen → hermes (Qwen2.5 / Qwen2 / Qwen1.5) Verified against the model matrix: Qwen2.5-Coder-14B-Instruct-AWQ → hermes Qwen2.5-7B-Instruct → hermes Qwen3-8B → qwen3_xml Qwen3-32B → qwen3_xml Qwen3-Coder-30B-A3B → qwen3_coder Qwen2-72B-Instruct → hermes Qwen1.5-7B-Chat → hermes --- static/js/cookbook.js | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/static/js/cookbook.js b/static/js/cookbook.js index 8eb914a..cac5a90 100644 --- a/static/js/cookbook.js +++ b/static/js/cookbook.js @@ -223,11 +223,20 @@ function _detectModelOptimizations(modelName) { return opts; } -/** Detect the right vLLM tool-call-parser based on model name */ +/** Detect the right vLLM tool-call-parser based on model name. + * Qwen tool-call formats split by generation: + * - Qwen3-Coder → qwen3_coder (XML with named params) + * - Qwen3 (non-coder) → qwen3_xml (reasoning/instruct, XML wrapper) + * - Qwen2.5 / Qwen2 / 1.5 → hermes (Qwen2.5 was trained on Hermes format) + * Catching "qwen" first and labelling everything qwen3_xml breaks tool + * calls on the Qwen2.5 line (the model emits hermes-style which the + * qwen3_xml parser doesn't recognise, so the call leaks through as text). + */ export function _detectToolParser(modelName) { const n = (modelName || '').toLowerCase(); if (n.includes('qwen3') && n.includes('coder')) return 'qwen3_coder'; - if (n.includes('qwen')) return 'qwen3_xml'; + if (n.includes('qwen3')) return 'qwen3_xml'; + if (n.includes('qwen')) return 'hermes'; // Qwen2.5 / Qwen2 / Qwen1.5 if (n.includes('llama-4') || n.includes('llama4')) return 'llama4_json'; if (n.includes('llama') || n.includes('nemotron')) return 'llama3_json'; if (n.includes('mistral') || n.includes('mixtral')) return 'mistral';