From 39848a168b79eccfbe6703ff97f6d27c93abea67 Mon Sep 17 00:00:00 2001 From: danielroytel <107309800+danielroytel@users.noreply.github.com> Date: Wed, 3 Jun 2026 15:23:18 +1000 Subject: [PATCH] fix: recognize Gemma 4 as a thinking model and add context entry (#1642) Gemma 4 returns reasoning_content in streaming responses via llama-server, but the model wasn't listed in _THINKING_MODEL_PATTERNS, causing reasoning tokens to be mishandled. Add "gemma" to the pattern list and register Gemma 4's 128K context window in KNOWN_CONTEXT_WINDOWS so the agent loop budgets context correctly. Co-authored-by: Claude Opus 4.6 --- src/llm_core.py | 2 +- src/model_context.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/llm_core.py b/src/llm_core.py index 1cbe6e7..eb23057 100644 --- a/src/llm_core.py +++ b/src/llm_core.py @@ -420,7 +420,7 @@ def _restricts_temperature(model: str) -> bool: return any(m.startswith(p) or f"/{p}" in m for p in _FIXED_TEMPERATURE_MODELS) # Models that support structured thinking — may output without opening tag -_THINKING_MODEL_PATTERNS = ("qwen3", "qwq", "deepseek-r1", "deepseek-reasoner", "minimax", "m2-reap") +_THINKING_MODEL_PATTERNS = ("qwen3", "qwq", "deepseek-r1", "deepseek-reasoner", "minimax", "m2-reap", "gemma") def _supports_thinking(model: str) -> bool: """Check if model supports structured thinking output.""" diff --git a/src/model_context.py b/src/model_context.py index c812041..6fdd23e 100644 --- a/src/model_context.py +++ b/src/model_context.py @@ -83,6 +83,7 @@ KNOWN_CONTEXT_WINDOWS = { 'gemini-2.0-flash': 1048576, 'gemini-1.5-pro': 1048576, 'gemini-1.5-flash': 1048576, + 'gemma-4': 262144, 'gemma-3': 128000, 'gemma-2': 8192,