fix: recognize Gemma 4 as a thinking model and add context entry (#1642)

Gemma 4 returns reasoning_content in streaming responses via
llama-server, but the model wasn't listed in _THINKING_MODEL_PATTERNS,
causing reasoning tokens to be mishandled. Add "gemma" to the pattern
list and register Gemma 4's 128K context window in KNOWN_CONTEXT_WINDOWS
so the agent loop budgets context correctly.

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
danielroytel
2026-06-03 15:23:18 +10:00
committed by GitHub
parent b45611e9c5
commit 39848a168b
2 changed files with 2 additions and 1 deletions

View File

@@ -420,7 +420,7 @@ def _restricts_temperature(model: str) -> bool:
return any(m.startswith(p) or f"/{p}" in m for p in _FIXED_TEMPERATURE_MODELS)
# Models that support structured thinking — may output </think> without opening tag
_THINKING_MODEL_PATTERNS = ("qwen3", "qwq", "deepseek-r1", "deepseek-reasoner", "minimax", "m2-reap")
_THINKING_MODEL_PATTERNS = ("qwen3", "qwq", "deepseek-r1", "deepseek-reasoner", "minimax", "m2-reap", "gemma")
def _supports_thinking(model: str) -> bool:
"""Check if model supports structured thinking output."""

View File

@@ -83,6 +83,7 @@ KNOWN_CONTEXT_WINDOWS = {
'gemini-2.0-flash': 1048576,
'gemini-1.5-pro': 1048576,
'gemini-1.5-flash': 1048576,
'gemma-4': 262144,
'gemma-3': 128000,
'gemma-2': 8192,