fix: recognize Gemma 4 as a thinking model and add context entry (#1642)
Gemma 4 returns reasoning_content in streaming responses via llama-server, but the model wasn't listed in _THINKING_MODEL_PATTERNS, causing reasoning tokens to be mishandled. Add "gemma" to the pattern list and register Gemma 4's 128K context window in KNOWN_CONTEXT_WINDOWS so the agent loop budgets context correctly. Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -420,7 +420,7 @@ def _restricts_temperature(model: str) -> bool:
|
||||
return any(m.startswith(p) or f"/{p}" in m for p in _FIXED_TEMPERATURE_MODELS)
|
||||
|
||||
# Models that support structured thinking — may output </think> without opening tag
|
||||
_THINKING_MODEL_PATTERNS = ("qwen3", "qwq", "deepseek-r1", "deepseek-reasoner", "minimax", "m2-reap")
|
||||
_THINKING_MODEL_PATTERNS = ("qwen3", "qwq", "deepseek-r1", "deepseek-reasoner", "minimax", "m2-reap", "gemma")
|
||||
|
||||
def _supports_thinking(model: str) -> bool:
|
||||
"""Check if model supports structured thinking output."""
|
||||
|
||||
@@ -83,6 +83,7 @@ KNOWN_CONTEXT_WINDOWS = {
|
||||
'gemini-2.0-flash': 1048576,
|
||||
'gemini-1.5-pro': 1048576,
|
||||
'gemini-1.5-flash': 1048576,
|
||||
'gemma-4': 262144,
|
||||
'gemma-3': 128000,
|
||||
'gemma-2': 8192,
|
||||
|
||||
|
||||
Reference in New Issue
Block a user