From 39848a168b79eccfbe6703ff97f6d27c93abea67 Mon Sep 17 00:00:00 2001
From: danielroytel <107309800+danielroytel@users.noreply.github.com>
Date: Wed, 3 Jun 2026 15:23:18 +1000
Subject: [PATCH] fix: recognize Gemma 4 as a thinking model and add context
 entry (#1642)

Gemma 4 returns reasoning_content in streaming responses via
llama-server, but the model wasn't listed in _THINKING_MODEL_PATTERNS,
causing reasoning tokens to be mishandled. Add "gemma" to the pattern
list and register Gemma 4's 128K context window in KNOWN_CONTEXT_WINDOWS
so the agent loop budgets context correctly.

Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/llm_core.py      | 2 +-
 src/model_context.py | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/llm_core.py b/src/llm_core.py
index 1cbe6e7..eb23057 100644
--- a/src/llm_core.py
+++ b/src/llm_core.py
@@ -420,7 +420,7 @@ def _restricts_temperature(model: str) -> bool:
     return any(m.startswith(p) or f"/{p}" in m for p in _FIXED_TEMPERATURE_MODELS)
 
 # Models that support structured thinking — may output </think> without opening tag
-_THINKING_MODEL_PATTERNS = ("qwen3", "qwq", "deepseek-r1", "deepseek-reasoner", "minimax", "m2-reap")
+_THINKING_MODEL_PATTERNS = ("qwen3", "qwq", "deepseek-r1", "deepseek-reasoner", "minimax", "m2-reap", "gemma")
 
 def _supports_thinking(model: str) -> bool:
     """Check if model supports structured thinking output."""
diff --git a/src/model_context.py b/src/model_context.py
index c812041..6fdd23e 100644
--- a/src/model_context.py
+++ b/src/model_context.py
@@ -83,6 +83,7 @@ KNOWN_CONTEXT_WINDOWS = {
     'gemini-2.0-flash': 1048576,
     'gemini-1.5-pro': 1048576,
     'gemini-1.5-flash': 1048576,
+    'gemma-4': 262144,
     'gemma-3': 128000,
     'gemma-2': 8192,