diff --git a/routes/email_pollers.py b/routes/email_pollers.py
index ec8b1e1..a06cbdc 100644
--- a/routes/email_pollers.py
+++ b/routes/email_pollers.py
@@ -132,7 +132,7 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None
     import sqlite3 as _sql3
     import requests as _req
     from src.endpoint_resolver import resolve_endpoint
-    from src.llm_core import _uses_max_completion_tokens
+    from src.llm_core import _uses_max_completion_tokens, _restricts_temperature
 
     settings = _load_settings()
     auto_sum = settings.get("email_auto_summarize", False)
@@ -355,6 +355,9 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None
                         "temperature": 0.3,
                         "stream": False,
                     }
+                    # Reasoning models (o1/o3/o4/gpt-5) reject an explicit temperature.
+                    if _restricts_temperature(model):
+                        payload.pop("temperature", None)
                     try:
                         # Use to_thread so this sync HTTP call doesn't freeze
                         # the entire event loop while the LLM thinks (240s).
@@ -806,6 +809,9 @@ async def _auto_summarize_pass_single(days_back: int = 1, account_id: str | None
                             "temperature": 0.1,
                             "stream": False,
                         }
+                        # Reasoning models (o1/o3/o4/gpt-5) reject an explicit temperature.
+                        if _restricts_temperature(model):
+                            payload.pop("temperature", None)
                         # to_thread keeps the event loop responsive during the LLM call
                         resp = await asyncio.to_thread(
                             _req.post, url, json=payload, headers=req_headers, timeout=120
diff --git a/routes/email_routes.py b/routes/email_routes.py
index 24f085b..9870cb4 100644
--- a/routes/email_routes.py
+++ b/routes/email_routes.py
@@ -2419,7 +2419,7 @@ def setup_email_routes():
         """Generate a quick AI summary of an email body."""
         try:
             from src.endpoint_resolver import resolve_endpoint
-            from src.llm_core import _uses_max_completion_tokens
+            from src.llm_core import _uses_max_completion_tokens, _restricts_temperature
             import requests as _req
 
             body = data.get("body", "")
@@ -2476,6 +2476,9 @@ def setup_email_routes():
                 "temperature": 0.3,
                 "stream": False,
             }
+            # Reasoning models (o1/o3/o4/gpt-5) reject an explicit temperature.
+            if _restricts_temperature(model):
+                payload.pop("temperature", None)
             resp = await asyncio.to_thread(
                 _req.post, url, json=payload, headers=req_headers, timeout=180
             )
diff --git a/routes/gallery_routes.py b/routes/gallery_routes.py
index badc389..8ec2176 100644
--- a/routes/gallery_routes.py
+++ b/routes/gallery_routes.py
@@ -1707,7 +1707,7 @@ def setup_gallery_routes() -> APIRouter:
                 return {"error": "No vision-capable endpoint configured"}
 
             # Call vision model — format differs between Anthropic and OpenAI
-            from src.llm_core import _detect_provider
+            from src.llm_core import _detect_provider, _restricts_temperature, _uses_max_completion_tokens
             provider = _detect_provider(chat_url)
             tag_prompt = (
                 "Analyze this photo. Return ONLY a comma-separated list of tags. "
@@ -1732,6 +1732,7 @@ def setup_gallery_routes() -> APIRouter:
                     }],
                 }
             else:
+                _tok_key = "max_completion_tokens" if _uses_max_completion_tokens(model_name) else "max_tokens"
                 payload = {
                     "model": model_name,
                     "messages": [{
@@ -1741,9 +1742,12 @@ def setup_gallery_routes() -> APIRouter:
                             {"type": "image_url", "image_url": {"url": f"data:{mime};base64,{b64}"}},
                         ],
                     }],
-                    "max_tokens": 200,
+                    _tok_key: 200,
                     "temperature": 0.3,
                 }
+                # Reasoning models (o1/o3/o4/gpt-5) reject an explicit temperature.
+                if _restricts_temperature(model_name):
+                    payload.pop("temperature", None)
 
             h = {"Content-Type": "application/json"}
             if headers:
diff --git a/routes/model_routes.py b/routes/model_routes.py
index f04f2f2..b0fd1f6 100644
--- a/routes/model_routes.py
+++ b/routes/model_routes.py
@@ -251,9 +251,13 @@ def _probe_single_model(base: str, api_key: str, model_id: str, timeout: int = 1
         target_url = build_chat_url(base)
         h = build_headers(api_key, base)
         h["Content-Type"] = "application/json"
-        from src.llm_core import _uses_max_completion_tokens
+        from src.llm_core import _uses_max_completion_tokens, _restricts_temperature
         _max_key = "max_completion_tokens" if _uses_max_completion_tokens(model_id) else "max_tokens"
-        payload = {"model": model_id, "messages": messages, _max_key: 5, "temperature": 0.0}
+        payload = {"model": model_id, "messages": messages, _max_key: 5}
+        # Reasoning models (o1/o3/o4/gpt-5) reject an explicit temperature, so a
+        # probe that hardcodes one falsely reports a working endpoint as failing.
+        if not _restricts_temperature(model_id):
+            payload["temperature"] = 0.0
         if _test_tools:
             payload["tools"] = _test_tools
 
diff --git a/src/llm_core.py b/src/llm_core.py
index a407f97..18ccba7 100644
--- a/src/llm_core.py
+++ b/src/llm_core.py
@@ -403,6 +403,22 @@ def _uses_max_completion_tokens(model: str) -> bool:
     m = model.lower()
     return any(m.startswith(p) or f"/{p}" in m for p in _MAX_COMPLETION_TOKENS_MODELS)
 
+# OpenAI reasoning models (o1, o3, o4, gpt-5 families) only accept the default
+# temperature. Sending any explicit value — even 0.0 — returns HTTP 400
+# ("Only the default (1) value is supported"). That otherwise breaks chat when a
+# preset sets a non-default temperature, and makes endpoint probing report a
+# perfectly good model as failing. For these models we omit the field and let
+# the API use its required default. (gpt-4.5 is intentionally excluded — it is
+# not a reasoning model and accepts temperature normally.)
+_FIXED_TEMPERATURE_MODELS = ("o1", "o3", "o4", "gpt-5")
+
+def _restricts_temperature(model: str) -> bool:
+    """Check if a model rejects any non-default temperature."""
+    if not model:
+        return False
+    m = model.lower()
+    return any(m.startswith(p) or f"/{p}" in m for p in _FIXED_TEMPERATURE_MODELS)
+
 # Models that support structured thinking — may output </think> without opening tag
 _THINKING_MODEL_PATTERNS = ("qwen3", "qwq", "deepseek-r1", "deepseek-reasoner", "minimax", "m2-reap")
 
@@ -738,6 +754,8 @@ def llm_call(url: str, model: str, messages: List[Dict], temperature: float = LL
             "messages": messages_copy,
             "temperature": temperature,
         }
+        if _restricts_temperature(model):
+            payload.pop("temperature", None)
         if max_tokens and max_tokens > 0:
             tok_key = "max_completion_tokens" if _uses_max_completion_tokens(model) else "max_tokens"
             payload[tok_key] = max_tokens
@@ -857,6 +875,8 @@ async def llm_call_async(
             "messages": messages_copy,
             "temperature": temperature,
         }
+        if _restricts_temperature(model):
+            payload.pop("temperature", None)
         if max_tokens and max_tokens > 0:
             tok_key = "max_completion_tokens" if _uses_max_completion_tokens(model) else "max_tokens"
             payload[tok_key] = max_tokens
@@ -958,6 +978,8 @@ async def stream_llm(url: str, model: str, messages: List[Dict], temperature: fl
             "temperature": temperature,
             "stream": True,
         }
+        if _restricts_temperature(model):
+            payload.pop("temperature", None)
         if provider not in {"openrouter", "groq"}:
             payload["stream_options"] = {"include_usage": True}
         if max_tokens and max_tokens > 0:
diff --git a/tests/test_llm_core_temperature.py b/tests/test_llm_core_temperature.py
new file mode 100644
index 0000000..09abf8a
--- /dev/null
+++ b/tests/test_llm_core_temperature.py
@@ -0,0 +1,68 @@
+"""Regression tests: OpenAI reasoning models reject a non-default temperature.
+
+o1/o3/o4/gpt-5 only accept the default temperature (1); sending an explicit
+value — even 0.0 — returns HTTP 400 "Only the default (1) value is supported".
+The OpenAI-compatible payload builders must omit the temperature field for these
+models so chat (with a non-default preset) and endpoint probing don't break.
+"""
+import httpx
+import pytest
+
+from src import llm_core
+
+
+@pytest.mark.parametrize(
+    "model",
+    ["o1", "o1-mini", "o3", "o3-mini", "o4-mini", "gpt-5", "gpt-5-mini",
+     "openrouter/openai/o3-mini", "OpenAI/GPT-5"],
+)
+def test_reasoning_models_restrict_temperature(model):
+    assert llm_core._restricts_temperature(model) is True
+
+
+@pytest.mark.parametrize(
+    "model",
+    ["gpt-4o", "gpt-4.1", "gpt-3.5-turbo", "gpt-4.5-preview",
+     "claude-3-5-sonnet", "llama3.1", "", None],
+)
+def test_normal_models_allow_temperature(model):
+    assert llm_core._restricts_temperature(model) is False
+
+
+def _capture_openai_payload(monkeypatch, model, temperature):
+    """Run a synchronous OpenAI-compatible call and return the posted JSON body."""
+    llm_core._response_cache.clear()
+    seen = {}
+
+    def fake_post(url, headers=None, json=None, timeout=None):
+        seen["json"] = json
+        request = httpx.Request("POST", url)
+        return httpx.Response(
+            200,
+            request=request,
+            json={"choices": [{"message": {"content": "OK"}}]},
+        )
+
+    monkeypatch.setattr(llm_core.httpx, "post", fake_post)
+    result = llm_core.llm_call(
+        "https://api.openai.com/v1/chat/completions",
+        model,
+        [{"role": "user", "content": "Say OK"}],
+        temperature=temperature,
+        max_tokens=5,
+    )
+    assert result == "OK"
+    return seen["json"]
+
+
+def test_reasoning_model_payload_omits_temperature(monkeypatch):
+    payload = _capture_openai_payload(monkeypatch, "o3-mini", 0.0)
+    assert "temperature" not in payload
+    # Reasoning models also use max_completion_tokens, which must survive.
+    assert payload["max_completion_tokens"] == 5
+
+
+def test_normal_model_payload_keeps_temperature(monkeypatch):
+    payload = _capture_openai_payload(monkeypatch, "gpt-4o", 0.2)
+    assert payload["temperature"] == 0.2
+    assert payload["max_tokens"] == 5