fix: llm_call_async does not retry on HTTP 429/502/503/504 (#2364)

The retry loop raised immediately for any non-success HTTP response regardless of attempt count. For transient upstream errors (rate limit, bad gateway, gateway timeout) the function should back off and retry within the existing attempt budget. Also lets ConnectError / ConnectTimeout retry when the host has not been cooled and attempts remain, instead of always raising on the first connect failure. Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-04 19:35:55 +02:00
parent c12c2aa233
commit ff8f9f2188
1 changed files with 6 additions and 1 deletions
--- a/src/llm_core.py
+++ b/src/llm_core.py
@@ -1088,6 +1088,9 @@ async def llm_call_async(
                    f"LLM async call to {target_url} failed in {duration:.2f}s "
                    f"(attempt {attempt}): HTTP {r.status_code} {friendly}"
                )
+                if r.status_code in (429, 502, 503, 504) and attempt < max_retries:
+                    await asyncio.sleep(LLMConfig.RETRY_DELAY)
+                    continue
                raise HTTPException(r.status_code, friendly)
            logger.info(f"LLM async call to {target_url} succeeded in {duration:.2f}s (attempt {attempt})")
            _clear_host_dead(target_url)
@@ -1109,7 +1112,9 @@ async def llm_call_async(
            duration = time.time() - start
            _tail = f" — host cooled for {DEAD_HOST_COOLDOWN:.0f}s" if _cooled else " — transient, will retry"
            logger.warning(f"LLM async connect to {target_url} failed after {duration:.2f}s: {e}{_tail}")
-            raise HTTPException(503, f"Cannot reach {_host_key(target_url)}: {e}")
+            if _cooled or attempt >= max_retries:
+                raise HTTPException(503, f"Cannot reach {_host_key(target_url)}: {e}")
+            await asyncio.sleep(LLMConfig.RETRY_DELAY)
        except (httpx.RequestError, httpx.HTTPStatusError) as e:
            duration = time.time() - start
            logger.warning(f"LLM async call attempt {attempt} failed after {duration:.2f}s: {e}")