From ff8f9f2188727f930a7fe67197b92da6bb9abf1e Mon Sep 17 00:00:00 2001
From: Giuseppe <peppecastellos245@icloud.com>
Date: Thu, 4 Jun 2026 19:35:55 +0200
Subject: [PATCH] fix: llm_call_async does not retry on HTTP 429/502/503/504
 (#2364)

The retry loop raised immediately for any non-success HTTP response
regardless of attempt count. For transient upstream errors (rate limit,
bad gateway, gateway timeout) the function should back off and retry
within the existing attempt budget.

Also lets ConnectError / ConnectTimeout retry when the host has not been
cooled and attempts remain, instead of always raising on the first
connect failure.

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 src/llm_core.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/llm_core.py b/src/llm_core.py
index a929edc..a155530 100644
--- a/src/llm_core.py
+++ b/src/llm_core.py
@@ -1088,6 +1088,9 @@ async def llm_call_async(
                     f"LLM async call to {target_url} failed in {duration:.2f}s "
                     f"(attempt {attempt}): HTTP {r.status_code} {friendly}"
                 )
+                if r.status_code in (429, 502, 503, 504) and attempt < max_retries:
+                    await asyncio.sleep(LLMConfig.RETRY_DELAY)
+                    continue
                 raise HTTPException(r.status_code, friendly)
             logger.info(f"LLM async call to {target_url} succeeded in {duration:.2f}s (attempt {attempt})")
             _clear_host_dead(target_url)
@@ -1109,7 +1112,9 @@ async def llm_call_async(
             duration = time.time() - start
             _tail = f" — host cooled for {DEAD_HOST_COOLDOWN:.0f}s" if _cooled else " — transient, will retry"
             logger.warning(f"LLM async connect to {target_url} failed after {duration:.2f}s: {e}{_tail}")
-            raise HTTPException(503, f"Cannot reach {_host_key(target_url)}: {e}")
+            if _cooled or attempt >= max_retries:
+                raise HTTPException(503, f"Cannot reach {_host_key(target_url)}: {e}")
+            await asyncio.sleep(LLMConfig.RETRY_DELAY)
         except (httpx.RequestError, httpx.HTTPStatusError) as e:
             duration = time.time() - start
             logger.warning(f"LLM async call attempt {attempt} failed after {duration:.2f}s: {e}")