From ff8f9f2188727f930a7fe67197b92da6bb9abf1e Mon Sep 17 00:00:00 2001 From: Giuseppe Date: Thu, 4 Jun 2026 19:35:55 +0200 Subject: [PATCH] fix: llm_call_async does not retry on HTTP 429/502/503/504 (#2364) The retry loop raised immediately for any non-success HTTP response regardless of attempt count. For transient upstream errors (rate limit, bad gateway, gateway timeout) the function should back off and retry within the existing attempt budget. Also lets ConnectError / ConnectTimeout retry when the host has not been cooled and attempts remain, instead of always raising on the first connect failure. Co-authored-by: Claude Sonnet 4.6 --- src/llm_core.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/llm_core.py b/src/llm_core.py index a929edc..a155530 100644 --- a/src/llm_core.py +++ b/src/llm_core.py @@ -1088,6 +1088,9 @@ async def llm_call_async( f"LLM async call to {target_url} failed in {duration:.2f}s " f"(attempt {attempt}): HTTP {r.status_code} {friendly}" ) + if r.status_code in (429, 502, 503, 504) and attempt < max_retries: + await asyncio.sleep(LLMConfig.RETRY_DELAY) + continue raise HTTPException(r.status_code, friendly) logger.info(f"LLM async call to {target_url} succeeded in {duration:.2f}s (attempt {attempt})") _clear_host_dead(target_url) @@ -1109,7 +1112,9 @@ async def llm_call_async( duration = time.time() - start _tail = f" — host cooled for {DEAD_HOST_COOLDOWN:.0f}s" if _cooled else " — transient, will retry" logger.warning(f"LLM async connect to {target_url} failed after {duration:.2f}s: {e}{_tail}") - raise HTTPException(503, f"Cannot reach {_host_key(target_url)}: {e}") + if _cooled or attempt >= max_retries: + raise HTTPException(503, f"Cannot reach {_host_key(target_url)}: {e}") + await asyncio.sleep(LLMConfig.RETRY_DELAY) except (httpx.RequestError, httpx.HTTPStatusError) as e: duration = time.time() - start logger.warning(f"LLM async call attempt {attempt} failed after {duration:.2f}s: {e}")