fix: llm_call_async does not retry on HTTP 429/502/503/504 (#2364)

The retry loop raised immediately for any non-success HTTP response
regardless of attempt count. For transient upstream errors (rate limit,
bad gateway, gateway timeout) the function should back off and retry
within the existing attempt budget.

Also lets ConnectError / ConnectTimeout retry when the host has not been
cooled and attempts remain, instead of always raising on the first
connect failure.

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Giuseppe
2026-06-04 19:35:55 +02:00
committed by GitHub
parent c12c2aa233
commit ff8f9f2188

View File

@@ -1088,6 +1088,9 @@ async def llm_call_async(
f"LLM async call to {target_url} failed in {duration:.2f}s "
f"(attempt {attempt}): HTTP {r.status_code} {friendly}"
)
if r.status_code in (429, 502, 503, 504) and attempt < max_retries:
await asyncio.sleep(LLMConfig.RETRY_DELAY)
continue
raise HTTPException(r.status_code, friendly)
logger.info(f"LLM async call to {target_url} succeeded in {duration:.2f}s (attempt {attempt})")
_clear_host_dead(target_url)
@@ -1109,7 +1112,9 @@ async def llm_call_async(
duration = time.time() - start
_tail = f" — host cooled for {DEAD_HOST_COOLDOWN:.0f}s" if _cooled else " — transient, will retry"
logger.warning(f"LLM async connect to {target_url} failed after {duration:.2f}s: {e}{_tail}")
raise HTTPException(503, f"Cannot reach {_host_key(target_url)}: {e}")
if _cooled or attempt >= max_retries:
raise HTTPException(503, f"Cannot reach {_host_key(target_url)}: {e}")
await asyncio.sleep(LLMConfig.RETRY_DELAY)
except (httpx.RequestError, httpx.HTTPStatusError) as e:
duration = time.time() - start
logger.warning(f"LLM async call attempt {attempt} failed after {duration:.2f}s: {e}")