diff --git a/src/llm_core.py b/src/llm_core.py index a929edc..a155530 100644 --- a/src/llm_core.py +++ b/src/llm_core.py @@ -1088,6 +1088,9 @@ async def llm_call_async( f"LLM async call to {target_url} failed in {duration:.2f}s " f"(attempt {attempt}): HTTP {r.status_code} {friendly}" ) + if r.status_code in (429, 502, 503, 504) and attempt < max_retries: + await asyncio.sleep(LLMConfig.RETRY_DELAY) + continue raise HTTPException(r.status_code, friendly) logger.info(f"LLM async call to {target_url} succeeded in {duration:.2f}s (attempt {attempt})") _clear_host_dead(target_url) @@ -1109,7 +1112,9 @@ async def llm_call_async( duration = time.time() - start _tail = f" — host cooled for {DEAD_HOST_COOLDOWN:.0f}s" if _cooled else " — transient, will retry" logger.warning(f"LLM async connect to {target_url} failed after {duration:.2f}s: {e}{_tail}") - raise HTTPException(503, f"Cannot reach {_host_key(target_url)}: {e}") + if _cooled or attempt >= max_retries: + raise HTTPException(503, f"Cannot reach {_host_key(target_url)}: {e}") + await asyncio.sleep(LLMConfig.RETRY_DELAY) except (httpx.RequestError, httpx.HTTPStatusError) as e: duration = time.time() - start logger.warning(f"LLM async call attempt {attempt} failed after {duration:.2f}s: {e}")