fix: improve request queue reliability and reduce stuck requests
This commit is contained in:
@@ -17,4 +17,4 @@ class LLMConfig:
|
||||
self.enable_prompt_caching = enable_prompt_caching
|
||||
self.prompt_modules = prompt_modules or []
|
||||
|
||||
self.timeout = timeout or int(os.getenv("LLM_TIMEOUT", "600"))
|
||||
self.timeout = timeout or int(os.getenv("LLM_TIMEOUT", "300"))
|
||||
|
||||
@@ -27,7 +27,7 @@ def should_retry_exception(exception: Exception) -> bool:
|
||||
|
||||
|
||||
class LLMRequestQueue:
|
||||
def __init__(self, max_concurrent: int = 6, delay_between_requests: float = 5.0):
|
||||
def __init__(self, max_concurrent: int = 1, delay_between_requests: float = 4.0):
|
||||
rate_limit_delay = os.getenv("LLM_RATE_LIMIT_DELAY")
|
||||
if rate_limit_delay:
|
||||
delay_between_requests = float(rate_limit_delay)
|
||||
@@ -61,8 +61,8 @@ class LLMRequestQueue:
|
||||
self._semaphore.release()
|
||||
|
||||
@retry( # type: ignore[misc]
|
||||
stop=stop_after_attempt(7),
|
||||
wait=wait_exponential(multiplier=6, min=12, max=150),
|
||||
stop=stop_after_attempt(3),
|
||||
wait=wait_exponential(multiplier=8, min=8, max=64),
|
||||
retry=retry_if_exception(should_retry_exception),
|
||||
reraise=True,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user