fix: improve request queue reliability and reduce stuck requests

This commit is contained in:
0xallam
2025-12-06 20:44:48 +02:00
parent b6cb1302ce
commit ab40dbc33a
2 changed files with 4 additions and 4 deletions

View File

@@ -17,4 +17,4 @@ class LLMConfig:
self.enable_prompt_caching = enable_prompt_caching
self.prompt_modules = prompt_modules or []
self.timeout = timeout or int(os.getenv("LLM_TIMEOUT", "600"))
self.timeout = timeout or int(os.getenv("LLM_TIMEOUT", "300"))

View File

@@ -27,7 +27,7 @@ def should_retry_exception(exception: Exception) -> bool:
class LLMRequestQueue:
def __init__(self, max_concurrent: int = 6, delay_between_requests: float = 5.0):
def __init__(self, max_concurrent: int = 1, delay_between_requests: float = 4.0):
rate_limit_delay = os.getenv("LLM_RATE_LIMIT_DELAY")
if rate_limit_delay:
delay_between_requests = float(rate_limit_delay)
@@ -61,8 +61,8 @@ class LLMRequestQueue:
self._semaphore.release()
@retry( # type: ignore[misc]
stop=stop_after_attempt(7),
wait=wait_exponential(multiplier=6, min=12, max=150),
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=8, min=8, max=64),
retry=retry_if_exception(should_retry_exception),
reraise=True,
)