From ab40dbc33aeea3a1a037bc36ad875dd86bcf343d Mon Sep 17 00:00:00 2001 From: 0xallam Date: Sat, 6 Dec 2025 20:44:48 +0200 Subject: [PATCH] fix: improve request queue reliability and reduce stuck requests --- strix/llm/config.py | 2 +- strix/llm/request_queue.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/strix/llm/config.py b/strix/llm/config.py index ea7163a..0738f4f 100644 --- a/strix/llm/config.py +++ b/strix/llm/config.py @@ -17,4 +17,4 @@ class LLMConfig: self.enable_prompt_caching = enable_prompt_caching self.prompt_modules = prompt_modules or [] - self.timeout = timeout or int(os.getenv("LLM_TIMEOUT", "600")) + self.timeout = timeout or int(os.getenv("LLM_TIMEOUT", "300")) diff --git a/strix/llm/request_queue.py b/strix/llm/request_queue.py index 27d0611..3c6a00f 100644 --- a/strix/llm/request_queue.py +++ b/strix/llm/request_queue.py @@ -27,7 +27,7 @@ def should_retry_exception(exception: Exception) -> bool: class LLMRequestQueue: - def __init__(self, max_concurrent: int = 6, delay_between_requests: float = 5.0): + def __init__(self, max_concurrent: int = 1, delay_between_requests: float = 4.0): rate_limit_delay = os.getenv("LLM_RATE_LIMIT_DELAY") if rate_limit_delay: delay_between_requests = float(rate_limit_delay) @@ -61,8 +61,8 @@ class LLMRequestQueue: self._semaphore.release() @retry( # type: ignore[misc] - stop=stop_after_attempt(7), - wait=wait_exponential(multiplier=6, min=12, max=150), + stop=stop_after_attempt(3), + wait=wait_exponential(multiplier=8, min=8, max=64), retry=retry_if_exception(should_retry_exception), reraise=True, )