From ab40dbc33aeea3a1a037bc36ad875dd86bcf343d Mon Sep 17 00:00:00 2001
From: 0xallam <ahmed39652003@gmail.com>
Date: Sat, 6 Dec 2025 20:44:48 +0200
Subject: [PATCH] fix: improve request queue reliability and reduce stuck
 requests

---
 strix/llm/config.py        | 2 +-
 strix/llm/request_queue.py | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/strix/llm/config.py b/strix/llm/config.py
index ea7163a..0738f4f 100644
--- a/strix/llm/config.py
+++ b/strix/llm/config.py
@@ -17,4 +17,4 @@ class LLMConfig:
         self.enable_prompt_caching = enable_prompt_caching
         self.prompt_modules = prompt_modules or []
 
-        self.timeout = timeout or int(os.getenv("LLM_TIMEOUT", "600"))
+        self.timeout = timeout or int(os.getenv("LLM_TIMEOUT", "300"))
diff --git a/strix/llm/request_queue.py b/strix/llm/request_queue.py
index 27d0611..3c6a00f 100644
--- a/strix/llm/request_queue.py
+++ b/strix/llm/request_queue.py
@@ -27,7 +27,7 @@ def should_retry_exception(exception: Exception) -> bool:
 
 
 class LLMRequestQueue:
-    def __init__(self, max_concurrent: int = 6, delay_between_requests: float = 5.0):
+    def __init__(self, max_concurrent: int = 1, delay_between_requests: float = 4.0):
         rate_limit_delay = os.getenv("LLM_RATE_LIMIT_DELAY")
         if rate_limit_delay:
             delay_between_requests = float(rate_limit_delay)
@@ -61,8 +61,8 @@ class LLMRequestQueue:
             self._semaphore.release()
 
     @retry(  # type: ignore[misc]
-        stop=stop_after_attempt(7),
-        wait=wait_exponential(multiplier=6, min=12, max=150),
+        stop=stop_after_attempt(3),
+        wait=wait_exponential(multiplier=8, min=8, max=64),
         retry=retry_if_exception(should_retry_exception),
         reraise=True,
     )