fix: improve request queue reliability and reduce stuck requests

2025-12-06 20:44:48 +02:00
parent b6cb1302ce
commit ab40dbc33a
2 changed files with 4 additions and 4 deletions
--- a/strix/llm/config.py
+++ b/strix/llm/config.py
@@ -17,4 +17,4 @@ class LLMConfig:
        self.enable_prompt_caching = enable_prompt_caching
        self.prompt_modules = prompt_modules or []

-        self.timeout = timeout or int(os.getenv("LLM_TIMEOUT", "600"))
+        self.timeout = timeout or int(os.getenv("LLM_TIMEOUT", "300"))
--- a/strix/llm/request_queue.py
+++ b/strix/llm/request_queue.py
@@ -27,7 +27,7 @@ def should_retry_exception(exception: Exception) -> bool:


 class LLMRequestQueue:
-    def __init__(self, max_concurrent: int = 6, delay_between_requests: float = 5.0):
+    def __init__(self, max_concurrent: int = 1, delay_between_requests: float = 4.0):
        rate_limit_delay = os.getenv("LLM_RATE_LIMIT_DELAY")
        if rate_limit_delay:
            delay_between_requests = float(rate_limit_delay)
@@ -61,8 +61,8 @@ class LLMRequestQueue:
            self._semaphore.release()

    @retry(  # type: ignore[misc]
-        stop=stop_after_attempt(7),
-        wait=wait_exponential(multiplier=6, min=12, max=150),
+        stop=stop_after_attempt(3),
+        wait=wait_exponential(multiplier=8, min=8, max=64),
        retry=retry_if_exception(should_retry_exception),
        reraise=True,
    )