diff --git a/strix/interface/main.py b/strix/interface/main.py index 063dc10..fcb5ab1 100644 --- a/strix/interface/main.py +++ b/strix/interface/main.py @@ -208,9 +208,12 @@ async def warm_up_llm() -> None: {"role": "user", "content": "Reply with just 'OK'."}, ] + llm_timeout = int(os.getenv("LLM_TIMEOUT", "600")) + response = litellm.completion( model=model_name, messages=test_messages, + timeout=llm_timeout, ) validate_llm_response(response) diff --git a/strix/llm/config.py b/strix/llm/config.py index 9428e12..8bb6410 100644 --- a/strix/llm/config.py +++ b/strix/llm/config.py @@ -8,6 +8,7 @@ class LLMConfig: temperature: float = 0, enable_prompt_caching: bool = True, prompt_modules: list[str] | None = None, + timeout: int | None = None, ): self.model_name = model_name or os.getenv("STRIX_LLM", "openai/gpt-5") @@ -17,3 +18,5 @@ class LLMConfig: self.temperature = max(0.0, min(1.0, temperature)) self.enable_prompt_caching = enable_prompt_caching self.prompt_modules = prompt_modules or [] + + self.timeout = timeout or int(os.getenv("LLM_TIMEOUT", "600")) diff --git a/strix/llm/llm.py b/strix/llm/llm.py index aed2b4a..a35a3c6 100644 --- a/strix/llm/llm.py +++ b/strix/llm/llm.py @@ -123,7 +123,10 @@ class LLM: self._total_stats = RequestStats() self._last_request_stats = RequestStats() - self.memory_compressor = MemoryCompressor() + self.memory_compressor = MemoryCompressor( + model_name=self.config.model_name, + timeout=self.config.timeout, + ) if agent_name: prompt_dir = Path(__file__).parent.parent / "agents" / agent_name @@ -359,7 +362,7 @@ class LLM: "model": self.config.model_name, "messages": messages, "temperature": self.config.temperature, - "timeout": 180, + "timeout": self.config.timeout, } if self._should_include_stop_param(): diff --git a/strix/llm/memory_compressor.py b/strix/llm/memory_compressor.py index 38dbcf6..b5779d8 100644 --- a/strix/llm/memory_compressor.py +++ b/strix/llm/memory_compressor.py @@ -85,6 +85,7 @@ def _extract_message_text(msg: dict[str, Any]) -> str: def _summarize_messages( messages: list[dict[str, Any]], model: str, + timeout: int = 600, ) -> dict[str, Any]: if not messages: empty_summary = "{text}" @@ -106,7 +107,7 @@ def _summarize_messages( completion_args = { "model": model, "messages": [{"role": "user", "content": prompt}], - "timeout": 180, + "timeout": timeout, } response = litellm.completion(**completion_args) @@ -146,9 +147,11 @@ class MemoryCompressor: self, max_images: int = 3, model_name: str | None = None, + timeout: int = 600, ): self.max_images = max_images self.model_name = model_name or os.getenv("STRIX_LLM", "openai/gpt-5") + self.timeout = timeout if not self.model_name: raise ValueError("STRIX_LLM environment variable must be set and not empty") @@ -202,7 +205,7 @@ class MemoryCompressor: chunk_size = 10 for i in range(0, len(old_msgs), chunk_size): chunk = old_msgs[i : i + chunk_size] - summary = _summarize_messages(chunk, model_name) + summary = _summarize_messages(chunk, model_name, self.timeout) if summary: compressed.append(summary) diff --git a/strix/tools/agents_graph/agents_graph_actions.py b/strix/tools/agents_graph/agents_graph_actions.py index fb9cf27..2e384c0 100644 --- a/strix/tools/agents_graph/agents_graph_actions.py +++ b/strix/tools/agents_graph/agents_graph_actions.py @@ -230,9 +230,18 @@ def create_agent( state = AgentState(task=task, agent_name=name, parent_id=parent_id, max_iterations=300) - llm_config = LLMConfig(prompt_modules=module_list) - parent_agent = _agent_instances.get(parent_id) + + timeout = None + if ( + parent_agent + and hasattr(parent_agent, "llm_config") + and hasattr(parent_agent.llm_config, "timeout") + ): + timeout = parent_agent.llm_config.timeout + + llm_config = LLMConfig(prompt_modules=module_list, timeout=timeout) + agent_config = { "llm_config": llm_config, "state": state,