feat: add configurable timeout for LLM requests

This commit is contained in:
Ahmed Allam
2025-11-12 18:52:08 +04:00
committed by Ahmed Allam
parent 795ed02955
commit 1abfb360e4
5 changed files with 27 additions and 6 deletions

View File

@@ -208,9 +208,12 @@ async def warm_up_llm() -> None:
{"role": "user", "content": "Reply with just 'OK'."}, {"role": "user", "content": "Reply with just 'OK'."},
] ]
llm_timeout = int(os.getenv("LLM_TIMEOUT", "600"))
response = litellm.completion( response = litellm.completion(
model=model_name, model=model_name,
messages=test_messages, messages=test_messages,
timeout=llm_timeout,
) )
validate_llm_response(response) validate_llm_response(response)

View File

@@ -8,6 +8,7 @@ class LLMConfig:
temperature: float = 0, temperature: float = 0,
enable_prompt_caching: bool = True, enable_prompt_caching: bool = True,
prompt_modules: list[str] | None = None, prompt_modules: list[str] | None = None,
timeout: int | None = None,
): ):
self.model_name = model_name or os.getenv("STRIX_LLM", "openai/gpt-5") self.model_name = model_name or os.getenv("STRIX_LLM", "openai/gpt-5")
@@ -17,3 +18,5 @@ class LLMConfig:
self.temperature = max(0.0, min(1.0, temperature)) self.temperature = max(0.0, min(1.0, temperature))
self.enable_prompt_caching = enable_prompt_caching self.enable_prompt_caching = enable_prompt_caching
self.prompt_modules = prompt_modules or [] self.prompt_modules = prompt_modules or []
self.timeout = timeout or int(os.getenv("LLM_TIMEOUT", "600"))

View File

@@ -123,7 +123,10 @@ class LLM:
self._total_stats = RequestStats() self._total_stats = RequestStats()
self._last_request_stats = RequestStats() self._last_request_stats = RequestStats()
self.memory_compressor = MemoryCompressor() self.memory_compressor = MemoryCompressor(
model_name=self.config.model_name,
timeout=self.config.timeout,
)
if agent_name: if agent_name:
prompt_dir = Path(__file__).parent.parent / "agents" / agent_name prompt_dir = Path(__file__).parent.parent / "agents" / agent_name
@@ -359,7 +362,7 @@ class LLM:
"model": self.config.model_name, "model": self.config.model_name,
"messages": messages, "messages": messages,
"temperature": self.config.temperature, "temperature": self.config.temperature,
"timeout": 180, "timeout": self.config.timeout,
} }
if self._should_include_stop_param(): if self._should_include_stop_param():

View File

@@ -85,6 +85,7 @@ def _extract_message_text(msg: dict[str, Any]) -> str:
def _summarize_messages( def _summarize_messages(
messages: list[dict[str, Any]], messages: list[dict[str, Any]],
model: str, model: str,
timeout: int = 600,
) -> dict[str, Any]: ) -> dict[str, Any]:
if not messages: if not messages:
empty_summary = "<context_summary message_count='0'>{text}</context_summary>" empty_summary = "<context_summary message_count='0'>{text}</context_summary>"
@@ -106,7 +107,7 @@ def _summarize_messages(
completion_args = { completion_args = {
"model": model, "model": model,
"messages": [{"role": "user", "content": prompt}], "messages": [{"role": "user", "content": prompt}],
"timeout": 180, "timeout": timeout,
} }
response = litellm.completion(**completion_args) response = litellm.completion(**completion_args)
@@ -146,9 +147,11 @@ class MemoryCompressor:
self, self,
max_images: int = 3, max_images: int = 3,
model_name: str | None = None, model_name: str | None = None,
timeout: int = 600,
): ):
self.max_images = max_images self.max_images = max_images
self.model_name = model_name or os.getenv("STRIX_LLM", "openai/gpt-5") self.model_name = model_name or os.getenv("STRIX_LLM", "openai/gpt-5")
self.timeout = timeout
if not self.model_name: if not self.model_name:
raise ValueError("STRIX_LLM environment variable must be set and not empty") raise ValueError("STRIX_LLM environment variable must be set and not empty")
@@ -202,7 +205,7 @@ class MemoryCompressor:
chunk_size = 10 chunk_size = 10
for i in range(0, len(old_msgs), chunk_size): for i in range(0, len(old_msgs), chunk_size):
chunk = old_msgs[i : i + chunk_size] chunk = old_msgs[i : i + chunk_size]
summary = _summarize_messages(chunk, model_name) summary = _summarize_messages(chunk, model_name, self.timeout)
if summary: if summary:
compressed.append(summary) compressed.append(summary)

View File

@@ -230,9 +230,18 @@ def create_agent(
state = AgentState(task=task, agent_name=name, parent_id=parent_id, max_iterations=300) state = AgentState(task=task, agent_name=name, parent_id=parent_id, max_iterations=300)
llm_config = LLMConfig(prompt_modules=module_list)
parent_agent = _agent_instances.get(parent_id) parent_agent = _agent_instances.get(parent_id)
timeout = None
if (
parent_agent
and hasattr(parent_agent, "llm_config")
and hasattr(parent_agent.llm_config, "timeout")
):
timeout = parent_agent.llm_config.timeout
llm_config = LLMConfig(prompt_modules=module_list, timeout=timeout)
agent_config = { agent_config = {
"llm_config": llm_config, "llm_config": llm_config,
"state": state, "state": state,