feat: add configurable timeout for LLM requests
This commit is contained in:
@@ -208,9 +208,12 @@ async def warm_up_llm() -> None:
|
||||
{"role": "user", "content": "Reply with just 'OK'."},
|
||||
]
|
||||
|
||||
llm_timeout = int(os.getenv("LLM_TIMEOUT", "600"))
|
||||
|
||||
response = litellm.completion(
|
||||
model=model_name,
|
||||
messages=test_messages,
|
||||
timeout=llm_timeout,
|
||||
)
|
||||
|
||||
validate_llm_response(response)
|
||||
|
||||
@@ -8,6 +8,7 @@ class LLMConfig:
|
||||
temperature: float = 0,
|
||||
enable_prompt_caching: bool = True,
|
||||
prompt_modules: list[str] | None = None,
|
||||
timeout: int | None = None,
|
||||
):
|
||||
self.model_name = model_name or os.getenv("STRIX_LLM", "openai/gpt-5")
|
||||
|
||||
@@ -17,3 +18,5 @@ class LLMConfig:
|
||||
self.temperature = max(0.0, min(1.0, temperature))
|
||||
self.enable_prompt_caching = enable_prompt_caching
|
||||
self.prompt_modules = prompt_modules or []
|
||||
|
||||
self.timeout = timeout or int(os.getenv("LLM_TIMEOUT", "600"))
|
||||
|
||||
@@ -123,7 +123,10 @@ class LLM:
|
||||
self._total_stats = RequestStats()
|
||||
self._last_request_stats = RequestStats()
|
||||
|
||||
self.memory_compressor = MemoryCompressor()
|
||||
self.memory_compressor = MemoryCompressor(
|
||||
model_name=self.config.model_name,
|
||||
timeout=self.config.timeout,
|
||||
)
|
||||
|
||||
if agent_name:
|
||||
prompt_dir = Path(__file__).parent.parent / "agents" / agent_name
|
||||
@@ -359,7 +362,7 @@ class LLM:
|
||||
"model": self.config.model_name,
|
||||
"messages": messages,
|
||||
"temperature": self.config.temperature,
|
||||
"timeout": 180,
|
||||
"timeout": self.config.timeout,
|
||||
}
|
||||
|
||||
if self._should_include_stop_param():
|
||||
|
||||
@@ -85,6 +85,7 @@ def _extract_message_text(msg: dict[str, Any]) -> str:
|
||||
def _summarize_messages(
|
||||
messages: list[dict[str, Any]],
|
||||
model: str,
|
||||
timeout: int = 600,
|
||||
) -> dict[str, Any]:
|
||||
if not messages:
|
||||
empty_summary = "<context_summary message_count='0'>{text}</context_summary>"
|
||||
@@ -106,7 +107,7 @@ def _summarize_messages(
|
||||
completion_args = {
|
||||
"model": model,
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
"timeout": 180,
|
||||
"timeout": timeout,
|
||||
}
|
||||
|
||||
response = litellm.completion(**completion_args)
|
||||
@@ -146,9 +147,11 @@ class MemoryCompressor:
|
||||
self,
|
||||
max_images: int = 3,
|
||||
model_name: str | None = None,
|
||||
timeout: int = 600,
|
||||
):
|
||||
self.max_images = max_images
|
||||
self.model_name = model_name or os.getenv("STRIX_LLM", "openai/gpt-5")
|
||||
self.timeout = timeout
|
||||
|
||||
if not self.model_name:
|
||||
raise ValueError("STRIX_LLM environment variable must be set and not empty")
|
||||
@@ -202,7 +205,7 @@ class MemoryCompressor:
|
||||
chunk_size = 10
|
||||
for i in range(0, len(old_msgs), chunk_size):
|
||||
chunk = old_msgs[i : i + chunk_size]
|
||||
summary = _summarize_messages(chunk, model_name)
|
||||
summary = _summarize_messages(chunk, model_name, self.timeout)
|
||||
if summary:
|
||||
compressed.append(summary)
|
||||
|
||||
|
||||
@@ -230,9 +230,18 @@ def create_agent(
|
||||
|
||||
state = AgentState(task=task, agent_name=name, parent_id=parent_id, max_iterations=300)
|
||||
|
||||
llm_config = LLMConfig(prompt_modules=module_list)
|
||||
|
||||
parent_agent = _agent_instances.get(parent_id)
|
||||
|
||||
timeout = None
|
||||
if (
|
||||
parent_agent
|
||||
and hasattr(parent_agent, "llm_config")
|
||||
and hasattr(parent_agent.llm_config, "timeout")
|
||||
):
|
||||
timeout = parent_agent.llm_config.timeout
|
||||
|
||||
llm_config = LLMConfig(prompt_modules=module_list, timeout=timeout)
|
||||
|
||||
agent_config = {
|
||||
"llm_config": llm_config,
|
||||
"state": state,
|
||||
|
||||
Reference in New Issue
Block a user