feat: add configurable timeout for LLM requests
This commit is contained in:
@@ -208,9 +208,12 @@ async def warm_up_llm() -> None:
|
|||||||
{"role": "user", "content": "Reply with just 'OK'."},
|
{"role": "user", "content": "Reply with just 'OK'."},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
llm_timeout = int(os.getenv("LLM_TIMEOUT", "600"))
|
||||||
|
|
||||||
response = litellm.completion(
|
response = litellm.completion(
|
||||||
model=model_name,
|
model=model_name,
|
||||||
messages=test_messages,
|
messages=test_messages,
|
||||||
|
timeout=llm_timeout,
|
||||||
)
|
)
|
||||||
|
|
||||||
validate_llm_response(response)
|
validate_llm_response(response)
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ class LLMConfig:
|
|||||||
temperature: float = 0,
|
temperature: float = 0,
|
||||||
enable_prompt_caching: bool = True,
|
enable_prompt_caching: bool = True,
|
||||||
prompt_modules: list[str] | None = None,
|
prompt_modules: list[str] | None = None,
|
||||||
|
timeout: int | None = None,
|
||||||
):
|
):
|
||||||
self.model_name = model_name or os.getenv("STRIX_LLM", "openai/gpt-5")
|
self.model_name = model_name or os.getenv("STRIX_LLM", "openai/gpt-5")
|
||||||
|
|
||||||
@@ -17,3 +18,5 @@ class LLMConfig:
|
|||||||
self.temperature = max(0.0, min(1.0, temperature))
|
self.temperature = max(0.0, min(1.0, temperature))
|
||||||
self.enable_prompt_caching = enable_prompt_caching
|
self.enable_prompt_caching = enable_prompt_caching
|
||||||
self.prompt_modules = prompt_modules or []
|
self.prompt_modules = prompt_modules or []
|
||||||
|
|
||||||
|
self.timeout = timeout or int(os.getenv("LLM_TIMEOUT", "600"))
|
||||||
|
|||||||
@@ -123,7 +123,10 @@ class LLM:
|
|||||||
self._total_stats = RequestStats()
|
self._total_stats = RequestStats()
|
||||||
self._last_request_stats = RequestStats()
|
self._last_request_stats = RequestStats()
|
||||||
|
|
||||||
self.memory_compressor = MemoryCompressor()
|
self.memory_compressor = MemoryCompressor(
|
||||||
|
model_name=self.config.model_name,
|
||||||
|
timeout=self.config.timeout,
|
||||||
|
)
|
||||||
|
|
||||||
if agent_name:
|
if agent_name:
|
||||||
prompt_dir = Path(__file__).parent.parent / "agents" / agent_name
|
prompt_dir = Path(__file__).parent.parent / "agents" / agent_name
|
||||||
@@ -359,7 +362,7 @@ class LLM:
|
|||||||
"model": self.config.model_name,
|
"model": self.config.model_name,
|
||||||
"messages": messages,
|
"messages": messages,
|
||||||
"temperature": self.config.temperature,
|
"temperature": self.config.temperature,
|
||||||
"timeout": 180,
|
"timeout": self.config.timeout,
|
||||||
}
|
}
|
||||||
|
|
||||||
if self._should_include_stop_param():
|
if self._should_include_stop_param():
|
||||||
|
|||||||
@@ -85,6 +85,7 @@ def _extract_message_text(msg: dict[str, Any]) -> str:
|
|||||||
def _summarize_messages(
|
def _summarize_messages(
|
||||||
messages: list[dict[str, Any]],
|
messages: list[dict[str, Any]],
|
||||||
model: str,
|
model: str,
|
||||||
|
timeout: int = 600,
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
if not messages:
|
if not messages:
|
||||||
empty_summary = "<context_summary message_count='0'>{text}</context_summary>"
|
empty_summary = "<context_summary message_count='0'>{text}</context_summary>"
|
||||||
@@ -106,7 +107,7 @@ def _summarize_messages(
|
|||||||
completion_args = {
|
completion_args = {
|
||||||
"model": model,
|
"model": model,
|
||||||
"messages": [{"role": "user", "content": prompt}],
|
"messages": [{"role": "user", "content": prompt}],
|
||||||
"timeout": 180,
|
"timeout": timeout,
|
||||||
}
|
}
|
||||||
|
|
||||||
response = litellm.completion(**completion_args)
|
response = litellm.completion(**completion_args)
|
||||||
@@ -146,9 +147,11 @@ class MemoryCompressor:
|
|||||||
self,
|
self,
|
||||||
max_images: int = 3,
|
max_images: int = 3,
|
||||||
model_name: str | None = None,
|
model_name: str | None = None,
|
||||||
|
timeout: int = 600,
|
||||||
):
|
):
|
||||||
self.max_images = max_images
|
self.max_images = max_images
|
||||||
self.model_name = model_name or os.getenv("STRIX_LLM", "openai/gpt-5")
|
self.model_name = model_name or os.getenv("STRIX_LLM", "openai/gpt-5")
|
||||||
|
self.timeout = timeout
|
||||||
|
|
||||||
if not self.model_name:
|
if not self.model_name:
|
||||||
raise ValueError("STRIX_LLM environment variable must be set and not empty")
|
raise ValueError("STRIX_LLM environment variable must be set and not empty")
|
||||||
@@ -202,7 +205,7 @@ class MemoryCompressor:
|
|||||||
chunk_size = 10
|
chunk_size = 10
|
||||||
for i in range(0, len(old_msgs), chunk_size):
|
for i in range(0, len(old_msgs), chunk_size):
|
||||||
chunk = old_msgs[i : i + chunk_size]
|
chunk = old_msgs[i : i + chunk_size]
|
||||||
summary = _summarize_messages(chunk, model_name)
|
summary = _summarize_messages(chunk, model_name, self.timeout)
|
||||||
if summary:
|
if summary:
|
||||||
compressed.append(summary)
|
compressed.append(summary)
|
||||||
|
|
||||||
|
|||||||
@@ -230,9 +230,18 @@ def create_agent(
|
|||||||
|
|
||||||
state = AgentState(task=task, agent_name=name, parent_id=parent_id, max_iterations=300)
|
state = AgentState(task=task, agent_name=name, parent_id=parent_id, max_iterations=300)
|
||||||
|
|
||||||
llm_config = LLMConfig(prompt_modules=module_list)
|
|
||||||
|
|
||||||
parent_agent = _agent_instances.get(parent_id)
|
parent_agent = _agent_instances.get(parent_id)
|
||||||
|
|
||||||
|
timeout = None
|
||||||
|
if (
|
||||||
|
parent_agent
|
||||||
|
and hasattr(parent_agent, "llm_config")
|
||||||
|
and hasattr(parent_agent.llm_config, "timeout")
|
||||||
|
):
|
||||||
|
timeout = parent_agent.llm_config.timeout
|
||||||
|
|
||||||
|
llm_config = LLMConfig(prompt_modules=module_list, timeout=timeout)
|
||||||
|
|
||||||
agent_config = {
|
agent_config = {
|
||||||
"llm_config": llm_config,
|
"llm_config": llm_config,
|
||||||
"state": state,
|
"state": state,
|
||||||
|
|||||||
Reference in New Issue
Block a user