fix: use fixed per-request timeout for tool server health checks

The previous implementation divided total timeout by retries, making the
timeout behavior confusing and the actual wait time unpredictable. Now
uses a consistent 5-second timeout per request for clearer semantics.
This commit is contained in:
0xallam
2026-01-08 17:36:23 -08:00
committed by Ahmed Allam
parent 13ba8746dd
commit dc5043452e

View File

@@ -22,7 +22,7 @@ from .runtime import AbstractRuntime, SandboxInfo
STRIX_IMAGE = os.getenv("STRIX_IMAGE", "ghcr.io/usestrix/strix-sandbox:0.1.10")
HOST_GATEWAY_HOSTNAME = "host.docker.internal"
DOCKER_TIMEOUT = 60 # seconds
TOOL_SERVER_HEALTH_TIMEOUT = 30 # seconds to wait for tool server to be healthy
TOOL_SERVER_HEALTH_REQUEST_TIMEOUT = 5 # seconds per health check request
TOOL_SERVER_HEALTH_RETRIES = 10 # number of retries for health check
logger = logging.getLogger(__name__)
@@ -312,7 +312,7 @@ class DockerRuntime(AbstractRuntime):
self,
health_url: str,
max_retries: int = TOOL_SERVER_HEALTH_RETRIES,
timeout: int = TOOL_SERVER_HEALTH_TIMEOUT,
request_timeout: int = TOOL_SERVER_HEALTH_REQUEST_TIMEOUT,
) -> None:
import httpx
@@ -320,7 +320,7 @@ class DockerRuntime(AbstractRuntime):
for attempt in range(max_retries):
try:
with httpx.Client(trust_env=False, timeout=timeout / max_retries) as client:
with httpx.Client(trust_env=False, timeout=request_timeout) as client:
response = client.get(health_url)
response.raise_for_status()
health_data = response.json()