feat: add tool server health check and show error details in CLI
- Add _wait_for_tool_server_health() to verify tool server is responding after init - Show error details in CLI mode when penetration test fails - Simplify error message (remove technical URL details)
This commit is contained in:
@@ -167,8 +167,11 @@ async def run_cli(args: Any) -> None: # noqa: PLR0915
|
|||||||
|
|
||||||
if isinstance(result, dict) and not result.get("success", True):
|
if isinstance(result, dict) and not result.get("success", True):
|
||||||
error_msg = result.get("error", "Unknown error")
|
error_msg = result.get("error", "Unknown error")
|
||||||
|
error_details = result.get("details")
|
||||||
console.print()
|
console.print()
|
||||||
console.print(f"[bold red]❌ Penetration test failed:[/] {error_msg}")
|
console.print(f"[bold red]❌ Penetration test failed:[/] {error_msg}")
|
||||||
|
if error_details:
|
||||||
|
console.print(f"[dim]{error_details}[/]")
|
||||||
console.print()
|
console.print()
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
finally:
|
finally:
|
||||||
|
|||||||
@@ -1,4 +1,3 @@
|
|||||||
import asyncio
|
|
||||||
import contextlib
|
import contextlib
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
@@ -305,22 +304,20 @@ class DockerRuntime(AbstractRuntime):
|
|||||||
|
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
|
|
||||||
self._wait_for_tool_server_health(tool_server_port)
|
host = self._resolve_docker_host()
|
||||||
|
health_url = f"http://{host}:{tool_server_port}/health"
|
||||||
|
self._wait_for_tool_server_health(health_url)
|
||||||
|
|
||||||
def _wait_for_tool_server_health(
|
def _wait_for_tool_server_health(
|
||||||
self,
|
self,
|
||||||
port: int,
|
health_url: str,
|
||||||
max_retries: int = TOOL_SERVER_HEALTH_RETRIES,
|
max_retries: int = TOOL_SERVER_HEALTH_RETRIES,
|
||||||
timeout: int = TOOL_SERVER_HEALTH_TIMEOUT,
|
timeout: int = TOOL_SERVER_HEALTH_TIMEOUT,
|
||||||
) -> None:
|
) -> None:
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
host = self._resolve_docker_host()
|
|
||||||
health_url = f"http://{host}:{port}/health"
|
|
||||||
|
|
||||||
logger.info(f"Waiting for tool server health at {health_url}")
|
logger.info(f"Waiting for tool server health at {health_url}")
|
||||||
|
|
||||||
last_error: Exception | None = None
|
|
||||||
for attempt in range(max_retries):
|
for attempt in range(max_retries):
|
||||||
try:
|
try:
|
||||||
with httpx.Client(trust_env=False, timeout=timeout / max_retries) as client:
|
with httpx.Client(trust_env=False, timeout=timeout / max_retries) as client:
|
||||||
@@ -336,32 +333,25 @@ class DockerRuntime(AbstractRuntime):
|
|||||||
|
|
||||||
logger.warning(f"Tool server returned unexpected status: {health_data}")
|
logger.warning(f"Tool server returned unexpected status: {health_data}")
|
||||||
|
|
||||||
except httpx.ConnectError as e:
|
except httpx.ConnectError:
|
||||||
last_error = e
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"Tool server not ready (attempt {attempt + 1}/{max_retries}): "
|
f"Tool server not ready (attempt {attempt + 1}/{max_retries}): "
|
||||||
f"Connection refused"
|
f"Connection refused"
|
||||||
)
|
)
|
||||||
except httpx.TimeoutException as e:
|
except httpx.TimeoutException:
|
||||||
last_error = e
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
f"Tool server not ready (attempt {attempt + 1}/{max_retries}): "
|
f"Tool server not ready (attempt {attempt + 1}/{max_retries}): "
|
||||||
f"Request timed out"
|
f"Request timed out"
|
||||||
)
|
)
|
||||||
except (httpx.RequestError, httpx.HTTPStatusError) as e:
|
except (httpx.RequestError, httpx.HTTPStatusError) as e:
|
||||||
last_error = e
|
|
||||||
logger.debug(f"Tool server not ready (attempt {attempt + 1}/{max_retries}): {e}")
|
logger.debug(f"Tool server not ready (attempt {attempt + 1}/{max_retries}): {e}")
|
||||||
|
|
||||||
sleep_time = min(2**attempt * 0.5, 5)
|
sleep_time = min(2**attempt * 0.5, 5)
|
||||||
time.sleep(sleep_time)
|
time.sleep(sleep_time)
|
||||||
|
|
||||||
error_detail = str(last_error) if last_error else "Unknown error"
|
|
||||||
raise SandboxInitializationError(
|
raise SandboxInitializationError(
|
||||||
"Tool server failed to start",
|
"Tool server failed to start",
|
||||||
f"Could not connect to tool server at {health_url} after {max_retries} attempts. "
|
"Please ensure Docker Desktop is installed and running, and try running strix again.",
|
||||||
f"Last error: {error_detail}. "
|
|
||||||
"Please ensure Docker Desktop is installed and running, "
|
|
||||||
"and try running strix again.",
|
|
||||||
)
|
)
|
||||||
|
|
||||||
def _copy_local_directory_to_container(
|
def _copy_local_directory_to_container(
|
||||||
@@ -439,7 +429,6 @@ class DockerRuntime(AbstractRuntime):
|
|||||||
|
|
||||||
api_url = await self.get_sandbox_url(container_id, self._tool_server_port)
|
api_url = await self.get_sandbox_url(container_id, self._tool_server_port)
|
||||||
|
|
||||||
await self._verify_tool_server_health(api_url)
|
|
||||||
await self._register_agent_with_tool_server(api_url, agent_id, token)
|
await self._register_agent_with_tool_server(api_url, agent_id, token)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
@@ -450,60 +439,6 @@ class DockerRuntime(AbstractRuntime):
|
|||||||
"agent_id": agent_id,
|
"agent_id": agent_id,
|
||||||
}
|
}
|
||||||
|
|
||||||
async def _verify_tool_server_health(
|
|
||||||
self,
|
|
||||||
api_url: str,
|
|
||||||
max_retries: int = 3,
|
|
||||||
timeout: int = 10,
|
|
||||||
) -> None:
|
|
||||||
import httpx
|
|
||||||
|
|
||||||
health_url = f"{api_url}/health"
|
|
||||||
last_error: Exception | None = None
|
|
||||||
|
|
||||||
for attempt in range(max_retries):
|
|
||||||
try:
|
|
||||||
async with httpx.AsyncClient(trust_env=False, timeout=timeout) as client:
|
|
||||||
response = await client.get(health_url)
|
|
||||||
response.raise_for_status()
|
|
||||||
health_data = response.json()
|
|
||||||
|
|
||||||
if health_data.get("status") == "healthy":
|
|
||||||
logger.debug(f"Tool server health verified: {health_data}")
|
|
||||||
return
|
|
||||||
|
|
||||||
logger.warning(f"Tool server returned unexpected status: {health_data}")
|
|
||||||
|
|
||||||
except httpx.ConnectError as e:
|
|
||||||
last_error = e
|
|
||||||
logger.debug(
|
|
||||||
f"Tool server health check failed (attempt {attempt + 1}/{max_retries}): "
|
|
||||||
f"Connection refused"
|
|
||||||
)
|
|
||||||
except httpx.TimeoutException as e:
|
|
||||||
last_error = e
|
|
||||||
logger.debug(
|
|
||||||
f"Tool server health check failed (attempt {attempt + 1}/{max_retries}): "
|
|
||||||
f"Request timed out"
|
|
||||||
)
|
|
||||||
except (httpx.RequestError, httpx.HTTPStatusError) as e:
|
|
||||||
last_error = e
|
|
||||||
logger.debug(
|
|
||||||
f"Tool server health check failed (attempt {attempt + 1}/{max_retries}): {e}"
|
|
||||||
)
|
|
||||||
|
|
||||||
if attempt < max_retries - 1:
|
|
||||||
await asyncio.sleep(min(2**attempt, 4))
|
|
||||||
|
|
||||||
error_detail = str(last_error) if last_error else "Unknown error"
|
|
||||||
raise SandboxInitializationError(
|
|
||||||
"Tool server is not responding",
|
|
||||||
f"Could not connect to tool server at {health_url}. "
|
|
||||||
f"Last error: {error_detail}. "
|
|
||||||
"Please ensure Docker Desktop is installed and running, "
|
|
||||||
"and try running strix again.",
|
|
||||||
)
|
|
||||||
|
|
||||||
async def _register_agent_with_tool_server(
|
async def _register_agent_with_tool_server(
|
||||||
self, api_url: str, agent_id: str, token: str
|
self, api_url: str, agent_id: str, token: str
|
||||||
) -> None:
|
) -> None:
|
||||||
|
|||||||
Reference in New Issue
Block a user