From 61dea7010aed3c6bed23feab0d497b50269edaa4 Mon Sep 17 00:00:00 2001 From: 0xallam Date: Fri, 16 Jan 2026 03:40:09 -0800 Subject: [PATCH] refactor: simplify container initialization and fix startup reliability - Move tool server startup from Python to entrypoint script - Hardcode Caido port (48080) in entrypoint, remove from Python - Use /app/venv/bin/python directly instead of poetry run - Fix env var passing through sudo with sudo -E and explicit vars - Add Caido process monitoring and logging during startup - Add retry logic with exponential backoff for token fetch - Add tool server process validation before declaring ready - Simplify docker_runtime.py (489 -> 310 lines) - DRY up container state recovery into _recover_container_state() - Add container creation retry logic (3 attempts) - Fix GraphQL health check URL (/graphql/ with trailing slash) --- containers/docker-entrypoint.sh | 95 +++++-- strix/runtime/docker_runtime.py | 397 ++++++++--------------------- strix/tools/proxy/proxy_manager.py | 11 +- 3 files changed, 187 insertions(+), 316 deletions(-) diff --git a/containers/docker-entrypoint.sh b/containers/docker-entrypoint.sh index c4178cd..1a905dc 100644 --- a/containers/docker-entrypoint.sh +++ b/containers/docker-entrypoint.sh @@ -1,9 +1,12 @@ #!/bin/bash set -e -if [ -z "$CAIDO_PORT" ]; then - echo "Error: CAIDO_PORT must be set." - exit 1 +CAIDO_PORT=48080 +CAIDO_LOG="/tmp/caido_startup.log" + +if [ ! -f /app/certs/ca.p12 ]; then + echo "ERROR: CA certificate file /app/certs/ca.p12 not found." + exit 1 fi caido-cli --listen 127.0.0.1:${CAIDO_PORT} \ @@ -11,28 +14,62 @@ caido-cli --listen 127.0.0.1:${CAIDO_PORT} \ --no-logging \ --no-open \ --import-ca-cert /app/certs/ca.p12 \ - --import-ca-cert-pass "" > /dev/null 2>&1 & + --import-ca-cert-pass "" > "$CAIDO_LOG" 2>&1 & + +CAIDO_PID=$! +echo "Started Caido with PID $CAIDO_PID on port $CAIDO_PORT" echo "Waiting for Caido API to be ready..." +CAIDO_READY=false for i in {1..30}; do - if curl -s -o /dev/null http://localhost:${CAIDO_PORT}/graphql; then - echo "Caido API is ready." + if ! kill -0 $CAIDO_PID 2>/dev/null; then + echo "ERROR: Caido process died while waiting for API (iteration $i)." + echo "=== Caido log ===" + cat "$CAIDO_LOG" 2>/dev/null || echo "(no log available)" + exit 1 + fi + + if curl -s -o /dev/null -w "%{http_code}" http://localhost:${CAIDO_PORT}/graphql/ | grep -qE "^(200|400)$"; then + echo "Caido API is ready (attempt $i)." + CAIDO_READY=true break fi sleep 1 done +if [ "$CAIDO_READY" = false ]; then + echo "ERROR: Caido API did not become ready within 30 seconds." + echo "Caido process status: $(kill -0 $CAIDO_PID 2>&1 && echo 'running' || echo 'dead')" + echo "=== Caido log ===" + cat "$CAIDO_LOG" 2>/dev/null || echo "(no log available)" + exit 1 +fi + sleep 2 echo "Fetching API token..." -TOKEN=$(curl -s -X POST \ - -H "Content-Type: application/json" \ - -d '{"query":"mutation LoginAsGuest { loginAsGuest { token { accessToken } } }"}' \ - http://localhost:${CAIDO_PORT}/graphql | jq -r '.data.loginAsGuest.token.accessToken') +TOKEN="" +for attempt in 1 2 3 4 5; do + RESPONSE=$(curl -sL -X POST \ + -H "Content-Type: application/json" \ + -d '{"query":"mutation LoginAsGuest { loginAsGuest { token { accessToken } } }"}' \ + http://localhost:${CAIDO_PORT}/graphql) + + TOKEN=$(echo "$RESPONSE" | jq -r '.data.loginAsGuest.token.accessToken // empty') + + if [ -n "$TOKEN" ] && [ "$TOKEN" != "null" ]; then + echo "Successfully obtained API token (attempt $attempt)." + break + fi + + echo "Token fetch attempt $attempt failed: $RESPONSE" + sleep $((attempt * 2)) +done if [ -z "$TOKEN" ] || [ "$TOKEN" == "null" ]; then - echo "Failed to get API token from Caido." - curl -s -X POST -H "Content-Type: application/json" -d '{"query":"mutation { loginAsGuest { token { accessToken } } }"}' http://localhost:${CAIDO_PORT}/graphql + echo "ERROR: Failed to get API token from Caido after 5 attempts." + echo "=== Caido log ===" + cat "$CAIDO_LOG" 2>/dev/null || echo "(no log available)" exit 1 fi @@ -40,7 +77,7 @@ export CAIDO_API_TOKEN=$TOKEN echo "Caido API token has been set." echo "Creating a new Caido project..." -CREATE_PROJECT_RESPONSE=$(curl -s -X POST \ +CREATE_PROJECT_RESPONSE=$(curl -sL -X POST \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $TOKEN" \ -d '{"query":"mutation CreateProject { createProject(input: {name: \"sandbox\", temporary: true}) { project { id } } }"}' \ @@ -57,7 +94,7 @@ fi echo "Caido project created with ID: $PROJECT_ID" echo "Selecting Caido project..." -SELECT_RESPONSE=$(curl -s -X POST \ +SELECT_RESPONSE=$(curl -sL -X POST \ -H "Content-Type: application/json" \ -H "Authorization: Bearer $TOKEN" \ -d '{"query":"mutation SelectProject { selectProject(id: \"'$PROJECT_ID'\") { currentProject { project { id } } } }"}' \ @@ -114,9 +151,33 @@ sudo -u pentester certutil -N -d sql:/home/pentester/.pki/nssdb --empty-password sudo -u pentester certutil -A -n "Testing Root CA" -t "C,," -i /app/certs/ca.crt -d sql:/home/pentester/.pki/nssdb echo "✅ CA added to browser trust store" -echo "Container initialization complete - agents will start their own tool servers as needed" -echo "✅ Shared container ready for multi-agent use" +echo "Starting tool server..." +cd /app +TOOL_SERVER_TIMEOUT="${STRIX_SANDBOX_EXECUTION_TIMEOUT:-120}" +TOOL_SERVER_LOG="/tmp/tool_server.log" + +sudo -E -u pentester \ + PYTHONPATH=/app \ + STRIX_SANDBOX_MODE=true \ + TOOL_SERVER_TOKEN="$TOOL_SERVER_TOKEN" \ + TOOL_SERVER_PORT="$TOOL_SERVER_PORT" \ + TOOL_SERVER_TIMEOUT="$TOOL_SERVER_TIMEOUT" \ + /app/venv/bin/python strix/runtime/tool_server.py \ + --token="$TOOL_SERVER_TOKEN" \ + --host=0.0.0.0 \ + --port="$TOOL_SERVER_PORT" \ + --timeout="$TOOL_SERVER_TIMEOUT" > "$TOOL_SERVER_LOG" 2>&1 & + +sleep 3 +if ! pgrep -f "tool_server.py" > /dev/null; then + echo "ERROR: Tool server process failed to start" + echo "=== Tool server log ===" + cat "$TOOL_SERVER_LOG" 2>/dev/null || echo "(no log)" + exit 1 +fi +echo "✅ Tool server started on port $TOOL_SERVER_PORT" + +echo "✅ Container ready" cd /workspace - exec "$@" diff --git a/strix/runtime/docker_runtime.py b/strix/runtime/docker_runtime.py index ecf7fda..1ae2975 100644 --- a/strix/runtime/docker_runtime.py +++ b/strix/runtime/docker_runtime.py @@ -1,15 +1,13 @@ import contextlib -import logging import os import secrets import socket import time -from concurrent.futures import ThreadPoolExecutor -from concurrent.futures import TimeoutError as FuturesTimeoutError from pathlib import Path -from typing import Any, cast +from typing import cast import docker +import httpx from docker.errors import DockerException, ImageNotFound, NotFound from docker.models.containers import Container from requests.exceptions import ConnectionError as RequestsConnectionError @@ -22,10 +20,8 @@ from .runtime import AbstractRuntime, SandboxInfo HOST_GATEWAY_HOSTNAME = "host.docker.internal" -DOCKER_TIMEOUT = 60 # seconds -TOOL_SERVER_HEALTH_REQUEST_TIMEOUT = 5 # seconds per health check request -TOOL_SERVER_HEALTH_RETRIES = 10 # number of retries for health check -logger = logging.getLogger(__name__) +DOCKER_TIMEOUT = 60 +CONTAINER_TOOL_SERVER_PORT = 48081 class DockerRuntime(AbstractRuntime): @@ -33,50 +29,20 @@ class DockerRuntime(AbstractRuntime): try: self.client = docker.from_env(timeout=DOCKER_TIMEOUT) except (DockerException, RequestsConnectionError, RequestsTimeout) as e: - logger.exception("Failed to connect to Docker daemon") - if isinstance(e, RequestsConnectionError | RequestsTimeout): - raise SandboxInitializationError( - "Docker daemon unresponsive", - f"Connection timed out after {DOCKER_TIMEOUT} seconds. " - "Please ensure Docker Desktop is installed and running, " - "and try running strix again.", - ) from e raise SandboxInitializationError( "Docker is not available", - "Docker is not available or not configured correctly. " - "Please ensure Docker Desktop is installed and running, " - "and try running strix again.", + "Please ensure Docker Desktop is installed and running.", ) from e self._scan_container: Container | None = None self._tool_server_port: int | None = None self._tool_server_token: str | None = None - def _generate_sandbox_token(self) -> str: - return secrets.token_urlsafe(32) - def _find_available_port(self) -> int: with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: s.bind(("", 0)) return cast("int", s.getsockname()[1]) - def _exec_run_with_timeout( - self, container: Container, cmd: str, timeout: int = DOCKER_TIMEOUT, **kwargs: Any - ) -> Any: - with ThreadPoolExecutor(max_workers=1) as executor: - future = executor.submit(container.exec_run, cmd, **kwargs) - try: - return future.result(timeout=timeout) - except FuturesTimeoutError: - logger.exception(f"exec_run timed out after {timeout}s: {cmd[:100]}...") - raise SandboxInitializationError( - "Container command timed out", - f"Command timed out after {timeout} seconds. " - "Docker may be overloaded or unresponsive. " - "Please ensure Docker Desktop is installed and running, " - "and try running strix again.", - ) from None - def _get_scan_id(self, agent_id: str) -> str: try: from strix.telemetry.tracer import get_global_tracer @@ -84,129 +50,116 @@ class DockerRuntime(AbstractRuntime): tracer = get_global_tracer() if tracer and tracer.scan_config: return str(tracer.scan_config.get("scan_id", "default-scan")) - except ImportError: - logger.debug("Failed to import tracer, using fallback scan ID") - except AttributeError: - logger.debug("Tracer missing scan_config, using fallback scan ID") - + except (ImportError, AttributeError): + pass return f"scan-{agent_id.split('-')[0]}" def _verify_image_available(self, image_name: str, max_retries: int = 3) -> None: - def _validate_image(image: docker.models.images.Image) -> None: - if not image.id or not image.attrs: - raise ImageNotFound(f"Image {image_name} metadata incomplete") - for attempt in range(max_retries): try: image = self.client.images.get(image_name) - _validate_image(image) - except ImageNotFound: + if not image.id or not image.attrs: + raise ImageNotFound(f"Image {image_name} metadata incomplete") # noqa: TRY301 + except (ImageNotFound, DockerException): if attempt == max_retries - 1: - logger.exception(f"Image {image_name} not found after {max_retries} attempts") raise - logger.warning(f"Image {image_name} not ready, attempt {attempt + 1}/{max_retries}") - time.sleep(2**attempt) - except DockerException: - if attempt == max_retries - 1: - logger.exception(f"Failed to verify image {image_name}") - raise - logger.warning(f"Docker error verifying image, attempt {attempt + 1}/{max_retries}") time.sleep(2**attempt) else: - logger.debug(f"Image {image_name} verified as available") return - def _create_container_with_retry(self, scan_id: str, max_retries: int = 3) -> Container: - last_exception = None + def _recover_container_state(self, container: Container) -> None: + for env_var in container.attrs["Config"]["Env"]: + if env_var.startswith("TOOL_SERVER_TOKEN="): + self._tool_server_token = env_var.split("=", 1)[1] + break + + port_bindings = container.attrs.get("NetworkSettings", {}).get("Ports", {}) + port_key = f"{CONTAINER_TOOL_SERVER_PORT}/tcp" + if port_bindings.get(port_key): + self._tool_server_port = int(port_bindings[port_key][0]["HostPort"]) + + def _wait_for_tool_server(self, max_retries: int = 20, timeout: int = 5) -> None: + host = self._resolve_docker_host() + health_url = f"http://{host}:{self._tool_server_port}/health" + + for attempt in range(max_retries): + try: + with httpx.Client(trust_env=False, timeout=timeout) as client: + response = client.get(health_url) + if response.status_code == 200: + data = response.json() + if data.get("status") == "healthy": + return + except (httpx.ConnectError, httpx.TimeoutException, httpx.RequestError): + pass + + time.sleep(min(2**attempt * 0.5, 5)) + + raise SandboxInitializationError( + "Tool server failed to start", + "Container initialization timed out. Please try again.", + ) + + def _create_container(self, scan_id: str, max_retries: int = 2) -> Container: container_name = f"strix-scan-{scan_id}" image_name = Config.get("strix_image") if not image_name: raise ValueError("STRIX_IMAGE must be configured") - for attempt in range(max_retries): + self._verify_image_available(image_name) + + last_error: Exception | None = None + for attempt in range(max_retries + 1): try: - self._verify_image_available(image_name) - - try: - existing_container = self.client.containers.get(container_name) - logger.warning(f"Container {container_name} already exists, removing it") + with contextlib.suppress(NotFound): + existing = self.client.containers.get(container_name) with contextlib.suppress(Exception): - existing_container.stop(timeout=5) - existing_container.remove(force=True) + existing.stop(timeout=5) + existing.remove(force=True) time.sleep(1) - except NotFound: - pass - except DockerException as e: - logger.warning(f"Error checking/removing existing container: {e}") - caido_port = self._find_available_port() - tool_server_port = self._find_available_port() - tool_server_token = self._generate_sandbox_token() - - self._tool_server_port = tool_server_port - self._tool_server_token = tool_server_token + self._tool_server_port = self._find_available_port() + self._tool_server_token = secrets.token_urlsafe(32) + execution_timeout = Config.get("strix_sandbox_execution_timeout") or "120" container = self.client.containers.run( image_name, command="sleep infinity", detach=True, name=container_name, - hostname=f"strix-scan-{scan_id}", - ports={ - f"{caido_port}/tcp": caido_port, - f"{tool_server_port}/tcp": tool_server_port, - }, + hostname=container_name, + ports={f"{CONTAINER_TOOL_SERVER_PORT}/tcp": self._tool_server_port}, cap_add=["NET_ADMIN", "NET_RAW"], labels={"strix-scan-id": scan_id}, environment={ "PYTHONUNBUFFERED": "1", - "CAIDO_PORT": str(caido_port), - "TOOL_SERVER_PORT": str(tool_server_port), - "TOOL_SERVER_TOKEN": tool_server_token, + "TOOL_SERVER_PORT": str(CONTAINER_TOOL_SERVER_PORT), + "TOOL_SERVER_TOKEN": self._tool_server_token, + "STRIX_SANDBOX_EXECUTION_TIMEOUT": str(execution_timeout), "HOST_GATEWAY": HOST_GATEWAY_HOSTNAME, }, - extra_hosts=self._get_extra_hosts(), + extra_hosts={HOST_GATEWAY_HOSTNAME: "host-gateway"}, tty=True, ) self._scan_container = container - logger.info("Created container %s for scan %s", container.id, scan_id) + self._wait_for_tool_server() - self._initialize_container( - container, caido_port, tool_server_port, tool_server_token - ) except (DockerException, RequestsConnectionError, RequestsTimeout) as e: - last_exception = e - if attempt == max_retries - 1: - logger.exception(f"Failed to create container after {max_retries} attempts") - break - - logger.warning(f"Container creation attempt {attempt + 1}/{max_retries} failed") - - self._tool_server_port = None - self._tool_server_token = None - - sleep_time = (2**attempt) + (0.1 * attempt) - time.sleep(sleep_time) + last_error = e + if attempt < max_retries: + self._tool_server_port = None + self._tool_server_token = None + time.sleep(2**attempt) else: return container - if isinstance(last_exception, RequestsConnectionError | RequestsTimeout): - raise SandboxInitializationError( - "Failed to create sandbox container", - f"Docker daemon unresponsive after {max_retries} attempts " - f"(timed out after {DOCKER_TIMEOUT}s). " - "Please ensure Docker Desktop is installed and running, " - "and try running strix again.", - ) from last_exception raise SandboxInitializationError( - "Failed to create sandbox container", - f"Container creation failed after {max_retries} attempts: {last_exception}. " - "Please ensure Docker Desktop is installed and running, " - "and try running strix again.", - ) from last_exception + "Failed to create container", + f"Container creation failed after {max_retries + 1} attempts: {last_error}", + ) from last_error - def _get_or_create_scan_container(self, scan_id: str) -> Container: # noqa: PLR0912 + def _get_or_create_container(self, scan_id: str) -> Container: container_name = f"strix-scan-{scan_id}" if self._scan_container: @@ -223,33 +176,14 @@ class DockerRuntime(AbstractRuntime): container = self.client.containers.get(container_name) container.reload() - if ( - "strix-scan-id" not in container.labels - or container.labels["strix-scan-id"] != scan_id - ): - logger.warning( - f"Container {container_name} exists but missing/wrong label, updating" - ) - if container.status != "running": - logger.info(f"Starting existing container {container_name}") container.start() time.sleep(2) self._scan_container = container - - for env_var in container.attrs["Config"]["Env"]: - if env_var.startswith("TOOL_SERVER_PORT="): - self._tool_server_port = int(env_var.split("=")[1]) - elif env_var.startswith("TOOL_SERVER_TOKEN="): - self._tool_server_token = env_var.split("=")[1] - - logger.info(f"Reusing existing container {container_name}") - + self._recover_container_state(container) except NotFound: pass - except (DockerException, RequestsConnectionError, RequestsTimeout) as e: - logger.warning(f"Failed to get container by name {container_name}: {e}") else: return container @@ -262,102 +196,14 @@ class DockerRuntime(AbstractRuntime): if container.status != "running": container.start() time.sleep(2) + self._scan_container = container - - for env_var in container.attrs["Config"]["Env"]: - if env_var.startswith("TOOL_SERVER_PORT="): - self._tool_server_port = int(env_var.split("=")[1]) - elif env_var.startswith("TOOL_SERVER_TOKEN="): - self._tool_server_token = env_var.split("=")[1] - - logger.info(f"Found existing container by label for scan {scan_id}") + self._recover_container_state(container) return container - except (DockerException, RequestsConnectionError, RequestsTimeout) as e: - logger.warning("Failed to find existing container by label for scan %s: %s", scan_id, e) + except DockerException: + pass - logger.info("Creating new Docker container for scan %s", scan_id) - return self._create_container_with_retry(scan_id) - - def _initialize_container( - self, container: Container, caido_port: int, tool_server_port: int, tool_server_token: str - ) -> None: - logger.info("Initializing Caido proxy on port %s", caido_port) - self._exec_run_with_timeout( - container, - f"bash -c 'export CAIDO_PORT={caido_port} && /usr/local/bin/docker-entrypoint.sh true'", - detach=False, - ) - - time.sleep(5) - - result = self._exec_run_with_timeout( - container, - "bash -c 'source /etc/profile.d/proxy.sh && echo $CAIDO_API_TOKEN'", - user="pentester", - ) - caido_token = result.output.decode().strip() if result.exit_code == 0 else "" - - execution_timeout = Config.get("strix_sandbox_execution_timeout") or "120" - container.exec_run( - f"bash -c 'source /etc/profile.d/proxy.sh && cd /app && " - f"STRIX_SANDBOX_MODE=true CAIDO_API_TOKEN={caido_token} CAIDO_PORT={caido_port} " - f"poetry run python strix/runtime/tool_server.py --token {tool_server_token} " - f"--host 0.0.0.0 --port {tool_server_port} --timeout {execution_timeout} &'", - detach=True, - user="pentester", - ) - - time.sleep(2) - - host = self._resolve_docker_host() - health_url = f"http://{host}:{tool_server_port}/health" - self._wait_for_tool_server_health(health_url) - - def _wait_for_tool_server_health( - self, - health_url: str, - max_retries: int = TOOL_SERVER_HEALTH_RETRIES, - request_timeout: int = TOOL_SERVER_HEALTH_REQUEST_TIMEOUT, - ) -> None: - import httpx - - logger.info(f"Waiting for tool server health at {health_url}") - - for attempt in range(max_retries): - try: - with httpx.Client(trust_env=False, timeout=request_timeout) as client: - response = client.get(health_url) - response.raise_for_status() - health_data = response.json() - - if health_data.get("status") == "healthy": - logger.info( - f"Tool server is healthy after {attempt + 1} attempt(s): {health_data}" - ) - return - - logger.warning(f"Tool server returned unexpected status: {health_data}") - - except httpx.ConnectError: - logger.debug( - f"Tool server not ready (attempt {attempt + 1}/{max_retries}): " - f"Connection refused" - ) - except httpx.TimeoutException: - logger.debug( - f"Tool server not ready (attempt {attempt + 1}/{max_retries}): " - f"Request timed out" - ) - except (httpx.RequestError, httpx.HTTPStatusError) as e: - logger.debug(f"Tool server not ready (attempt {attempt + 1}/{max_retries}): {e}") - - sleep_time = min(2**attempt * 0.5, 5) - time.sleep(sleep_time) - - raise SandboxInitializationError( - "Tool server failed to start", - "Please ensure Docker Desktop is installed and running, and try running strix again.", - ) + return self._create_container(scan_id) def _copy_local_directory_to_container( self, container: Container, local_path: str, target_name: str | None = None @@ -368,17 +214,8 @@ class DockerRuntime(AbstractRuntime): try: local_path_obj = Path(local_path).resolve() if not local_path_obj.exists() or not local_path_obj.is_dir(): - logger.warning(f"Local path does not exist or is not directory: {local_path_obj}") return - if target_name: - logger.info( - f"Copying local directory {local_path_obj} to container at " - f"/workspace/{target_name}" - ) - else: - logger.info(f"Copying local directory {local_path_obj} to container") - tar_buffer = BytesIO() with tarfile.open(fileobj=tar_buffer, mode="w") as tar: for item in local_path_obj.rglob("*"): @@ -389,16 +226,12 @@ class DockerRuntime(AbstractRuntime): tar_buffer.seek(0) container.put_archive("/workspace", tar_buffer.getvalue()) - container.exec_run( "chown -R pentester:pentester /workspace && chmod -R 755 /workspace", user="root", ) - - logger.info("Successfully copied local directory to /workspace") - except (OSError, DockerException): - logger.exception("Failed to copy local directory to container") + pass async def create_sandbox( self, @@ -407,7 +240,7 @@ class DockerRuntime(AbstractRuntime): local_sources: list[dict[str, str]] | None = None, ) -> SandboxInfo: scan_id = self._get_scan_id(agent_id) - container = self._get_or_create_scan_container(scan_id) + container = self._get_or_create_container(scan_id) source_copied_key = f"_source_copied_{scan_id}" if local_sources and not hasattr(self, source_copied_key): @@ -415,40 +248,33 @@ class DockerRuntime(AbstractRuntime): source_path = source.get("source_path") if not source_path: continue - - target_name = source.get("workspace_subdir") - if not target_name: - target_name = Path(source_path).name or f"target_{index}" - + target_name = ( + source.get("workspace_subdir") or Path(source_path).name or f"target_{index}" + ) self._copy_local_directory_to_container(container, source_path, target_name) setattr(self, source_copied_key, True) - container_id = container.id - if container_id is None: + if container.id is None: raise RuntimeError("Docker container ID is unexpectedly None") - token = existing_token if existing_token is not None else self._tool_server_token - + token = existing_token or self._tool_server_token if self._tool_server_port is None or token is None: - raise RuntimeError("Tool server not initialized or no token available") + raise RuntimeError("Tool server not initialized") - api_url = await self.get_sandbox_url(container_id, self._tool_server_port) + host = self._resolve_docker_host() + api_url = f"http://{host}:{self._tool_server_port}" - await self._register_agent_with_tool_server(api_url, agent_id, token) + await self._register_agent(api_url, agent_id, token) return { - "workspace_id": container_id, + "workspace_id": container.id, "api_url": api_url, "auth_token": token, "tool_server_port": self._tool_server_port, "agent_id": agent_id, } - async def _register_agent_with_tool_server( - self, api_url: str, agent_id: str, token: str - ) -> None: - import httpx - + async def _register_agent(self, api_url: str, agent_id: str, token: str) -> None: try: async with httpx.AsyncClient(trust_env=False) as client: response = await client.post( @@ -458,54 +284,33 @@ class DockerRuntime(AbstractRuntime): timeout=30, ) response.raise_for_status() - logger.info(f"Registered agent {agent_id} with tool server") - except (httpx.RequestError, httpx.HTTPStatusError) as e: - logger.warning(f"Failed to register agent {agent_id}: {e}") + except httpx.RequestError: + pass async def get_sandbox_url(self, container_id: str, port: int) -> str: try: - container = self.client.containers.get(container_id) - container.reload() - - host = self._resolve_docker_host() - + self.client.containers.get(container_id) + return f"http://{self._resolve_docker_host()}:{port}" except NotFound: raise ValueError(f"Container {container_id} not found.") from None - except DockerException as e: - raise RuntimeError(f"Failed to get container URL for {container_id}: {e}") from e - else: - return f"http://{host}:{port}" def _resolve_docker_host(self) -> str: docker_host = os.getenv("DOCKER_HOST", "") - if not docker_host: - return "127.0.0.1" - - from urllib.parse import urlparse - - parsed = urlparse(docker_host) - - if parsed.scheme in ("tcp", "http", "https") and parsed.hostname: - return parsed.hostname + if docker_host: + from urllib.parse import urlparse + parsed = urlparse(docker_host) + if parsed.scheme in ("tcp", "http", "https") and parsed.hostname: + return parsed.hostname return "127.0.0.1" - def _get_extra_hosts(self) -> dict[str, str]: - return {HOST_GATEWAY_HOSTNAME: "host-gateway"} - async def destroy_sandbox(self, container_id: str) -> None: - logger.info("Destroying scan container %s", container_id) try: container = self.client.containers.get(container_id) container.stop() container.remove() - logger.info("Successfully destroyed container %s", container_id) - self._scan_container = None self._tool_server_port = None self._tool_server_token = None - - except NotFound: - logger.warning("Container %s not found for destruction.", container_id) - except DockerException as e: - logger.warning("Failed to destroy container %s: %s", container_id, e) + except (NotFound, DockerException): + pass diff --git a/strix/tools/proxy/proxy_manager.py b/strix/tools/proxy/proxy_manager.py index e02d85b..7679be6 100644 --- a/strix/tools/proxy/proxy_manager.py +++ b/strix/tools/proxy/proxy_manager.py @@ -16,12 +16,17 @@ if TYPE_CHECKING: from collections.abc import Callable +CAIDO_PORT = 48080 # Fixed port inside container + + class ProxyManager: def __init__(self, auth_token: str | None = None): host = "127.0.0.1" - port = os.getenv("CAIDO_PORT", "56789") - self.base_url = f"http://{host}:{port}/graphql" - self.proxies = {"http": f"http://{host}:{port}", "https": f"http://{host}:{port}"} + self.base_url = f"http://{host}:{CAIDO_PORT}/graphql" + self.proxies = { + "http": f"http://{host}:{CAIDO_PORT}", + "https": f"http://{host}:{CAIDO_PORT}", + } self.auth_token = auth_token or os.getenv("CAIDO_API_TOKEN") self.transport = RequestsHTTPTransport( url=self.base_url, headers={"Authorization": f"Bearer {self.auth_token}"}