Running all agents under same container (#12)

This commit is contained in:
Ahmed Allam
2025-08-18 13:58:38 -07:00
committed by GitHub
parent 198a5e4a61
commit cb57426cc6
13 changed files with 546 additions and 292 deletions

View File

@@ -57,10 +57,10 @@ def _run_agent_in_thread(
- Work independently with your own approach
- Use agent_finish when complete to report back to parent
- You are a SPECIALIST for this specific task
- The previous browser, sessions, proxy history, and files in /workspace were for your
parent agent. Do not depend on them.
- You are starting with a fresh context. Fresh proxy, browser, and files.
Only stuff in /shared_workspace is passed to you from context.
- You share the same container as other agents but have your own tool server instance
- All agents share /workspace directory and proxy history for better collaboration
- You can see files created by other agents and proxy traffic from previous work
- Build upon previous work but focus on your specific delegated task
</instructions>
</agent_delegation>"""

View File

@@ -49,7 +49,10 @@ async def _execute_tool_in_sandbox(tool_name: str, agent_state: Any, **kwargs: A
server_url = await runtime.get_sandbox_url(agent_state.sandbox_id, tool_server_port)
request_url = f"{server_url}/execute"
agent_id = getattr(agent_state, "agent_id", "unknown")
request_data = {
"agent_id": agent_id,
"tool_name": tool_name,
"kwargs": kwargs,
}

View File

@@ -3,7 +3,7 @@
<description>Execute a bash command in a persistent terminal session. The terminal maintains state (environment variables, current directory, running processes) between commands.</description>
<parameters>
<parameter name="command" type="string" required="true">
<description>The bash command to execute. Cannot be empty - must provide a valid command or special key sequence.
<description>The bash command to execute. Can be empty to check output of running commands (will wait for timeout period to collect output).
Supported special keys and sequences (based on official tmux key names):
- Control sequences: C-c, C-d, C-z, C-a, C-e, C-k, C-l, C-u, C-w, etc. (also ^c, ^d, etc.)
@@ -16,13 +16,16 @@
- Shift sequences: S-key (e.g., S-F6, S-Tab, S-Left)
- Combined modifiers: C-S-key, C-M-key, S-M-key, etc.
Special keys work automatically - no need to set is_input=true for keys like C-c, C-d, etc.
These are useful for interacting with vim, emacs, REPLs, and other interactive applications.</description>
</parameter>
<parameter name="is_input" type="boolean" required="false">
<description>If true, the command is sent as input to a currently running process. If false (default), the command is executed as a new bash command. Use this to interact with running processes.</description>
<description>If true, the command is sent as input to a currently running process. If false (default), the command is executed as a new bash command.
Note: Special keys (C-c, C-d, etc.) automatically work when a process is running - you don't need to set is_input=true for them.
Use is_input=true for regular text input to running processes.</description>
</parameter>
<parameter name="timeout" type="number" required="false">
<description>Optional timeout in seconds for command execution. If not provided, uses default timeout behavior. Set to higher values for long-running commands like installations or tests.</description>
<description>Optional timeout in seconds for command execution. If not provided, uses default timeout behavior. Set to higher values for long-running commands like installations or tests. Default is 10 seconds.</description>
</parameter>
<parameter name="terminal_id" type="string" required="false">
<description>Identifier for the terminal session. Defaults to "default". Use different IDs to manage multiple concurrent terminal sessions.</description>
@@ -44,7 +47,7 @@
- exit_code: Exit code of the command (only for completed commands)
- command: The executed command
- terminal_id: The terminal session ID
- status: Command status ('completed', 'timeout', 'running')
- status: Command status ('completed' or 'running')
- working_dir: Current working directory after command execution</description>
</returns>
<notes>
@@ -56,22 +59,25 @@
&& or ; operators, or make separate tool calls.
3. LONG-RUNNING COMMANDS:
- For commands that run indefinitely, run them in background: 'python app.py > server.log 2>&1 &'
- For commands that take time, set appropriate timeout parameter
- Use is_input=true to interact with running processes
- Commands never get killed automatically - they keep running in background
- Set timeout to control how long to wait for output before returning
- Use empty command "" to check progress (waits for timeout period to collect output)
- Use C-c, C-d, C-z to interrupt processes (works automatically, no is_input needed)
4. TIMEOUT HANDLING:
- Commands have a default soft timeout (30 seconds of no output changes)
- Set custom timeout for longer operations
- When timeout occurs, you can send empty command to get more output
- Use control sequences (C-c, C-d, C-z) to interrupt processes
- Timeout controls how long to wait before returning current output
- Commands are NEVER killed on timeout - they keep running
- After timeout, you can run new commands or check progress with empty command
- All commands return status "completed" - you have full control
5. MULTIPLE TERMINALS: Use different terminal_id values to run multiple concurrent sessions.
6. INTERACTIVE PROCESSES: Use is_input=true to send input to running processes like:
- Interactive shells, REPLs, or prompts
- Long-running applications waiting for input
- Background processes that need interaction
6. INTERACTIVE PROCESSES:
- Special keys (C-c, C-d, etc.) work automatically when a process is running
- Use is_input=true for regular text input to running processes like:
* Interactive shells, REPLs, or prompts
* Long-running applications waiting for input
* Background processes that need interaction
- Use no_enter=true for stuff like Vim navigation, password typing, or multi-step commands
7. WORKING DIRECTORY: The terminal tracks and returns the current working directory.
@@ -92,6 +98,12 @@
<parameter=timeout>120</parameter>
</function>
# Check progress of running command (waits for timeout to collect output)
<function=terminal_execute>
<parameter=command></parameter>
<parameter=timeout>5</parameter>
</function>
# Start a background service
<function=terminal_execute>
<parameter=command>python app.py > server.log 2>&1 &</parameter>
@@ -103,7 +115,7 @@
<parameter=is_input>true</parameter>
</function>
# Interrupt a running process
# Interrupt a running process (special keys work automatically)
<function=terminal_execute>
<parameter=command>C-c</parameter>
</function>

View File

@@ -33,7 +33,6 @@ class TerminalSession:
self.work_dir = str(Path(work_dir).resolve())
self._closed = False
self._cwd = self.work_dir
self.NO_CHANGE_TIMEOUT_SECONDS = 30
self.server: libtmux.Server | None = None
self.session: libtmux.Session | None = None
@@ -200,55 +199,126 @@ class TerminalSession:
except (ValueError, IndexError):
return None
def execute(
self, command: str, is_input: bool = False, timeout: float = 30.0, no_enter: bool = False
def _handle_empty_command(
self,
cur_pane_output: str,
ps1_matches: list[re.Match[str]],
is_command_running: bool,
timeout: float,
) -> dict[str, Any]:
if not self._initialized:
raise RuntimeError("Bash session is not initialized")
if command == "" or command.strip() == "":
if not is_command_running:
raw_command_output = self._combine_outputs_between_matches(cur_pane_output, ps1_matches)
command_output = self._get_command_output("", raw_command_output)
return {
"content": (
"Command cannot be empty - must provide a valid command or control sequence"
),
"content": command_output,
"status": "completed",
"exit_code": 0,
"working_dir": self._cwd,
}
start_time = time.time()
last_pane_output = cur_pane_output
while True:
cur_pane_output = self._get_pane_content()
ps1_matches = self._matches_ps1_metadata(cur_pane_output)
if cur_pane_output.rstrip().endswith(self.PS1_END.rstrip()) or len(ps1_matches) > 0:
exit_code = self._extract_exit_code_from_matches(ps1_matches)
raw_command_output = self._combine_outputs_between_matches(
cur_pane_output, ps1_matches
)
command_output = self._get_command_output("", raw_command_output)
self.prev_status = BashCommandStatus.COMPLETED
self.prev_output = ""
self._ready_for_next_command()
return {
"content": command_output,
"status": "completed",
"exit_code": exit_code or 0,
"working_dir": self._cwd,
}
elapsed_time = time.time() - start_time
if elapsed_time >= timeout:
raw_command_output = self._combine_outputs_between_matches(
cur_pane_output, ps1_matches
)
command_output = self._get_command_output("", raw_command_output)
return {
"content": command_output
+ f"\n[Command still running after {timeout}s - showing output so far]",
"status": "running",
"exit_code": None,
"working_dir": self._cwd,
}
if cur_pane_output != last_pane_output:
last_pane_output = cur_pane_output
time.sleep(self.POLL_INTERVAL)
def _handle_input_command(
self, command: str, no_enter: bool, is_command_running: bool
) -> dict[str, Any]:
if not is_command_running:
return {
"content": "No command is currently running. Cannot send input.",
"status": "error",
"exit_code": None,
"working_dir": self._cwd,
}
if (
self.prev_status
in {
BashCommandStatus.HARD_TIMEOUT,
BashCommandStatus.NO_CHANGE_TIMEOUT,
}
and not is_input
and command != ""
):
if not self.pane:
raise RuntimeError("Terminal session not properly initialized")
is_special_key = self._is_special_key(command)
should_add_enter = not is_special_key and not no_enter
self.pane.send_keys(command, enter=should_add_enter)
time.sleep(2)
cur_pane_output = self._get_pane_content()
ps1_matches = self._matches_ps1_metadata(cur_pane_output)
raw_command_output = self._combine_outputs_between_matches(cur_pane_output, ps1_matches)
command_output = self._get_command_output(command, raw_command_output)
is_still_running = not (
cur_pane_output.rstrip().endswith(self.PS1_END.rstrip()) or len(ps1_matches) > 0
)
if is_still_running:
return {
"content": (
f'Previous command still running. Cannot execute "{command}". '
"Use is_input=True to interact with running process."
),
"status": "error",
"content": command_output,
"status": "running",
"exit_code": None,
"working_dir": self._cwd,
}
exit_code = self._extract_exit_code_from_matches(ps1_matches)
self.prev_status = BashCommandStatus.COMPLETED
self.prev_output = ""
self._ready_for_next_command()
return {
"content": command_output,
"status": "completed",
"exit_code": exit_code or 0,
"working_dir": self._cwd,
}
def _execute_new_command(self, command: str, no_enter: bool, timeout: float) -> dict[str, Any]:
if not self.pane:
raise RuntimeError("Terminal session not properly initialized")
initial_pane_output = self._get_pane_content()
initial_ps1_matches = self._matches_ps1_metadata(initial_pane_output)
initial_ps1_count = len(initial_ps1_matches)
start_time = time.time()
last_change_time = start_time
last_pane_output = initial_pane_output
if command != "":
if not self.pane:
raise RuntimeError("Terminal session not properly initialized")
is_special_key = self._is_special_key(command)
should_add_enter = not is_special_key and not no_enter
self.pane.send_keys(command, enter=should_add_enter)
is_special_key = self._is_special_key(command)
should_add_enter = not is_special_key and not no_enter
self.pane.send_keys(command, enter=should_add_enter)
while True:
cur_pane_output = self._get_pane_content()
@@ -257,7 +327,6 @@ class TerminalSession:
if cur_pane_output != last_pane_output:
last_pane_output = cur_pane_output
last_change_time = time.time()
if current_ps1_count > initial_ps1_count or cur_pane_output.rstrip().endswith(
self.PS1_END.rstrip()
@@ -283,26 +352,6 @@ class TerminalSession:
"working_dir": self._cwd,
}
time_since_last_change = time.time() - last_change_time
if time_since_last_change >= self.NO_CHANGE_TIMEOUT_SECONDS:
raw_command_output = self._combine_outputs_between_matches(
cur_pane_output, ps1_matches
)
command_output = self._get_command_output(
command,
raw_command_output,
continue_prefix="[Below is the output of the previous command.]\n",
)
self.prev_status = BashCommandStatus.NO_CHANGE_TIMEOUT
return {
"content": command_output + f"\n[Command timed out - no output change for "
f"{self.NO_CHANGE_TIMEOUT_SECONDS} seconds]",
"status": "timeout",
"exit_code": -1,
"working_dir": self._cwd,
}
elapsed_time = time.time() - start_time
if elapsed_time >= timeout:
raw_command_output = self._combine_outputs_between_matches(
@@ -313,17 +362,59 @@ class TerminalSession:
raw_command_output,
continue_prefix="[Below is the output of the previous command.]\n",
)
self.prev_status = BashCommandStatus.HARD_TIMEOUT
self.prev_status = BashCommandStatus.CONTINUE
timeout_msg = (
f"\n[Command still running after {timeout}s - showing output so far. "
"Use C-c to interrupt if needed.]"
)
return {
"content": command_output + f"\n[Command timed out after {timeout} seconds]",
"status": "timeout",
"exit_code": -1,
"content": command_output + timeout_msg,
"status": "running",
"exit_code": None,
"working_dir": self._cwd,
}
time.sleep(self.POLL_INTERVAL)
def execute(
self, command: str, is_input: bool = False, timeout: float = 10.0, no_enter: bool = False
) -> dict[str, Any]:
if not self._initialized:
raise RuntimeError("Bash session is not initialized")
cur_pane_output = self._get_pane_content()
ps1_matches = self._matches_ps1_metadata(cur_pane_output)
is_command_running = not (
cur_pane_output.rstrip().endswith(self.PS1_END.rstrip()) or len(ps1_matches) > 0
)
if command.strip() == "":
return self._handle_empty_command(
cur_pane_output, ps1_matches, is_command_running, timeout
)
is_special_key = self._is_special_key(command)
if is_input:
return self._handle_input_command(command, no_enter, is_command_running)
if is_special_key and is_command_running:
return self._handle_input_command(command, no_enter, is_command_running)
if is_command_running:
return {
"content": (
"A command is already running. Use is_input=true to send input to it, "
"or interrupt it first (e.g., with C-c)."
),
"status": "error",
"exit_code": None,
"working_dir": self._cwd,
}
return self._execute_new_command(command, no_enter, timeout)
def _ready_for_next_command(self) -> None:
self._clear_screen()