Running all agents under same container (#12)
This commit is contained in:
@@ -57,10 +57,10 @@ def _run_agent_in_thread(
|
||||
- Work independently with your own approach
|
||||
- Use agent_finish when complete to report back to parent
|
||||
- You are a SPECIALIST for this specific task
|
||||
- The previous browser, sessions, proxy history, and files in /workspace were for your
|
||||
parent agent. Do not depend on them.
|
||||
- You are starting with a fresh context. Fresh proxy, browser, and files.
|
||||
Only stuff in /shared_workspace is passed to you from context.
|
||||
- You share the same container as other agents but have your own tool server instance
|
||||
- All agents share /workspace directory and proxy history for better collaboration
|
||||
- You can see files created by other agents and proxy traffic from previous work
|
||||
- Build upon previous work but focus on your specific delegated task
|
||||
</instructions>
|
||||
</agent_delegation>"""
|
||||
|
||||
|
||||
@@ -49,7 +49,10 @@ async def _execute_tool_in_sandbox(tool_name: str, agent_state: Any, **kwargs: A
|
||||
server_url = await runtime.get_sandbox_url(agent_state.sandbox_id, tool_server_port)
|
||||
request_url = f"{server_url}/execute"
|
||||
|
||||
agent_id = getattr(agent_state, "agent_id", "unknown")
|
||||
|
||||
request_data = {
|
||||
"agent_id": agent_id,
|
||||
"tool_name": tool_name,
|
||||
"kwargs": kwargs,
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
<description>Execute a bash command in a persistent terminal session. The terminal maintains state (environment variables, current directory, running processes) between commands.</description>
|
||||
<parameters>
|
||||
<parameter name="command" type="string" required="true">
|
||||
<description>The bash command to execute. Cannot be empty - must provide a valid command or special key sequence.
|
||||
<description>The bash command to execute. Can be empty to check output of running commands (will wait for timeout period to collect output).
|
||||
|
||||
Supported special keys and sequences (based on official tmux key names):
|
||||
- Control sequences: C-c, C-d, C-z, C-a, C-e, C-k, C-l, C-u, C-w, etc. (also ^c, ^d, etc.)
|
||||
@@ -16,13 +16,16 @@
|
||||
- Shift sequences: S-key (e.g., S-F6, S-Tab, S-Left)
|
||||
- Combined modifiers: C-S-key, C-M-key, S-M-key, etc.
|
||||
|
||||
Special keys work automatically - no need to set is_input=true for keys like C-c, C-d, etc.
|
||||
These are useful for interacting with vim, emacs, REPLs, and other interactive applications.</description>
|
||||
</parameter>
|
||||
<parameter name="is_input" type="boolean" required="false">
|
||||
<description>If true, the command is sent as input to a currently running process. If false (default), the command is executed as a new bash command. Use this to interact with running processes.</description>
|
||||
<description>If true, the command is sent as input to a currently running process. If false (default), the command is executed as a new bash command.
|
||||
Note: Special keys (C-c, C-d, etc.) automatically work when a process is running - you don't need to set is_input=true for them.
|
||||
Use is_input=true for regular text input to running processes.</description>
|
||||
</parameter>
|
||||
<parameter name="timeout" type="number" required="false">
|
||||
<description>Optional timeout in seconds for command execution. If not provided, uses default timeout behavior. Set to higher values for long-running commands like installations or tests.</description>
|
||||
<description>Optional timeout in seconds for command execution. If not provided, uses default timeout behavior. Set to higher values for long-running commands like installations or tests. Default is 10 seconds.</description>
|
||||
</parameter>
|
||||
<parameter name="terminal_id" type="string" required="false">
|
||||
<description>Identifier for the terminal session. Defaults to "default". Use different IDs to manage multiple concurrent terminal sessions.</description>
|
||||
@@ -44,7 +47,7 @@
|
||||
- exit_code: Exit code of the command (only for completed commands)
|
||||
- command: The executed command
|
||||
- terminal_id: The terminal session ID
|
||||
- status: Command status ('completed', 'timeout', 'running')
|
||||
- status: Command status ('completed' or 'running')
|
||||
- working_dir: Current working directory after command execution</description>
|
||||
</returns>
|
||||
<notes>
|
||||
@@ -56,22 +59,25 @@
|
||||
&& or ; operators, or make separate tool calls.
|
||||
|
||||
3. LONG-RUNNING COMMANDS:
|
||||
- For commands that run indefinitely, run them in background: 'python app.py > server.log 2>&1 &'
|
||||
- For commands that take time, set appropriate timeout parameter
|
||||
- Use is_input=true to interact with running processes
|
||||
- Commands never get killed automatically - they keep running in background
|
||||
- Set timeout to control how long to wait for output before returning
|
||||
- Use empty command "" to check progress (waits for timeout period to collect output)
|
||||
- Use C-c, C-d, C-z to interrupt processes (works automatically, no is_input needed)
|
||||
|
||||
4. TIMEOUT HANDLING:
|
||||
- Commands have a default soft timeout (30 seconds of no output changes)
|
||||
- Set custom timeout for longer operations
|
||||
- When timeout occurs, you can send empty command to get more output
|
||||
- Use control sequences (C-c, C-d, C-z) to interrupt processes
|
||||
- Timeout controls how long to wait before returning current output
|
||||
- Commands are NEVER killed on timeout - they keep running
|
||||
- After timeout, you can run new commands or check progress with empty command
|
||||
- All commands return status "completed" - you have full control
|
||||
|
||||
5. MULTIPLE TERMINALS: Use different terminal_id values to run multiple concurrent sessions.
|
||||
|
||||
6. INTERACTIVE PROCESSES: Use is_input=true to send input to running processes like:
|
||||
- Interactive shells, REPLs, or prompts
|
||||
- Long-running applications waiting for input
|
||||
- Background processes that need interaction
|
||||
6. INTERACTIVE PROCESSES:
|
||||
- Special keys (C-c, C-d, etc.) work automatically when a process is running
|
||||
- Use is_input=true for regular text input to running processes like:
|
||||
* Interactive shells, REPLs, or prompts
|
||||
* Long-running applications waiting for input
|
||||
* Background processes that need interaction
|
||||
- Use no_enter=true for stuff like Vim navigation, password typing, or multi-step commands
|
||||
|
||||
7. WORKING DIRECTORY: The terminal tracks and returns the current working directory.
|
||||
@@ -92,6 +98,12 @@
|
||||
<parameter=timeout>120</parameter>
|
||||
</function>
|
||||
|
||||
# Check progress of running command (waits for timeout to collect output)
|
||||
<function=terminal_execute>
|
||||
<parameter=command></parameter>
|
||||
<parameter=timeout>5</parameter>
|
||||
</function>
|
||||
|
||||
# Start a background service
|
||||
<function=terminal_execute>
|
||||
<parameter=command>python app.py > server.log 2>&1 &</parameter>
|
||||
@@ -103,7 +115,7 @@
|
||||
<parameter=is_input>true</parameter>
|
||||
</function>
|
||||
|
||||
# Interrupt a running process
|
||||
# Interrupt a running process (special keys work automatically)
|
||||
<function=terminal_execute>
|
||||
<parameter=command>C-c</parameter>
|
||||
</function>
|
||||
|
||||
@@ -33,7 +33,6 @@ class TerminalSession:
|
||||
self.work_dir = str(Path(work_dir).resolve())
|
||||
self._closed = False
|
||||
self._cwd = self.work_dir
|
||||
self.NO_CHANGE_TIMEOUT_SECONDS = 30
|
||||
|
||||
self.server: libtmux.Server | None = None
|
||||
self.session: libtmux.Session | None = None
|
||||
@@ -200,55 +199,126 @@ class TerminalSession:
|
||||
except (ValueError, IndexError):
|
||||
return None
|
||||
|
||||
def execute(
|
||||
self, command: str, is_input: bool = False, timeout: float = 30.0, no_enter: bool = False
|
||||
def _handle_empty_command(
|
||||
self,
|
||||
cur_pane_output: str,
|
||||
ps1_matches: list[re.Match[str]],
|
||||
is_command_running: bool,
|
||||
timeout: float,
|
||||
) -> dict[str, Any]:
|
||||
if not self._initialized:
|
||||
raise RuntimeError("Bash session is not initialized")
|
||||
|
||||
if command == "" or command.strip() == "":
|
||||
if not is_command_running:
|
||||
raw_command_output = self._combine_outputs_between_matches(cur_pane_output, ps1_matches)
|
||||
command_output = self._get_command_output("", raw_command_output)
|
||||
return {
|
||||
"content": (
|
||||
"Command cannot be empty - must provide a valid command or control sequence"
|
||||
),
|
||||
"content": command_output,
|
||||
"status": "completed",
|
||||
"exit_code": 0,
|
||||
"working_dir": self._cwd,
|
||||
}
|
||||
|
||||
start_time = time.time()
|
||||
last_pane_output = cur_pane_output
|
||||
|
||||
while True:
|
||||
cur_pane_output = self._get_pane_content()
|
||||
ps1_matches = self._matches_ps1_metadata(cur_pane_output)
|
||||
|
||||
if cur_pane_output.rstrip().endswith(self.PS1_END.rstrip()) or len(ps1_matches) > 0:
|
||||
exit_code = self._extract_exit_code_from_matches(ps1_matches)
|
||||
raw_command_output = self._combine_outputs_between_matches(
|
||||
cur_pane_output, ps1_matches
|
||||
)
|
||||
command_output = self._get_command_output("", raw_command_output)
|
||||
self.prev_status = BashCommandStatus.COMPLETED
|
||||
self.prev_output = ""
|
||||
self._ready_for_next_command()
|
||||
return {
|
||||
"content": command_output,
|
||||
"status": "completed",
|
||||
"exit_code": exit_code or 0,
|
||||
"working_dir": self._cwd,
|
||||
}
|
||||
|
||||
elapsed_time = time.time() - start_time
|
||||
if elapsed_time >= timeout:
|
||||
raw_command_output = self._combine_outputs_between_matches(
|
||||
cur_pane_output, ps1_matches
|
||||
)
|
||||
command_output = self._get_command_output("", raw_command_output)
|
||||
return {
|
||||
"content": command_output
|
||||
+ f"\n[Command still running after {timeout}s - showing output so far]",
|
||||
"status": "running",
|
||||
"exit_code": None,
|
||||
"working_dir": self._cwd,
|
||||
}
|
||||
|
||||
if cur_pane_output != last_pane_output:
|
||||
last_pane_output = cur_pane_output
|
||||
|
||||
time.sleep(self.POLL_INTERVAL)
|
||||
|
||||
def _handle_input_command(
|
||||
self, command: str, no_enter: bool, is_command_running: bool
|
||||
) -> dict[str, Any]:
|
||||
if not is_command_running:
|
||||
return {
|
||||
"content": "No command is currently running. Cannot send input.",
|
||||
"status": "error",
|
||||
"exit_code": None,
|
||||
"working_dir": self._cwd,
|
||||
}
|
||||
|
||||
if (
|
||||
self.prev_status
|
||||
in {
|
||||
BashCommandStatus.HARD_TIMEOUT,
|
||||
BashCommandStatus.NO_CHANGE_TIMEOUT,
|
||||
}
|
||||
and not is_input
|
||||
and command != ""
|
||||
):
|
||||
if not self.pane:
|
||||
raise RuntimeError("Terminal session not properly initialized")
|
||||
|
||||
is_special_key = self._is_special_key(command)
|
||||
should_add_enter = not is_special_key and not no_enter
|
||||
self.pane.send_keys(command, enter=should_add_enter)
|
||||
|
||||
time.sleep(2)
|
||||
cur_pane_output = self._get_pane_content()
|
||||
ps1_matches = self._matches_ps1_metadata(cur_pane_output)
|
||||
raw_command_output = self._combine_outputs_between_matches(cur_pane_output, ps1_matches)
|
||||
command_output = self._get_command_output(command, raw_command_output)
|
||||
|
||||
is_still_running = not (
|
||||
cur_pane_output.rstrip().endswith(self.PS1_END.rstrip()) or len(ps1_matches) > 0
|
||||
)
|
||||
|
||||
if is_still_running:
|
||||
return {
|
||||
"content": (
|
||||
f'Previous command still running. Cannot execute "{command}". '
|
||||
"Use is_input=True to interact with running process."
|
||||
),
|
||||
"status": "error",
|
||||
"content": command_output,
|
||||
"status": "running",
|
||||
"exit_code": None,
|
||||
"working_dir": self._cwd,
|
||||
}
|
||||
|
||||
exit_code = self._extract_exit_code_from_matches(ps1_matches)
|
||||
self.prev_status = BashCommandStatus.COMPLETED
|
||||
self.prev_output = ""
|
||||
self._ready_for_next_command()
|
||||
return {
|
||||
"content": command_output,
|
||||
"status": "completed",
|
||||
"exit_code": exit_code or 0,
|
||||
"working_dir": self._cwd,
|
||||
}
|
||||
|
||||
def _execute_new_command(self, command: str, no_enter: bool, timeout: float) -> dict[str, Any]:
|
||||
if not self.pane:
|
||||
raise RuntimeError("Terminal session not properly initialized")
|
||||
|
||||
initial_pane_output = self._get_pane_content()
|
||||
initial_ps1_matches = self._matches_ps1_metadata(initial_pane_output)
|
||||
initial_ps1_count = len(initial_ps1_matches)
|
||||
|
||||
start_time = time.time()
|
||||
last_change_time = start_time
|
||||
last_pane_output = initial_pane_output
|
||||
|
||||
if command != "":
|
||||
if not self.pane:
|
||||
raise RuntimeError("Terminal session not properly initialized")
|
||||
is_special_key = self._is_special_key(command)
|
||||
should_add_enter = not is_special_key and not no_enter
|
||||
self.pane.send_keys(command, enter=should_add_enter)
|
||||
is_special_key = self._is_special_key(command)
|
||||
should_add_enter = not is_special_key and not no_enter
|
||||
self.pane.send_keys(command, enter=should_add_enter)
|
||||
|
||||
while True:
|
||||
cur_pane_output = self._get_pane_content()
|
||||
@@ -257,7 +327,6 @@ class TerminalSession:
|
||||
|
||||
if cur_pane_output != last_pane_output:
|
||||
last_pane_output = cur_pane_output
|
||||
last_change_time = time.time()
|
||||
|
||||
if current_ps1_count > initial_ps1_count or cur_pane_output.rstrip().endswith(
|
||||
self.PS1_END.rstrip()
|
||||
@@ -283,26 +352,6 @@ class TerminalSession:
|
||||
"working_dir": self._cwd,
|
||||
}
|
||||
|
||||
time_since_last_change = time.time() - last_change_time
|
||||
if time_since_last_change >= self.NO_CHANGE_TIMEOUT_SECONDS:
|
||||
raw_command_output = self._combine_outputs_between_matches(
|
||||
cur_pane_output, ps1_matches
|
||||
)
|
||||
command_output = self._get_command_output(
|
||||
command,
|
||||
raw_command_output,
|
||||
continue_prefix="[Below is the output of the previous command.]\n",
|
||||
)
|
||||
self.prev_status = BashCommandStatus.NO_CHANGE_TIMEOUT
|
||||
|
||||
return {
|
||||
"content": command_output + f"\n[Command timed out - no output change for "
|
||||
f"{self.NO_CHANGE_TIMEOUT_SECONDS} seconds]",
|
||||
"status": "timeout",
|
||||
"exit_code": -1,
|
||||
"working_dir": self._cwd,
|
||||
}
|
||||
|
||||
elapsed_time = time.time() - start_time
|
||||
if elapsed_time >= timeout:
|
||||
raw_command_output = self._combine_outputs_between_matches(
|
||||
@@ -313,17 +362,59 @@ class TerminalSession:
|
||||
raw_command_output,
|
||||
continue_prefix="[Below is the output of the previous command.]\n",
|
||||
)
|
||||
self.prev_status = BashCommandStatus.HARD_TIMEOUT
|
||||
self.prev_status = BashCommandStatus.CONTINUE
|
||||
|
||||
timeout_msg = (
|
||||
f"\n[Command still running after {timeout}s - showing output so far. "
|
||||
"Use C-c to interrupt if needed.]"
|
||||
)
|
||||
return {
|
||||
"content": command_output + f"\n[Command timed out after {timeout} seconds]",
|
||||
"status": "timeout",
|
||||
"exit_code": -1,
|
||||
"content": command_output + timeout_msg,
|
||||
"status": "running",
|
||||
"exit_code": None,
|
||||
"working_dir": self._cwd,
|
||||
}
|
||||
|
||||
time.sleep(self.POLL_INTERVAL)
|
||||
|
||||
def execute(
|
||||
self, command: str, is_input: bool = False, timeout: float = 10.0, no_enter: bool = False
|
||||
) -> dict[str, Any]:
|
||||
if not self._initialized:
|
||||
raise RuntimeError("Bash session is not initialized")
|
||||
|
||||
cur_pane_output = self._get_pane_content()
|
||||
ps1_matches = self._matches_ps1_metadata(cur_pane_output)
|
||||
is_command_running = not (
|
||||
cur_pane_output.rstrip().endswith(self.PS1_END.rstrip()) or len(ps1_matches) > 0
|
||||
)
|
||||
|
||||
if command.strip() == "":
|
||||
return self._handle_empty_command(
|
||||
cur_pane_output, ps1_matches, is_command_running, timeout
|
||||
)
|
||||
|
||||
is_special_key = self._is_special_key(command)
|
||||
|
||||
if is_input:
|
||||
return self._handle_input_command(command, no_enter, is_command_running)
|
||||
|
||||
if is_special_key and is_command_running:
|
||||
return self._handle_input_command(command, no_enter, is_command_running)
|
||||
|
||||
if is_command_running:
|
||||
return {
|
||||
"content": (
|
||||
"A command is already running. Use is_input=true to send input to it, "
|
||||
"or interrupt it first (e.g., with C-c)."
|
||||
),
|
||||
"status": "error",
|
||||
"exit_code": None,
|
||||
"working_dir": self._cwd,
|
||||
}
|
||||
|
||||
return self._execute_new_command(command, no_enter, timeout)
|
||||
|
||||
def _ready_for_next_command(self) -> None:
|
||||
self._clear_screen()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user