feat(tools): add bulk operations support to todo tools

- update_todo: add `updates` param for bulk updates in one call - mark_todo_done: add `todo_ids` param to mark multiple todos done - mark_todo_pending: add `todo_ids` param to mark multiple pending - delete_todo: add `todo_ids` param to delete multiple todos - Increase todo renderer display limit from 10 to 25 - Maintains backward compatibility with single-ID usage - Update prompts to keep todos short-horizon and dynamic
2025-12-14 20:30:04 -08:00
parent c29f13fd69
commit 37c8cffbe3
4 changed files with 327 additions and 106 deletions
--- a/strix/agents/StrixAgent/system_prompt.jinja
+++ b/strix/agents/StrixAgent/system_prompt.jinja
@@ -114,11 +114,13 @@ OPERATIONAL PRINCIPLES:
 TASK TRACKING:
 - USE THE TODO TOOL EXTENSIVELY - this is critical for staying organized and focused
 - Each subagent has their own INDEPENDENT todo list - your todos are private to you
- At the START of any task: Create todos to break down your work into clear steps
+- KEEP THE LIST SHORT-HORIZON: track only the next few concrete steps (3-6 max), not long-term goals.
+- REWRITE TODOS AS YOU LEARN: update, trim, or reprioritize the list whenever plans change or tasks finish.
+- At the START of any task: Create todos to break down your next steps into clear actions
 - BEFORE starting a task: Mark it as "in_progress" - this shows what you're actively doing
 - AFTER completing a task: Mark it as "done" immediately - don't wait
- When you discover new tasks: Add them as todos right away
- ALWAYS follow this workflow: create → in_progress → done
+- When you discover new tasks: Add them as todos right away and reprioritize; avoid dumping the whole project plan upfront
+- ALWAYS follow this workflow: create → in_progress → done, iterating frequently
 - A well-maintained todo list prevents going in circles, forgetting tasks, and losing focus
 - If you're unsure what to do next: Check your todo list first

--- a/strix/interface/tool_components/todo_renderer.py
+++ b/strix/interface/tool_components/todo_renderer.py
@@ -20,7 +20,7 @@ def _truncate(text: str, length: int = 80) -> str:


 def _format_todo_lines(
-    cls: type[BaseToolRenderer], result: dict[str, Any], limit: int = 10
+    cls: type[BaseToolRenderer], result: dict[str, Any], limit: int = 25
 ) -> list[str]:
    todos = result.get("todos")
    if not isinstance(todos, list) or not todos:
@@ -67,7 +67,7 @@ class CreateTodoRenderer(BaseToolRenderer):
        if result and isinstance(result, dict):
            if result.get("success"):
                lines = [header]
-                lines.extend(_format_todo_lines(cls, result, limit=10))
+                lines.extend(_format_todo_lines(cls, result))
                content_text = "\n".join(lines)
            else:
                error = result.get("error", "Failed to create todo")
@@ -92,7 +92,7 @@ class ListTodosRenderer(BaseToolRenderer):
        if result and isinstance(result, dict):
            if result.get("success"):
                lines = [header]
-                lines.extend(_format_todo_lines(cls, result, limit=10))
+                lines.extend(_format_todo_lines(cls, result))
                content_text = "\n".join(lines)
            else:
                error = result.get("error", "Unable to list todos")
@@ -117,7 +117,7 @@ class UpdateTodoRenderer(BaseToolRenderer):
        if result and isinstance(result, dict):
            if result.get("success"):
                lines = [header]
-                lines.extend(_format_todo_lines(cls, result, limit=10))
+                lines.extend(_format_todo_lines(cls, result))
                content_text = "\n".join(lines)
            else:
                error = result.get("error", "Failed to update todo")
@@ -142,7 +142,7 @@ class MarkTodoDoneRenderer(BaseToolRenderer):
        if result and isinstance(result, dict):
            if result.get("success"):
                lines = [header]
-                lines.extend(_format_todo_lines(cls, result, limit=10))
+                lines.extend(_format_todo_lines(cls, result))
                content_text = "\n".join(lines)
            else:
                error = result.get("error", "Failed to mark todo done")
@@ -167,7 +167,7 @@ class MarkTodoPendingRenderer(BaseToolRenderer):
        if result and isinstance(result, dict):
            if result.get("success"):
                lines = [header]
-                lines.extend(_format_todo_lines(cls, result, limit=10))
+                lines.extend(_format_todo_lines(cls, result))
                content_text = "\n".join(lines)
            else:
                error = result.get("error", "Failed to reopen todo")
@@ -192,7 +192,7 @@ class DeleteTodoRenderer(BaseToolRenderer):
        if result and isinstance(result, dict):
            if result.get("success"):
                lines = [header]
-                lines.extend(_format_todo_lines(cls, result, limit=10))
+                lines.extend(_format_todo_lines(cls, result))
                content_text = "\n".join(lines)
            else:
                error = result.get("error", "Failed to remove todo")
--- a/strix/tools/todo/todo_actions.py
+++ b/strix/tools/todo/todo_actions.py
@@ -47,6 +47,70 @@ def _sorted_todos(agent_id: str) -> list[dict[str, Any]]:
    return todos_list


+def _normalize_todo_ids(raw_ids: Any) -> list[str]:
+    if raw_ids is None:
+        return []
+
+    if isinstance(raw_ids, str):
+        stripped = raw_ids.strip()
+        if not stripped:
+            return []
+        try:
+            data = json.loads(stripped)
+        except json.JSONDecodeError:
+            data = stripped.split(",") if "," in stripped else [stripped]
+        if isinstance(data, list):
+            return [str(item).strip() for item in data if str(item).strip()]
+        return [str(data).strip()]
+
+    if isinstance(raw_ids, list):
+        return [str(item).strip() for item in raw_ids if str(item).strip()]
+
+    return [str(raw_ids).strip()]
+
+
+def _normalize_bulk_updates(raw_updates: Any) -> list[dict[str, Any]]:
+    if raw_updates is None:
+        return []
+
+    data = raw_updates
+    if isinstance(raw_updates, str):
+        stripped = raw_updates.strip()
+        if not stripped:
+            return []
+        try:
+            data = json.loads(stripped)
+        except json.JSONDecodeError as e:
+            raise ValueError("Updates must be valid JSON") from e
+
+    if isinstance(data, dict):
+        data = [data]
+
+    if not isinstance(data, list):
+        raise TypeError("Updates must be a list of update objects")
+
+    normalized: list[dict[str, Any]] = []
+    for item in data:
+        if not isinstance(item, dict):
+            raise TypeError("Each update must be an object with todo_id")
+
+        todo_id = item.get("todo_id") or item.get("id")
+        if not todo_id:
+            raise ValueError("Each update must include 'todo_id'")
+
+        normalized.append(
+            {
+                "todo_id": str(todo_id).strip(),
+                "title": item.get("title"),
+                "description": item.get("description"),
+                "priority": item.get("priority"),
+                "status": item.get("status"),
+            }
+        )
+
+    return normalized
+
+
 def _normalize_bulk_todos(raw_todos: Any) -> list[dict[str, Any]]:
    if raw_todos is None:
        return []
@@ -233,146 +297,272 @@ def list_todos(
        }


-@register_tool(sandbox_execution=False)
-def update_todo(
-    agent_state: Any,
+def _apply_single_update(
+    agent_todos: dict[str, dict[str, Any]],
    todo_id: str,
    title: str | None = None,
    description: str | None = None,
    priority: str | None = None,
    status: str | None = None,
+) -> dict[str, Any] | None:
+    if todo_id not in agent_todos:
+        return {"todo_id": todo_id, "error": f"Todo with ID '{todo_id}' not found"}
+
+    todo = agent_todos[todo_id]
+
+    if title is not None:
+        if not title.strip():
+            return {"todo_id": todo_id, "error": "Title cannot be empty"}
+        todo["title"] = title.strip()
+
+    if description is not None:
+        todo["description"] = description.strip() if description else None
+
+    if priority is not None:
+        try:
+            todo["priority"] = _normalize_priority(priority, str(todo.get("priority", "normal")))
+        except ValueError as exc:
+            return {"todo_id": todo_id, "error": str(exc)}
+
+    if status is not None:
+        status_candidate = status.lower()
+        if status_candidate not in VALID_STATUSES:
+            return {
+                "todo_id": todo_id,
+                "error": f"Invalid status. Must be one of: {', '.join(VALID_STATUSES)}",
+            }
+        todo["status"] = status_candidate
+        if status_candidate == "done":
+            todo["completed_at"] = datetime.now(UTC).isoformat()
+        else:
+            todo["completed_at"] = None
+
+    todo["updated_at"] = datetime.now(UTC).isoformat()
+    return None
+
+
+@register_tool(sandbox_execution=False)
+def update_todo(
+    agent_state: Any,
+    todo_id: str | None = None,
+    title: str | None = None,
+    description: str | None = None,
+    priority: str | None = None,
+    status: str | None = None,
+    updates: Any | None = None,
 ) -> dict[str, Any]:
    try:
        agent_id = agent_state.agent_id
        agent_todos = _get_agent_todos(agent_id)

-        if todo_id not in agent_todos:
-            return {"success": False, "error": f"Todo with ID '{todo_id}' not found"}
+        updates_to_apply: list[dict[str, Any]] = []

-        todo = agent_todos[todo_id]
+        if updates is not None:
+            updates_to_apply.extend(_normalize_bulk_updates(updates))

-        if title is not None:
-            if not title.strip():
-                return {"success": False, "error": "Title cannot be empty"}
-            todo["title"] = title.strip()
-
-        if description is not None:
-            todo["description"] = description.strip() if description else None
-
-        if priority is not None:
-            try:
-                todo["priority"] = _normalize_priority(
-                    priority, str(todo.get("priority", "normal"))
-                )
-            except ValueError as exc:
-                return {"success": False, "error": str(exc)}
-
-        if status is not None:
-            status_candidate = status.lower()
-            if status_candidate not in VALID_STATUSES:
-                return {
-                    "success": False,
-                    "error": f"Invalid status. Must be one of: {', '.join(VALID_STATUSES)}",
+        if todo_id is not None:
+            updates_to_apply.append(
+                {
+                    "todo_id": todo_id,
+                    "title": title,
+                    "description": description,
+                    "priority": priority,
+                    "status": status,
                }
-            todo["status"] = status_candidate
-            if status_candidate == "done":
-                todo["completed_at"] = datetime.now(UTC).isoformat()
-            else:
-                todo["completed_at"] = None
+            )

-        todo["updated_at"] = datetime.now(UTC).isoformat()
+        if not updates_to_apply:
+            return {
+                "success": False,
+                "error": "Provide todo_id or 'updates' list to update.",
+            }
+
+        updated: list[str] = []
+        errors: list[dict[str, Any]] = []
+
+        for update in updates_to_apply:
+            error = _apply_single_update(
+                agent_todos,
+                update["todo_id"],
+                update.get("title"),
+                update.get("description"),
+                update.get("priority"),
+                update.get("status"),
+            )
+            if error:
+                errors.append(error)
+            else:
+                updated.append(update["todo_id"])

        todos_list = _sorted_todos(agent_id)

-        return {
-            "success": True,
+        response: dict[str, Any] = {
+            "success": len(errors) == 0,
+            "updated": updated,
+            "updated_count": len(updated),
            "todos": todos_list,
            "total_count": len(todos_list),
        }

+        if errors:
+            response["errors"] = errors
+
    except (ValueError, TypeError) as e:
        return {"success": False, "error": str(e)}
+    else:
+        return response


@register_tool(sandbox_execution=False)
 def mark_todo_done(
    agent_state: Any,
-    todo_id: str,
+    todo_id: str | None = None,
+    todo_ids: Any | None = None,
 ) -> dict[str, Any]:
    try:
        agent_id = agent_state.agent_id
        agent_todos = _get_agent_todos(agent_id)

-        if todo_id not in agent_todos:
-            return {"success": False, "error": f"Todo with ID '{todo_id}' not found"}
+        ids_to_mark: list[str] = []
+        if todo_ids is not None:
+            ids_to_mark.extend(_normalize_todo_ids(todo_ids))
+        if todo_id is not None:
+            ids_to_mark.append(todo_id)

-        todo = agent_todos[todo_id]
-        todo["status"] = "done"
-        todo["completed_at"] = datetime.now(UTC).isoformat()
-        todo["updated_at"] = datetime.now(UTC).isoformat()
+        if not ids_to_mark:
+            return {"success": False, "error": "Provide todo_id or todo_ids to mark as done."}
+
+        marked: list[str] = []
+        errors: list[dict[str, Any]] = []
+        timestamp = datetime.now(UTC).isoformat()
+
+        for tid in ids_to_mark:
+            if tid not in agent_todos:
+                errors.append({"todo_id": tid, "error": f"Todo with ID '{tid}' not found"})
+                continue
+
+            todo = agent_todos[tid]
+            todo["status"] = "done"
+            todo["completed_at"] = timestamp
+            todo["updated_at"] = timestamp
+            marked.append(tid)

        todos_list = _sorted_todos(agent_id)

-        return {
-            "success": True,
+        response: dict[str, Any] = {
+            "success": len(errors) == 0,
+            "marked_done": marked,
+            "marked_count": len(marked),
            "todos": todos_list,
            "total_count": len(todos_list),
        }

+        if errors:
+            response["errors"] = errors
+
    except (ValueError, TypeError) as e:
        return {"success": False, "error": str(e)}
+    else:
+        return response


@register_tool(sandbox_execution=False)
 def mark_todo_pending(
    agent_state: Any,
-    todo_id: str,
+    todo_id: str | None = None,
+    todo_ids: Any | None = None,
 ) -> dict[str, Any]:
    try:
        agent_id = agent_state.agent_id
        agent_todos = _get_agent_todos(agent_id)

-        if todo_id not in agent_todos:
-            return {"success": False, "error": f"Todo with ID '{todo_id}' not found"}
+        ids_to_mark: list[str] = []
+        if todo_ids is not None:
+            ids_to_mark.extend(_normalize_todo_ids(todo_ids))
+        if todo_id is not None:
+            ids_to_mark.append(todo_id)

-        todo = agent_todos[todo_id]
-        todo["status"] = "pending"
-        todo["completed_at"] = None
-        todo["updated_at"] = datetime.now(UTC).isoformat()
+        if not ids_to_mark:
+            return {"success": False, "error": "Provide todo_id or todo_ids to mark as pending."}
+
+        marked: list[str] = []
+        errors: list[dict[str, Any]] = []
+        timestamp = datetime.now(UTC).isoformat()
+
+        for tid in ids_to_mark:
+            if tid not in agent_todos:
+                errors.append({"todo_id": tid, "error": f"Todo with ID '{tid}' not found"})
+                continue
+
+            todo = agent_todos[tid]
+            todo["status"] = "pending"
+            todo["completed_at"] = None
+            todo["updated_at"] = timestamp
+            marked.append(tid)

        todos_list = _sorted_todos(agent_id)

-        return {
-            "success": True,
+        response: dict[str, Any] = {
+            "success": len(errors) == 0,
+            "marked_pending": marked,
+            "marked_count": len(marked),
            "todos": todos_list,
            "total_count": len(todos_list),
        }

+        if errors:
+            response["errors"] = errors
+
    except (ValueError, TypeError) as e:
        return {"success": False, "error": str(e)}
+    else:
+        return response


@register_tool(sandbox_execution=False)
 def delete_todo(
    agent_state: Any,
-    todo_id: str,
+    todo_id: str | None = None,
+    todo_ids: Any | None = None,
 ) -> dict[str, Any]:
    try:
        agent_id = agent_state.agent_id
        agent_todos = _get_agent_todos(agent_id)

-        if todo_id not in agent_todos:
-            return {"success": False, "error": f"Todo with ID '{todo_id}' not found"}
+        ids_to_delete: list[str] = []
+        if todo_ids is not None:
+            ids_to_delete.extend(_normalize_todo_ids(todo_ids))
+        if todo_id is not None:
+            ids_to_delete.append(todo_id)

-        del agent_todos[todo_id]
+        if not ids_to_delete:
+            return {"success": False, "error": "Provide todo_id or todo_ids to delete."}
+
+        deleted: list[str] = []
+        errors: list[dict[str, Any]] = []
+
+        for tid in ids_to_delete:
+            if tid not in agent_todos:
+                errors.append({"todo_id": tid, "error": f"Todo with ID '{tid}' not found"})
+                continue
+
+            del agent_todos[tid]
+            deleted.append(tid)

        todos_list = _sorted_todos(agent_id)

-        return {
-            "success": True,
+        response: dict[str, Any] = {
+            "success": len(errors) == 0,
+            "deleted": deleted,
+            "deleted_count": len(deleted),
            "todos": todos_list,
            "total_count": len(todos_list),
        }

+        if errors:
+            response["errors"] = errors
+
    except (ValueError, TypeError) as e:
        return {"success": False, "error": str(e)}
+    else:
+        return response
--- a/strix/tools/todo/todo_actions_schema.xml
+++ b/strix/tools/todo/todo_actions_schema.xml
@@ -6,10 +6,11 @@
  do not interfere with other agents' todos. Use this to your advantage.

  WORKFLOW - Follow this for EVERY task:
-  1. Create todos at the START to break down your work
-  2. BEFORE starting a task: Mark it as "in_progress" using update_todo
-  3. AFTER completing a task: Mark it as "done" using mark_todo_done
-  4. When you discover new tasks: Add them as todos right away
+  1. Keep the list short-horizon: track only the next few concrete steps (3-6 max), not long-term goals.
+  2. Create/update todos as you learn new info; drop or rewrite items when plans change.
+  3. BEFORE starting a task: Mark it as "in_progress" using update_todo.
+  4. AFTER completing a task: Mark it as "done" using mark_todo_done.
+  5. When you discover new tasks: Add them right away and re-prioritize; avoid giant upfront lists.

  ALWAYS mark the current task as in_progress before working on it. This shows what you're
  actively doing. Then mark it done when finished. Never skip these status updates.
@@ -107,94 +108,122 @@
  </tool>

  <tool name="update_todo">
-    <description>Update an existing todo item's title, description, priority, or status.</description>
+    <description>Update one or multiple todo items. Supports bulk updates in a single call.</description>
    <parameters>
-      <parameter name="todo_id" type="string" required="true">
-        <description>ID of the todo to update</description>
+      <parameter name="todo_id" type="string" required="false">
+        <description>ID of a single todo to update (for simple updates)</description>
+      </parameter>
+      <parameter name="updates" type="string" required="false">
+        <description>Bulk update multiple todos at once. JSON array of objects with todo_id and fields to update: [{"todo_id": "abc", "status": "done"}, {"todo_id": "def", "priority": "high"}]</description>
      </parameter>
      <parameter name="title" type="string" required="false">
-        <description>New title for the todo</description>
+        <description>New title (used with todo_id)</description>
      </parameter>
      <parameter name="description" type="string" required="false">
-        <description>New description for the todo</description>
+        <description>New description (used with todo_id)</description>
      </parameter>
      <parameter name="priority" type="string" required="false">
-        <description>New priority: "low", "normal", "high", "critical"</description>
+        <description>New priority: "low", "normal", "high", "critical" (used with todo_id)</description>
      </parameter>
      <parameter name="status" type="string" required="false">
-        <description>New status: "pending", "in_progress", "done"</description>
+        <description>New status: "pending", "in_progress", "done" (used with todo_id)</description>
      </parameter>
    </parameters>
    <returns type="Dict[str, Any]">
-      <description>Response containing: - success: Whether the update was successful</description>
+      <description>Response containing: - updated: List of updated todo IDs - updated_count: Number updated - todos: Full sorted todo list - errors: Any failed updates</description>
    </returns>
    <examples>
-  # Update priority and add description
-  <function=update_todo>
-  <parameter=todo_id>abc123</parameter>
-  <parameter=priority>critical</parameter>
-  <parameter=description>Found potential RCE vector, needs immediate attention</parameter>
-  </function>
-
-  # Mark as in progress
+  # Single update
  <function=update_todo>
  <parameter=todo_id>abc123</parameter>
  <parameter=status>in_progress</parameter>
+  </function>
+
+  # Bulk update - mark multiple todos with different statuses in ONE call
+  <function=update_todo>
+  <parameter=updates>[{"todo_id": "abc123", "status": "done"}, {"todo_id": "def456", "status": "in_progress"}, {"todo_id": "ghi789", "priority": "critical"}]</parameter>
  </function>
    </examples>
  </tool>

  <tool name="mark_todo_done">
-    <description>Mark a todo item as completed. DO THIS IMMEDIATELY after finishing a task.</description>
-    <details>Mark todos as done right after completing them - don't wait! This keeps your list accurate,
-  helps track progress, and gives you a clear picture of what's been accomplished vs what remains.</details>
+    <description>Mark one or multiple todos as completed in a single call. DO THIS IMMEDIATELY after finishing tasks.</description>
+    <details>Mark todos as done right after completing them - don't wait! Supports marking multiple todos at once
+  to save tool calls. This keeps your list accurate and gives you a clear picture of progress.</details>
    <parameters>
-      <parameter name="todo_id" type="string" required="true">
-        <description>ID of the todo to mark as done</description>
+      <parameter name="todo_id" type="string" required="false">
+        <description>ID of a single todo to mark as done</description>
+      </parameter>
+      <parameter name="todo_ids" type="string" required="false">
+        <description>Mark multiple todos done at once. JSON array of IDs: ["abc123", "def456"] or comma-separated: "abc123, def456"</description>
      </parameter>
    </parameters>
    <returns type="Dict[str, Any]">
-      <description>Response containing: - success: Whether the operation was successful</description>
+      <description>Response containing: - marked_done: List of IDs marked done - marked_count: Number marked - todos: Full sorted list - errors: Any failures</description>
    </returns>
    <examples>
+  # Mark single todo done
  <function=mark_todo_done>
  <parameter=todo_id>abc123</parameter>
+  </function>
+
+  # Mark multiple todos done in ONE call
+  <function=mark_todo_done>
+  <parameter=todo_ids>["abc123", "def456", "ghi789"]</parameter>
  </function>
    </examples>
  </tool>

  <tool name="mark_todo_pending">
-    <description>Mark a todo item as pending (reopen a completed task).</description>
-    <details>Use this to reopen a task that was marked done but needs more work.</details>
+    <description>Mark one or multiple todos as pending (reopen completed tasks).</description>
+    <details>Use this to reopen tasks that were marked done but need more work. Supports bulk operations.</details>
    <parameters>
-      <parameter name="todo_id" type="string" required="true">
-        <description>ID of the todo to mark as pending</description>
+      <parameter name="todo_id" type="string" required="false">
+        <description>ID of a single todo to mark as pending</description>
+      </parameter>
+      <parameter name="todo_ids" type="string" required="false">
+        <description>Mark multiple todos pending at once. JSON array of IDs: ["abc123", "def456"] or comma-separated: "abc123, def456"</description>
      </parameter>
    </parameters>
    <returns type="Dict[str, Any]">
-      <description>Response containing: - success: Whether the operation was successful</description>
+      <description>Response containing: - marked_pending: List of IDs marked pending - marked_count: Number marked - todos: Full sorted list - errors: Any failures</description>
    </returns>
    <examples>
+  # Mark single todo pending
  <function=mark_todo_pending>
  <parameter=todo_id>abc123</parameter>
+  </function>
+
+  # Mark multiple todos pending in ONE call
+  <function=mark_todo_pending>
+  <parameter=todo_ids>["abc123", "def456"]</parameter>
  </function>
    </examples>
  </tool>

  <tool name="delete_todo">
-    <description>Delete a todo item.</description>
-    <details>Use this to remove todos that are no longer relevant or were created by mistake.</details>
+    <description>Delete one or multiple todos in a single call.</description>
+    <details>Use this to remove todos that are no longer relevant. Supports bulk deletion to save tool calls.</details>
    <parameters>
-      <parameter name="todo_id" type="string" required="true">
-        <description>ID of the todo to delete</description>
+      <parameter name="todo_id" type="string" required="false">
+        <description>ID of a single todo to delete</description>
+      </parameter>
+      <parameter name="todo_ids" type="string" required="false">
+        <description>Delete multiple todos at once. JSON array of IDs: ["abc123", "def456"] or comma-separated: "abc123, def456"</description>
      </parameter>
    </parameters>
    <returns type="Dict[str, Any]">
-      <description>Response containing: - success: Whether the deletion was successful</description>
+      <description>Response containing: - deleted: List of deleted IDs - deleted_count: Number deleted - todos: Remaining todos - errors: Any failures</description>
    </returns>
    <examples>
+  # Delete single todo
  <function=delete_todo>
  <parameter=todo_id>abc123</parameter>
+  </function>
+
+  # Delete multiple todos in ONE call
+  <function=delete_todo>
+  <parameter=todo_ids>["abc123", "def456", "ghi789"]</parameter>
  </function>
    </examples>
  </tool>