feat(tools): add dedicated todo tool for agent task tracking (#196)

- Add new todo tool with create, list, update, mark_done, mark_pending, delete actions - Each subagent has isolated todo storage keyed by agent_id - Support bulk todo creation via JSON array or bullet list - Add TUI renderers for all todo actions with status markers - Update notes tool to remove priority and todo-related functionality - Add task tracking guidance to StrixAgent system prompt - Fix instruction file error handling in CLI
2025-12-14 10:16:02 -08:00
parent a075ea1a0a
commit 2b926c733b
11 changed files with 908 additions and 120 deletions
--- a/strix/agents/StrixAgent/system_prompt.jinja
+++ b/strix/agents/StrixAgent/system_prompt.jinja
@@ -111,6 +111,17 @@ OPERATIONAL PRINCIPLES:
 - Try multiple approaches simultaneously - don't wait for one to fail
 - Continuously research payloads, bypasses, and exploitation techniques with the web_search tool; integrate findings into automated sprays and validation
 TASK TRACKING:
 - USE THE TODO TOOL EXTENSIVELY - this is critical for staying organized and focused
 - Each subagent has their own INDEPENDENT todo list - your todos are private to you
 - At the START of any task: Create todos to break down your work into clear steps
 - BEFORE starting a task: Mark it as "in_progress" - this shows what you're actively doing
 - AFTER completing a task: Mark it as "done" immediately - don't wait
 - When you discover new tasks: Add them as todos right away
 - ALWAYS follow this workflow: create → in_progress → done
 - A well-maintained todo list prevents going in circles, forgetting tasks, and losing focus
 - If you're unsure what to do next: Check your todo list first
 EFFICIENCY TACTICS:
 - Automate with Python scripts for complex workflows and repetitive inputs/tasks
 - Batch similar operations together
--- a/strix/interface/main.py
+++ b/strix/interface/main.py
@@ -315,7 +315,9 @@ Examples:
    args = parser.parse_args()
    if args.instruction and args.instruction_file:
-        parser.error("Cannot specify both --instruction and --instruction-file. Use one or the other.")
+        parser.error(
            "Cannot specify both --instruction and --instruction-file. Use one or the other."
        )
    if args.instruction_file:
        instruction_path = Path(args.instruction_file)
@@ -324,7 +326,7 @@ Examples:
                args.instruction = f.read().strip()
                if not args.instruction:
                    parser.error(f"Instruction file '{instruction_path}' is empty")
-        except Exception as e:
+        except Exception as e:  # noqa: BLE001
            parser.error(f"Failed to read instruction file '{instruction_path}': {e}")
    args.targets_info = []
--- a/strix/interface/tool_components/init.py
+++ b/strix/interface/tool_components/init.py
@@ -10,6 +10,7 @@ from . import (
    scan_info_renderer,
    terminal_renderer,
    thinking_renderer,
    todo_renderer,
    user_message_renderer,
    web_search_renderer,
 )
@@ -34,6 +35,7 @@ __all__ = [
    "scan_info_renderer",
    "terminal_renderer",
    "thinking_renderer",
    "todo_renderer",
    "user_message_renderer",
    "web_search_renderer",
 ]
--- a/strix/interface/tool_components/notes_renderer.py
+++ b/strix/interface/tool_components/notes_renderer.py
@@ -6,6 +6,12 @@ from .base_renderer import BaseToolRenderer
 from .registry import register_tool_renderer
 def _truncate(text: str, length: int = 800) -> str:
    if len(text) <= length:
        return text
    return text[: length - 3] + "..."
@register_tool_renderer
 class CreateNoteRenderer(BaseToolRenderer):
    tool_name: ClassVar[str] = "create_note"
@@ -17,23 +23,24 @@ class CreateNoteRenderer(BaseToolRenderer):
        title = args.get("title", "")
        content = args.get("content", "")
        category = args.get("category", "general")
-        header = "📝 [bold #fbbf24]Note[/]"
+        header = f"📝 [bold #fbbf24]Note[/] [dim]({category})[/]"
        lines = [header]
        if title:
-            title_display = title[:100] + "..." if len(title) > 100 else title
+            title_display = _truncate(title.strip(), 300)
-            note_parts = [f"{header}\n  [bold]{cls.escape_markup(title_display)}[/]"]
+            lines.append(f"  {cls.escape_markup(title_display)}")
-            if content:
+        if content:
-                content_display = content[:200] + "..." if len(content) > 200 else content
+            content_display = _truncate(content.strip(), 800)
-                note_parts.append(f"  [dim]{cls.escape_markup(content_display)}[/]")
+            lines.append(f"  [dim]{cls.escape_markup(content_display)}[/]")
-            content_text = "\n".join(note_parts)
+        if len(lines) == 1:
-        else:
+            lines.append("  [dim]Capturing...[/]")
            content_text = f"{header}\n  [dim]Creating note...[/]"
        css_classes = cls.get_css_classes("completed")
-        return Static(content_text, classes=css_classes)
+        return Static("\n".join(lines), classes=css_classes)
@register_tool_renderer
@@ -43,8 +50,8 @@ class DeleteNoteRenderer(BaseToolRenderer):
    @classmethod
    def render(cls, tool_data: dict[str, Any]) -> Static:  # noqa: ARG003
-        header = "🗑️ [bold #fbbf24]Delete Note[/]"
+        header = "📝 [bold #94a3b8]Note Removed[/]"
-        content_text = f"{header}\n  [dim]Deleting...[/]"
+        content_text = header
        css_classes = cls.get_css_classes("completed")
        return Static(content_text, classes=css_classes)
@@ -59,28 +66,24 @@ class UpdateNoteRenderer(BaseToolRenderer):
    def render(cls, tool_data: dict[str, Any]) -> Static:
        args = tool_data.get("args", {})
-        title = args.get("title", "")
+        title = args.get("title")
-        content = args.get("content", "")
+        content = args.get("content")
-        header = "✏️ [bold #fbbf24]Update Note[/]"
+        header = "📝 [bold #fbbf24]Note Updated[/]"
        lines = [header]
-        if title or content:
+        if title:
-            note_parts = [header]
+            lines.append(f"  {cls.escape_markup(_truncate(title, 300))}")
-            if title:
+        if content:
-                title_display = title[:100] + "..." if len(title) > 100 else title
+            content_display = _truncate(content.strip(), 800)
-                note_parts.append(f"  [bold]{cls.escape_markup(title_display)}[/]")
+            lines.append(f"  [dim]{cls.escape_markup(content_display)}[/]")
-            if content:
+        if len(lines) == 1:
-                content_display = content[:200] + "..." if len(content) > 200 else content
+            lines.append("  [dim]Updating...[/]")
                note_parts.append(f"  [dim]{cls.escape_markup(content_display)}[/]")
            content_text = "\n".join(note_parts)
        else:
            content_text = f"{header}\n  [dim]Updating...[/]"
        css_classes = cls.get_css_classes("completed")
-        return Static(content_text, classes=css_classes)
+        return Static("\n".join(lines), classes=css_classes)
@register_tool_renderer
@@ -92,17 +95,34 @@ class ListNotesRenderer(BaseToolRenderer):
    def render(cls, tool_data: dict[str, Any]) -> Static:
        result = tool_data.get("result")
-        header = "📋 [bold #fbbf24]Listing notes[/]"
+        header = "📝 [bold #fbbf24]Notes[/]"
-        if result and isinstance(result, dict) and "notes" in result:
+        if result and isinstance(result, dict) and result.get("success"):
-            notes = result["notes"]
+            count = result.get("total_count", 0)
-            if isinstance(notes, list):
+            notes = result.get("notes", []) or []
-                count = len(notes)
+            lines = [header]
-                content_text = f"{header}\n  [dim]{count} notes found[/]"
+
            if count == 0:
                lines.append("  [dim]No notes[/]")
            else:
-                content_text = f"{header}\n  [dim]No notes found[/]"
+                for note in notes[:5]:
                    title = note.get("title", "").strip() or "(untitled)"
                    category = note.get("category", "general")
                    content = note.get("content", "").strip()
                    lines.append(
                        f"  - {cls.escape_markup(_truncate(title, 300))} [dim]({category})[/]"
                    )
                    if content:
                        content_preview = _truncate(content, 400)
                        lines.append(f"    [dim]{cls.escape_markup(content_preview)}[/]")
                remaining = max(count - 5, 0)
                if remaining:
                    lines.append(f"  [dim]... +{remaining} more[/]")
            content_text = "\n".join(lines)
        else:
-            content_text = f"{header}\n  [dim]Listing notes...[/]"
+            content_text = f"{header}\n  [dim]Loading...[/]"
        css_classes = cls.get_css_classes("completed")
        return Static(content_text, classes=css_classes)
--- a/strix/interface/tool_components/todo_renderer.py
+++ b/strix/interface/tool_components/todo_renderer.py
@@ -0,0 +1,204 @@
 from typing import Any, ClassVar
 from textual.widgets import Static
 from .base_renderer import BaseToolRenderer
 from .registry import register_tool_renderer
 STATUS_MARKERS = {
    "pending": "[ ]",
    "in_progress": "[~]",
    "done": "[•]",
 }
 def _truncate(text: str, length: int = 80) -> str:
    if len(text) <= length:
        return text
    return text[: length - 3] + "..."
 def _format_todo_lines(
    cls: type[BaseToolRenderer], result: dict[str, Any], limit: int = 10
 ) -> list[str]:
    todos = result.get("todos")
    if not isinstance(todos, list) or not todos:
        return ["  [dim]No todos[/]"]
    lines: list[str] = []
    total = len(todos)
    for index, todo in enumerate(todos):
        if index >= limit:
            remaining = total - limit
            if remaining > 0:
                lines.append(f"  [dim]... +{remaining} more[/]")
            break
        status = todo.get("status", "pending")
        marker = STATUS_MARKERS.get(status, STATUS_MARKERS["pending"])
        title = todo.get("title", "").strip() or "(untitled)"
        title = cls.escape_markup(_truncate(title, 90))
        if status == "done":
            title_markup = f"[dim strike]{title}[/]"
        elif status == "in_progress":
            title_markup = f"[italic]{title}[/]"
        else:
            title_markup = title
        lines.append(f"  {marker} {title_markup}")
    return lines
@register_tool_renderer
 class CreateTodoRenderer(BaseToolRenderer):
    tool_name: ClassVar[str] = "create_todo"
    css_classes: ClassVar[list[str]] = ["tool-call", "todo-tool"]
    @classmethod
    def render(cls, tool_data: dict[str, Any]) -> Static:
        result = tool_data.get("result")
        header = "📋 [bold #a78bfa]Todo[/]"
        if result and isinstance(result, dict):
            if result.get("success"):
                lines = [header]
                lines.extend(_format_todo_lines(cls, result, limit=10))
                content_text = "\n".join(lines)
            else:
                error = result.get("error", "Failed to create todo")
                content_text = f"{header}\n  [#ef4444]{cls.escape_markup(error)}[/]"
        else:
            content_text = f"{header}\n  [dim]Creating...[/]"
        css_classes = cls.get_css_classes("completed")
        return Static(content_text, classes=css_classes)
@register_tool_renderer
 class ListTodosRenderer(BaseToolRenderer):
    tool_name: ClassVar[str] = "list_todos"
    css_classes: ClassVar[list[str]] = ["tool-call", "todo-tool"]
    @classmethod
    def render(cls, tool_data: dict[str, Any]) -> Static:
        result = tool_data.get("result")
        header = "📋 [bold #a78bfa]Todos[/]"
        if result and isinstance(result, dict):
            if result.get("success"):
                lines = [header]
                lines.extend(_format_todo_lines(cls, result, limit=10))
                content_text = "\n".join(lines)
            else:
                error = result.get("error", "Unable to list todos")
                content_text = f"{header}\n  [#ef4444]{cls.escape_markup(error)}[/]"
        else:
            content_text = f"{header}\n  [dim]Loading...[/]"
        css_classes = cls.get_css_classes("completed")
        return Static(content_text, classes=css_classes)
@register_tool_renderer
 class UpdateTodoRenderer(BaseToolRenderer):
    tool_name: ClassVar[str] = "update_todo"
    css_classes: ClassVar[list[str]] = ["tool-call", "todo-tool"]
    @classmethod
    def render(cls, tool_data: dict[str, Any]) -> Static:
        result = tool_data.get("result")
        header = "📋 [bold #a78bfa]Todo Updated[/]"
        if result and isinstance(result, dict):
            if result.get("success"):
                lines = [header]
                lines.extend(_format_todo_lines(cls, result, limit=10))
                content_text = "\n".join(lines)
            else:
                error = result.get("error", "Failed to update todo")
                content_text = f"{header}\n  [#ef4444]{cls.escape_markup(error)}[/]"
        else:
            content_text = f"{header}\n  [dim]Updating...[/]"
        css_classes = cls.get_css_classes("completed")
        return Static(content_text, classes=css_classes)
@register_tool_renderer
 class MarkTodoDoneRenderer(BaseToolRenderer):
    tool_name: ClassVar[str] = "mark_todo_done"
    css_classes: ClassVar[list[str]] = ["tool-call", "todo-tool"]
    @classmethod
    def render(cls, tool_data: dict[str, Any]) -> Static:
        result = tool_data.get("result")
        header = "📋 [bold #a78bfa]Todo Completed[/]"
        if result and isinstance(result, dict):
            if result.get("success"):
                lines = [header]
                lines.extend(_format_todo_lines(cls, result, limit=10))
                content_text = "\n".join(lines)
            else:
                error = result.get("error", "Failed to mark todo done")
                content_text = f"{header}\n  [#ef4444]{cls.escape_markup(error)}[/]"
        else:
            content_text = f"{header}\n  [dim]Marking done...[/]"
        css_classes = cls.get_css_classes("completed")
        return Static(content_text, classes=css_classes)
@register_tool_renderer
 class MarkTodoPendingRenderer(BaseToolRenderer):
    tool_name: ClassVar[str] = "mark_todo_pending"
    css_classes: ClassVar[list[str]] = ["tool-call", "todo-tool"]
    @classmethod
    def render(cls, tool_data: dict[str, Any]) -> Static:
        result = tool_data.get("result")
        header = "📋 [bold #f59e0b]Todo Reopened[/]"
        if result and isinstance(result, dict):
            if result.get("success"):
                lines = [header]
                lines.extend(_format_todo_lines(cls, result, limit=10))
                content_text = "\n".join(lines)
            else:
                error = result.get("error", "Failed to reopen todo")
                content_text = f"{header}\n  [#ef4444]{cls.escape_markup(error)}[/]"
        else:
            content_text = f"{header}\n  [dim]Reopening...[/]"
        css_classes = cls.get_css_classes("completed")
        return Static(content_text, classes=css_classes)
@register_tool_renderer
 class DeleteTodoRenderer(BaseToolRenderer):
    tool_name: ClassVar[str] = "delete_todo"
    css_classes: ClassVar[list[str]] = ["tool-call", "todo-tool"]
    @classmethod
    def render(cls, tool_data: dict[str, Any]) -> Static:
        result = tool_data.get("result")
        header = "📋 [bold #94a3b8]Todo Removed[/]"
        if result and isinstance(result, dict):
            if result.get("success"):
                lines = [header]
                lines.extend(_format_todo_lines(cls, result, limit=10))
                content_text = "\n".join(lines)
            else:
                error = result.get("error", "Failed to remove todo")
                content_text = f"{header}\n  [#ef4444]{cls.escape_markup(error)}[/]"
        else:
            content_text = f"{header}\n  [dim]Removing...[/]"
        css_classes = cls.get_css_classes("completed")
        return Static(content_text, classes=css_classes)
--- a/strix/tools/init.py
+++ b/strix/tools/init.py
@@ -35,13 +35,13 @@ if not SANDBOX_MODE:
    from .reporting import *  # noqa: F403
    from .terminal import *  # noqa: F403
    from .thinking import *  # noqa: F403
    from .todo import *  # noqa: F403
    if HAS_PERPLEXITY_API:
        from .web_search import *  # noqa: F403
 else:
    from .browser import *  # noqa: F403
    from .file_edit import *  # noqa: F403
    from .notes import *  # noqa: F403
    from .proxy import *  # noqa: F403
    from .python import *  # noqa: F403
    from .terminal import *  # noqa: F403
--- a/strix/tools/notes/notes_actions.py
+++ b/strix/tools/notes/notes_actions.py
@@ -11,7 +11,6 @@ _notes_storage: dict[str, dict[str, Any]] = {}
 def _filter_notes(
    category: str | None = None,
    tags: list[str] | None = None,
    priority: str | None = None,
    search_query: str | None = None,
 ) -> list[dict[str, Any]]:
    filtered_notes = []
@@ -20,9 +19,6 @@ def _filter_notes(
        if category and note.get("category") != category:
            continue
        if priority and note.get("priority") != priority:
            continue
        if tags:
            note_tags = note.get("tags", [])
            if not any(tag in note_tags for tag in tags):
@@ -43,13 +39,12 @@ def _filter_notes(
    return filtered_notes
-@register_tool
+@register_tool(sandbox_execution=False)
 def create_note(
    title: str,
    content: str,
    category: str = "general",
    tags: list[str] | None = None,
    priority: str = "normal",
 ) -> dict[str, Any]:
    try:
        if not title or not title.strip():
@@ -58,7 +53,7 @@ def create_note(
        if not content or not content.strip():
            return {"success": False, "error": "Content cannot be empty", "note_id": None}
-        valid_categories = ["general", "findings", "methodology", "todo", "questions", "plan"]
+        valid_categories = ["general", "findings", "methodology", "questions", "plan"]
        if category not in valid_categories:
            return {
                "success": False,
@@ -66,14 +61,6 @@ def create_note(
                "note_id": None,
            }
        valid_priorities = ["low", "normal", "high", "urgent"]
        if priority not in valid_priorities:
            return {
                "success": False,
                "error": f"Invalid priority. Must be one of: {', '.join(valid_priorities)}",
                "note_id": None,
            }
        note_id = str(uuid.uuid4())[:5]
        timestamp = datetime.now(UTC).isoformat()
@@ -82,7 +69,6 @@ def create_note(
            "content": content.strip(),
            "category": category,
            "tags": tags or [],
            "priority": priority,
            "created_at": timestamp,
            "updated_at": timestamp,
        }
@@ -99,17 +85,14 @@ def create_note(
        }
-@register_tool
+@register_tool(sandbox_execution=False)
 def list_notes(
    category: str | None = None,
    tags: list[str] | None = None,
    priority: str | None = None,
    search: str | None = None,
 ) -> dict[str, Any]:
    try:
-        filtered_notes = _filter_notes(
+        filtered_notes = _filter_notes(category=category, tags=tags, search_query=search)
            category=category, tags=tags, priority=priority, search_query=search
        )
        return {
            "success": True,
@@ -126,13 +109,12 @@ def list_notes(
        }
-@register_tool
+@register_tool(sandbox_execution=False)
 def update_note(
    note_id: str,
    title: str | None = None,
    content: str | None = None,
    tags: list[str] | None = None,
    priority: str | None = None,
 ) -> dict[str, Any]:
    try:
        if note_id not in _notes_storage:
@@ -153,15 +135,6 @@ def update_note(
        if tags is not None:
            note["tags"] = tags
        if priority is not None:
            valid_priorities = ["low", "normal", "high", "urgent"]
            if priority not in valid_priorities:
                return {
                    "success": False,
                    "error": f"Invalid priority. Must be one of: {', '.join(valid_priorities)}",
                }
            note["priority"] = priority
        note["updated_at"] = datetime.now(UTC).isoformat()
        return {
@@ -173,7 +146,7 @@ def update_note(
        return {"success": False, "error": f"Failed to update note: {e}"}
-@register_tool
+@register_tool(sandbox_execution=False)
 def delete_note(note_id: str) -> dict[str, Any]:
    try:
        if note_id not in _notes_storage:
--- a/strix/tools/notes/notes_actions_schema.xml
+++ b/strix/tools/notes/notes_actions_schema.xml
@@ -1,10 +1,9 @@
 <tools>
  <tool name="create_note">
-    <description>Create a personal note for TODOs, side notes, plans, and organizational purposes during
+    <description>Create a personal note for observations, findings, and research during the scan.</description>
-  the scan.</description>
+    <details>Use this tool for documenting discoveries, observations, methodology notes, and questions.
-    <details>Use this tool for quick reminders, action items, planning thoughts, and organizational notes
+  This is your personal notepad for recording information you want to remember or reference later.
-  rather than formal vulnerability reports or detailed findings. This is your personal notepad
+  For tracking actionable tasks, use the todo tool instead.</details>
  for keeping track of tasks, ideas, and things to remember or follow up on.</details>
    <parameters>
      <parameter name="title" type="string" required="true">
        <description>Title of the note</description>
@@ -13,49 +12,41 @@
        <description>Content of the note</description>
      </parameter>
      <parameter name="category" type="string" required="false">
-        <description>Category to organize the note (default: "general", "findings", "methodology", "todo", "questions", "plan")</description>
+        <description>Category to organize the note (default: "general", "findings", "methodology", "questions", "plan")</description>
      </parameter>
      <parameter name="tags" type="string" required="false">
        <description>Tags for categorization</description>
      </parameter>
      <parameter name="priority" type="string" required="false">
        <description>Priority level of the note ("low", "normal", "high", "urgent")</description>
      </parameter>
    </parameters>
    <returns type="Dict[str, Any]">
      <description>Response containing: - note_id: ID of the created note - success: Whether the note was created successfully</description>
    </returns>
    <examples>
-  # Create a TODO reminder
+  # Document an interesting finding
  <function=create_note>
  <parameter=title>TODO: Check SSL Certificate Details</parameter>
  <parameter=content>Remember to verify SSL certificate validity and check for weak ciphers
               on the HTTPS service discovered on port 443. Also check for certificate
               transparency logs.</parameter>
  <parameter=category>todo</parameter>
  <parameter=tags>["ssl", "certificate", "followup"]</parameter>
  <parameter=priority>normal</parameter>
  </function>
  # Planning note
  <function=create_note>
  <parameter=title>Scan Strategy Planning</parameter>
  <parameter=content>Plan for next phase: 1) Complete subdomain enumeration 2) Test discovered
               web apps for OWASP Top 10 3) Check database services for default creds
               4) Review any custom applications for business logic flaws</parameter>
  <parameter=category>plan</parameter>
  <parameter=tags>["planning", "strategy", "next_steps"]</parameter>
  </function>
  # Side note for later investigation
  <function=create_note>
  <parameter=title>Interesting Directory Found</parameter>
-  <parameter=content>Found /backup/ directory that might contain sensitive files. Low priority
+  <parameter=content>Found /backup/ directory that might contain sensitive files. Directory listing
-               for now but worth checking if time permits. Directory listing seems
+               seems disabled but worth investigating further.</parameter>
               disabled.</parameter>
  <parameter=category>findings</parameter>
-  <parameter=tags>["directory", "backup", "low_priority"]</parameter>
+  <parameter=tags>["directory", "backup"]</parameter>
-  <parameter=priority>low</parameter>
+  </function>
  # Methodology note
  <function=create_note>
  <parameter=title>Authentication Flow Analysis</parameter>
  <parameter=content>The application uses JWT tokens stored in localStorage. Token expiration is
               set to 24 hours. Observed that refresh token rotation is not implemented.</parameter>
  <parameter=category>methodology</parameter>
  <parameter=tags>["auth", "jwt", "session"]</parameter>
  </function>
  # Research question
  <function=create_note>
  <parameter=title>Custom Header Investigation</parameter>
  <parameter=content>The API returns a custom X-Request-ID header. Need to research if this
               could be used for user tracking or has any security implications.</parameter>
  <parameter=category>questions</parameter>
  <parameter=tags>["headers", "research"]</parameter>
  </function>
    </examples>
  </tool>
@@ -84,9 +75,6 @@
      <parameter name="tags" type="string" required="false">
        <description>Filter by tags (returns notes with any of these tags)</description>
      </parameter>
      <parameter name="priority" type="string" required="false">
        <description>Filter by priority level</description>
      </parameter>
      <parameter name="search" type="string" required="false">
        <description>Search query to find in note titles and content</description>
      </parameter>
@@ -100,11 +88,6 @@
  <parameter=category>findings</parameter>
  </function>
  # List high priority items
  <function=list_notes>
  <parameter=priority>high</parameter>
  </function>
  # Search for SQL injection related notes
  <function=list_notes>
  <parameter=search>SQL injection</parameter>
@@ -132,9 +115,6 @@
      <parameter name="tags" type="string" required="false">
        <description>New tags for the note</description>
      </parameter>
      <parameter name="priority" type="string" required="false">
        <description>New priority level</description>
      </parameter>
    </parameters>
    <returns type="Dict[str, Any]">
      <description>Response containing: - success: Whether the note was updated successfully</description>
@@ -143,7 +123,6 @@
  <function=update_note>
  <parameter=note_id>note_123</parameter>
  <parameter=content>Updated content with new findings...</parameter>
  <parameter=priority>urgent</parameter>
  </function>
    </examples>
  </tool>
--- a/strix/tools/todo/init.py
+++ b/strix/tools/todo/init.py
@@ -0,0 +1,18 @@
 from .todo_actions import (
    create_todo,
    delete_todo,
    list_todos,
    mark_todo_done,
    mark_todo_pending,
    update_todo,
 )
 __all__ = [
    "create_todo",
    "delete_todo",
    "list_todos",
    "mark_todo_done",
    "mark_todo_pending",
    "update_todo",
 ]
--- a/strix/tools/todo/todo_actions.py
+++ b/strix/tools/todo/todo_actions.py
@@ -0,0 +1,378 @@
 import json
 import uuid
 from datetime import UTC, datetime
 from typing import Any
 from strix.tools.registry import register_tool
 VALID_PRIORITIES = ["low", "normal", "high", "critical"]
 VALID_STATUSES = ["pending", "in_progress", "done"]
 _todos_storage: dict[str, dict[str, dict[str, Any]]] = {}
 def _get_agent_todos(agent_id: str) -> dict[str, dict[str, Any]]:
    if agent_id not in _todos_storage:
        _todos_storage[agent_id] = {}
    return _todos_storage[agent_id]
 def _normalize_priority(priority: str | None, default: str = "normal") -> str:
    candidate = (priority or default or "normal").lower()
    if candidate not in VALID_PRIORITIES:
        raise ValueError(f"Invalid priority. Must be one of: {', '.join(VALID_PRIORITIES)}")
    return candidate
 def _sorted_todos(agent_id: str) -> list[dict[str, Any]]:
    agent_todos = _get_agent_todos(agent_id)
    todos_list: list[dict[str, Any]] = []
    for todo_id, todo in agent_todos.items():
        entry = todo.copy()
        entry["todo_id"] = todo_id
        todos_list.append(entry)
    priority_order = {"critical": 0, "high": 1, "normal": 2, "low": 3}
    status_order = {"done": 0, "in_progress": 1, "pending": 2}
    todos_list.sort(
        key=lambda x: (
            status_order.get(x.get("status", "pending"), 99),
            priority_order.get(x.get("priority", "normal"), 99),
            x.get("created_at", ""),
        )
    )
    return todos_list
 def _normalize_bulk_todos(raw_todos: Any) -> list[dict[str, Any]]:
    if raw_todos is None:
        return []
    data = raw_todos
    if isinstance(raw_todos, str):
        stripped = raw_todos.strip()
        if not stripped:
            return []
        try:
            data = json.loads(stripped)
        except json.JSONDecodeError:
            entries = [line.strip(" -*\t") for line in stripped.splitlines() if line.strip(" -*\t")]
            return [{"title": entry} for entry in entries]
    if isinstance(data, dict):
        data = [data]
    if not isinstance(data, list):
        raise TypeError("Todos must be provided as a list, dict, or JSON string")
    normalized: list[dict[str, Any]] = []
    for item in data:
        if isinstance(item, str):
            title = item.strip()
            if title:
                normalized.append({"title": title})
            continue
        if not isinstance(item, dict):
            raise TypeError("Each todo entry must be a string or object with a title")
        title = item.get("title", "")
        if not isinstance(title, str) or not title.strip():
            raise ValueError("Each todo entry must include a non-empty 'title'")
        normalized.append(
            {
                "title": title.strip(),
                "description": (item.get("description") or "").strip() or None,
                "priority": item.get("priority"),
            }
        )
    return normalized
@register_tool(sandbox_execution=False)
 def create_todo(
    agent_state: Any,
    title: str | None = None,
    description: str | None = None,
    priority: str = "normal",
    todos: Any | None = None,
 ) -> dict[str, Any]:
    try:
        agent_id = agent_state.agent_id
        default_priority = _normalize_priority(priority)
        tasks_to_create: list[dict[str, Any]] = []
        if todos is not None:
            tasks_to_create.extend(_normalize_bulk_todos(todos))
        if title and title.strip():
            tasks_to_create.append(
                {
                    "title": title.strip(),
                    "description": description.strip() if description else None,
                    "priority": default_priority,
                }
            )
        if not tasks_to_create:
            return {
                "success": False,
                "error": "Provide a title or 'todos' list to create.",
                "todo_id": None,
            }
        agent_todos = _get_agent_todos(agent_id)
        created: list[dict[str, Any]] = []
        for task in tasks_to_create:
            task_priority = _normalize_priority(task.get("priority"), default_priority)
            todo_id = str(uuid.uuid4())[:6]
            timestamp = datetime.now(UTC).isoformat()
            todo = {
                "title": task["title"],
                "description": task.get("description"),
                "priority": task_priority,
                "status": "pending",
                "created_at": timestamp,
                "updated_at": timestamp,
                "completed_at": None,
            }
            agent_todos[todo_id] = todo
            created.append(
                {
                    "todo_id": todo_id,
                    "title": task["title"],
                    "priority": task_priority,
                }
            )
    except (ValueError, TypeError) as e:
        return {"success": False, "error": f"Failed to create todo: {e}", "todo_id": None}
    else:
        todos_list = _sorted_todos(agent_id)
        response: dict[str, Any] = {
            "success": True,
            "created": created,
            "count": len(created),
            "todos": todos_list,
            "total_count": len(todos_list),
        }
        return response
@register_tool(sandbox_execution=False)
 def list_todos(
    agent_state: Any,
    status: str | None = None,
    priority: str | None = None,
 ) -> dict[str, Any]:
    try:
        agent_id = agent_state.agent_id
        agent_todos = _get_agent_todos(agent_id)
        status_filter = status.lower() if isinstance(status, str) else None
        priority_filter = priority.lower() if isinstance(priority, str) else None
        todos_list = []
        for todo_id, todo in agent_todos.items():
            if status_filter and todo.get("status") != status_filter:
                continue
            if priority_filter and todo.get("priority") != priority_filter:
                continue
            todo_with_id = todo.copy()
            todo_with_id["todo_id"] = todo_id
            todos_list.append(todo_with_id)
        priority_order = {"critical": 0, "high": 1, "normal": 2, "low": 3}
        status_order = {"done": 0, "in_progress": 1, "pending": 2}
        todos_list.sort(
            key=lambda x: (
                status_order.get(x.get("status", "pending"), 99),
                priority_order.get(x.get("priority", "normal"), 99),
                x.get("created_at", ""),
            )
        )
        summary_counts = {
            "pending": 0,
            "in_progress": 0,
            "done": 0,
        }
        for todo in todos_list:
            status_value = todo.get("status", "pending")
            if status_value not in summary_counts:
                summary_counts[status_value] = 0
            summary_counts[status_value] += 1
        return {
            "success": True,
            "todos": todos_list,
            "total_count": len(todos_list),
            "summary": summary_counts,
        }
    except (ValueError, TypeError) as e:
        return {
            "success": False,
            "error": f"Failed to list todos: {e}",
            "todos": [],
            "total_count": 0,
            "summary": {"pending": 0, "in_progress": 0, "done": 0},
        }
@register_tool(sandbox_execution=False)
 def update_todo(
    agent_state: Any,
    todo_id: str,
    title: str | None = None,
    description: str | None = None,
    priority: str | None = None,
    status: str | None = None,
 ) -> dict[str, Any]:
    try:
        agent_id = agent_state.agent_id
        agent_todos = _get_agent_todos(agent_id)
        if todo_id not in agent_todos:
            return {"success": False, "error": f"Todo with ID '{todo_id}' not found"}
        todo = agent_todos[todo_id]
        if title is not None:
            if not title.strip():
                return {"success": False, "error": "Title cannot be empty"}
            todo["title"] = title.strip()
        if description is not None:
            todo["description"] = description.strip() if description else None
        if priority is not None:
            try:
                todo["priority"] = _normalize_priority(
                    priority, str(todo.get("priority", "normal"))
                )
            except ValueError as exc:
                return {"success": False, "error": str(exc)}
        if status is not None:
            status_candidate = status.lower()
            if status_candidate not in VALID_STATUSES:
                return {
                    "success": False,
                    "error": f"Invalid status. Must be one of: {', '.join(VALID_STATUSES)}",
                }
            todo["status"] = status_candidate
            if status_candidate == "done":
                todo["completed_at"] = datetime.now(UTC).isoformat()
            else:
                todo["completed_at"] = None
        todo["updated_at"] = datetime.now(UTC).isoformat()
        todos_list = _sorted_todos(agent_id)
        return {
            "success": True,
            "todos": todos_list,
            "total_count": len(todos_list),
        }
    except (ValueError, TypeError) as e:
        return {"success": False, "error": str(e)}
@register_tool(sandbox_execution=False)
 def mark_todo_done(
    agent_state: Any,
    todo_id: str,
 ) -> dict[str, Any]:
    try:
        agent_id = agent_state.agent_id
        agent_todos = _get_agent_todos(agent_id)
        if todo_id not in agent_todos:
            return {"success": False, "error": f"Todo with ID '{todo_id}' not found"}
        todo = agent_todos[todo_id]
        todo["status"] = "done"
        todo["completed_at"] = datetime.now(UTC).isoformat()
        todo["updated_at"] = datetime.now(UTC).isoformat()
        todos_list = _sorted_todos(agent_id)
        return {
            "success": True,
            "todos": todos_list,
            "total_count": len(todos_list),
        }
    except (ValueError, TypeError) as e:
        return {"success": False, "error": str(e)}
@register_tool(sandbox_execution=False)
 def mark_todo_pending(
    agent_state: Any,
    todo_id: str,
 ) -> dict[str, Any]:
    try:
        agent_id = agent_state.agent_id
        agent_todos = _get_agent_todos(agent_id)
        if todo_id not in agent_todos:
            return {"success": False, "error": f"Todo with ID '{todo_id}' not found"}
        todo = agent_todos[todo_id]
        todo["status"] = "pending"
        todo["completed_at"] = None
        todo["updated_at"] = datetime.now(UTC).isoformat()
        todos_list = _sorted_todos(agent_id)
        return {
            "success": True,
            "todos": todos_list,
            "total_count": len(todos_list),
        }
    except (ValueError, TypeError) as e:
        return {"success": False, "error": str(e)}
@register_tool(sandbox_execution=False)
 def delete_todo(
    agent_state: Any,
    todo_id: str,
 ) -> dict[str, Any]:
    try:
        agent_id = agent_state.agent_id
        agent_todos = _get_agent_todos(agent_id)
        if todo_id not in agent_todos:
            return {"success": False, "error": f"Todo with ID '{todo_id}' not found"}
        del agent_todos[todo_id]
        todos_list = _sorted_todos(agent_id)
        return {
            "success": True,
            "todos": todos_list,
            "total_count": len(todos_list),
        }
    except (ValueError, TypeError) as e:
        return {"success": False, "error": str(e)}
--- a/strix/tools/todo/todo_actions_schema.xml
+++ b/strix/tools/todo/todo_actions_schema.xml
@@ -0,0 +1,201 @@
 <tools>
  <important>
  YOU MUST USE THE TODO TOOL EXTENSIVELY. This is critical for staying organized and focused.
  IMPORTANT: Each subagent has their own separate todo list. Your todos are private to you and
  do not interfere with other agents' todos. Use this to your advantage.
  WORKFLOW - Follow this for EVERY task:
  1. Create todos at the START to break down your work
  2. BEFORE starting a task: Mark it as "in_progress" using update_todo
  3. AFTER completing a task: Mark it as "done" using mark_todo_done
  4. When you discover new tasks: Add them as todos right away
  ALWAYS mark the current task as in_progress before working on it. This shows what you're
  actively doing. Then mark it done when finished. Never skip these status updates.
  A well-maintained todo list prevents you from going in circles, forgetting important tasks,
  or losing track of your progress. USE IT CONSTANTLY.
  </important>
  <tool name="create_todo">
    <description>Create a new todo item to track tasks, goals, and progress. USE THIS FREQUENTLY.</description>
    <details>Use this tool liberally to create actionable items. Break down complex tasks into smaller,
  manageable todos. Each subagent maintains their own independent todo list - your todos are yours alone.
  Create todos at the start of work to plan your approach, add new ones as you discover tasks,
  and mark them done as you progress. This keeps you focused, prevents you from forgetting tasks,
  and provides a clear record of what you've accomplished.</details>
    <parameters>
      <parameter name="title" type="string" required="false">
        <description>Short, actionable title for the todo (e.g., "Test login endpoint for SQL injection")</description>
      </parameter>
      <parameter name="todos" type="string" required="false">
        <description>Create multiple todos at once. Provide a JSON array of {"title": "...", "description": "...", "priority": "..."} objects or a newline-separated bullet list.</description>
      </parameter>
      <parameter name="description" type="string" required="false">
        <description>Detailed description or notes about the task</description>
      </parameter>
      <parameter name="priority" type="string" required="false">
        <description>Priority level: "low", "normal", "high", "critical" (default: "normal")</description>
      </parameter>
    </parameters>
    <returns type="Dict[str, Any]">
      <description>Response containing: - created: List of created todos with their IDs - todos: Full sorted todo list - success: Whether the operation succeeded</description>
    </returns>
    <examples>
  # Create a high priority todo
  <function=create_todo>
  <parameter=title>Test authentication bypass on /api/admin</parameter>
  <parameter=description>The admin endpoint seems to have weak authentication. Try JWT manipulation, session fixation, and privilege escalation.</parameter>
  <parameter=priority>high</parameter>
  </function>
  # Create a simple todo
  <function=create_todo>
  <parameter=title>Enumerate all API endpoints</parameter>
  </function>
  # Bulk create todos (JSON array)
  <function=create_todo>
  <parameter=todos>[{"title": "Map all admin routes", "priority": "high"}, {"title": "Check forgotten password flow"}]</parameter>
  </function>
  # Bulk create todos (bullet list)
  <function=create_todo>
  <parameter=todos>
  - Capture baseline traffic in proxy
  - Enumerate S3 buckets for leaked assets
  - Compare responses for timing differences
  </parameter>
  </function>
    </examples>
  </tool>
  <tool name="list_todos">
    <description>List all todos with optional filtering by status or priority. CHECK THIS OFTEN.</description>
    <details>Use this frequently to review your task list, check progress, and decide what to work on next.
  Check your todos regularly to stay focused and avoid missing important tasks.
  The list is sorted: done first, then in_progress, then pending. Within each status, sorted by priority (critical > high > normal > low).
  Each subagent has their own independent todo list.</details>
    <parameters>
      <parameter name="status" type="string" required="false">
        <description>Filter by status: "pending", "in_progress", "done"</description>
      </parameter>
      <parameter name="priority" type="string" required="false">
        <description>Filter by priority: "low", "normal", "high", "critical"</description>
      </parameter>
    </parameters>
    <returns type="Dict[str, Any]">
      <description>Response containing: - todos: List of todo items - total_count: Total number of todos - summary: Count by status (pending, in_progress, done)</description>
    </returns>
    <examples>
  # List all todos
  <function=list_todos>
  </function>
  # List only pending todos
  <function=list_todos>
  <parameter=status>pending</parameter>
  </function>
  # List high priority items
  <function=list_todos>
  <parameter=priority>high</parameter>
  </function>
    </examples>
  </tool>
  <tool name="update_todo">
    <description>Update an existing todo item's title, description, priority, or status.</description>
    <parameters>
      <parameter name="todo_id" type="string" required="true">
        <description>ID of the todo to update</description>
      </parameter>
      <parameter name="title" type="string" required="false">
        <description>New title for the todo</description>
      </parameter>
      <parameter name="description" type="string" required="false">
        <description>New description for the todo</description>
      </parameter>
      <parameter name="priority" type="string" required="false">
        <description>New priority: "low", "normal", "high", "critical"</description>
      </parameter>
      <parameter name="status" type="string" required="false">
        <description>New status: "pending", "in_progress", "done"</description>
      </parameter>
    </parameters>
    <returns type="Dict[str, Any]">
      <description>Response containing: - success: Whether the update was successful</description>
    </returns>
    <examples>
  # Update priority and add description
  <function=update_todo>
  <parameter=todo_id>abc123</parameter>
  <parameter=priority>critical</parameter>
  <parameter=description>Found potential RCE vector, needs immediate attention</parameter>
  </function>
  # Mark as in progress
  <function=update_todo>
  <parameter=todo_id>abc123</parameter>
  <parameter=status>in_progress</parameter>
  </function>
    </examples>
  </tool>
  <tool name="mark_todo_done">
    <description>Mark a todo item as completed. DO THIS IMMEDIATELY after finishing a task.</description>
    <details>Mark todos as done right after completing them - don't wait! This keeps your list accurate,
  helps track progress, and gives you a clear picture of what's been accomplished vs what remains.</details>
    <parameters>
      <parameter name="todo_id" type="string" required="true">
        <description>ID of the todo to mark as done</description>
      </parameter>
    </parameters>
    <returns type="Dict[str, Any]">
      <description>Response containing: - success: Whether the operation was successful</description>
    </returns>
    <examples>
  <function=mark_todo_done>
  <parameter=todo_id>abc123</parameter>
  </function>
    </examples>
  </tool>
  <tool name="mark_todo_pending">
    <description>Mark a todo item as pending (reopen a completed task).</description>
    <details>Use this to reopen a task that was marked done but needs more work.</details>
    <parameters>
      <parameter name="todo_id" type="string" required="true">
        <description>ID of the todo to mark as pending</description>
      </parameter>
    </parameters>
    <returns type="Dict[str, Any]">
      <description>Response containing: - success: Whether the operation was successful</description>
    </returns>
    <examples>
  <function=mark_todo_pending>
  <parameter=todo_id>abc123</parameter>
  </function>
    </examples>
  </tool>
  <tool name="delete_todo">
    <description>Delete a todo item.</description>
    <details>Use this to remove todos that are no longer relevant or were created by mistake.</details>
    <parameters>
      <parameter name="todo_id" type="string" required="true">
        <description>ID of the todo to delete</description>
      </parameter>
    </parameters>
    <returns type="Dict[str, Any]">
      <description>Response containing: - success: Whether the deletion was successful</description>
    </returns>
    <examples>
  <function=delete_todo>
  <parameter=todo_id>abc123</parameter>
  </function>
    </examples>
  </tool>
 </tools>