feat(tools): add dedicated todo tool for agent task tracking (#196)

- Add new todo tool with create, list, update, mark_done, mark_pending, delete actions - Each subagent has isolated todo storage keyed by agent_id - Support bulk todo creation via JSON array or bullet list - Add TUI renderers for all todo actions with status markers - Update notes tool to remove priority and todo-related functionality - Add task tracking guidance to StrixAgent system prompt - Fix instruction file error handling in CLI
2025-12-14 10:16:02 -08:00
parent a075ea1a0a
commit 2b926c733b
11 changed files with 908 additions and 120 deletions
--- a/strix/agents/StrixAgent/system_prompt.jinja
+++ b/strix/agents/StrixAgent/system_prompt.jinja
@@ -111,6 +111,17 @@ OPERATIONAL PRINCIPLES:
 - Try multiple approaches simultaneously - don't wait for one to fail
 - Continuously research payloads, bypasses, and exploitation techniques with the web_search tool; integrate findings into automated sprays and validation

+TASK TRACKING:
+- USE THE TODO TOOL EXTENSIVELY - this is critical for staying organized and focused
+- Each subagent has their own INDEPENDENT todo list - your todos are private to you
+- At the START of any task: Create todos to break down your work into clear steps
+- BEFORE starting a task: Mark it as "in_progress" - this shows what you're actively doing
+- AFTER completing a task: Mark it as "done" immediately - don't wait
+- When you discover new tasks: Add them as todos right away
+- ALWAYS follow this workflow: create → in_progress → done
+- A well-maintained todo list prevents going in circles, forgetting tasks, and losing focus
+- If you're unsure what to do next: Check your todo list first
+
 EFFICIENCY TACTICS:
 - Automate with Python scripts for complex workflows and repetitive inputs/tasks
 - Batch similar operations together
--- a/strix/interface/main.py
+++ b/strix/interface/main.py
@@ -315,7 +315,9 @@ Examples:
    args = parser.parse_args()

    if args.instruction and args.instruction_file:
-        parser.error("Cannot specify both --instruction and --instruction-file. Use one or the other.")
+        parser.error(
+            "Cannot specify both --instruction and --instruction-file. Use one or the other."
+        )

    if args.instruction_file:
        instruction_path = Path(args.instruction_file)
@@ -324,7 +326,7 @@ Examples:
                args.instruction = f.read().strip()
                if not args.instruction:
                    parser.error(f"Instruction file '{instruction_path}' is empty")
-        except Exception as e:
+        except Exception as e:  # noqa: BLE001
            parser.error(f"Failed to read instruction file '{instruction_path}': {e}")

    args.targets_info = []
--- a/strix/interface/tool_components/init.py
+++ b/strix/interface/tool_components/init.py
@@ -10,6 +10,7 @@ from . import (
    scan_info_renderer,
    terminal_renderer,
    thinking_renderer,
+    todo_renderer,
    user_message_renderer,
    web_search_renderer,
 )
@@ -34,6 +35,7 @@ __all__ = [
    "scan_info_renderer",
    "terminal_renderer",
    "thinking_renderer",
+    "todo_renderer",
    "user_message_renderer",
    "web_search_renderer",
 ]
--- a/strix/interface/tool_components/notes_renderer.py
+++ b/strix/interface/tool_components/notes_renderer.py
@@ -6,6 +6,12 @@ from .base_renderer import BaseToolRenderer
 from .registry import register_tool_renderer


+def _truncate(text: str, length: int = 800) -> str:
+    if len(text) <= length:
+        return text
+    return text[: length - 3] + "..."
+
+
@register_tool_renderer
 class CreateNoteRenderer(BaseToolRenderer):
    tool_name: ClassVar[str] = "create_note"
@@ -17,23 +23,24 @@ class CreateNoteRenderer(BaseToolRenderer):

        title = args.get("title", "")
        content = args.get("content", "")
+        category = args.get("category", "general")

-        header = "📝 [bold #fbbf24]Note[/]"
+        header = f"📝 [bold #fbbf24]Note[/] [dim]({category})[/]"

+        lines = [header]
        if title:
-            title_display = title[:100] + "..." if len(title) > 100 else title
-            note_parts = [f"{header}\n  [bold]{cls.escape_markup(title_display)}[/]"]
+            title_display = _truncate(title.strip(), 300)
+            lines.append(f"  {cls.escape_markup(title_display)}")

        if content:
-                content_display = content[:200] + "..." if len(content) > 200 else content
-                note_parts.append(f"  [dim]{cls.escape_markup(content_display)}[/]")
+            content_display = _truncate(content.strip(), 800)
+            lines.append(f"  [dim]{cls.escape_markup(content_display)}[/]")

-            content_text = "\n".join(note_parts)
-        else:
-            content_text = f"{header}\n  [dim]Creating note...[/]"
+        if len(lines) == 1:
+            lines.append("  [dim]Capturing...[/]")

        css_classes = cls.get_css_classes("completed")
-        return Static(content_text, classes=css_classes)
+        return Static("\n".join(lines), classes=css_classes)


@register_tool_renderer
@@ -43,8 +50,8 @@ class DeleteNoteRenderer(BaseToolRenderer):

    @classmethod
    def render(cls, tool_data: dict[str, Any]) -> Static:  # noqa: ARG003
-        header = "🗑️ [bold #fbbf24]Delete Note[/]"
-        content_text = f"{header}\n  [dim]Deleting...[/]"
+        header = "📝 [bold #94a3b8]Note Removed[/]"
+        content_text = header

        css_classes = cls.get_css_classes("completed")
        return Static(content_text, classes=css_classes)
@@ -59,28 +66,24 @@ class UpdateNoteRenderer(BaseToolRenderer):
    def render(cls, tool_data: dict[str, Any]) -> Static:
        args = tool_data.get("args", {})

-        title = args.get("title", "")
-        content = args.get("content", "")
+        title = args.get("title")
+        content = args.get("content")

-        header = "✏️ [bold #fbbf24]Update Note[/]"
-
-        if title or content:
-            note_parts = [header]
+        header = "📝 [bold #fbbf24]Note Updated[/]"
+        lines = [header]

        if title:
-                title_display = title[:100] + "..." if len(title) > 100 else title
-                note_parts.append(f"  [bold]{cls.escape_markup(title_display)}[/]")
+            lines.append(f"  {cls.escape_markup(_truncate(title, 300))}")

        if content:
-                content_display = content[:200] + "..." if len(content) > 200 else content
-                note_parts.append(f"  [dim]{cls.escape_markup(content_display)}[/]")
+            content_display = _truncate(content.strip(), 800)
+            lines.append(f"  [dim]{cls.escape_markup(content_display)}[/]")

-            content_text = "\n".join(note_parts)
-        else:
-            content_text = f"{header}\n  [dim]Updating...[/]"
+        if len(lines) == 1:
+            lines.append("  [dim]Updating...[/]")

        css_classes = cls.get_css_classes("completed")
-        return Static(content_text, classes=css_classes)
+        return Static("\n".join(lines), classes=css_classes)


@register_tool_renderer
@@ -92,17 +95,34 @@ class ListNotesRenderer(BaseToolRenderer):
    def render(cls, tool_data: dict[str, Any]) -> Static:
        result = tool_data.get("result")

-        header = "📋 [bold #fbbf24]Listing notes[/]"
+        header = "📝 [bold #fbbf24]Notes[/]"

-        if result and isinstance(result, dict) and "notes" in result:
-            notes = result["notes"]
-            if isinstance(notes, list):
-                count = len(notes)
-                content_text = f"{header}\n  [dim]{count} notes found[/]"
+        if result and isinstance(result, dict) and result.get("success"):
+            count = result.get("total_count", 0)
+            notes = result.get("notes", []) or []
+            lines = [header]
+
+            if count == 0:
+                lines.append("  [dim]No notes[/]")
            else:
-                content_text = f"{header}\n  [dim]No notes found[/]"
+                for note in notes[:5]:
+                    title = note.get("title", "").strip() or "(untitled)"
+                    category = note.get("category", "general")
+                    content = note.get("content", "").strip()
+
+                    lines.append(
+                        f"  - {cls.escape_markup(_truncate(title, 300))} [dim]({category})[/]"
+                    )
+                    if content:
+                        content_preview = _truncate(content, 400)
+                        lines.append(f"    [dim]{cls.escape_markup(content_preview)}[/]")
+
+                remaining = max(count - 5, 0)
+                if remaining:
+                    lines.append(f"  [dim]... +{remaining} more[/]")
+            content_text = "\n".join(lines)
        else:
-            content_text = f"{header}\n  [dim]Listing notes...[/]"
+            content_text = f"{header}\n  [dim]Loading...[/]"

        css_classes = cls.get_css_classes("completed")
        return Static(content_text, classes=css_classes)
--- a/strix/interface/tool_components/todo_renderer.py
+++ b/strix/interface/tool_components/todo_renderer.py
@@ -0,0 +1,204 @@
+from typing import Any, ClassVar
+
+from textual.widgets import Static
+
+from .base_renderer import BaseToolRenderer
+from .registry import register_tool_renderer
+
+
+STATUS_MARKERS = {
+    "pending": "[ ]",
+    "in_progress": "[~]",
+    "done": "[•]",
+}
+
+
+def _truncate(text: str, length: int = 80) -> str:
+    if len(text) <= length:
+        return text
+    return text[: length - 3] + "..."
+
+
+def _format_todo_lines(
+    cls: type[BaseToolRenderer], result: dict[str, Any], limit: int = 10
+) -> list[str]:
+    todos = result.get("todos")
+    if not isinstance(todos, list) or not todos:
+        return ["  [dim]No todos[/]"]
+
+    lines: list[str] = []
+    total = len(todos)
+
+    for index, todo in enumerate(todos):
+        if index >= limit:
+            remaining = total - limit
+            if remaining > 0:
+                lines.append(f"  [dim]... +{remaining} more[/]")
+            break
+
+        status = todo.get("status", "pending")
+        marker = STATUS_MARKERS.get(status, STATUS_MARKERS["pending"])
+
+        title = todo.get("title", "").strip() or "(untitled)"
+        title = cls.escape_markup(_truncate(title, 90))
+
+        if status == "done":
+            title_markup = f"[dim strike]{title}[/]"
+        elif status == "in_progress":
+            title_markup = f"[italic]{title}[/]"
+        else:
+            title_markup = title
+
+        lines.append(f"  {marker} {title_markup}")
+
+    return lines
+
+
+@register_tool_renderer
+class CreateTodoRenderer(BaseToolRenderer):
+    tool_name: ClassVar[str] = "create_todo"
+    css_classes: ClassVar[list[str]] = ["tool-call", "todo-tool"]
+
+    @classmethod
+    def render(cls, tool_data: dict[str, Any]) -> Static:
+        result = tool_data.get("result")
+        header = "📋 [bold #a78bfa]Todo[/]"
+
+        if result and isinstance(result, dict):
+            if result.get("success"):
+                lines = [header]
+                lines.extend(_format_todo_lines(cls, result, limit=10))
+                content_text = "\n".join(lines)
+            else:
+                error = result.get("error", "Failed to create todo")
+                content_text = f"{header}\n  [#ef4444]{cls.escape_markup(error)}[/]"
+        else:
+            content_text = f"{header}\n  [dim]Creating...[/]"
+
+        css_classes = cls.get_css_classes("completed")
+        return Static(content_text, classes=css_classes)
+
+
+@register_tool_renderer
+class ListTodosRenderer(BaseToolRenderer):
+    tool_name: ClassVar[str] = "list_todos"
+    css_classes: ClassVar[list[str]] = ["tool-call", "todo-tool"]
+
+    @classmethod
+    def render(cls, tool_data: dict[str, Any]) -> Static:
+        result = tool_data.get("result")
+        header = "📋 [bold #a78bfa]Todos[/]"
+
+        if result and isinstance(result, dict):
+            if result.get("success"):
+                lines = [header]
+                lines.extend(_format_todo_lines(cls, result, limit=10))
+                content_text = "\n".join(lines)
+            else:
+                error = result.get("error", "Unable to list todos")
+                content_text = f"{header}\n  [#ef4444]{cls.escape_markup(error)}[/]"
+        else:
+            content_text = f"{header}\n  [dim]Loading...[/]"
+
+        css_classes = cls.get_css_classes("completed")
+        return Static(content_text, classes=css_classes)
+
+
+@register_tool_renderer
+class UpdateTodoRenderer(BaseToolRenderer):
+    tool_name: ClassVar[str] = "update_todo"
+    css_classes: ClassVar[list[str]] = ["tool-call", "todo-tool"]
+
+    @classmethod
+    def render(cls, tool_data: dict[str, Any]) -> Static:
+        result = tool_data.get("result")
+        header = "📋 [bold #a78bfa]Todo Updated[/]"
+
+        if result and isinstance(result, dict):
+            if result.get("success"):
+                lines = [header]
+                lines.extend(_format_todo_lines(cls, result, limit=10))
+                content_text = "\n".join(lines)
+            else:
+                error = result.get("error", "Failed to update todo")
+                content_text = f"{header}\n  [#ef4444]{cls.escape_markup(error)}[/]"
+        else:
+            content_text = f"{header}\n  [dim]Updating...[/]"
+
+        css_classes = cls.get_css_classes("completed")
+        return Static(content_text, classes=css_classes)
+
+
+@register_tool_renderer
+class MarkTodoDoneRenderer(BaseToolRenderer):
+    tool_name: ClassVar[str] = "mark_todo_done"
+    css_classes: ClassVar[list[str]] = ["tool-call", "todo-tool"]
+
+    @classmethod
+    def render(cls, tool_data: dict[str, Any]) -> Static:
+        result = tool_data.get("result")
+        header = "📋 [bold #a78bfa]Todo Completed[/]"
+
+        if result and isinstance(result, dict):
+            if result.get("success"):
+                lines = [header]
+                lines.extend(_format_todo_lines(cls, result, limit=10))
+                content_text = "\n".join(lines)
+            else:
+                error = result.get("error", "Failed to mark todo done")
+                content_text = f"{header}\n  [#ef4444]{cls.escape_markup(error)}[/]"
+        else:
+            content_text = f"{header}\n  [dim]Marking done...[/]"
+
+        css_classes = cls.get_css_classes("completed")
+        return Static(content_text, classes=css_classes)
+
+
+@register_tool_renderer
+class MarkTodoPendingRenderer(BaseToolRenderer):
+    tool_name: ClassVar[str] = "mark_todo_pending"
+    css_classes: ClassVar[list[str]] = ["tool-call", "todo-tool"]
+
+    @classmethod
+    def render(cls, tool_data: dict[str, Any]) -> Static:
+        result = tool_data.get("result")
+        header = "📋 [bold #f59e0b]Todo Reopened[/]"
+
+        if result and isinstance(result, dict):
+            if result.get("success"):
+                lines = [header]
+                lines.extend(_format_todo_lines(cls, result, limit=10))
+                content_text = "\n".join(lines)
+            else:
+                error = result.get("error", "Failed to reopen todo")
+                content_text = f"{header}\n  [#ef4444]{cls.escape_markup(error)}[/]"
+        else:
+            content_text = f"{header}\n  [dim]Reopening...[/]"
+
+        css_classes = cls.get_css_classes("completed")
+        return Static(content_text, classes=css_classes)
+
+
+@register_tool_renderer
+class DeleteTodoRenderer(BaseToolRenderer):
+    tool_name: ClassVar[str] = "delete_todo"
+    css_classes: ClassVar[list[str]] = ["tool-call", "todo-tool"]
+
+    @classmethod
+    def render(cls, tool_data: dict[str, Any]) -> Static:
+        result = tool_data.get("result")
+        header = "📋 [bold #94a3b8]Todo Removed[/]"
+
+        if result and isinstance(result, dict):
+            if result.get("success"):
+                lines = [header]
+                lines.extend(_format_todo_lines(cls, result, limit=10))
+                content_text = "\n".join(lines)
+            else:
+                error = result.get("error", "Failed to remove todo")
+                content_text = f"{header}\n  [#ef4444]{cls.escape_markup(error)}[/]"
+        else:
+            content_text = f"{header}\n  [dim]Removing...[/]"
+
+        css_classes = cls.get_css_classes("completed")
+        return Static(content_text, classes=css_classes)
--- a/strix/tools/init.py
+++ b/strix/tools/init.py
@@ -35,13 +35,13 @@ if not SANDBOX_MODE:
    from .reporting import *  # noqa: F403
    from .terminal import *  # noqa: F403
    from .thinking import *  # noqa: F403
+    from .todo import *  # noqa: F403

    if HAS_PERPLEXITY_API:
        from .web_search import *  # noqa: F403
 else:
    from .browser import *  # noqa: F403
    from .file_edit import *  # noqa: F403
-    from .notes import *  # noqa: F403
    from .proxy import *  # noqa: F403
    from .python import *  # noqa: F403
    from .terminal import *  # noqa: F403
--- a/strix/tools/notes/notes_actions.py
+++ b/strix/tools/notes/notes_actions.py
@@ -11,7 +11,6 @@ _notes_storage: dict[str, dict[str, Any]] = {}
 def _filter_notes(
    category: str | None = None,
    tags: list[str] | None = None,
-    priority: str | None = None,
    search_query: str | None = None,
 ) -> list[dict[str, Any]]:
    filtered_notes = []
@@ -20,9 +19,6 @@ def _filter_notes(
        if category and note.get("category") != category:
            continue

-        if priority and note.get("priority") != priority:
-            continue
-
        if tags:
            note_tags = note.get("tags", [])
            if not any(tag in note_tags for tag in tags):
@@ -43,13 +39,12 @@ def _filter_notes(
    return filtered_notes


-@register_tool
+@register_tool(sandbox_execution=False)
 def create_note(
    title: str,
    content: str,
    category: str = "general",
    tags: list[str] | None = None,
-    priority: str = "normal",
 ) -> dict[str, Any]:
    try:
        if not title or not title.strip():
@@ -58,7 +53,7 @@ def create_note(
        if not content or not content.strip():
            return {"success": False, "error": "Content cannot be empty", "note_id": None}

-        valid_categories = ["general", "findings", "methodology", "todo", "questions", "plan"]
+        valid_categories = ["general", "findings", "methodology", "questions", "plan"]
        if category not in valid_categories:
            return {
                "success": False,
@@ -66,14 +61,6 @@ def create_note(
                "note_id": None,
            }

-        valid_priorities = ["low", "normal", "high", "urgent"]
-        if priority not in valid_priorities:
-            return {
-                "success": False,
-                "error": f"Invalid priority. Must be one of: {', '.join(valid_priorities)}",
-                "note_id": None,
-            }
-
        note_id = str(uuid.uuid4())[:5]
        timestamp = datetime.now(UTC).isoformat()

@@ -82,7 +69,6 @@ def create_note(
            "content": content.strip(),
            "category": category,
            "tags": tags or [],
-            "priority": priority,
            "created_at": timestamp,
            "updated_at": timestamp,
        }
@@ -99,17 +85,14 @@ def create_note(
        }


-@register_tool
+@register_tool(sandbox_execution=False)
 def list_notes(
    category: str | None = None,
    tags: list[str] | None = None,
-    priority: str | None = None,
    search: str | None = None,
 ) -> dict[str, Any]:
    try:
-        filtered_notes = _filter_notes(
-            category=category, tags=tags, priority=priority, search_query=search
-        )
+        filtered_notes = _filter_notes(category=category, tags=tags, search_query=search)

        return {
            "success": True,
@@ -126,13 +109,12 @@ def list_notes(
        }


-@register_tool
+@register_tool(sandbox_execution=False)
 def update_note(
    note_id: str,
    title: str | None = None,
    content: str | None = None,
    tags: list[str] | None = None,
-    priority: str | None = None,
 ) -> dict[str, Any]:
    try:
        if note_id not in _notes_storage:
@@ -153,15 +135,6 @@ def update_note(
        if tags is not None:
            note["tags"] = tags

-        if priority is not None:
-            valid_priorities = ["low", "normal", "high", "urgent"]
-            if priority not in valid_priorities:
-                return {
-                    "success": False,
-                    "error": f"Invalid priority. Must be one of: {', '.join(valid_priorities)}",
-                }
-            note["priority"] = priority
-
        note["updated_at"] = datetime.now(UTC).isoformat()

        return {
@@ -173,7 +146,7 @@ def update_note(
        return {"success": False, "error": f"Failed to update note: {e}"}


-@register_tool
+@register_tool(sandbox_execution=False)
 def delete_note(note_id: str) -> dict[str, Any]:
    try:
        if note_id not in _notes_storage:
--- a/strix/tools/notes/notes_actions_schema.xml
+++ b/strix/tools/notes/notes_actions_schema.xml
@@ -1,10 +1,9 @@
 <tools>
  <tool name="create_note">
-    <description>Create a personal note for TODOs, side notes, plans, and organizational purposes during
-  the scan.</description>
-    <details>Use this tool for quick reminders, action items, planning thoughts, and organizational notes
-  rather than formal vulnerability reports or detailed findings. This is your personal notepad
-  for keeping track of tasks, ideas, and things to remember or follow up on.</details>
+    <description>Create a personal note for observations, findings, and research during the scan.</description>
+    <details>Use this tool for documenting discoveries, observations, methodology notes, and questions.
+  This is your personal notepad for recording information you want to remember or reference later.
+  For tracking actionable tasks, use the todo tool instead.</details>
    <parameters>
      <parameter name="title" type="string" required="true">
        <description>Title of the note</description>
@@ -13,49 +12,41 @@
        <description>Content of the note</description>
      </parameter>
      <parameter name="category" type="string" required="false">
-        <description>Category to organize the note (default: "general", "findings", "methodology", "todo", "questions", "plan")</description>
+        <description>Category to organize the note (default: "general", "findings", "methodology", "questions", "plan")</description>
      </parameter>
      <parameter name="tags" type="string" required="false">
        <description>Tags for categorization</description>
      </parameter>
-      <parameter name="priority" type="string" required="false">
-        <description>Priority level of the note ("low", "normal", "high", "urgent")</description>
-      </parameter>
    </parameters>
    <returns type="Dict[str, Any]">
      <description>Response containing: - note_id: ID of the created note - success: Whether the note was created successfully</description>
    </returns>
    <examples>
-  # Create a TODO reminder
-  <function=create_note>
-  <parameter=title>TODO: Check SSL Certificate Details</parameter>
-  <parameter=content>Remember to verify SSL certificate validity and check for weak ciphers
-               on the HTTPS service discovered on port 443. Also check for certificate
-               transparency logs.</parameter>
-  <parameter=category>todo</parameter>
-  <parameter=tags>["ssl", "certificate", "followup"]</parameter>
-  <parameter=priority>normal</parameter>
-  </function>
-
-  # Planning note
-  <function=create_note>
-  <parameter=title>Scan Strategy Planning</parameter>
-  <parameter=content>Plan for next phase: 1) Complete subdomain enumeration 2) Test discovered
-               web apps for OWASP Top 10 3) Check database services for default creds
-               4) Review any custom applications for business logic flaws</parameter>
-  <parameter=category>plan</parameter>
-  <parameter=tags>["planning", "strategy", "next_steps"]</parameter>
-  </function>
-
-  # Side note for later investigation
+  # Document an interesting finding
  <function=create_note>
  <parameter=title>Interesting Directory Found</parameter>
-  <parameter=content>Found /backup/ directory that might contain sensitive files. Low priority
-               for now but worth checking if time permits. Directory listing seems
-               disabled.</parameter>
+  <parameter=content>Found /backup/ directory that might contain sensitive files. Directory listing
+               seems disabled but worth investigating further.</parameter>
  <parameter=category>findings</parameter>
-  <parameter=tags>["directory", "backup", "low_priority"]</parameter>
-  <parameter=priority>low</parameter>
+  <parameter=tags>["directory", "backup"]</parameter>
+  </function>
+
+  # Methodology note
+  <function=create_note>
+  <parameter=title>Authentication Flow Analysis</parameter>
+  <parameter=content>The application uses JWT tokens stored in localStorage. Token expiration is
+               set to 24 hours. Observed that refresh token rotation is not implemented.</parameter>
+  <parameter=category>methodology</parameter>
+  <parameter=tags>["auth", "jwt", "session"]</parameter>
+  </function>
+
+  # Research question
+  <function=create_note>
+  <parameter=title>Custom Header Investigation</parameter>
+  <parameter=content>The API returns a custom X-Request-ID header. Need to research if this
+               could be used for user tracking or has any security implications.</parameter>
+  <parameter=category>questions</parameter>
+  <parameter=tags>["headers", "research"]</parameter>
  </function>
    </examples>
  </tool>
@@ -84,9 +75,6 @@
      <parameter name="tags" type="string" required="false">
        <description>Filter by tags (returns notes with any of these tags)</description>
      </parameter>
-      <parameter name="priority" type="string" required="false">
-        <description>Filter by priority level</description>
-      </parameter>
      <parameter name="search" type="string" required="false">
        <description>Search query to find in note titles and content</description>
      </parameter>
@@ -100,11 +88,6 @@
  <parameter=category>findings</parameter>
  </function>

-  # List high priority items
-  <function=list_notes>
-  <parameter=priority>high</parameter>
-  </function>
-
  # Search for SQL injection related notes
  <function=list_notes>
  <parameter=search>SQL injection</parameter>
@@ -132,9 +115,6 @@
      <parameter name="tags" type="string" required="false">
        <description>New tags for the note</description>
      </parameter>
-      <parameter name="priority" type="string" required="false">
-        <description>New priority level</description>
-      </parameter>
    </parameters>
    <returns type="Dict[str, Any]">
      <description>Response containing: - success: Whether the note was updated successfully</description>
@@ -143,7 +123,6 @@
  <function=update_note>
  <parameter=note_id>note_123</parameter>
  <parameter=content>Updated content with new findings...</parameter>
-  <parameter=priority>urgent</parameter>
  </function>
    </examples>
  </tool>
--- a/strix/tools/todo/init.py
+++ b/strix/tools/todo/init.py
@@ -0,0 +1,18 @@
+from .todo_actions import (
+    create_todo,
+    delete_todo,
+    list_todos,
+    mark_todo_done,
+    mark_todo_pending,
+    update_todo,
+)
+
+
+__all__ = [
+    "create_todo",
+    "delete_todo",
+    "list_todos",
+    "mark_todo_done",
+    "mark_todo_pending",
+    "update_todo",
+]
--- a/strix/tools/todo/todo_actions.py
+++ b/strix/tools/todo/todo_actions.py
@@ -0,0 +1,378 @@
+import json
+import uuid
+from datetime import UTC, datetime
+from typing import Any
+
+from strix.tools.registry import register_tool
+
+
+VALID_PRIORITIES = ["low", "normal", "high", "critical"]
+VALID_STATUSES = ["pending", "in_progress", "done"]
+
+_todos_storage: dict[str, dict[str, dict[str, Any]]] = {}
+
+
+def _get_agent_todos(agent_id: str) -> dict[str, dict[str, Any]]:
+    if agent_id not in _todos_storage:
+        _todos_storage[agent_id] = {}
+    return _todos_storage[agent_id]
+
+
+def _normalize_priority(priority: str | None, default: str = "normal") -> str:
+    candidate = (priority or default or "normal").lower()
+    if candidate not in VALID_PRIORITIES:
+        raise ValueError(f"Invalid priority. Must be one of: {', '.join(VALID_PRIORITIES)}")
+    return candidate
+
+
+def _sorted_todos(agent_id: str) -> list[dict[str, Any]]:
+    agent_todos = _get_agent_todos(agent_id)
+
+    todos_list: list[dict[str, Any]] = []
+    for todo_id, todo in agent_todos.items():
+        entry = todo.copy()
+        entry["todo_id"] = todo_id
+        todos_list.append(entry)
+
+    priority_order = {"critical": 0, "high": 1, "normal": 2, "low": 3}
+    status_order = {"done": 0, "in_progress": 1, "pending": 2}
+
+    todos_list.sort(
+        key=lambda x: (
+            status_order.get(x.get("status", "pending"), 99),
+            priority_order.get(x.get("priority", "normal"), 99),
+            x.get("created_at", ""),
+        )
+    )
+    return todos_list
+
+
+def _normalize_bulk_todos(raw_todos: Any) -> list[dict[str, Any]]:
+    if raw_todos is None:
+        return []
+
+    data = raw_todos
+    if isinstance(raw_todos, str):
+        stripped = raw_todos.strip()
+        if not stripped:
+            return []
+        try:
+            data = json.loads(stripped)
+        except json.JSONDecodeError:
+            entries = [line.strip(" -*\t") for line in stripped.splitlines() if line.strip(" -*\t")]
+            return [{"title": entry} for entry in entries]
+
+    if isinstance(data, dict):
+        data = [data]
+
+    if not isinstance(data, list):
+        raise TypeError("Todos must be provided as a list, dict, or JSON string")
+
+    normalized: list[dict[str, Any]] = []
+    for item in data:
+        if isinstance(item, str):
+            title = item.strip()
+            if title:
+                normalized.append({"title": title})
+            continue
+
+        if not isinstance(item, dict):
+            raise TypeError("Each todo entry must be a string or object with a title")
+
+        title = item.get("title", "")
+        if not isinstance(title, str) or not title.strip():
+            raise ValueError("Each todo entry must include a non-empty 'title'")
+
+        normalized.append(
+            {
+                "title": title.strip(),
+                "description": (item.get("description") or "").strip() or None,
+                "priority": item.get("priority"),
+            }
+        )
+
+    return normalized
+
+
+@register_tool(sandbox_execution=False)
+def create_todo(
+    agent_state: Any,
+    title: str | None = None,
+    description: str | None = None,
+    priority: str = "normal",
+    todos: Any | None = None,
+) -> dict[str, Any]:
+    try:
+        agent_id = agent_state.agent_id
+        default_priority = _normalize_priority(priority)
+
+        tasks_to_create: list[dict[str, Any]] = []
+
+        if todos is not None:
+            tasks_to_create.extend(_normalize_bulk_todos(todos))
+
+        if title and title.strip():
+            tasks_to_create.append(
+                {
+                    "title": title.strip(),
+                    "description": description.strip() if description else None,
+                    "priority": default_priority,
+                }
+            )
+
+        if not tasks_to_create:
+            return {
+                "success": False,
+                "error": "Provide a title or 'todos' list to create.",
+                "todo_id": None,
+            }
+
+        agent_todos = _get_agent_todos(agent_id)
+        created: list[dict[str, Any]] = []
+
+        for task in tasks_to_create:
+            task_priority = _normalize_priority(task.get("priority"), default_priority)
+            todo_id = str(uuid.uuid4())[:6]
+            timestamp = datetime.now(UTC).isoformat()
+
+            todo = {
+                "title": task["title"],
+                "description": task.get("description"),
+                "priority": task_priority,
+                "status": "pending",
+                "created_at": timestamp,
+                "updated_at": timestamp,
+                "completed_at": None,
+            }
+
+            agent_todos[todo_id] = todo
+            created.append(
+                {
+                    "todo_id": todo_id,
+                    "title": task["title"],
+                    "priority": task_priority,
+                }
+            )
+
+    except (ValueError, TypeError) as e:
+        return {"success": False, "error": f"Failed to create todo: {e}", "todo_id": None}
+    else:
+        todos_list = _sorted_todos(agent_id)
+
+        response: dict[str, Any] = {
+            "success": True,
+            "created": created,
+            "count": len(created),
+            "todos": todos_list,
+            "total_count": len(todos_list),
+        }
+        return response
+
+
+@register_tool(sandbox_execution=False)
+def list_todos(
+    agent_state: Any,
+    status: str | None = None,
+    priority: str | None = None,
+) -> dict[str, Any]:
+    try:
+        agent_id = agent_state.agent_id
+        agent_todos = _get_agent_todos(agent_id)
+
+        status_filter = status.lower() if isinstance(status, str) else None
+        priority_filter = priority.lower() if isinstance(priority, str) else None
+
+        todos_list = []
+        for todo_id, todo in agent_todos.items():
+            if status_filter and todo.get("status") != status_filter:
+                continue
+
+            if priority_filter and todo.get("priority") != priority_filter:
+                continue
+
+            todo_with_id = todo.copy()
+            todo_with_id["todo_id"] = todo_id
+            todos_list.append(todo_with_id)
+
+        priority_order = {"critical": 0, "high": 1, "normal": 2, "low": 3}
+        status_order = {"done": 0, "in_progress": 1, "pending": 2}
+
+        todos_list.sort(
+            key=lambda x: (
+                status_order.get(x.get("status", "pending"), 99),
+                priority_order.get(x.get("priority", "normal"), 99),
+                x.get("created_at", ""),
+            )
+        )
+
+        summary_counts = {
+            "pending": 0,
+            "in_progress": 0,
+            "done": 0,
+        }
+        for todo in todos_list:
+            status_value = todo.get("status", "pending")
+            if status_value not in summary_counts:
+                summary_counts[status_value] = 0
+            summary_counts[status_value] += 1
+
+        return {
+            "success": True,
+            "todos": todos_list,
+            "total_count": len(todos_list),
+            "summary": summary_counts,
+        }
+
+    except (ValueError, TypeError) as e:
+        return {
+            "success": False,
+            "error": f"Failed to list todos: {e}",
+            "todos": [],
+            "total_count": 0,
+            "summary": {"pending": 0, "in_progress": 0, "done": 0},
+        }
+
+
+@register_tool(sandbox_execution=False)
+def update_todo(
+    agent_state: Any,
+    todo_id: str,
+    title: str | None = None,
+    description: str | None = None,
+    priority: str | None = None,
+    status: str | None = None,
+) -> dict[str, Any]:
+    try:
+        agent_id = agent_state.agent_id
+        agent_todos = _get_agent_todos(agent_id)
+
+        if todo_id not in agent_todos:
+            return {"success": False, "error": f"Todo with ID '{todo_id}' not found"}
+
+        todo = agent_todos[todo_id]
+
+        if title is not None:
+            if not title.strip():
+                return {"success": False, "error": "Title cannot be empty"}
+            todo["title"] = title.strip()
+
+        if description is not None:
+            todo["description"] = description.strip() if description else None
+
+        if priority is not None:
+            try:
+                todo["priority"] = _normalize_priority(
+                    priority, str(todo.get("priority", "normal"))
+                )
+            except ValueError as exc:
+                return {"success": False, "error": str(exc)}
+
+        if status is not None:
+            status_candidate = status.lower()
+            if status_candidate not in VALID_STATUSES:
+                return {
+                    "success": False,
+                    "error": f"Invalid status. Must be one of: {', '.join(VALID_STATUSES)}",
+                }
+            todo["status"] = status_candidate
+            if status_candidate == "done":
+                todo["completed_at"] = datetime.now(UTC).isoformat()
+            else:
+                todo["completed_at"] = None
+
+        todo["updated_at"] = datetime.now(UTC).isoformat()
+
+        todos_list = _sorted_todos(agent_id)
+
+        return {
+            "success": True,
+            "todos": todos_list,
+            "total_count": len(todos_list),
+        }
+
+    except (ValueError, TypeError) as e:
+        return {"success": False, "error": str(e)}
+
+
+@register_tool(sandbox_execution=False)
+def mark_todo_done(
+    agent_state: Any,
+    todo_id: str,
+) -> dict[str, Any]:
+    try:
+        agent_id = agent_state.agent_id
+        agent_todos = _get_agent_todos(agent_id)
+
+        if todo_id not in agent_todos:
+            return {"success": False, "error": f"Todo with ID '{todo_id}' not found"}
+
+        todo = agent_todos[todo_id]
+        todo["status"] = "done"
+        todo["completed_at"] = datetime.now(UTC).isoformat()
+        todo["updated_at"] = datetime.now(UTC).isoformat()
+
+        todos_list = _sorted_todos(agent_id)
+
+        return {
+            "success": True,
+            "todos": todos_list,
+            "total_count": len(todos_list),
+        }
+
+    except (ValueError, TypeError) as e:
+        return {"success": False, "error": str(e)}
+
+
+@register_tool(sandbox_execution=False)
+def mark_todo_pending(
+    agent_state: Any,
+    todo_id: str,
+) -> dict[str, Any]:
+    try:
+        agent_id = agent_state.agent_id
+        agent_todos = _get_agent_todos(agent_id)
+
+        if todo_id not in agent_todos:
+            return {"success": False, "error": f"Todo with ID '{todo_id}' not found"}
+
+        todo = agent_todos[todo_id]
+        todo["status"] = "pending"
+        todo["completed_at"] = None
+        todo["updated_at"] = datetime.now(UTC).isoformat()
+
+        todos_list = _sorted_todos(agent_id)
+
+        return {
+            "success": True,
+            "todos": todos_list,
+            "total_count": len(todos_list),
+        }
+
+    except (ValueError, TypeError) as e:
+        return {"success": False, "error": str(e)}
+
+
+@register_tool(sandbox_execution=False)
+def delete_todo(
+    agent_state: Any,
+    todo_id: str,
+) -> dict[str, Any]:
+    try:
+        agent_id = agent_state.agent_id
+        agent_todos = _get_agent_todos(agent_id)
+
+        if todo_id not in agent_todos:
+            return {"success": False, "error": f"Todo with ID '{todo_id}' not found"}
+
+        del agent_todos[todo_id]
+
+        todos_list = _sorted_todos(agent_id)
+
+        return {
+            "success": True,
+            "todos": todos_list,
+            "total_count": len(todos_list),
+        }
+
+    except (ValueError, TypeError) as e:
+        return {"success": False, "error": str(e)}
--- a/strix/tools/todo/todo_actions_schema.xml
+++ b/strix/tools/todo/todo_actions_schema.xml
@@ -0,0 +1,201 @@
+<tools>
+  <important>
+  YOU MUST USE THE TODO TOOL EXTENSIVELY. This is critical for staying organized and focused.
+
+  IMPORTANT: Each subagent has their own separate todo list. Your todos are private to you and
+  do not interfere with other agents' todos. Use this to your advantage.
+
+  WORKFLOW - Follow this for EVERY task:
+  1. Create todos at the START to break down your work
+  2. BEFORE starting a task: Mark it as "in_progress" using update_todo
+  3. AFTER completing a task: Mark it as "done" using mark_todo_done
+  4. When you discover new tasks: Add them as todos right away
+
+  ALWAYS mark the current task as in_progress before working on it. This shows what you're
+  actively doing. Then mark it done when finished. Never skip these status updates.
+
+  A well-maintained todo list prevents you from going in circles, forgetting important tasks,
+  or losing track of your progress. USE IT CONSTANTLY.
+  </important>
+
+  <tool name="create_todo">
+    <description>Create a new todo item to track tasks, goals, and progress. USE THIS FREQUENTLY.</description>
+    <details>Use this tool liberally to create actionable items. Break down complex tasks into smaller,
+  manageable todos. Each subagent maintains their own independent todo list - your todos are yours alone.
+
+  Create todos at the start of work to plan your approach, add new ones as you discover tasks,
+  and mark them done as you progress. This keeps you focused, prevents you from forgetting tasks,
+  and provides a clear record of what you've accomplished.</details>
+    <parameters>
+      <parameter name="title" type="string" required="false">
+        <description>Short, actionable title for the todo (e.g., "Test login endpoint for SQL injection")</description>
+      </parameter>
+      <parameter name="todos" type="string" required="false">
+        <description>Create multiple todos at once. Provide a JSON array of {"title": "...", "description": "...", "priority": "..."} objects or a newline-separated bullet list.</description>
+      </parameter>
+      <parameter name="description" type="string" required="false">
+        <description>Detailed description or notes about the task</description>
+      </parameter>
+      <parameter name="priority" type="string" required="false">
+        <description>Priority level: "low", "normal", "high", "critical" (default: "normal")</description>
+      </parameter>
+    </parameters>
+    <returns type="Dict[str, Any]">
+      <description>Response containing: - created: List of created todos with their IDs - todos: Full sorted todo list - success: Whether the operation succeeded</description>
+    </returns>
+    <examples>
+  # Create a high priority todo
+  <function=create_todo>
+  <parameter=title>Test authentication bypass on /api/admin</parameter>
+  <parameter=description>The admin endpoint seems to have weak authentication. Try JWT manipulation, session fixation, and privilege escalation.</parameter>
+  <parameter=priority>high</parameter>
+  </function>
+
+  # Create a simple todo
+  <function=create_todo>
+  <parameter=title>Enumerate all API endpoints</parameter>
+  </function>
+
+  # Bulk create todos (JSON array)
+  <function=create_todo>
+  <parameter=todos>[{"title": "Map all admin routes", "priority": "high"}, {"title": "Check forgotten password flow"}]</parameter>
+  </function>
+
+  # Bulk create todos (bullet list)
+  <function=create_todo>
+  <parameter=todos>
+  - Capture baseline traffic in proxy
+  - Enumerate S3 buckets for leaked assets
+  - Compare responses for timing differences
+  </parameter>
+  </function>
+    </examples>
+  </tool>
+
+  <tool name="list_todos">
+    <description>List all todos with optional filtering by status or priority. CHECK THIS OFTEN.</description>
+    <details>Use this frequently to review your task list, check progress, and decide what to work on next.
+  Check your todos regularly to stay focused and avoid missing important tasks.
+  The list is sorted: done first, then in_progress, then pending. Within each status, sorted by priority (critical > high > normal > low).
+  Each subagent has their own independent todo list.</details>
+    <parameters>
+      <parameter name="status" type="string" required="false">
+        <description>Filter by status: "pending", "in_progress", "done"</description>
+      </parameter>
+      <parameter name="priority" type="string" required="false">
+        <description>Filter by priority: "low", "normal", "high", "critical"</description>
+      </parameter>
+    </parameters>
+    <returns type="Dict[str, Any]">
+      <description>Response containing: - todos: List of todo items - total_count: Total number of todos - summary: Count by status (pending, in_progress, done)</description>
+    </returns>
+    <examples>
+  # List all todos
+  <function=list_todos>
+  </function>
+
+  # List only pending todos
+  <function=list_todos>
+  <parameter=status>pending</parameter>
+  </function>
+
+  # List high priority items
+  <function=list_todos>
+  <parameter=priority>high</parameter>
+  </function>
+    </examples>
+  </tool>
+
+  <tool name="update_todo">
+    <description>Update an existing todo item's title, description, priority, or status.</description>
+    <parameters>
+      <parameter name="todo_id" type="string" required="true">
+        <description>ID of the todo to update</description>
+      </parameter>
+      <parameter name="title" type="string" required="false">
+        <description>New title for the todo</description>
+      </parameter>
+      <parameter name="description" type="string" required="false">
+        <description>New description for the todo</description>
+      </parameter>
+      <parameter name="priority" type="string" required="false">
+        <description>New priority: "low", "normal", "high", "critical"</description>
+      </parameter>
+      <parameter name="status" type="string" required="false">
+        <description>New status: "pending", "in_progress", "done"</description>
+      </parameter>
+    </parameters>
+    <returns type="Dict[str, Any]">
+      <description>Response containing: - success: Whether the update was successful</description>
+    </returns>
+    <examples>
+  # Update priority and add description
+  <function=update_todo>
+  <parameter=todo_id>abc123</parameter>
+  <parameter=priority>critical</parameter>
+  <parameter=description>Found potential RCE vector, needs immediate attention</parameter>
+  </function>
+
+  # Mark as in progress
+  <function=update_todo>
+  <parameter=todo_id>abc123</parameter>
+  <parameter=status>in_progress</parameter>
+  </function>
+    </examples>
+  </tool>
+
+  <tool name="mark_todo_done">
+    <description>Mark a todo item as completed. DO THIS IMMEDIATELY after finishing a task.</description>
+    <details>Mark todos as done right after completing them - don't wait! This keeps your list accurate,
+  helps track progress, and gives you a clear picture of what's been accomplished vs what remains.</details>
+    <parameters>
+      <parameter name="todo_id" type="string" required="true">
+        <description>ID of the todo to mark as done</description>
+      </parameter>
+    </parameters>
+    <returns type="Dict[str, Any]">
+      <description>Response containing: - success: Whether the operation was successful</description>
+    </returns>
+    <examples>
+  <function=mark_todo_done>
+  <parameter=todo_id>abc123</parameter>
+  </function>
+    </examples>
+  </tool>
+
+  <tool name="mark_todo_pending">
+    <description>Mark a todo item as pending (reopen a completed task).</description>
+    <details>Use this to reopen a task that was marked done but needs more work.</details>
+    <parameters>
+      <parameter name="todo_id" type="string" required="true">
+        <description>ID of the todo to mark as pending</description>
+      </parameter>
+    </parameters>
+    <returns type="Dict[str, Any]">
+      <description>Response containing: - success: Whether the operation was successful</description>
+    </returns>
+    <examples>
+  <function=mark_todo_pending>
+  <parameter=todo_id>abc123</parameter>
+  </function>
+    </examples>
+  </tool>
+
+  <tool name="delete_todo">
+    <description>Delete a todo item.</description>
+    <details>Use this to remove todos that are no longer relevant or were created by mistake.</details>
+    <parameters>
+      <parameter name="todo_id" type="string" required="true">
+        <description>ID of the todo to delete</description>
+      </parameter>
+    </parameters>
+    <returns type="Dict[str, Any]">
+      <description>Response containing: - success: Whether the deletion was successful</description>
+    </returns>
+    <examples>
+  <function=delete_todo>
+  <parameter=todo_id>abc123</parameter>
+  </function>
+    </examples>
+  </tool>
+</tools>