From ccab853c0fd0e3ca12471eff67c80c4ce44ac910 Mon Sep 17 00:00:00 2001
From: Ahmed Allam <49919286+0xallam@users.noreply.github.com>
Date: Sat, 16 Aug 2025 15:47:36 -0700
Subject: [PATCH] Clone git repositories internally (#10)

---
 strix/agents/StrixAgent/strix_agent.py        | 18 +++-
 strix/agents/StrixAgent/system_prompt.jinja   | 21 +++++
 strix/cli/app.py                              |  2 +
 strix/cli/main.py                             | 89 ++++++++++++++++++-
 .../agents_graph_actions_schema.xml           | 18 +++-
 5 files changed, 143 insertions(+), 5 deletions(-)

diff --git a/strix/agents/StrixAgent/strix_agent.py b/strix/agents/StrixAgent/strix_agent.py
index 5cee175..2616091 100644
--- a/strix/agents/StrixAgent/strix_agent.py
+++ b/strix/agents/StrixAgent/strix_agent.py
@@ -26,9 +26,21 @@ class StrixAgent(BaseAgent):
         task_parts = []
 
         if scan_type == "repository":
-            task_parts.append(
-                f"Perform a security assessment of the Git repository: {target['target_repo']}"
-            )
+            repo_url = target["target_repo"]
+            cloned_path = target.get("cloned_repo_path")
+
+            if cloned_path:
+                shared_workspace_path = "/shared_workspace"
+                task_parts.append(
+                    f"Perform a security assessment of the Git repository: {repo_url}. "
+                    f"The repository has been cloned from '{repo_url}' to '{cloned_path}' "
+                    f"(host path) and then copied to '{shared_workspace_path}' in your environment."
+                    f"Analyze the codebase at: {shared_workspace_path}"
+                )
+            else:
+                task_parts.append(
+                    f"Perform a security assessment of the Git repository: {repo_url}"
+                )
 
         elif scan_type == "web_application":
             task_parts.append(
diff --git a/strix/agents/StrixAgent/system_prompt.jinja b/strix/agents/StrixAgent/system_prompt.jinja
index 2fd4668..3fa206b 100644
--- a/strix/agents/StrixAgent/system_prompt.jinja
+++ b/strix/agents/StrixAgent/system_prompt.jinja
@@ -206,6 +206,27 @@ CRITICAL RULES:
 - **ONE AGENT = ONE TASK** - Don't let agents do multiple unrelated jobs
 - **SPAWN REACTIVELY** - Create new agents based on what you discover
 - **ONLY REPORTING AGENTS** can use create_vulnerability_report tool
+- **AGENT SPECIALIZATION MANDATORY** - Each agent must be highly specialized with maximum 3 prompt modules
+- **NO GENERIC AGENTS** - Avoid creating broad, multi-purpose agents that dilute focus
+
+AGENT SPECIALIZATION EXAMPLES:
+
+GOOD SPECIALIZATION:
+- "SQLi Validation Agent" with prompt_modules: sql_injection
+- "XSS Discovery Agent" with prompt_modules: xss
+- "Auth Testing Agent" with prompt_modules: authentication_jwt, business_logic
+- "SSRF + XXE Agent" with prompt_modules: ssrf, xxe, rce (related attack vectors)
+
+BAD SPECIALIZATION:
+- "General Web Testing Agent" with prompt_modules: sql_injection, xss, csrf, ssrf, authentication_jwt (too broad)
+- "Everything Agent" with prompt_modules: all available modules (completely unfocused)
+- Any agent with more than 3 prompt modules (violates constraints)
+
+FOCUS PRINCIPLES:
+- Each agent should have deep expertise in 1-3 related vulnerability types
+- Agents with single modules have the deepest specialization
+- Related vulnerabilities (like SSRF+XXE or Auth+Business Logic) can be combined
+- Never create "kitchen sink" agents that try to do everything
 
 REALISTIC TESTING OUTCOMES:
 - **No Findings**: Agent completes testing but finds no vulnerabilities
diff --git a/strix/cli/app.py b/strix/cli/app.py
index dcac147..95e4be7 100644
--- a/strix/cli/app.py
+++ b/strix/cli/app.py
@@ -248,6 +248,8 @@ class StrixCLIApp(App):  # type: ignore[misc]
 
         if args.target_type == "local_code" and "target_path" in args.target_dict:
             config["local_source_path"] = args.target_dict["target_path"]
+        elif args.target_type == "repository" and "cloned_repo_path" in args.target_dict:
+            config["local_source_path"] = args.target_dict["cloned_repo_path"]
 
         return config
 
diff --git a/strix/cli/main.py b/strix/cli/main.py
index 4ed70c3..654183a 100644
--- a/strix/cli/main.py
+++ b/strix/cli/main.py
@@ -9,7 +9,9 @@ import logging
 import os
 import secrets
 import shutil
+import subprocess
 import sys
+import tempfile
 from pathlib import Path
 from typing import Any
 from urllib.parse import urlparse
@@ -204,6 +206,84 @@ def generate_run_name() -> str:
     return f"{adj}-{noun}-{number}"
 
 
+def clone_repository(repo_url: str, run_name: str) -> str:
+    console = Console()
+
+    git_executable = shutil.which("git")
+    if git_executable is None:
+        raise FileNotFoundError("Git executable not found in PATH")
+
+    temp_dir = Path(tempfile.gettempdir()) / "strix_repos" / run_name
+    temp_dir.mkdir(parents=True, exist_ok=True)
+
+    repo_name = Path(repo_url).stem if repo_url.endswith(".git") else Path(repo_url).name
+
+    clone_path = temp_dir / repo_name
+
+    if clone_path.exists():
+        shutil.rmtree(clone_path)
+
+    try:
+        with console.status(f"[bold cyan]Cloning repository {repo_name}...", spinner="dots"):
+            subprocess.run(  # noqa: S603
+                [
+                    git_executable,
+                    "clone",
+                    "--depth=1",
+                    "--no-recurse-submodules",
+                    "--single-branch",
+                    repo_url,
+                    str(clone_path),
+                ],
+                capture_output=True,
+                text=True,
+                check=True,
+            )
+
+        return str(clone_path.absolute())
+
+    except subprocess.CalledProcessError as e:
+        error_text = Text()
+        error_text.append("❌ ", style="bold red")
+        error_text.append("REPOSITORY CLONE FAILED", style="bold red")
+        error_text.append("\n\n", style="white")
+        error_text.append(f"Could not clone repository: {repo_url}\n", style="white")
+        error_text.append(
+            f"Error: {e.stderr if hasattr(e, 'stderr') and e.stderr else str(e)}", style="dim red"
+        )
+
+        panel = Panel(
+            error_text,
+            title="[bold red]🛡️  STRIX CLONE ERROR",
+            title_align="center",
+            border_style="red",
+            padding=(1, 2),
+        )
+        console.print("\n")
+        console.print(panel)
+        console.print()
+        sys.exit(1)
+    except FileNotFoundError:
+        error_text = Text()
+        error_text.append("❌ ", style="bold red")
+        error_text.append("GIT NOT FOUND", style="bold red")
+        error_text.append("\n\n", style="white")
+        error_text.append("Git is not installed or not available in PATH.\n", style="white")
+        error_text.append("Please install Git to clone repositories.\n", style="white")
+
+        panel = Panel(
+            error_text,
+            title="[bold red]🛡️  STRIX CLONE ERROR",
+            title_align="center",
+            border_style="red",
+            padding=(1, 2),
+        )
+        console.print("\n")
+        console.print(panel)
+        console.print()
+        sys.exit(1)
+
+
 def infer_target_type(target: str) -> tuple[str, dict[str, str]]:
     if not target or not isinstance(target, str):
         raise ValueError("Target must be a non-empty string")
@@ -544,16 +624,23 @@ def main() -> None:
     if sys.platform == "win32":
         asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
 
+    args = parse_arguments()
+
     check_docker_installed()
     pull_docker_image()
 
     validate_environment()
     asyncio.run(warm_up_llm())
 
-    args = parse_arguments()
     if not args.run_name:
         args.run_name = generate_run_name()
 
+    if args.target_type == "repository":
+        repo_url = args.target_dict["target_repo"]
+        cloned_path = clone_repository(repo_url, args.run_name)
+
+        args.target_dict["cloned_repo_path"] = cloned_path
+
     asyncio.run(run_strix_cli(args))
 
     results_path = Path("agent_runs") / args.run_name
diff --git a/strix/tools/agents_graph/agents_graph_actions_schema.xml b/strix/tools/agents_graph/agents_graph_actions_schema.xml
index 22e7173..f9d67a4 100644
--- a/strix/tools/agents_graph/agents_graph_actions_schema.xml
+++ b/strix/tools/agents_graph/agents_graph_actions_schema.xml
@@ -80,7 +80,7 @@ Only create a new agent if no existing agent is handling the specific task.</des
         <description>Whether the new agent should inherit parent's conversation history and context</description>
       </parameter>
       <parameter name="prompt_modules" type="string" required="false">
-        <description>Comma-separated list of prompt modules to use for the agent. Most agents should have at least one module in order to be useful. {{DYNAMIC_MODULES_DESCRIPTION}}</description>
+        <description>Comma-separated list of prompt modules to use for the agent (MAXIMUM 3 modules allowed). Most agents should have at least one module in order to be useful. Agents should be highly specialized - use 1-3 related vulnerability modules only. {{DYNAMIC_MODULES_DESCRIPTION}}</description>
       </parameter>
     </parameters>
     <returns type="Dict[str, Any]">
@@ -104,6 +104,22 @@ Only create a new agent if no existing agent is handling the specific task.</des
               for security vulnerabilities and bypass techniques.</parameter>
   <parameter=name>Auth Specialist</parameter>
   <parameter=prompt_modules>authentication_jwt, business_logic</parameter>
+  </function>
+
+  # Example of single-module specialization (most focused)
+  <function=create_agent>
+  <parameter=task>Perform comprehensive XSS testing including reflected, stored, and DOM-based
+              variants across all identified input points.</parameter>
+  <parameter=name>XSS Specialist</parameter>
+  <parameter=prompt_modules>xss</parameter>
+  </function>
+
+  # Example of maximum 3 related modules (borderline acceptable)
+  <function=create_agent>
+  <parameter=task>Test for server-side vulnerabilities including SSRF, XXE, and potential
+              RCE vectors in file upload and XML processing endpoints.</parameter>
+  <parameter=name>Server-Side Attack Specialist</parameter>
+  <parameter=prompt_modules>ssrf, xxe, rce</parameter>
   </function>
     </examples>
   </tool>