diff --git a/strix/interface/cli.py b/strix/interface/cli.py index 5579f82..582f811 100644 --- a/strix/interface/cli.py +++ b/strix/interface/cli.py @@ -66,6 +66,8 @@ async def run_cli(args: Any) -> None: # noqa: PLR0915 console.print(startup_panel) console.print() + scan_mode = getattr(args, "scan_mode", "deep") + scan_config = { "scan_id": args.run_name, "targets": args.targets_info, @@ -73,7 +75,7 @@ async def run_cli(args: Any) -> None: # noqa: PLR0915 "run_name": args.run_name, } - llm_config = LLMConfig() + llm_config = LLMConfig(scan_mode=scan_mode) agent_config = { "llm_config": llm_config, "max_iterations": 300, diff --git a/strix/interface/main.py b/strix/interface/main.py index f632590..f740f1b 100644 --- a/strix/interface/main.py +++ b/strix/interface/main.py @@ -312,6 +312,21 @@ Examples: ), ) + parser.add_argument( + "-m", + "--scan-mode", + type=str, + choices=["quick", "standard", "deep"], + default="deep", + help=( + "Scan mode: " + "'quick' for fast CI/CD checks, " + "'standard' for routine testing, " + "'deep' for thorough security reviews (default). " + "Default: deep." + ), + ) + args = parser.parse_args() if args.instruction and args.instruction_file: diff --git a/strix/interface/tui.py b/strix/interface/tui.py index b2d3bf0..69f0fc9 100644 --- a/strix/interface/tui.py +++ b/strix/interface/tui.py @@ -319,7 +319,8 @@ class StrixTUIApp(App): # type: ignore[misc] } def _build_agent_config(self, args: argparse.Namespace) -> dict[str, Any]: - llm_config = LLMConfig() + scan_mode = getattr(args, "scan_mode", "deep") + llm_config = LLMConfig(scan_mode=scan_mode) config = { "llm_config": llm_config, diff --git a/strix/llm/config.py b/strix/llm/config.py index 0738f4f..23f20e4 100644 --- a/strix/llm/config.py +++ b/strix/llm/config.py @@ -8,6 +8,7 @@ class LLMConfig: enable_prompt_caching: bool = True, prompt_modules: list[str] | None = None, timeout: int | None = None, + scan_mode: str = "deep", ): self.model_name = model_name or os.getenv("STRIX_LLM", "openai/gpt-5") @@ -18,3 +19,5 @@ class LLMConfig: self.prompt_modules = prompt_modules or [] self.timeout = timeout or int(os.getenv("LLM_TIMEOUT", "300")) + + self.scan_mode = scan_mode if scan_mode in ["quick", "standard", "deep"] else "deep" diff --git a/strix/llm/llm.py b/strix/llm/llm.py index 50e0003..e3df248 100644 --- a/strix/llm/llm.py +++ b/strix/llm/llm.py @@ -158,9 +158,10 @@ class LLM: ) try: - prompt_module_content = load_prompt_modules( - self.config.prompt_modules or [], self.jinja_env - ) + modules_to_load = list(self.config.prompt_modules or []) + modules_to_load.append(f"scan_modes/{self.config.scan_mode}") + + prompt_module_content = load_prompt_modules(modules_to_load, self.jinja_env) def get_module(name: str) -> str: return prompt_module_content.get(name, "") diff --git a/strix/prompts/scan_modes/deep.jinja b/strix/prompts/scan_modes/deep.jinja new file mode 100644 index 0000000..5f90a68 --- /dev/null +++ b/strix/prompts/scan_modes/deep.jinja @@ -0,0 +1,145 @@ + +DEEP SCAN MODE - Exhaustive Security Assessment + +This mode is for thorough security reviews where finding vulnerabilities is critical. + +PHASE 1: EXHAUSTIVE RECONNAISSANCE AND MAPPING +Spend significant effort understanding the target before exploitation. + +For whitebox (source code available): +- Map EVERY file, module, and code path in the repository +- Trace all entry points from HTTP handlers to database queries +- Identify all authentication mechanisms and their implementations +- Map all authorization checks and understand the access control model +- Identify all external service integrations and API calls +- Analyze all configuration files for secrets and misconfigurations +- Review all database schemas and understand data relationships +- Map all background jobs, cron tasks, and async processing +- Identify all serialization/deserialization points +- Review all file handling operations (upload, download, processing) +- Understand the deployment model and infrastructure assumptions +- Check all dependency versions against known CVE databases + +For blackbox (no source code): +- Exhaustive subdomain enumeration using multiple sources and tools +- Full port scanning to identify all services +- Complete content discovery with multiple wordlists +- Technology fingerprinting on all discovered assets +- API endpoint discovery through documentation, JavaScript analysis, and fuzzing +- Identify all parameters including hidden and rarely-used ones +- Map all user roles by testing with different account types +- Understand rate limiting, WAF rules, and security controls in place +- Document the complete application architecture as understood from outside + +EXECUTION STRATEGY - HIERARCHICAL AGENT SWARM: +After Phase 1 (Recon & Mapping) is complete: +1. Divide the application into major components/parts (e.g., Auth System, Payment Gateway, User Profile, Admin Panel) +2. Spawn a specialized subagent for EACH major component +3. Each component agent must then: + - Further subdivide its scope into subparts (e.g., Login Form, Registration API, Password Reset) + - Spawn sub-subagents for each distinct subpart +4. At the lowest level (specific functionality), spawn specialized agents for EACH potential vulnerability type: + - "Auth System" → "Login Form" → "SQLi Agent", "XSS Agent", "Auth Bypass Agent" + - This creates a massive parallel swarm covering every angle + - Do NOT overload a single agent with multiple vulnerability types + - Scale horizontally to maximum capacity + +PHASE 2: DEEP BUSINESS LOGIC ANALYSIS +Understand the application deeply enough to find logic flaws: +- CREATE A FULL STORYBOARD of all user flows and state transitions +- Document every step of the business logic in a structured flow diagram +- Use the application extensively as every type of user to map the full lifecycle of data +- Document all state machines and workflows (e.g. Order Created -> Paid -> Shipped) +- Identify trust boundaries between components +- Map all integrations with third-party services +- Understand what invariants the application tries to maintain +- Identify all points where roles, privileges, or sensitive data changes hands +- Look for implicit assumptions in the business logic +- Consider multi-step attacks that abuse normal functionality + +PHASE 3: COMPREHENSIVE ATTACK SURFACE TESTING +Test EVERY input vector with EVERY applicable technique. + +Input Handling - Test all parameters, headers, cookies with: +- Multiple injection payloads (SQL, NoSQL, LDAP, XPath, Command, Template) +- Various encodings and bypass techniques (double encoding, unicode, null bytes) +- Boundary conditions and type confusion +- Large payloads and buffer-related issues + +Authentication and Session: +- Exhaustive brute force protection testing +- Session fixation, hijacking, and prediction attacks +- JWT/token manipulation if applicable +- OAuth flow abuse scenarios +- Password reset flow vulnerabilities (token leakage, reuse, timing) +- Multi-factor authentication bypass techniques +- Account enumeration through all possible channels + +Access Control: +- Test EVERY endpoint for horizontal and vertical access control +- Parameter tampering on all object references +- Forced browsing to all discovered resources +- HTTP method tampering +- Test access control after session changes (logout, role change) + +File Operations: +- Exhaustive file upload bypass testing (extension, content-type, magic bytes) +- Path traversal on all file parameters +- Server-side request forgery through file inclusion +- XXE through all XML parsing points + +Business Logic: +- Race conditions on all state-changing operations +- Workflow bypass attempts on every multi-step process +- Price/quantity manipulation in all transactions +- Parallel execution attacks +- Time-of-check to time-of-use vulnerabilities + +Advanced Attacks: +- HTTP request smuggling if multiple proxies/servers +- Cache poisoning and cache deception +- Subdomain takeover on all subdomains +- Prototype pollution in JavaScript applications +- CORS misconfiguration exploitation +- WebSocket security testing +- GraphQL specific attacks if applicable + +PHASE 4: VULNERABILITY CHAINING +Don't just find individual bugs - chain them: +- Combine information disclosure with access control bypass +- Chain SSRF to access internal services +- Use low-severity findings to enable high-impact attacks +- Look for multi-step attack paths that automated tools miss +- Consider attacks that span multiple application components + +CHAINING PRINCIPLES (MAX IMPACT): +- Treat every finding as a pivot: ask "What does this unlock next?" until you reach maximum privilege / maximum data exposure / maximum control +- Prefer end-to-end exploit paths over isolated bugs: initial foothold → pivot → privilege gain → sensitive action/data +- Cross boundaries deliberately: user → admin, external → internal, unauthenticated → authenticated, read → write, single-tenant → cross-tenant +- Validate chains by executing the full sequence using the available tools (proxy + browser for workflows, python for automation, terminal for supporting commands) +- When a component agent finds a potential pivot, it must message/spawn the next focused agent to continue the chain in the next component/subpart + +PHASE 5: PERSISTENT TESTING +If initial attempts fail, don't give up: +- Research specific technologies for known bypasses +- Try alternative exploitation techniques +- Look for edge cases and unusual functionality +- Test with different client contexts +- Revisit previously tested areas with new information +- Consider timing-based and blind exploitation techniques + +PHASE 6: THOROUGH REPORTING +- Document EVERY confirmed vulnerability with full details +- Include all severity levels - even low findings may enable chains +- Provide complete reproduction steps and PoC +- Document remediation recommendations +- Note areas requiring additional review beyond current scope + +MINDSET: +- Relentless - this is about finding what others miss +- Creative - think of unconventional attack vectors +- Patient - real vulnerabilities often require deep investigation +- Thorough - test every parameter, every endpoint, every edge case +- Persistent - if one approach fails, try ten more +- Holistic - understand how components interact to find systemic issues + diff --git a/strix/prompts/scan_modes/quick.jinja b/strix/prompts/scan_modes/quick.jinja new file mode 100644 index 0000000..58f52ac --- /dev/null +++ b/strix/prompts/scan_modes/quick.jinja @@ -0,0 +1,63 @@ + +QUICK SCAN MODE - Rapid Security Assessment + +This mode is optimized for fast feedback. Focus on HIGH-IMPACT vulnerabilities with minimal overhead. + +PHASE 1: RAPID ORIENTATION +- If source code is available: Focus primarily on RECENT CHANGES (git diff, new commits, modified files) +- Identify the most critical entry points: authentication endpoints, payment flows, admin interfaces, API endpoints handling sensitive data +- Quickly understand the tech stack and frameworks in use +- Skip exhaustive reconnaissance - use what's immediately visible + +PHASE 2: TARGETED ATTACK SURFACE +For whitebox (source code available): +- Prioritize files changed in recent commits/PRs - these are most likely to contain fresh bugs +- Look for security-sensitive patterns in diffs: auth checks, input handling, database queries, file operations +- Trace user-controllable input in changed code paths +- Check if security controls were modified or bypassed + +For blackbox (no source code): +- Focus on authentication and session management +- Test the most critical user flows only +- Check for obvious misconfigurations and exposed endpoints +- Skip deep content discovery - test what's immediately accessible + +PHASE 3: HIGH-IMPACT VULNERABILITY FOCUS +Prioritize in this order: +1. Authentication bypass and broken access control +2. Remote code execution vectors +3. SQL injection in critical endpoints +4. Insecure direct object references (IDOR) in sensitive resources +5. Server-side request forgery (SSRF) +6. Hardcoded credentials or secrets in code + +Skip lower-priority items: +- Extensive subdomain enumeration +- Full directory bruteforcing +- Information disclosure that doesn't lead to exploitation +- Theoretical vulnerabilities without PoC + +PHASE 4: VALIDATION AND REPORTING +- Validate only critical/high severity findings with minimal PoC +- Report findings as you discover them - don't wait for completion +- Focus on exploitability and business impact + +QUICK CHAINING RULE: +- If you find ANY strong primitive (auth weakness, access control gap, injection point, internal reachability), immediately attempt a single high-impact pivot to demonstrate real impact +- Do not stop at a low-context “maybe”; turn it into a concrete exploit sequence (even if short) that reaches privileged action or sensitive data + +OPERATIONAL GUIDELINES: +- Use the browser tool for quick manual testing of critical flows +- Use terminal for targeted scans with fast presets (e.g., nuclei with critical/high templates only) +- Use proxy to inspect traffic on key endpoints +- Skip extensive fuzzing - use targeted payloads only +- Create subagents only for parallel high-priority tasks +- If whitebox: file_edit tool to review specific suspicious code sections +- Use notes tool to track critical findings only + +MINDSET: +- Think like a time-boxed bug bounty hunter going for quick wins +- Prioritize breadth over depth on critical areas +- If something looks exploitable, validate quickly and move on +- Don't get stuck - if an attack vector isn't yielding results quickly, pivot + diff --git a/strix/prompts/scan_modes/standard.jinja b/strix/prompts/scan_modes/standard.jinja new file mode 100644 index 0000000..46b31c2 --- /dev/null +++ b/strix/prompts/scan_modes/standard.jinja @@ -0,0 +1,91 @@ + +STANDARD SCAN MODE - Balanced Security Assessment + +This mode provides thorough coverage with a structured methodology. Balance depth with efficiency. + +PHASE 1: RECONNAISSANCE AND MAPPING +Understanding the target is critical before exploitation. Never skip this phase. + +For whitebox (source code available): +- Map the entire codebase structure: directories, modules, entry points +- Identify the application architecture (MVC, microservices, monolith) +- Understand the routing: how URLs map to handlers/controllers +- Identify all user input vectors: forms, APIs, file uploads, headers, cookies +- Map authentication and authorization flows +- Identify database interactions and ORM usage +- Review dependency manifests for known vulnerable packages +- Understand the data model and sensitive data locations + +For blackbox (no source code): +- Crawl the application thoroughly using browser tool - interact with every feature +- Enumerate all endpoints, parameters, and functionality +- Identify the technology stack through fingerprinting +- Map user roles and access levels +- Understand the business logic by using the application as intended +- Document all forms, APIs, and data entry points +- Use proxy tool to capture and analyze all traffic during exploration + +PHASE 2: BUSINESS LOGIC UNDERSTANDING +Before testing for vulnerabilities, understand what the application DOES: +- What are the critical business flows? (payments, user registration, data access) +- What actions should be restricted to specific roles? +- What data should users NOT be able to access? +- What state transitions exist? (order pending → paid → shipped) +- Where does money, sensitive data, or privilege flow? + +PHASE 3: SYSTEMATIC VULNERABILITY ASSESSMENT +Test each attack surface methodically. Create focused subagents for different areas. + +Entry Point Analysis: +- Test all input fields for injection vulnerabilities +- Check all API endpoints for authentication and authorization +- Verify all file upload functionality for bypass +- Test all search and filter functionality +- Check redirect parameters and URL handling + +Authentication and Session: +- Test login for brute force protection +- Check session token entropy and handling +- Test password reset flows for weaknesses +- Verify logout invalidates sessions +- Test for authentication bypass techniques + +Access Control: +- For every privileged action, test as unprivileged user +- Test horizontal access control (user A accessing user B's data) +- Test vertical access control (user escalating to admin) +- Check API endpoints mirror UI access controls +- Test direct object references with different user contexts + +Business Logic: +- Attempt to skip steps in multi-step processes +- Test for race conditions in critical operations +- Try negative values, zero values, boundary conditions +- Attempt to replay transactions +- Test for price manipulation in e-commerce flows + +PHASE 4: EXPLOITATION AND VALIDATION +- Every finding must have a working proof-of-concept +- Demonstrate actual impact, not theoretical risk +- Chain vulnerabilities when possible to show maximum impact +- Document the full attack path from initial access to impact +- Use python tool for complex exploit development + +CHAINING & MAX IMPACT MINDSET: +- Always ask: "If I can do X, what does that enable me to do next?" Keep pivoting until you reach maximum privilege or maximum sensitive data access +- Prefer complete end-to-end paths (entry point → pivot → privileged action/data) over isolated bug reports +- Use the application as a real user would: exploit must survive the actual workflow and state transitions +- When you discover a useful pivot (info leak, weak boundary, partial access), immediately pursue the next step rather than stopping at the first win + +PHASE 5: COMPREHENSIVE REPORTING +- Report all confirmed vulnerabilities with clear reproduction steps +- Include severity based on actual exploitability and business impact +- Provide remediation recommendations +- Document any areas that need further investigation + +MINDSET: +- Methodical and systematic - cover the full attack surface +- Document as you go - findings and areas tested +- Validate everything - no assumptions about exploitability +- Think about business impact, not just technical severity + diff --git a/strix/tools/agents_graph/agents_graph_actions.py b/strix/tools/agents_graph/agents_graph_actions.py index 2e384c0..e5b36b3 100644 --- a/strix/tools/agents_graph/agents_graph_actions.py +++ b/strix/tools/agents_graph/agents_graph_actions.py @@ -233,14 +233,14 @@ def create_agent( parent_agent = _agent_instances.get(parent_id) timeout = None - if ( - parent_agent - and hasattr(parent_agent, "llm_config") - and hasattr(parent_agent.llm_config, "timeout") - ): - timeout = parent_agent.llm_config.timeout + scan_mode = "deep" + if parent_agent and hasattr(parent_agent, "llm_config"): + if hasattr(parent_agent.llm_config, "timeout"): + timeout = parent_agent.llm_config.timeout + if hasattr(parent_agent.llm_config, "scan_mode"): + scan_mode = parent_agent.llm_config.scan_mode - llm_config = LLMConfig(prompt_modules=module_list, timeout=timeout) + llm_config = LLMConfig(prompt_modules=module_list, timeout=timeout, scan_mode=scan_mode) agent_config = { "llm_config": llm_config,