feat: add skills for specific tools (#366)

Co-authored-by: 0xallam <ahmed39652003@gmail.com>
This commit is contained in:
alex s
2026-03-19 17:47:29 -06:00
committed by GitHub
parent f0f8f3d4cc
commit 86341597c1
22 changed files with 986 additions and 26 deletions

View File

@@ -81,6 +81,21 @@ Protocol-specific testing techniques.
| --------- | ------------------------------------------------ | | --------- | ------------------------------------------------ |
| `graphql` | GraphQL introspection, batching, resolver issues | | `graphql` | GraphQL introspection, batching, resolver issues |
### Tooling
Sandbox CLI playbooks for core recon and scanning tools.
| Skill | Coverage |
| ----------- | ------------------------------------------------------- |
| `nmap` | Port/service scan syntax and high-signal scan patterns |
| `nuclei` | Template selection, severity filtering, and rate tuning |
| `httpx` | HTTP probing and fingerprint output patterns |
| `ffuf` | Wordlist fuzzing, matcher/filter strategy, recursion |
| `subfinder` | Passive subdomain enumeration and source control |
| `naabu` | Fast port scanning with explicit rate/verify controls |
| `katana` | Crawl depth/JS/known-files behavior and pitfalls |
| `sqlmap` | SQLi workflow for enumeration and controlled extraction |
## Skill Structure ## Skill Structure
Each skill is a Markdown file with YAML frontmatter for metadata: Each skill is a Markdown file with YAML frontmatter for metadata:

View File

@@ -4,6 +4,7 @@ from . import (
browser_renderer, browser_renderer,
file_edit_renderer, file_edit_renderer,
finish_renderer, finish_renderer,
load_skill_renderer,
notes_renderer, notes_renderer,
proxy_renderer, proxy_renderer,
python_renderer, python_renderer,
@@ -28,6 +29,7 @@ __all__ = [
"file_edit_renderer", "file_edit_renderer",
"finish_renderer", "finish_renderer",
"get_tool_renderer", "get_tool_renderer",
"load_skill_renderer",
"notes_renderer", "notes_renderer",
"proxy_renderer", "proxy_renderer",
"python_renderer", "python_renderer",

View File

@@ -0,0 +1,33 @@
from typing import Any, ClassVar
from rich.text import Text
from textual.widgets import Static
from .base_renderer import BaseToolRenderer
from .registry import register_tool_renderer
@register_tool_renderer
class LoadSkillRenderer(BaseToolRenderer):
tool_name: ClassVar[str] = "load_skill"
css_classes: ClassVar[list[str]] = ["tool-call", "load-skill-tool"]
@classmethod
def render(cls, tool_data: dict[str, Any]) -> Static:
args = tool_data.get("args", {})
status = tool_data.get("status", "completed")
requested = args.get("skills", "")
text = Text()
text.append("", style="#10b981")
text.append("loading skill", style="dim")
if requested:
text.append(" ")
text.append(requested, style="#10b981")
elif not tool_data.get("result"):
text.append("\n ")
text.append("Loading...", style="dim")
return Static(text, classes=cls.get_css_classes(status))

View File

@@ -63,6 +63,7 @@ class LLM:
self.config = config self.config = config
self.agent_name = agent_name self.agent_name = agent_name
self.agent_id: str | None = None self.agent_id: str | None = None
self._active_skills: list[str] = list(config.skills or [])
self._total_stats = RequestStats() self._total_stats = RequestStats()
self.memory_compressor = MemoryCompressor(model_name=config.litellm_model) self.memory_compressor = MemoryCompressor(model_name=config.litellm_model)
self.system_prompt = self._load_system_prompt(agent_name) self.system_prompt = self._load_system_prompt(agent_name)
@@ -87,10 +88,7 @@ class LLM:
autoescape=select_autoescape(enabled_extensions=(), default_for_string=False), autoescape=select_autoescape(enabled_extensions=(), default_for_string=False),
) )
skills_to_load = [ skills_to_load = self._get_skills_to_load()
*list(self.config.skills or []),
f"scan_modes/{self.config.scan_mode}",
]
skill_content = load_skills(skills_to_load) skill_content = load_skills(skills_to_load)
env.globals["get_skill"] = lambda name: skill_content.get(name, "") env.globals["get_skill"] = lambda name: skill_content.get(name, "")
@@ -104,6 +102,36 @@ class LLM:
except Exception: # noqa: BLE001 except Exception: # noqa: BLE001
return "" return ""
def _get_skills_to_load(self) -> list[str]:
ordered_skills = [*self._active_skills]
ordered_skills.append(f"scan_modes/{self.config.scan_mode}")
deduped: list[str] = []
seen: set[str] = set()
for skill_name in ordered_skills:
if skill_name not in seen:
deduped.append(skill_name)
seen.add(skill_name)
return deduped
def add_skills(self, skill_names: list[str]) -> list[str]:
added: list[str] = []
for skill_name in skill_names:
if not skill_name or skill_name in self._active_skills:
continue
self._active_skills.append(skill_name)
added.append(skill_name)
if not added:
return []
updated_prompt = self._load_system_prompt(self.agent_name)
if updated_prompt:
self.system_prompt = updated_prompt
return added
def set_agent_identity(self, agent_name: str | None, agent_id: str | None) -> None: def set_agent_identity(self, agent_name: str | None, agent_id: str | None) -> None:
if agent_name: if agent_name:
self.agent_name = agent_name self.agent_name = agent_name

View File

@@ -33,6 +33,7 @@ The skills are dynamically injected into the agent's system prompt, allowing it
| **`/frameworks`** | Specific testing methods for popular frameworks e.g. Django, Express, FastAPI, and Next.js | | **`/frameworks`** | Specific testing methods for popular frameworks e.g. Django, Express, FastAPI, and Next.js |
| **`/technologies`** | Specialized techniques for third-party services such as Supabase, Firebase, Auth0, and payment gateways | | **`/technologies`** | Specialized techniques for third-party services such as Supabase, Firebase, Auth0, and payment gateways |
| **`/protocols`** | Protocol-specific testing patterns for GraphQL, WebSocket, OAuth, and other communication standards | | **`/protocols`** | Protocol-specific testing patterns for GraphQL, WebSocket, OAuth, and other communication standards |
| **`/tooling`** | Command-line playbooks for core sandbox tools (nmap, nuclei, httpx, ffuf, subfinder, naabu, katana, sqlmap) |
| **`/cloud`** | Cloud provider security testing for AWS, Azure, GCP, and Kubernetes environments | | **`/cloud`** | Cloud provider security testing for AWS, Azure, GCP, and Kubernetes environments |
| **`/reconnaissance`** | Advanced information gathering and enumeration techniques for comprehensive attack surface mapping | | **`/reconnaissance`** | Advanced information gathering and enumeration techniques for comprehensive attack surface mapping |
| **`/custom`** | Community-contributed skills for specialized or industry-specific testing scenarios | | **`/custom`** | Community-contributed skills for specialized or industry-specific testing scenarios |

View File

@@ -54,6 +54,30 @@ def validate_skill_names(skill_names: list[str]) -> dict[str, list[str]]:
return {"valid": valid_skills, "invalid": invalid_skills} return {"valid": valid_skills, "invalid": invalid_skills}
def parse_skill_list(skills: str | None) -> list[str]:
if not skills:
return []
return [s.strip() for s in skills.split(",") if s.strip()]
def validate_requested_skills(skill_list: list[str], max_skills: int = 5) -> str | None:
if len(skill_list) > max_skills:
return "Cannot specify more than 5 skills for an agent (use comma-separated format)"
if not skill_list:
return None
validation = validate_skill_names(skill_list)
if validation["invalid"]:
available_skills = list(get_all_skill_names())
return (
f"Invalid skills: {validation['invalid']}. "
f"Available skills: {', '.join(available_skills)}"
)
return None
def generate_skills_description() -> str: def generate_skills_description() -> str:
available_skills = get_available_skills() available_skills = get_available_skills()

View File

@@ -0,0 +1,66 @@
---
name: ffuf
description: ffuf fuzzing syntax with matcher/filter strategy and non-interactive defaults.
---
# ffuf CLI Playbook
Official docs:
- https://github.com/ffuf/ffuf
Canonical syntax:
`ffuf -w <wordlist> -u <url_with_FUZZ> [flags]`
High-signal flags:
- `-u <url>` target URL containing `FUZZ`
- `-w <wordlist>` wordlist input (supports `KEYWORD` mapping via `-w file:KEYWORD`)
- `-mc <codes>` match status codes
- `-fc <codes>` filter status codes
- `-fs <size>` filter by body size
- `-ac` auto-calibration
- `-t <n>` threads
- `-rate <n>` request rate
- `-timeout <seconds>` HTTP timeout
- `-x <proxy_url>` upstream proxy (HTTP/SOCKS)
- `-ignore-body` skip downloading response body
- `-noninteractive` disable interactive console mode
- `-recursion` and `-recursion-depth <n>` recursive discovery
- `-H <header>` custom headers
- `-X <method>` and `-d <body>` for non-GET fuzzing
- `-o <file> -of <json|ejson|md|html|csv|ecsv>` structured output
Agent-safe baseline for automation:
`ffuf -w wordlist.txt -u https://target.tld/FUZZ -mc 200,204,301,302,307,401,403,405 -ac -t 20 -rate 50 -timeout 10 -noninteractive -of json -o ffuf.json`
Common patterns:
- Basic path fuzzing:
`ffuf -w /path/wordlist.txt -u https://target.tld/FUZZ -mc 200,204,301,302,307,401,403 -ac -t 40 -rate 200 -noninteractive`
- Vhost fuzzing:
`ffuf -w vhosts.txt -u https://target.tld -H 'Host: FUZZ.target.tld' -fs 0 -ac -noninteractive`
- Parameter value fuzzing:
`ffuf -w values.txt -u 'https://target.tld/search?q=FUZZ' -mc all -fs 0 -ac -t 30 -noninteractive`
- POST body fuzzing:
`ffuf -w payloads.txt -u https://target.tld/login -X POST -H 'Content-Type: application/x-www-form-urlencoded' -d 'username=admin&password=FUZZ' -fc 401 -noninteractive`
- Recursive discovery:
`ffuf -w dirs.txt -u https://target.tld/FUZZ -recursion -recursion-depth 2 -ac -t 30 -noninteractive`
- Proxy-instrumented run:
`ffuf -w wordlist.txt -u https://target.tld/FUZZ -x http://127.0.0.1:48080 -mc 200,301,302,403 -ac -noninteractive`
Critical correctness rules:
- `FUZZ` must appear exactly at the mutation point in URL/header/body.
- If using `-w file:KEYWORD`, that same `KEYWORD` must be present in URL/header/body.
- Always include `-noninteractive` in agent/script execution to prevent ffuf console mode from swallowing subsequent shell commands.
- Save structured output with `-of json -o <file>` for deterministic parsing.
Usage rules:
- Prefer explicit matcher/filter strategy (`-mc`/`-fc`/`-fs`) over default-only output.
- Start conservative (`-rate`, `-t`) and scale only if target tolerance is known.
- Do not use `-h`/`--help` during normal execution unless absolutely necessary.
Failure recovery:
- If ffuf drops into interactive mode, send `C-c` and rerun with `-noninteractive`.
- If response noise is too high, tighten `-mc/-fc/-fs` instead of increasing load.
- If runtime is too long, lower `-rate/-t` and tighten scope.
If uncertain, query web_search with:
`site:github.com/ffuf/ffuf <flag> README`

View File

@@ -0,0 +1,77 @@
---
name: httpx
description: ProjectDiscovery httpx probing syntax, exact probe flags, and automation-safe output patterns.
---
# httpx CLI Playbook
Official docs:
- https://docs.projectdiscovery.io/opensource/httpx/usage
- https://docs.projectdiscovery.io/opensource/httpx/running
- https://github.com/projectdiscovery/httpx
Canonical syntax:
`httpx [flags]`
High-signal flags:
- `-u, -target <url>` single target
- `-l, -list <file>` target list
- `-nf, -no-fallback` probe both HTTP and HTTPS
- `-nfs, -no-fallback-scheme` do not auto-switch schemes
- `-sc` status code
- `-title` page title
- `-server, -web-server` server header
- `-td, -tech-detect` technology detection
- `-fr, -follow-redirects` follow redirects
- `-mc <codes>` / `-fc <codes>` match or filter status codes
- `-path <path_or_file>` probe specific paths
- `-p, -ports <ports>` probe custom ports
- `-proxy, -http-proxy <url>` proxy target requests
- `-tlsi, -tls-impersonate` experimental TLS impersonation
- `-j, -json` JSONL output
- `-sr, -store-response` store request/response artifacts
- `-srd, -store-response-dir <dir>` custom directory for stored artifacts
- `-silent` compact output
- `-rl <n>` requests/second cap
- `-t <n>` threads
- `-timeout <seconds>` request timeout
- `-retries <n>` retry attempts
- `-o <file>` output file
Agent-safe baseline for automation:
`httpx -l hosts.txt -sc -title -server -td -fr -timeout 10 -retries 1 -rl 50 -t 25 -silent -j -o httpx.jsonl`
Common patterns:
- Quick live+fingerprint check:
`httpx -l hosts.txt -sc -title -server -td -silent -o httpx.txt`
- Probe known admin paths:
`httpx -l hosts.txt -path /,/login,/admin -sc -title -silent -j -o httpx_paths.jsonl`
- Probe both schemes explicitly:
`httpx -l hosts.txt -nf -sc -title -silent`
- Vhost detection pass:
`httpx -l hosts.txt -vhost -sc -title -silent -j -o httpx_vhost.jsonl`
- Proxy-instrumented probing:
`httpx -l hosts.txt -sc -title -proxy http://127.0.0.1:48080 -silent -j -o httpx_proxy.jsonl`
- Response-storage pass for downstream content parsing:
`httpx -l hosts.txt -fr -sr -srd recon/httpx_store -sc -title -server -cl -ct -location -probe -silent`
Critical correctness rules:
- For machine parsing, prefer `-j -o <file>`.
- Keep `-rl` and `-t` explicit for reproducible throughput.
- Use `-nf` when you need dual-scheme probing from host-only input.
- When using `-path` or `-ports`, keep scope tight to avoid accidental scan inflation.
- Use `-sr -srd <dir>` when later steps need raw response artifacts (JS/route extraction, grepping, replay).
Usage rules:
- Use `-silent` for pipeline-friendly output.
- Use `-mc/-fc` when downstream steps depend on specific response classes.
- Prefer `-proxy` flag over global proxy env vars when only httpx traffic should be proxied.
- Do not use `-h`/`--help` for routine runs unless absolutely necessary.
Failure recovery:
- If too many timeouts occur, reduce `-rl/-t` and/or increase `-timeout`.
- If output is noisy, add `-fc` filters or `-fd` duplicate filtering.
- If HTTPS-only probing misses HTTP services, rerun with `-nf` (and avoid `-nfs`).
If uncertain, query web_search with:
`site:docs.projectdiscovery.io httpx <flag> usage`

View File

@@ -0,0 +1,76 @@
---
name: katana
description: Katana crawler syntax, depth/js/known-files behavior, and stable concurrency controls.
---
# Katana CLI Playbook
Official docs:
- https://docs.projectdiscovery.io/opensource/katana/usage
- https://docs.projectdiscovery.io/opensource/katana/running
- https://github.com/projectdiscovery/katana
Canonical syntax:
`katana [flags]`
High-signal flags:
- `-u, -list <url|file>` target URL(s)
- `-d, -depth <n>` crawl depth
- `-jc, -js-crawl` parse JavaScript-discovered endpoints
- `-jsl, -jsluice` deeper JS parsing (memory intensive)
- `-kf, -known-files <all|robotstxt|sitemapxml>` known-file crawling mode
- `-proxy <http|socks5 proxy>` explicit proxy setting
- `-c, -concurrency <n>` concurrent fetchers
- `-p, -parallelism <n>` concurrent input targets
- `-rl, -rate-limit <n>` request rate limit
- `-timeout <seconds>` request timeout
- `-retry <n>` retry count
- `-ef, -extension-filter <list>` extension exclusions
- `-tlsi, -tls-impersonate` experimental JA3/TLS impersonation
- `-hl, -headless` enable hybrid headless crawling
- `-sc, -system-chrome` use local Chrome for headless mode
- `-ho, -headless-options <csv>` extra Chrome options (for example proxy-server)
- `-nos, -no-sandbox` run Chrome headless with no-sandbox
- `-noi, -no-incognito` disable incognito in headless mode
- `-cdd, -chrome-data-dir <dir>` persist browser profile/session
- `-xhr, -xhr-extraction` include XHR endpoints in JSONL output
- `-silent`, `-j, -jsonl`, `-o <file>` output controls
Agent-safe baseline for automation:
`mkdir -p crawl && katana -u https://target.tld -d 3 -jc -kf robotstxt -c 10 -p 10 -rl 50 -timeout 10 -retry 1 -ef png,jpg,jpeg,gif,svg,css,woff,woff2,ttf,eot,map -silent -j -o crawl/katana.jsonl`
Common patterns:
- Fast crawl baseline:
`katana -u https://target.tld -d 3 -jc -silent`
- Deeper JS-aware crawl:
`katana -u https://target.tld -d 5 -jc -jsl -kf all -c 10 -p 10 -rl 50 -o katana_urls.txt`
- Multi-target run with JSONL output:
`katana -list urls.txt -d 3 -jc -silent -j -o katana.jsonl`
- Headless crawl with local Chrome:
`katana -u https://target.tld -hl -sc -nos -xhr -j -o crawl/katana_headless.jsonl`
- Headless crawl through proxy:
`katana -u https://target.tld -hl -sc -ho proxy-server=http://127.0.0.1:48080 -j -o crawl/katana_proxy.jsonl`
Critical correctness rules:
- `-kf` must be followed by one of `all`, `robotstxt`, or `sitemapxml`.
- Use documented `-hl` for headless mode.
- `-proxy` expects a single proxy URL string (for example `http://127.0.0.1:8080`).
- `-ho` expects comma-separated Chrome options (example: `-ho --disable-gpu,proxy-server=http://127.0.0.1:8080`).
- For `-kf`, keep depth at least `-d 3` so known files are fully covered.
- If writing to a file, ensure parent directory exists before `-o`.
Usage rules:
- Keep `-d`, `-c`, `-p`, and `-rl` explicit for reproducible runs.
- Use `-ef` early to reduce static-file noise before fuzzing.
- Prefer `-proxy` over environment proxy variables when proxying only Katana traffic.
- Use `-hc` only for one-time diagnostics, not routine crawling loops.
- Do not use `-h`/`--help` for routine runs unless absolutely necessary.
Failure recovery:
- If crawl runs too long, lower `-d` and optionally add `-ct`.
- If memory spikes, disable `-jsl` and lower `-c/-p`.
- If headless fails with Chrome errors, drop `-sc` or install system Chrome.
- If output is noisy, tighten scope and add `-ef` filters.
If uncertain, query web_search with:
`site:docs.projectdiscovery.io katana <flag> usage`

View File

@@ -0,0 +1,68 @@
---
name: naabu
description: Naabu port-scanning syntax with host input, scan-type, verification, and rate controls.
---
# Naabu CLI Playbook
Official docs:
- https://docs.projectdiscovery.io/opensource/naabu/usage
- https://docs.projectdiscovery.io/opensource/naabu/running
- https://github.com/projectdiscovery/naabu
Canonical syntax:
`naabu [flags]`
High-signal flags:
- `-host <host>` single host
- `-list, -l <file>` hosts list
- `-p <ports>` explicit ports (supports ranges)
- `-top-ports <n|full>` top ports profile
- `-exclude-ports <ports>` exclusions
- `-scan-type <s|c|syn|connect>` SYN or CONNECT scan
- `-Pn` skip host discovery
- `-rate <n>` packets per second
- `-c <n>` worker count
- `-timeout <ms>` per-probe timeout in milliseconds
- `-retries <n>` retry attempts
- `-proxy <socks5://host:port>` SOCKS5 proxy
- `-verify` verify discovered open ports
- `-j, -json` JSONL output
- `-silent` compact output
- `-o <file>` output file
Agent-safe baseline for automation:
`naabu -list hosts.txt -top-ports 100 -scan-type c -Pn -rate 300 -c 25 -timeout 1000 -retries 1 -verify -silent -j -o naabu.jsonl`
Common patterns:
- Top ports with controlled rate:
`naabu -list hosts.txt -top-ports 100 -scan-type c -rate 300 -c 25 -timeout 1000 -retries 1 -verify -silent -o naabu.txt`
- Focused web-ports sweep:
`naabu -list hosts.txt -p 80,443,8080,8443 -scan-type c -rate 300 -c 25 -timeout 1000 -retries 1 -verify -silent`
- Single-host quick check:
`naabu -host target.tld -p 22,80,443 -scan-type c -rate 300 -c 25 -timeout 1000 -retries 1 -verify`
- Root SYN mode (if available):
`sudo naabu -list hosts.txt -top-ports 100 -scan-type syn -rate 500 -c 25 -timeout 1000 -retries 1 -verify -silent`
Critical correctness rules:
- Use `-scan-type connect` when running without root/privileged raw socket access.
- Always set `-timeout` explicitly; it is in milliseconds.
- Set `-rate` explicitly to avoid unstable or noisy scans.
- `-timeout` is in milliseconds, not seconds.
- Keep port scope tight: prefer explicit important ports or a small `-top-ports` value unless broader coverage is explicitly required.
- Do not spam traffic; start with the smallest useful port set and conservative rate/worker settings.
- Prefer `-verify` before handing ports to follow-up scanners.
Usage rules:
- Keep host discovery behavior explicit (`-Pn` or default discovery).
- Use `-j -o <file>` for automation pipelines.
- Prefer `-p 22,80,443,8080,8443` or `-top-ports 100` before considering larger sweeps.
- Do not use `-h`/`--help` for normal flow unless absolutely necessary.
Failure recovery:
- If privileged socket errors occur, switch to `-scan-type c`.
- If scans are slow or lossy, lower `-rate`, lower `-c`, and tighten `-p`/`-top-ports`.
- If many hosts appear down, compare runs with and without `-Pn`.
If uncertain, query web_search with:
`site:docs.projectdiscovery.io naabu <flag> usage`

View File

@@ -0,0 +1,66 @@
---
name: nmap
description: Canonical Nmap CLI syntax, two-pass scanning workflow, and sandbox-safe bounded scan patterns.
---
# Nmap CLI Playbook
Official docs:
- https://nmap.org/book/man-briefoptions.html
- https://nmap.org/book/man.html
- https://nmap.org/book/man-performance.html
Canonical syntax:
`nmap [Scan Type(s)] [Options] {target specification}`
High-signal flags:
- `-n` skip DNS resolution
- `-Pn` skip host discovery when ICMP/ping is filtered
- `-sS` SYN scan (root/privileged)
- `-sT` TCP connect scan (no raw-socket privilege)
- `-sV` detect service versions
- `-sC` run default NSE scripts
- `-p <ports>` explicit ports (`-p-` for all TCP ports)
- `--top-ports <n>` quick common-port sweep
- `--open` show only hosts with open ports
- `-T<0-5>` timing template (`-T4` common)
- `--max-retries <n>` cap retransmissions
- `--host-timeout <time>` give up on very slow hosts
- `--script-timeout <time>` bound NSE script runtime
- `-oA <prefix>` output in normal/XML/grepable formats
Agent-safe baseline for automation:
`nmap -n -Pn --open --top-ports 100 -T4 --max-retries 1 --host-timeout 90s -oA nmap_quick <host>`
Common patterns:
- Fast first pass:
`nmap -n -Pn --top-ports 100 --open -T4 --max-retries 1 --host-timeout 90s <host>`
- Very small important-port pass:
`nmap -n -Pn -p 22,80,443,8080,8443 --open -T4 --max-retries 1 --host-timeout 90s <host>`
- Service/script enrichment on discovered ports:
`nmap -n -Pn -sV -sC -p <comma_ports> --script-timeout 30s --host-timeout 3m -oA nmap_services <host>`
- No-root fallback:
`nmap -n -Pn -sT --top-ports 100 --open --host-timeout 90s <host>`
Critical correctness rules:
- Always set target scope explicitly.
- Prefer two-pass scanning: discovery pass, then enrichment pass.
- Always set a timeout boundary with `--host-timeout`; add `--script-timeout` whenever NSE scripts are involved.
- Keep discovery scans tight: use explicit important ports or a small `--top-ports` profile unless broader coverage is explicitly required.
- In sandboxed runs, avoid exhaustive sweeps (`-p-`, very high `--top-ports`, or wide host ranges) unless explicitly required.
- Do not spam traffic; start with the smallest port set that can answer the question.
- Prefer `naabu` for broad port discovery; use `nmap` for scoped verification/enrichment.
Usage rules:
- Add `-n` by default in automation to avoid DNS delays.
- Use `-oA` for reusable artifacts.
- Prefer `-p 22,80,443,8080,8443` or `--top-ports 100` before considering larger sweeps.
- Do not use `-h`/`--help` for routine usage unless absolutely necessary.
Failure recovery:
- If host appears down unexpectedly, rerun with `-Pn`.
- If scan stalls, tighten scope (`-p` or smaller `--top-ports`) and lower retries.
- If scripts run too long, add `--script-timeout`.
If uncertain, query web_search with:
`site:nmap.org/book nmap <flag>`

View File

@@ -0,0 +1,67 @@
---
name: nuclei
description: Exact Nuclei command structure, template selection, and bounded high-throughput execution controls.
---
# Nuclei CLI Playbook
Official docs:
- https://docs.projectdiscovery.io/opensource/nuclei/running
- https://docs.projectdiscovery.io/opensource/nuclei/mass-scanning-cli
- https://github.com/projectdiscovery/nuclei
Canonical syntax:
`nuclei [flags]`
High-signal flags:
- `-u, -target <url>` single target
- `-l, -list <file>` targets file
- `-im, -input-mode <mode>` list/burp/jsonl/yaml/openapi/swagger
- `-t, -templates <path|tag>` explicit template path(s)
- `-tags <tag1,tag2>` run by tag
- `-s, -severity <critical,high,...>` severity filter
- `-as, -automatic-scan` tech-mapped automatic scan
- `-ni, -no-interactsh` disable OAST/interactsh requests
- `-rl, -rate-limit <n>` global request rate cap
- `-c, -concurrency <n>` template concurrency
- `-bs, -bulk-size <n>` hosts in parallel per template
- `-timeout <seconds>` request timeout
- `-retries <n>` retries
- `-stats` periodic scan stats output
- `-silent` findings-only output
- `-j, -jsonl` JSONL output
- `-o <file>` output file
Agent-safe baseline for automation:
`nuclei -l targets.txt -as -s critical,high -rl 50 -c 20 -bs 20 -timeout 10 -retries 1 -silent -j -o nuclei.jsonl`
Common patterns:
- Focused severity scan:
`nuclei -u https://target.tld -s critical,high -silent -o nuclei_high.txt`
- List-driven controlled scan:
`nuclei -l targets.txt -as -rl 50 -c 20 -bs 20 -timeout 10 -retries 1 -j -o nuclei.jsonl`
- Tag-driven run:
`nuclei -l targets.txt -tags cve,misconfig -s critical,high,medium -silent`
- Explicit templates:
`nuclei -l targets.txt -t http/cves/ -t dns/ -rl 30 -c 10 -bs 10 -j -o nuclei_templates.jsonl`
- Deterministic non-OAST run:
`nuclei -l targets.txt -as -s critical,high -ni -stats -rl 30 -c 10 -bs 10 -timeout 10 -retries 1 -j -o nuclei_no_oast.jsonl`
Critical correctness rules:
- Provide a template selection method (`-as`, `-t`, or `-tags`); avoid unscoped broad runs.
- Keep `-rl`, `-c`, and `-bs` explicit for predictable resource use.
- Use `-ni` when outbound interactsh/OAST traffic is not expected or not allowed.
- Use structured output (`-j -o <file>`) for automation.
Usage rules:
- Start with severity/tags/templates filters to keep runs explainable.
- Keep retries conservative (`-retries 1`) unless transport instability is proven.
- Do not use `-h`/`--help` for routine operation unless absolutely necessary.
Failure recovery:
- If performance degrades, lower `-c/-bs` before lowering `-rl`.
- If findings are unexpectedly empty, verify template selection (`-as` vs explicit `-t/-tags`).
- If scan duration grows, reduce target set and enforce stricter template/severity filters.
If uncertain, query web_search with:
`site:docs.projectdiscovery.io nuclei <flag> running`

View File

@@ -0,0 +1,72 @@
---
name: semgrep
description: Exact Semgrep CLI structure, metrics-off scanning, scoped ruleset selection, and automation-safe output patterns.
---
# Semgrep CLI Playbook
Official docs:
- https://semgrep.dev/docs/cli-reference
- https://semgrep.dev/docs/getting-started/cli
- https://semgrep.dev/docs/semgrep-code/semgrep-pro-engine-intro
Canonical syntax:
`semgrep scan [flags]`
High-signal flags:
- `--config <rule_or_ruleset>` ruleset, registry pack, local rule file, or directory
- `--metrics=off` disable telemetry and metrics reporting
- `--json` JSON output
- `--sarif` SARIF output
- `--output <file>` write findings to file
- `--severity <level>` filter by severity
- `--error` return non-zero exit when findings exist
- `--quiet` suppress progress noise
- `--jobs <n>` parallel workers
- `--timeout <seconds>` per-file timeout
- `--exclude <pattern>` exclude path pattern
- `--include <pattern>` include path pattern
- `--exclude-rule <rule_id>` suppress specific rule
- `--baseline-commit <sha>` only report findings introduced after baseline
- `--pro` enable Pro engine if available
- `--oss-only` force OSS engine only
Agent-safe baseline for automation:
`semgrep scan --config p/default --metrics=off --json --output semgrep.json --quiet --jobs 4 --timeout 20 /workspace`
Common patterns:
- Default security scan:
`semgrep scan --config p/default --metrics=off --json --output semgrep.json --quiet /workspace`
- High-severity focused pass:
`semgrep scan --config p/default --severity ERROR --metrics=off --json --output semgrep_high.json --quiet /workspace`
- OWASP-oriented scan:
`semgrep scan --config p/owasp-top-ten --metrics=off --sarif --output semgrep.sarif --quiet /workspace`
- Language- or framework-specific rules:
`semgrep scan --config p/python --config p/secrets --metrics=off --json --output semgrep_python.json --quiet /workspace`
- Scoped directory scan:
`semgrep scan --config p/default --metrics=off --json --output semgrep_api.json --quiet /workspace/services/api`
- Pro engine check or run:
`semgrep scan --config p/default --pro --metrics=off --json --output semgrep_pro.json --quiet /workspace`
Critical correctness rules:
- Always include `--metrics=off`; Semgrep sends telemetry by default.
- Always provide an explicit `--config`; do not rely on vague or implied defaults.
- Prefer `--json --output <file>` or `--sarif --output <file>` for machine-readable downstream processing.
- Keep the target path explicit; use an absolute or clearly scoped workspace path instead of `.` when possible.
- If Pro availability matters, check it explicitly with a bounded command before assuming cross-file analysis exists.
Usage rules:
- Start with `p/default` unless the task clearly calls for a narrower pack.
- Add focused packs such as `p/secrets`, `p/python`, or `p/javascript` only when they match the target stack.
- Use `--quiet` in automation to reduce noisy logs.
- Use `--jobs` and `--timeout` explicitly for reproducible runtime behavior.
- Do not use `-h`/`--help` for routine operation unless absolutely necessary.
Failure recovery:
- If scans are too slow, narrow the target path and reduce the active rulesets before changing engine settings.
- If scans time out, increase `--timeout` modestly or lower `--jobs`.
- If output is too broad, scope `--config`, add `--severity`, or exclude known irrelevant paths.
- If Pro mode fails, rerun with `--oss-only` or without `--pro` and note the loss of cross-file coverage.
If uncertain, query web_search with:
`site:semgrep.dev semgrep <flag> cli`

View File

@@ -0,0 +1,67 @@
---
name: sqlmap
description: sqlmap target syntax, non-interactive execution, and common validation/enumeration workflows.
---
# sqlmap CLI Playbook
Official docs:
- https://github.com/sqlmapproject/sqlmap/wiki/usage
- https://sqlmap.org
Canonical syntax:
`sqlmap -u "<target_url_with_params>" [options]`
High-signal flags:
- `-u, --url <url>` target URL
- `-r <request_file>` raw HTTP request input
- `-p <param>` test specific parameter(s)
- `--batch` non-interactive mode
- `--level <1-5>` test depth
- `--risk <1-3>` payload risk profile
- `--threads <n>` concurrency
- `--technique <letters>` technique selection
- `--forms` parse and test forms from target page
- `--cookie <cookie>` and `--headers <headers>` authenticated context
- `--timeout <seconds>` and `--retries <n>` transport stability
- `--tamper <scripts>` WAF/input-filter evasion
- `--random-agent` randomize user-agent
- `--ignore-proxy` bypass configured proxy
- `--dbs`, `-D <db> --tables`, `-D <db> -T <table> --columns`, `-D <db> -T <table> -C <cols> --dump`
- `--flush-session` clear cached scan state
Agent-safe baseline for automation:
`sqlmap -u "https://target.tld/item?id=1" -p id --batch --level 2 --risk 1 --threads 5 --timeout 10 --retries 1 --random-agent`
Common patterns:
- Baseline injection check:
`sqlmap -u "https://target.tld/item?id=1" -p id --batch --level 2 --risk 1 --threads 5`
- POST parameter testing:
`sqlmap -u "https://target.tld/login" --data "user=admin&pass=test" -p pass --batch --level 2 --risk 1`
- Form-driven testing:
`sqlmap -u "https://target.tld/login" --forms --batch --level 2 --risk 1 --random-agent`
- Enumerate DBs:
`sqlmap -u "https://target.tld/item?id=1" -p id --batch --dbs`
- Enumerate tables in DB:
`sqlmap -u "https://target.tld/item?id=1" -p id --batch -D appdb --tables`
- Dump selected columns:
`sqlmap -u "https://target.tld/item?id=1" -p id --batch -D appdb -T users -C id,email,role --dump`
Critical correctness rules:
- Always include `--batch` in automation to avoid interactive prompts.
- Keep target parameter explicit with `-p` when possible.
- Use `--flush-session` when retesting after request/profile changes.
- Start conservative (`--level 1-2`, `--risk 1`) and escalate only when needed.
Usage rules:
- Keep authenticated context (`--cookie`/`--headers`) aligned with manual validation state.
- Prefer narrow extraction (`-D/-T/-C`) over broad dump-first behavior.
- Do not use `-h`/`--help` during normal execution unless absolutely necessary.
Failure recovery:
- If results conflict with manual testing, rerun with `--flush-session`.
- If blocked by filtering/WAF, reduce `--threads` and test targeted `--tamper` chains.
- If initial detection misses likely injection, increment `--level`/`--risk` gradually.
If uncertain, query web_search with:
`site:github.com/sqlmapproject/sqlmap/wiki/usage sqlmap <flag>`

View File

@@ -0,0 +1,66 @@
---
name: subfinder
description: Subfinder passive subdomain enumeration syntax, source controls, and pipeline-ready output patterns.
---
# Subfinder CLI Playbook
Official docs:
- https://docs.projectdiscovery.io/opensource/subfinder/usage
- https://docs.projectdiscovery.io/opensource/subfinder/running
- https://github.com/projectdiscovery/subfinder
Canonical syntax:
`subfinder [flags]`
High-signal flags:
- `-d <domain>` single domain
- `-dL <file>` domain list
- `-all` include all sources
- `-recursive` use recursive-capable sources
- `-s <sources>` include specific sources
- `-es <sources>` exclude specific sources
- `-rl <n>` global rate limit
- `-rls <source=n/s,...>` per-source rate limits
- `-proxy <http://host:port>` proxy outbound source requests
- `-silent` compact output
- `-o <file>` output file
- `-oJ, -json` JSONL output
- `-cs, -collect-sources` include source metadata (`-oJ` output)
- `-nW, -active` show only active subdomains
- `-timeout <seconds>` request timeout
- `-max-time <minutes>` overall enumeration cap
Agent-safe baseline for automation:
`subfinder -d example.com -all -recursive -rl 20 -timeout 30 -silent -oJ -o subfinder.jsonl`
Common patterns:
- Standard passive enum:
`subfinder -d example.com -silent -o subs.txt`
- Broad-source passive enum:
`subfinder -d example.com -all -recursive -silent -o subs_all.txt`
- Multi-domain run:
`subfinder -dL domains.txt -all -recursive -rl 20 -silent -o subfinder_out.txt`
- Source-attributed JSONL output:
`subfinder -d example.com -all -oJ -cs -o subfinder_sources.jsonl`
- Passive enum via explicit proxy:
`subfinder -d example.com -all -recursive -proxy http://127.0.0.1:48080 -silent -oJ -o subfinder_proxy.jsonl`
Critical correctness rules:
- `-cs` is useful only with JSON output (`-oJ`).
- Many sources require API keys in provider config; low results can be config-related, not target-related.
- `-nW` performs active resolution/filtering and can drop passive-only hits.
- Keep passive enum first, then validate with `httpx`.
Usage rules:
- Keep output files explicit when chaining to `httpx`/`nuclei`.
- Use `-rl/-rls` when providers throttle aggressively.
- Do not use `-h`/`--help` for routine tasks unless absolutely necessary.
Failure recovery:
- If results are unexpectedly low, rerun with `-all` and verify provider config/API keys.
- If provider errors appear, lower `-rl` and apply `-rls` per source.
- If runs take too long, lower scope or split domain batches.
If uncertain, query web_search with:
`site:docs.projectdiscovery.io subfinder <flag> usage`

View File

@@ -48,6 +48,7 @@ if not SANDBOX_MODE:
from .browser import * # noqa: F403 from .browser import * # noqa: F403
from .file_edit import * # noqa: F403 from .file_edit import * # noqa: F403
from .finish import * # noqa: F403 from .finish import * # noqa: F403
from .load_skill import * # noqa: F403
from .notes import * # noqa: F403 from .notes import * # noqa: F403
from .proxy import * # noqa: F403 from .proxy import * # noqa: F403
from .python import * # noqa: F403 from .python import * # noqa: F403

View File

@@ -195,34 +195,17 @@ def create_agent(
try: try:
parent_id = agent_state.agent_id parent_id = agent_state.agent_id
skill_list = [] from strix.skills import parse_skill_list, validate_requested_skills
if skills:
skill_list = [s.strip() for s in skills.split(",") if s.strip()]
if len(skill_list) > 5: skill_list = parse_skill_list(skills)
validation_error = validate_requested_skills(skill_list)
if validation_error:
return { return {
"success": False, "success": False,
"error": ( "error": validation_error,
"Cannot specify more than 5 skills for an agent (use comma-separated format)"
),
"agent_id": None, "agent_id": None,
} }
if skill_list:
from strix.skills import get_all_skill_names, validate_skill_names
validation = validate_skill_names(skill_list)
if validation["invalid"]:
available_skills = list(get_all_skill_names())
return {
"success": False,
"error": (
f"Invalid skills: {validation['invalid']}. "
f"Available skills: {', '.join(available_skills)}"
),
"agent_id": None,
}
from strix.agents import StrixAgent from strix.agents import StrixAgent
from strix.agents.state import AgentState from strix.agents.state import AgentState
from strix.llm.config import LLMConfig from strix.llm.config import LLMConfig

View File

@@ -0,0 +1,4 @@
from .load_skill_actions import load_skill
__all__ = ["load_skill"]

View File

@@ -0,0 +1,71 @@
from typing import Any
from strix.tools.registry import register_tool
@register_tool(sandbox_execution=False)
def load_skill(agent_state: Any, skills: str) -> dict[str, Any]:
try:
from strix.skills import parse_skill_list, validate_requested_skills
requested_skills = parse_skill_list(skills)
if not requested_skills:
return {
"success": False,
"error": "No skills provided. Pass one or more comma-separated skill names.",
"requested_skills": [],
}
validation_error = validate_requested_skills(requested_skills)
if validation_error:
return {
"success": False,
"error": validation_error,
"requested_skills": requested_skills,
"loaded_skills": [],
}
from strix.tools.agents_graph.agents_graph_actions import _agent_instances
current_agent = _agent_instances.get(agent_state.agent_id)
if current_agent is None or not hasattr(current_agent, "llm"):
return {
"success": False,
"error": (
"Could not find running agent instance for runtime skill loading. "
"Try again in the current active agent."
),
"requested_skills": requested_skills,
"loaded_skills": [],
}
newly_loaded = current_agent.llm.add_skills(requested_skills)
already_loaded = [skill for skill in requested_skills if skill not in newly_loaded]
prior = agent_state.context.get("loaded_skills", [])
if not isinstance(prior, list):
prior = []
merged_skills = sorted(set(prior).union(requested_skills))
agent_state.update_context("loaded_skills", merged_skills)
except Exception as e: # noqa: BLE001
fallback_requested_skills = (
requested_skills
if "requested_skills" in locals()
else [s.strip() for s in skills.split(",") if s.strip()]
)
return {
"success": False,
"error": f"Failed to load skill(s): {e!s}",
"requested_skills": fallback_requested_skills,
"loaded_skills": [],
}
else:
return {
"success": True,
"requested_skills": requested_skills,
"loaded_skills": requested_skills,
"newly_loaded_skills": newly_loaded,
"already_loaded_skills": already_loaded,
"message": "Skills loaded into this agent prompt context.",
}

View File

@@ -0,0 +1,33 @@
<tools>
<tool name="load_skill">
<description>Dynamically load one or more skills into the current agent at runtime.
Use this when you need exact guidance right before acting (tool syntax, exploit workflow, or protocol details).
This updates the current agent's prompt context immediately.</description>
<details>Accepts one skill or a comma-separated skill bundle. Works for root agents and subagents.
Examples:
- Single skill: `xss`
- Bundle: `sql_injection,business_logic`</details>
<parameters>
<parameter name="skills" type="string" required="true">
<description>Comma-separated list of skills to use for the agent (MAXIMUM 5 skills allowed). Most agents should have at least one skill in order to be useful. Agents should be highly specialized - use 1-3 related skills; up to 5 for complex contexts. {{DYNAMIC_SKILLS_DESCRIPTION}}</description>
</parameter>
</parameters>
<returns type="Dict[str, Any]">
<description>Response containing: - success: Whether runtime loading succeeded - requested_skills: Skills requested - loaded_skills: Skills validated and applied - newly_loaded_skills: Skills newly injected into prompt - already_loaded_skills: Skills already present in prompt context</description>
</returns>
<examples>
<function=load_skill>
<parameter=skills>xss</parameter>
</function>
<function=load_skill>
<parameter=skills>sql_injection,business_logic</parameter>
</function>
<function=load_skill>
<parameter=skills>nmap,httpx</parameter>
</function>
</examples>
</tool>
</tools>

1
tests/skills/__init__.py Normal file
View File

@@ -0,0 +1 @@
# Tests for skill-related runtime behavior.

View File

@@ -0,0 +1,139 @@
from typing import Any
from strix.tools.agents_graph import agents_graph_actions
from strix.tools.load_skill import load_skill_actions
class _DummyLLM:
def __init__(self, initial_skills: list[str] | None = None) -> None:
self.loaded: set[str] = set(initial_skills or [])
def add_skills(self, skill_names: list[str]) -> list[str]:
newly_loaded = [skill for skill in skill_names if skill not in self.loaded]
self.loaded.update(newly_loaded)
return newly_loaded
class _DummyAgent:
def __init__(self, initial_skills: list[str] | None = None) -> None:
self.llm = _DummyLLM(initial_skills)
class _DummyAgentState:
def __init__(self, agent_id: str) -> None:
self.agent_id = agent_id
self.context: dict[str, Any] = {}
def update_context(self, key: str, value: Any) -> None:
self.context[key] = value
def test_load_skill_success_and_context_update() -> None:
instances = agents_graph_actions.__dict__["_agent_instances"]
original_instances = dict(instances)
try:
state = _DummyAgentState("agent_test_load_skill_success")
instances.clear()
instances[state.agent_id] = _DummyAgent()
result = load_skill_actions.load_skill(state, "ffuf,xss")
assert result["success"] is True
assert result["loaded_skills"] == ["ffuf", "xss"]
assert result["newly_loaded_skills"] == ["ffuf", "xss"]
assert state.context["loaded_skills"] == ["ffuf", "xss"]
finally:
instances.clear()
instances.update(original_instances)
def test_load_skill_uses_same_plain_skill_format_as_create_agent() -> None:
instances = agents_graph_actions.__dict__["_agent_instances"]
original_instances = dict(instances)
try:
state = _DummyAgentState("agent_test_load_skill_short_name")
instances.clear()
instances[state.agent_id] = _DummyAgent()
result = load_skill_actions.load_skill(state, "nmap")
assert result["success"] is True
assert result["loaded_skills"] == ["nmap"]
assert result["newly_loaded_skills"] == ["nmap"]
assert state.context["loaded_skills"] == ["nmap"]
finally:
instances.clear()
instances.update(original_instances)
def test_load_skill_invalid_skill_returns_error() -> None:
instances = agents_graph_actions.__dict__["_agent_instances"]
original_instances = dict(instances)
try:
state = _DummyAgentState("agent_test_load_skill_invalid")
instances.clear()
instances[state.agent_id] = _DummyAgent()
result = load_skill_actions.load_skill(state, "definitely_not_a_real_skill")
assert result["success"] is False
assert "Invalid skills" in result["error"]
assert "Available skills" in result["error"]
finally:
instances.clear()
instances.update(original_instances)
def test_load_skill_rejects_more_than_five_skills() -> None:
instances = agents_graph_actions.__dict__["_agent_instances"]
original_instances = dict(instances)
try:
state = _DummyAgentState("agent_test_load_skill_too_many")
instances.clear()
instances[state.agent_id] = _DummyAgent()
result = load_skill_actions.load_skill(state, "a,b,c,d,e,f")
assert result["success"] is False
assert result["error"] == (
"Cannot specify more than 5 skills for an agent (use comma-separated format)"
)
finally:
instances.clear()
instances.update(original_instances)
def test_load_skill_missing_agent_instance_returns_error() -> None:
instances = agents_graph_actions.__dict__["_agent_instances"]
original_instances = dict(instances)
try:
state = _DummyAgentState("agent_test_load_skill_missing_instance")
instances.clear()
result = load_skill_actions.load_skill(state, "httpx")
assert result["success"] is False
assert "running agent instance" in result["error"]
finally:
instances.clear()
instances.update(original_instances)
def test_load_skill_does_not_reload_skill_already_present_from_agent_creation() -> None:
instances = agents_graph_actions.__dict__["_agent_instances"]
original_instances = dict(instances)
try:
state = _DummyAgentState("agent_test_load_skill_existing_config_skill")
instances.clear()
instances[state.agent_id] = _DummyAgent(["xss"])
result = load_skill_actions.load_skill(state, "xss,sql_injection")
assert result["success"] is True
assert result["loaded_skills"] == ["xss", "sql_injection"]
assert result["newly_loaded_skills"] == ["sql_injection"]
assert result["already_loaded_skills"] == ["xss"]
assert state.context["loaded_skills"] == ["sql_injection", "xss"]
finally:
instances.clear()
instances.update(original_instances)