From e12b4bd2b526dea5aa74e4631a68e350494a4ac3 Mon Sep 17 00:00:00 2001 From: salvacybersec Date: Mon, 6 Apr 2026 12:14:06 +0300 Subject: [PATCH 1/2] =?UTF-8?q?docs(12):=20create=20phase=20plan=20?= =?UTF-8?q?=E2=80=94=20IoT=20scanners=20+=20cloud=20storage=20sources?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .planning/ROADMAP.md | 8 +- .../12-osint_iot_cloud_storage/12-01-PLAN.md | 193 ++++++++++++++++ .../12-osint_iot_cloud_storage/12-02-PLAN.md | 187 +++++++++++++++ .../12-osint_iot_cloud_storage/12-03-PLAN.md | 183 +++++++++++++++ .../12-osint_iot_cloud_storage/12-04-PLAN.md | 217 ++++++++++++++++++ 5 files changed, 787 insertions(+), 1 deletion(-) create mode 100644 .planning/phases/12-osint_iot_cloud_storage/12-01-PLAN.md create mode 100644 .planning/phases/12-osint_iot_cloud_storage/12-02-PLAN.md create mode 100644 .planning/phases/12-osint_iot_cloud_storage/12-03-PLAN.md create mode 100644 .planning/phases/12-osint_iot_cloud_storage/12-04-PLAN.md diff --git a/.planning/ROADMAP.md b/.planning/ROADMAP.md index f91fffa..1609850 100644 --- a/.planning/ROADMAP.md +++ b/.planning/ROADMAP.md @@ -252,7 +252,13 @@ Plans: 3. `keyhunter recon --sources=s3` enumerates publicly accessible S3 buckets and scans readable objects for API key patterns 4. `keyhunter recon --sources=gcs,azureblob,spaces` scans GCS, Azure Blob, and DigitalOcean Spaces; `--sources=minio` discovers MinIO instances via Shodan integration 5. `keyhunter recon --sources=grayhoundwarfare` queries the GrayHatWarfare bucket search engine for matching bucket names -**Plans**: TBD +**Plans**: 4 plans + +Plans: +- [ ] 12-01-PLAN.md — ShodanSource + CensysSource + ZoomEyeSource (RECON-IOT-01, RECON-IOT-02, RECON-IOT-03) +- [ ] 12-02-PLAN.md — FOFASource + NetlasSource + BinaryEdgeSource (RECON-IOT-04, RECON-IOT-05, RECON-IOT-06) +- [ ] 12-03-PLAN.md — S3Scanner + GCSScanner + AzureBlobScanner + DOSpacesScanner (RECON-CLOUD-01, RECON-CLOUD-02, RECON-CLOUD-03, RECON-CLOUD-04) +- [ ] 12-04-PLAN.md — RegisterAll wiring + cmd/recon.go credentials + integration test (all Phase 12 reqs) ### Phase 13: OSINT Package Registries & Container/IaC **Goal**: Users can scan npm, PyPI, and 6 other package registries for packages containing leaked keys, and scan Docker Hub image layers, Kubernetes configs, Terraform state files, Helm charts, and Ansible Galaxy for secrets in infrastructure code diff --git a/.planning/phases/12-osint_iot_cloud_storage/12-01-PLAN.md b/.planning/phases/12-osint_iot_cloud_storage/12-01-PLAN.md new file mode 100644 index 0000000..5e7dede --- /dev/null +++ b/.planning/phases/12-osint_iot_cloud_storage/12-01-PLAN.md @@ -0,0 +1,193 @@ +--- +phase: 12-osint_iot_cloud_storage +plan: 01 +type: execute +wave: 1 +depends_on: [] +files_modified: + - pkg/recon/sources/shodan.go + - pkg/recon/sources/shodan_test.go + - pkg/recon/sources/censys.go + - pkg/recon/sources/censys_test.go + - pkg/recon/sources/zoomeye.go + - pkg/recon/sources/zoomeye_test.go +autonomous: true +requirements: [RECON-IOT-01, RECON-IOT-02, RECON-IOT-03] + +must_haves: + truths: + - "ShodanSource searches Shodan /shodan/host/search for exposed LLM endpoints and emits findings" + - "CensysSource searches Censys v2 /hosts/search for exposed services and emits findings" + - "ZoomEyeSource searches ZoomEye /host/search for device/service key exposure and emits findings" + - "Each source is disabled (Enabled==false) when its API key is empty" + artifacts: + - path: "pkg/recon/sources/shodan.go" + provides: "ShodanSource implementing recon.ReconSource" + exports: ["ShodanSource"] + - path: "pkg/recon/sources/censys.go" + provides: "CensysSource implementing recon.ReconSource" + exports: ["CensysSource"] + - path: "pkg/recon/sources/zoomeye.go" + provides: "ZoomEyeSource implementing recon.ReconSource" + exports: ["ZoomEyeSource"] + key_links: + - from: "pkg/recon/sources/shodan.go" + to: "pkg/recon/sources/httpclient.go" + via: "sources.Client for retry/backoff HTTP" + pattern: "s\\.client\\.Do" + - from: "pkg/recon/sources/censys.go" + to: "pkg/recon/sources/httpclient.go" + via: "sources.Client for retry/backoff HTTP" + pattern: "s\\.client\\.Do" + - from: "pkg/recon/sources/zoomeye.go" + to: "pkg/recon/sources/httpclient.go" + via: "sources.Client for retry/backoff HTTP" + pattern: "s\\.client\\.Do" +--- + + +Implement three IoT scanner recon sources: Shodan, Censys, and ZoomEye. + +Purpose: Enable discovery of exposed LLM endpoints (vLLM, Ollama, LiteLLM proxies) via internet-wide device scanners. +Output: Three source files + tests following the established Phase 10 pattern. + + + +@$HOME/.claude/get-shit-done/workflows/execute-plan.md +@$HOME/.claude/get-shit-done/templates/summary.md + + + +@.planning/PROJECT.md +@.planning/ROADMAP.md +@.planning/STATE.md +@pkg/recon/source.go +@pkg/recon/sources/httpclient.go +@pkg/recon/sources/github.go +@pkg/recon/sources/bing.go +@pkg/recon/sources/queries.go +@pkg/recon/sources/register.go + + +From pkg/recon/source.go: +```go +type ReconSource interface { + Name() string + RateLimit() rate.Limit + Burst() int + RespectsRobots() bool + Enabled(cfg Config) bool + Sweep(ctx context.Context, query string, out chan<- Finding) error +} +``` + +From pkg/recon/sources/httpclient.go: +```go +type Client struct { HTTP *http.Client; MaxRetries int; UserAgent string } +func NewClient() *Client +func (c *Client) Do(ctx context.Context, req *http.Request) (*http.Response, error) +var ErrUnauthorized = errors.New("sources: unauthorized (check credentials)") +``` + +From pkg/recon/sources/queries.go: +```go +func BuildQueries(reg *providers.Registry, source string) []string +``` + + + + + + + Task 1: Implement ShodanSource, CensysSource, ZoomEyeSource + pkg/recon/sources/shodan.go, pkg/recon/sources/censys.go, pkg/recon/sources/zoomeye.go + +Create three source files following the BingDorkSource pattern exactly: + +**ShodanSource** (shodan.go): +- Struct: `ShodanSource` with fields `APIKey string`, `BaseURL string`, `Registry *providers.Registry`, `Limiters *recon.LimiterRegistry`, `client *Client` +- Compile-time assertion: `var _ recon.ReconSource = (*ShodanSource)(nil)` +- Name(): "shodan" +- RateLimit(): rate.Every(1 * time.Second) — Shodan allows ~1 req/s on most plans +- Burst(): 1 +- RespectsRobots(): false (authenticated REST API) +- Enabled(): returns `s.APIKey != ""` +- BaseURL default: "https://api.shodan.io" +- Sweep(): For each query from BuildQueries(s.Registry, "shodan"), call GET `{base}/shodan/host/search?key={apikey}&query={url.QueryEscape(q)}`. Parse JSON response `{"matches":[{"ip_str":"...","port":N,"data":"..."},...]}`. Emit a Finding per match with Source=`fmt.Sprintf("shodan://%s:%d", match.IPStr, match.Port)`, SourceType="recon:shodan", Confidence="low", ProviderName from keyword index. +- Add `shodanKeywordIndex` helper (same pattern as bingKeywordIndex). +- Error handling: ErrUnauthorized aborts, context cancellation aborts, transient errors continue. + +**CensysSource** (censys.go): +- Struct: `CensysSource` with fields `APIId string`, `APISecret string`, `BaseURL string`, `Registry *providers.Registry`, `Limiters *recon.LimiterRegistry`, `client *Client` +- Name(): "censys" +- RateLimit(): rate.Every(2500 * time.Millisecond) — Censys free tier is 0.4 req/s +- Burst(): 1 +- RespectsRobots(): false +- Enabled(): returns `s.APIId != "" && s.APISecret != ""` +- BaseURL default: "https://search.censys.io/api" +- Sweep(): For each query, POST `{base}/v2/hosts/search` with JSON body `{"q":q,"per_page":25}`. Set Basic Auth header using APIId:APISecret. Parse JSON response `{"result":{"hits":[{"ip":"...","services":[{"port":N,"service_name":"..."}]}]}}`. Emit Finding per hit with Source=`fmt.Sprintf("censys://%s", hit.IP)`. +- Add `censysKeywordIndex` helper. + +**ZoomEyeSource** (zoomeye.go): +- Struct: `ZoomEyeSource` with fields `APIKey string`, `BaseURL string`, `Registry *providers.Registry`, `Limiters *recon.LimiterRegistry`, `client *Client` +- Name(): "zoomeye" +- RateLimit(): rate.Every(2 * time.Second) +- Burst(): 1 +- RespectsRobots(): false +- Enabled(): returns `s.APIKey != ""` +- BaseURL default: "https://api.zoomeye.org" (ZoomEye uses v1-style API key in header) +- Sweep(): For each query, GET `{base}/host/search?query={url.QueryEscape(q)}&page=1`. Set header `API-KEY: {apikey}`. Parse JSON response `{"matches":[{"ip":"...","portinfo":{"port":N},"banner":"..."}]}`. Emit Finding per match with Source=`fmt.Sprintf("zoomeye://%s:%d", match.IP, match.PortInfo.Port)`. +- Add `zoomeyeKeywordIndex` helper. + +Update `formatQuery` in queries.go to add cases for "shodan", "censys", "zoomeye" — all use bare keyword (same as default). + +All sources must use `sources.NewClient()` for HTTP, `s.Limiters.Wait(ctx, s.Name(), ...)` before each request, and follow the same error handling pattern as BingDorkSource.Sweep. + + + cd /home/salva/Documents/apikey/.claude/worktrees/agent-a6700ee2 && go build ./pkg/recon/sources/ + + Three source files compile, each implements recon.ReconSource interface + + + + Task 2: Unit tests for Shodan, Censys, ZoomEye sources + pkg/recon/sources/shodan_test.go, pkg/recon/sources/censys_test.go, pkg/recon/sources/zoomeye_test.go + + - Shodan: httptest server returns mock JSON with 2 matches; Sweep emits 2 findings with "recon:shodan" source type + - Shodan: empty API key => Enabled()==false, Sweep returns nil with 0 findings + - Censys: httptest server returns mock JSON with 2 hits; Sweep emits 2 findings with "recon:censys" source type + - Censys: empty APIId => Enabled()==false + - ZoomEye: httptest server returns mock JSON with 2 matches; Sweep emits 2 findings with "recon:zoomeye" source type + - ZoomEye: empty API key => Enabled()==false + - All: cancelled context returns context error + + +Create test files following the pattern in github_test.go / bing_test.go: +- Use httptest.NewServer to mock API responses +- Set BaseURL to test server URL +- Create a minimal providers.Registry with 1-2 test providers containing keywords +- Verify Finding count, SourceType, and Source URL format +- Test disabled state (empty credentials) +- Test context cancellation + + + cd /home/salva/Documents/apikey/.claude/worktrees/agent-a6700ee2 && go test ./pkg/recon/sources/ -run "TestShodan|TestCensys|TestZoomEye" -v -count=1 + + All Shodan, Censys, ZoomEye tests pass; each source emits correct findings from mock API responses + + + + + +- `go build ./pkg/recon/sources/` compiles without errors +- `go test ./pkg/recon/sources/ -run "TestShodan|TestCensys|TestZoomEye" -v` all pass +- Each source file has compile-time assertion `var _ recon.ReconSource = (*XxxSource)(nil)` + + + +Three IoT scanner sources (Shodan, Censys, ZoomEye) implement recon.ReconSource, use shared Client for HTTP, respect rate limiting via LimiterRegistry, and pass unit tests with mock API responses. + + + +After completion, create `.planning/phases/12-osint_iot_cloud_storage/12-01-SUMMARY.md` + diff --git a/.planning/phases/12-osint_iot_cloud_storage/12-02-PLAN.md b/.planning/phases/12-osint_iot_cloud_storage/12-02-PLAN.md new file mode 100644 index 0000000..b7c7560 --- /dev/null +++ b/.planning/phases/12-osint_iot_cloud_storage/12-02-PLAN.md @@ -0,0 +1,187 @@ +--- +phase: 12-osint_iot_cloud_storage +plan: 02 +type: execute +wave: 1 +depends_on: [] +files_modified: + - pkg/recon/sources/fofa.go + - pkg/recon/sources/fofa_test.go + - pkg/recon/sources/netlas.go + - pkg/recon/sources/netlas_test.go + - pkg/recon/sources/binaryedge.go + - pkg/recon/sources/binaryedge_test.go +autonomous: true +requirements: [RECON-IOT-04, RECON-IOT-05, RECON-IOT-06] + +must_haves: + truths: + - "FOFASource searches FOFA API for exposed endpoints and emits findings" + - "NetlasSource searches Netlas API for internet-wide scan results and emits findings" + - "BinaryEdgeSource searches BinaryEdge API for exposed services and emits findings" + - "Each source is disabled when its API key/credentials are empty" + artifacts: + - path: "pkg/recon/sources/fofa.go" + provides: "FOFASource implementing recon.ReconSource" + exports: ["FOFASource"] + - path: "pkg/recon/sources/netlas.go" + provides: "NetlasSource implementing recon.ReconSource" + exports: ["NetlasSource"] + - path: "pkg/recon/sources/binaryedge.go" + provides: "BinaryEdgeSource implementing recon.ReconSource" + exports: ["BinaryEdgeSource"] + key_links: + - from: "pkg/recon/sources/fofa.go" + to: "pkg/recon/sources/httpclient.go" + via: "sources.Client for retry/backoff HTTP" + pattern: "s\\.client\\.Do" + - from: "pkg/recon/sources/netlas.go" + to: "pkg/recon/sources/httpclient.go" + via: "sources.Client for retry/backoff HTTP" + pattern: "s\\.client\\.Do" + - from: "pkg/recon/sources/binaryedge.go" + to: "pkg/recon/sources/httpclient.go" + via: "sources.Client for retry/backoff HTTP" + pattern: "s\\.client\\.Do" +--- + + +Implement three IoT scanner recon sources: FOFA, Netlas, and BinaryEdge. + +Purpose: Complete the IoT/device scanner coverage with Chinese (FOFA) and alternative (Netlas, BinaryEdge) internet search engines. +Output: Three source files + tests following the established Phase 10 pattern. + + + +@$HOME/.claude/get-shit-done/workflows/execute-plan.md +@$HOME/.claude/get-shit-done/templates/summary.md + + + +@.planning/PROJECT.md +@.planning/ROADMAP.md +@.planning/STATE.md +@pkg/recon/source.go +@pkg/recon/sources/httpclient.go +@pkg/recon/sources/bing.go +@pkg/recon/sources/queries.go +@pkg/recon/sources/register.go + + +From pkg/recon/source.go: +```go +type ReconSource interface { + Name() string + RateLimit() rate.Limit + Burst() int + RespectsRobots() bool + Enabled(cfg Config) bool + Sweep(ctx context.Context, query string, out chan<- Finding) error +} +``` + +From pkg/recon/sources/httpclient.go: +```go +type Client struct { HTTP *http.Client; MaxRetries int; UserAgent string } +func NewClient() *Client +func (c *Client) Do(ctx context.Context, req *http.Request) (*http.Response, error) +var ErrUnauthorized = errors.New("sources: unauthorized (check credentials)") +``` + + + + + + + Task 1: Implement FOFASource, NetlasSource, BinaryEdgeSource + pkg/recon/sources/fofa.go, pkg/recon/sources/netlas.go, pkg/recon/sources/binaryedge.go + +Create three source files following the BingDorkSource pattern: + +**FOFASource** (fofa.go): +- Struct: `FOFASource` with fields `Email string`, `APIKey string`, `BaseURL string`, `Registry *providers.Registry`, `Limiters *recon.LimiterRegistry`, `client *Client` +- Compile-time assertion: `var _ recon.ReconSource = (*FOFASource)(nil)` +- Name(): "fofa" +- RateLimit(): rate.Every(1 * time.Second) — FOFA allows ~1 req/s +- Burst(): 1 +- RespectsRobots(): false +- Enabled(): returns `s.Email != "" && s.APIKey != ""` +- BaseURL default: "https://fofa.info" +- Sweep(): For each query from BuildQueries, base64-encode the query, then GET `{base}/api/v1/search/all?email={email}&key={apikey}&qbase64={base64query}&size=100`. Parse JSON response `{"results":[["ip","port","protocol","host"],...],"size":N}`. Emit Finding per result with Source=`fmt.Sprintf("fofa://%s:%s", result[0], result[1])`, SourceType="recon:fofa". +- Note: FOFA results array contains string arrays, not objects. Each inner array is [host, ip, port]. +- Add `fofaKeywordIndex` helper. + +**NetlasSource** (netlas.go): +- Struct: `NetlasSource` with fields `APIKey string`, `BaseURL string`, `Registry *providers.Registry`, `Limiters *recon.LimiterRegistry`, `client *Client` +- Name(): "netlas" +- RateLimit(): rate.Every(1 * time.Second) +- Burst(): 1 +- RespectsRobots(): false +- Enabled(): returns `s.APIKey != ""` +- BaseURL default: "https://app.netlas.io" +- Sweep(): For each query, GET `{base}/api/responses/?q={url.QueryEscape(q)}&start=0&indices=`. Set header `X-API-Key: {apikey}`. Parse JSON response `{"items":[{"data":{"ip":"...","port":N}},...]}`. Emit Finding per item with Source=`fmt.Sprintf("netlas://%s:%d", item.Data.IP, item.Data.Port)`. +- Add `netlasKeywordIndex` helper. + +**BinaryEdgeSource** (binaryedge.go): +- Struct: `BinaryEdgeSource` with fields `APIKey string`, `BaseURL string`, `Registry *providers.Registry`, `Limiters *recon.LimiterRegistry`, `client *Client` +- Name(): "binaryedge" +- RateLimit(): rate.Every(2 * time.Second) — BinaryEdge free tier is conservative +- Burst(): 1 +- RespectsRobots(): false +- Enabled(): returns `s.APIKey != ""` +- BaseURL default: "https://api.binaryedge.io" +- Sweep(): For each query, GET `{base}/v2/query/search?query={url.QueryEscape(q)}&page=1`. Set header `X-Key: {apikey}`. Parse JSON response `{"events":[{"target":{"ip":"...","port":N}},...]}`. Emit Finding per event with Source=`fmt.Sprintf("binaryedge://%s:%d", event.Target.IP, event.Target.Port)`. +- Add `binaryedgeKeywordIndex` helper. + +Update `formatQuery` in queries.go to add cases for "fofa", "netlas", "binaryedge" — all use bare keyword (same as default). + +Same patterns as Plan 12-01: use sources.NewClient(), s.Limiters.Wait before requests, standard error handling. + + + cd /home/salva/Documents/apikey/.claude/worktrees/agent-a6700ee2 && go build ./pkg/recon/sources/ + + Three source files compile, each implements recon.ReconSource interface + + + + Task 2: Unit tests for FOFA, Netlas, BinaryEdge sources + pkg/recon/sources/fofa_test.go, pkg/recon/sources/netlas_test.go, pkg/recon/sources/binaryedge_test.go + + - FOFA: httptest server returns mock JSON with 2 results; Sweep emits 2 findings with "recon:fofa" source type + - FOFA: empty Email or APIKey => Enabled()==false + - Netlas: httptest server returns mock JSON with 2 items; Sweep emits 2 findings with "recon:netlas" source type + - Netlas: empty APIKey => Enabled()==false + - BinaryEdge: httptest server returns mock JSON with 2 events; Sweep emits 2 findings with "recon:binaryedge" source type + - BinaryEdge: empty APIKey => Enabled()==false + - All: cancelled context returns context error + + +Create test files following the same httptest pattern used in Plan 12-01: +- Use httptest.NewServer to mock API responses matching each source's expected JSON shape +- Set BaseURL to test server URL +- Create a minimal providers.Registry with 1-2 test providers +- Verify Finding count, SourceType, and Source URL format +- Test disabled state (empty credentials) +- Test context cancellation + + + cd /home/salva/Documents/apikey/.claude/worktrees/agent-a6700ee2 && go test ./pkg/recon/sources/ -run "TestFOFA|TestNetlas|TestBinaryEdge" -v -count=1 + + All FOFA, Netlas, BinaryEdge tests pass; each source emits correct findings from mock API responses + + + + + +- `go build ./pkg/recon/sources/` compiles without errors +- `go test ./pkg/recon/sources/ -run "TestFOFA|TestNetlas|TestBinaryEdge" -v` all pass +- Each source file has compile-time assertion `var _ recon.ReconSource = (*XxxSource)(nil)` + + + +Three IoT scanner sources (FOFA, Netlas, BinaryEdge) implement recon.ReconSource, use shared Client for HTTP, respect rate limiting via LimiterRegistry, and pass unit tests with mock API responses. + + + +After completion, create `.planning/phases/12-osint_iot_cloud_storage/12-02-SUMMARY.md` + diff --git a/.planning/phases/12-osint_iot_cloud_storage/12-03-PLAN.md b/.planning/phases/12-osint_iot_cloud_storage/12-03-PLAN.md new file mode 100644 index 0000000..598d737 --- /dev/null +++ b/.planning/phases/12-osint_iot_cloud_storage/12-03-PLAN.md @@ -0,0 +1,183 @@ +--- +phase: 12-osint_iot_cloud_storage +plan: 03 +type: execute +wave: 1 +depends_on: [] +files_modified: + - pkg/recon/sources/s3scanner.go + - pkg/recon/sources/s3scanner_test.go + - pkg/recon/sources/gcsscanner.go + - pkg/recon/sources/gcsscanner_test.go + - pkg/recon/sources/azureblob.go + - pkg/recon/sources/azureblob_test.go + - pkg/recon/sources/dospaces.go + - pkg/recon/sources/dospaces_test.go +autonomous: true +requirements: [RECON-CLOUD-01, RECON-CLOUD-02, RECON-CLOUD-03, RECON-CLOUD-04] + +must_haves: + truths: + - "S3Scanner enumerates publicly accessible S3 buckets by name pattern and scans readable objects for API key exposure" + - "GCSScanner scans publicly accessible Google Cloud Storage buckets" + - "AzureBlobScanner scans publicly accessible Azure Blob containers" + - "DOSpacesScanner scans publicly accessible DigitalOcean Spaces" + - "Each cloud scanner is credentialless (uses anonymous HTTP to probe public buckets) and always Enabled" + artifacts: + - path: "pkg/recon/sources/s3scanner.go" + provides: "S3Scanner implementing recon.ReconSource" + exports: ["S3Scanner"] + - path: "pkg/recon/sources/gcsscanner.go" + provides: "GCSScanner implementing recon.ReconSource" + exports: ["GCSScanner"] + - path: "pkg/recon/sources/azureblob.go" + provides: "AzureBlobScanner implementing recon.ReconSource" + exports: ["AzureBlobScanner"] + - path: "pkg/recon/sources/dospaces.go" + provides: "DOSpacesScanner implementing recon.ReconSource" + exports: ["DOSpacesScanner"] + key_links: + - from: "pkg/recon/sources/s3scanner.go" + to: "pkg/recon/sources/httpclient.go" + via: "sources.Client for retry/backoff HTTP" + pattern: "s\\.client\\.Do" +--- + + +Implement four cloud storage scanner recon sources: S3Scanner, GCSScanner, AzureBlobScanner, and DOSpacesScanner. + +Purpose: Enable discovery of API keys leaked in publicly accessible cloud storage buckets across AWS, GCP, Azure, and DigitalOcean. +Output: Four source files + tests following the established Phase 10 pattern. + +Note on RECON-CLOUD-03 (MinIO via Shodan) and RECON-CLOUD-04 (GrayHatWarfare): These are addressed here. MinIO discovery is implemented as a Shodan query variant within S3Scanner (MinIO uses S3-compatible API). GrayHatWarfare is implemented as a dedicated scanner that queries the GrayHatWarfare buckets.grayhatwarfare.com API. + + + +@$HOME/.claude/get-shit-done/workflows/execute-plan.md +@$HOME/.claude/get-shit-done/templates/summary.md + + + +@.planning/PROJECT.md +@.planning/ROADMAP.md +@.planning/STATE.md +@pkg/recon/source.go +@pkg/recon/sources/httpclient.go +@pkg/recon/sources/bing.go +@pkg/recon/sources/queries.go +@pkg/recon/sources/register.go + + +From pkg/recon/source.go: +```go +type ReconSource interface { + Name() string + RateLimit() rate.Limit + Burst() int + RespectsRobots() bool + Enabled(cfg Config) bool + Sweep(ctx context.Context, query string, out chan<- Finding) error +} +``` + +From pkg/recon/sources/httpclient.go: +```go +type Client struct { HTTP *http.Client; MaxRetries int; UserAgent string } +func NewClient() *Client +func (c *Client) Do(ctx context.Context, req *http.Request) (*http.Response, error) +``` + + + + + + + Task 1: Implement S3Scanner and GCSScanner + pkg/recon/sources/s3scanner.go, pkg/recon/sources/gcsscanner.go + +**S3Scanner** (s3scanner.go) — RECON-CLOUD-01 + RECON-CLOUD-03: +- Struct: `S3Scanner` with fields `Registry *providers.Registry`, `Limiters *recon.LimiterRegistry`, `BaseURL string`, `client *Client` +- Compile-time assertion: `var _ recon.ReconSource = (*S3Scanner)(nil)` +- Name(): "s3" +- RateLimit(): rate.Every(500 * time.Millisecond) — S3 public reads are generous +- Burst(): 3 +- RespectsRobots(): false (direct API calls) +- Enabled(): always true (credentialless — probes public buckets) +- Sweep(): Generates candidate bucket names from provider keywords (e.g., "openai-keys", "anthropic-config", "llm-keys", etc.) using a helper `bucketNames(registry)` that combines provider keywords with common suffixes like "-keys", "-config", "-backup", "-data", "-secrets", "-env". For each candidate bucket: + 1. HEAD `https://{bucket}.s3.amazonaws.com/` — if 200/403, bucket exists + 2. If 200 (public listing), GET the ListBucket XML, parse `` elements + 3. For keys matching common config file patterns (.env, config.*, *.json, *.yaml, *.yml, *.toml, *.conf), emit a Finding with Source=`s3://{bucket}/{key}`, SourceType="recon:s3", Confidence="medium" + 4. Do NOT download object contents (too heavy) — just flag the presence of suspicious files +- Use BaseURL override for tests (default: "https://%s.s3.amazonaws.com") +- Note: MinIO instances (RECON-CLOUD-03) are discovered via Shodan queries in Plan 12-01's ShodanSource using the query "minio" — this source focuses on AWS S3 bucket enumeration. + +**GCSScanner** (gcsscanner.go) — RECON-CLOUD-02: +- Struct: `GCSScanner` with fields `Registry *providers.Registry`, `Limiters *recon.LimiterRegistry`, `BaseURL string`, `client *Client` +- Name(): "gcs" +- RateLimit(): rate.Every(500 * time.Millisecond) +- Burst(): 3 +- RespectsRobots(): false +- Enabled(): always true (credentialless) +- Sweep(): Same bucket enumeration pattern as S3Scanner but using `https://storage.googleapis.com/{bucket}` for HEAD and listing. GCS public bucket listing returns JSON when Accept: application/json is set. Parse `{"items":[{"name":"..."}]}`. Emit findings for config-pattern files with Source=`gs://{bucket}/{name}`, SourceType="recon:gcs". + +Both sources share a common `bucketNames` helper function — define it in s3scanner.go and export it for use by both. + + + cd /home/salva/Documents/apikey/.claude/worktrees/agent-a6700ee2 && go build ./pkg/recon/sources/ + + S3Scanner and GCSScanner compile and implement recon.ReconSource + + + + Task 2: Implement AzureBlobScanner, DOSpacesScanner, and all cloud scanner tests + pkg/recon/sources/azureblob.go, pkg/recon/sources/dospaces.go, pkg/recon/sources/s3scanner_test.go, pkg/recon/sources/gcsscanner_test.go, pkg/recon/sources/azureblob_test.go, pkg/recon/sources/dospaces_test.go + +**AzureBlobScanner** (azureblob.go) — RECON-CLOUD-02: +- Struct: `AzureBlobScanner` with fields `Registry *providers.Registry`, `Limiters *recon.LimiterRegistry`, `BaseURL string`, `client *Client` +- Name(): "azureblob" +- RateLimit(): rate.Every(500 * time.Millisecond) +- Burst(): 3 +- RespectsRobots(): false +- Enabled(): always true (credentialless) +- Sweep(): Uses bucket enumeration pattern with Azure Blob URL format `https://{account}.blob.core.windows.net/{container}?restype=container&comp=list`. Generate account names from provider keywords with common suffixes. Parse XML `...`. Emit findings for config-pattern files with Source=`azure://{account}/{container}/{name}`, SourceType="recon:azureblob". + +**DOSpacesScanner** (dospaces.go) — RECON-CLOUD-02: +- Struct: `DOSpacesScanner` with fields `Registry *providers.Registry`, `Limiters *recon.LimiterRegistry`, `BaseURL string`, `client *Client` +- Name(): "spaces" +- RateLimit(): rate.Every(500 * time.Millisecond) +- Burst(): 3 +- RespectsRobots(): false +- Enabled(): always true (credentialless) +- Sweep(): Uses bucket enumeration with DO Spaces URL format `https://{bucket}.{region}.digitaloceanspaces.com/`. Iterate regions: nyc3, sfo3, ams3, sgp1, fra1. Same XML ListBucket format as S3 (DO Spaces is S3-compatible). Emit findings with Source=`do://{bucket}/{key}`, SourceType="recon:spaces". + +**Tests** (all four test files): +Each test file follows the httptest pattern: +- Mock server returns appropriate XML/JSON for bucket listing +- Verify Sweep emits correct number of findings with correct SourceType and Source URL format +- Verify Enabled() returns true (credentialless sources) +- Test with empty registry (no keywords => no bucket names => no findings) +- Test context cancellation + +Use a minimal providers.Registry with 1 test provider having keyword "testprov" so bucket names like "testprov-keys" are generated. + + + cd /home/salva/Documents/apikey/.claude/worktrees/agent-a6700ee2 && go test ./pkg/recon/sources/ -run "TestS3Scanner|TestGCSScanner|TestAzureBlob|TestDOSpaces" -v -count=1 + + All four cloud scanner sources compile and pass tests; each emits findings with correct source type and URL format + + + + + +- `go build ./pkg/recon/sources/` compiles without errors +- `go test ./pkg/recon/sources/ -run "TestS3Scanner|TestGCSScanner|TestAzureBlob|TestDOSpaces" -v` all pass +- Each source file has compile-time assertion + + + +Four cloud storage scanners (S3, GCS, Azure Blob, DO Spaces) implement recon.ReconSource with credentialless public bucket enumeration, use shared Client for HTTP, and pass unit tests. + + + +After completion, create `.planning/phases/12-osint_iot_cloud_storage/12-03-SUMMARY.md` + diff --git a/.planning/phases/12-osint_iot_cloud_storage/12-04-PLAN.md b/.planning/phases/12-osint_iot_cloud_storage/12-04-PLAN.md new file mode 100644 index 0000000..3e46bac --- /dev/null +++ b/.planning/phases/12-osint_iot_cloud_storage/12-04-PLAN.md @@ -0,0 +1,217 @@ +--- +phase: 12-osint_iot_cloud_storage +plan: 04 +type: execute +wave: 2 +depends_on: [12-01, 12-02, 12-03] +files_modified: + - pkg/recon/sources/register.go + - cmd/recon.go + - pkg/recon/sources/integration_test.go +autonomous: true +requirements: [RECON-IOT-01, RECON-IOT-02, RECON-IOT-03, RECON-IOT-04, RECON-IOT-05, RECON-IOT-06, RECON-CLOUD-01, RECON-CLOUD-02, RECON-CLOUD-03, RECON-CLOUD-04] + +must_haves: + truths: + - "RegisterAll registers all 28 sources (18 Phase 10-11 + 10 Phase 12)" + - "cmd/recon.go populates SourcesConfig with all Phase 12 credential fields from env/viper" + - "Integration test proves all 10 new sources are registered and discoverable by name" + artifacts: + - path: "pkg/recon/sources/register.go" + provides: "RegisterAll with all Phase 12 sources added" + contains: "Phase 12" + - path: "cmd/recon.go" + provides: "buildReconEngine with Phase 12 credential wiring" + contains: "ShodanAPIKey" + - path: "pkg/recon/sources/integration_test.go" + provides: "Integration test covering all 28 registered sources" + contains: "28" + key_links: + - from: "pkg/recon/sources/register.go" + to: "pkg/recon/sources/shodan.go" + via: "engine.Register(&ShodanSource{...})" + pattern: "ShodanSource" + - from: "cmd/recon.go" + to: "pkg/recon/sources/register.go" + via: "sources.RegisterAll(e, cfg)" + pattern: "RegisterAll" +--- + + +Wire all 10 Phase 12 sources into RegisterAll and cmd/recon.go, plus integration test. + +Purpose: Make all IoT and cloud storage sources available via `keyhunter recon list` and `keyhunter recon full`. +Output: Updated RegisterAll (28 sources total), updated cmd/recon.go with credential wiring, integration test. + + + +@$HOME/.claude/get-shit-done/workflows/execute-plan.md +@$HOME/.claude/get-shit-done/templates/summary.md + + + +@.planning/PROJECT.md +@.planning/ROADMAP.md +@.planning/STATE.md +@pkg/recon/sources/register.go +@cmd/recon.go +@pkg/recon/sources/integration_test.go + + +From pkg/recon/sources/register.go: +```go +type SourcesConfig struct { + GitHubToken string + // ... existing Phase 10-11 fields ... + Registry *providers.Registry + Limiters *recon.LimiterRegistry +} +func RegisterAll(engine *recon.Engine, cfg SourcesConfig) +``` + +From cmd/recon.go: +```go +func buildReconEngine() *recon.Engine // constructs engine with all sources +func firstNonEmpty(a, b string) string // env -> viper precedence +``` + + + + + + + Task 1: Extend SourcesConfig, RegisterAll, and cmd/recon.go + pkg/recon/sources/register.go, cmd/recon.go + +**SourcesConfig** (register.go) — add these fields after the existing Phase 11 fields: + +```go +// Phase 12: IoT scanner API keys. +ShodanAPIKey string +CensysAPIId string +CensysAPISecret string +ZoomEyeAPIKey string +FOFAEmail string +FOFAAPIKey string +NetlasAPIKey string +BinaryEdgeAPIKey string +``` + +**RegisterAll** (register.go) — add after the Phase 11 paste site registrations: + +```go +// Phase 12: IoT scanner sources. +engine.Register(&ShodanSource{ + APIKey: cfg.ShodanAPIKey, + Registry: reg, + Limiters: lim, +}) +engine.Register(&CensysSource{ + APIId: cfg.CensysAPIId, + APISecret: cfg.CensysAPISecret, + Registry: reg, + Limiters: lim, +}) +engine.Register(&ZoomEyeSource{ + APIKey: cfg.ZoomEyeAPIKey, + Registry: reg, + Limiters: lim, +}) +engine.Register(&FOFASource{ + Email: cfg.FOFAEmail, + APIKey: cfg.FOFAAPIKey, + Registry: reg, + Limiters: lim, +}) +engine.Register(&NetlasSource{ + APIKey: cfg.NetlasAPIKey, + Registry: reg, + Limiters: lim, +}) +engine.Register(&BinaryEdgeSource{ + APIKey: cfg.BinaryEdgeAPIKey, + Registry: reg, + Limiters: lim, +}) + +// Phase 12: Cloud storage sources (credentialless). +engine.Register(&S3Scanner{ + Registry: reg, + Limiters: lim, +}) +engine.Register(&GCSScanner{ + Registry: reg, + Limiters: lim, +}) +engine.Register(&AzureBlobScanner{ + Registry: reg, + Limiters: lim, +}) +engine.Register(&DOSpacesScanner{ + Registry: reg, + Limiters: lim, +}) +``` + +Update the RegisterAll doc comment to say "28 sources total" (18 Phase 10-11 + 10 Phase 12). + +**cmd/recon.go** — in buildReconEngine(), add to the SourcesConfig literal: + +```go +ShodanAPIKey: firstNonEmpty(os.Getenv("SHODAN_API_KEY"), viper.GetString("recon.shodan.api_key")), +CensysAPIId: firstNonEmpty(os.Getenv("CENSYS_API_ID"), viper.GetString("recon.censys.api_id")), +CensysAPISecret: firstNonEmpty(os.Getenv("CENSYS_API_SECRET"), viper.GetString("recon.censys.api_secret")), +ZoomEyeAPIKey: firstNonEmpty(os.Getenv("ZOOMEYE_API_KEY"), viper.GetString("recon.zoomeye.api_key")), +FOFAEmail: firstNonEmpty(os.Getenv("FOFA_EMAIL"), viper.GetString("recon.fofa.email")), +FOFAAPIKey: firstNonEmpty(os.Getenv("FOFA_API_KEY"), viper.GetString("recon.fofa.api_key")), +NetlasAPIKey: firstNonEmpty(os.Getenv("NETLAS_API_KEY"), viper.GetString("recon.netlas.api_key")), +BinaryEdgeAPIKey: firstNonEmpty(os.Getenv("BINARYEDGE_API_KEY"), viper.GetString("recon.binaryedge.api_key")), +``` + +Update the reconCmd Long description to mention Phase 12 sources. + + + cd /home/salva/Documents/apikey/.claude/worktrees/agent-a6700ee2 && go build ./cmd/... + + RegisterAll registers 28 sources; cmd/recon.go wires all Phase 12 credentials from env/viper + + + + Task 2: Integration test for all 28 registered sources + pkg/recon/sources/integration_test.go + + - TestRegisterAll_Phase12 registers all sources, asserts 28 total + - All 10 new source names are present: shodan, censys, zoomeye, fofa, netlas, binaryedge, s3, gcs, azureblob, spaces + - IoT sources with empty credentials report Enabled()==false + - Cloud storage sources (credentialless) report Enabled()==true + - SweepAll with short context timeout completes without panic + + +Extend the existing integration_test.go (which currently tests 18 Phase 10-11 sources): +- Update the expected source count from 18 to 28 +- Add all 10 new source names to the expected names list +- Add assertions that IoT sources (shodan, censys, zoomeye, fofa, netlas, binaryedge) are Enabled()==false when credentials are empty +- Add assertions that cloud sources (s3, gcs, azureblob, spaces) are Enabled()==true (credentialless) +- Keep the existing SweepAll test with short context timeout, verify no panics + + + cd /home/salva/Documents/apikey/.claude/worktrees/agent-a6700ee2 && go test ./pkg/recon/sources/ -run "TestRegisterAll" -v -count=1 + + Integration test passes with 28 registered sources; all Phase 12 source names are discoverable + + + + + +- `go build ./cmd/...` compiles without errors +- `go test ./pkg/recon/sources/ -run "TestRegisterAll" -v` passes with 28 sources +- `go test ./pkg/recon/sources/ -v -count=1` all tests pass (existing + new) + + + +All 10 Phase 12 sources are wired into RegisterAll and discoverable via the recon engine. cmd/recon.go reads credentials from env vars and viper config. Integration test confirms 28 total sources registered. + + + +After completion, create `.planning/phases/12-osint_iot_cloud_storage/12-04-SUMMARY.md` + From d8a54f2c164a477c9afbe86f55d00db3d1f16b08 Mon Sep 17 00:00:00 2001 From: salvacybersec Date: Mon, 6 Apr 2026 12:20:42 +0300 Subject: [PATCH 2/2] docs: update README to reflect current implementation state (phases 1-11) Co-Authored-By: Claude Opus 4.6 (1M context) --- README.md | 915 +++++++++++++++++++++--------------------------------- 1 file changed, 357 insertions(+), 558 deletions(-) diff --git a/README.md b/README.md index 20d72c7..7b6c324 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ Existing tools like TruffleHog (~3 LLM detectors) and Gitleaks (~5 LLM rules) were built for general secret scanning. AI-related credential leaks grew **81% year-over-year** in 2025, yet no tool covers more than ~15 LLM providers. -**KeyHunter fills that gap** with 108+ provider-specific detectors, active key validation, OSINT/recon capabilities, and real-time notifications. +**KeyHunter fills that gap** with 108+ provider-specific detectors, active key validation, OSINT/recon capabilities, and a growing set of internet sources for leak discovery. ### How It Compares @@ -20,138 +20,158 @@ Existing tools like TruffleHog (~3 LLM detectors) and Gitleaks (~5 LLM rules) we |---------|-----------|------------|----------|----------------| | LLM Providers | **108+** | ~3 | ~5 | ~1 | | Active Verification | **108+ endpoints** | ~20 types | No | No | -| OSINT/Recon | **Shodan, Censys, GitHub, GitLab, Paste, S3** | No | No | No | +| OSINT/Recon Sources | **18 live** (80+ planned) | No | No | No | | External Tool Import | **TruffleHog + Gitleaks** | - | - | - | -| Web Dashboard | **Built-in** | No | No | No | -| Telegram Bot | **Built-in** | No | No | No | -| Dork Engine | **Built-in YAML dorks** | No | No | No | +| Dork Engine | **150 built-in YAML dorks** | No | No | No | +| Pre-commit Hook | **Built-in** | Yes | Yes | Yes | +| SARIF Output | **Yes** | Yes | Yes | No | | Provider YAML Plugin | **Community-extensible** | Go code only | TOML rules | Python plugins | -| Scheduled Scanning | **Cron-based** | No | No | No | +| Web Dashboard | Coming soon | No | No | No | +| Telegram Bot | Coming soon | No | No | No | +| Scheduled Scanning | Coming soon | No | No | No | --- ## Features -### Core Scanning -- **File/Directory scanning** with recursive traversal and glob exclusions -- **Git-aware scanning** — full history, branches, stash, delta-based diffs -- **stdin/pipe** support — `cat dump.txt | keyhunter scan stdin` -- **URL fetching** — scan any remote URL content -- **Clipboard scanning** — instant clipboard content analysis +### Implemented -### OSINT / Recon Engine (80+ Sources, 18 Categories) +#### Core Scanning Engine +- **3-stage pipeline** -- AC pre-filter, regex match, entropy scoring +- **ants worker pool** for parallel scanning with configurable worker count +- **108 provider YAML definitions** (Tier 1-9), dual-located with `go:embed` -**IoT & Internet Scanners** -- **Shodan** — exposed LLM proxies, dashboards, API endpoints -- **Censys** — HTTP body search for leaked credentials -- **ZoomEye** — Chinese IoT scanner, different coverage perspective -- **FOFA** — Asian infrastructure scanning, body content search -- **Netlas** — HTTP response body keyword search -- **BinaryEdge** — internet-wide scan data +#### Input Sources +- **File scanning** -- single file analysis +- **Directory scanning** -- recursive traversal with glob exclusions and mmap +- **Git history scanning** -- full commit history analysis +- **stdin/pipe** support -- `echo "sk-proj-..." | keyhunter scan stdin` +- **URL fetching** -- scan any remote URL content +- **Clipboard scanning** -- instant clipboard content analysis -**Code Hosting & Snippets** -- **GitHub / GitLab / Bitbucket** — code search with automated dorks -- **Codeberg / Gitea instances** — alternative Git platforms (Gitea auto-discovered via Shodan) -- **Replit / CodeSandbox / StackBlitz / Glitch** — interactive dev environments with hardcoded keys -- **CodePen / JSFiddle / Observable** — browser snippet platforms -- **HuggingFace** — Spaces, repos, model configs (high-yield for LLM keys) -- **Kaggle** — notebooks and datasets with API keys -- **Jupyter / nbviewer** — shared notebooks -- **GitHub Gist** — public gist search -- **Gitpod** — workspace snapshots - -**Search Engine Dorking** -- **Google** — Custom Search API / SerpAPI, 100+ built-in dorks -- **Bing** — Azure Cognitive Services search -- **DuckDuckGo / Yandex / Brave** — alternative indexes for broader coverage - -**Paste Sites** -- **Multi-paste aggregator** — Pastebin, dpaste, paste.ee, rentry, hastebin, ix.io, and more - -**Package Registries** -- **npm / PyPI / RubyGems / crates.io / Maven / NuGet / Packagist / Go modules** — download packages, extract source, scan for key patterns - -**Container & Infrastructure** -- **Docker Hub** — image layer scanning, build arg extraction -- **Kubernetes** — exposed dashboards, public Secret/ConfigMap YAML files -- **Terraform** — state files (`.tfstate` with plaintext secrets), registry modules -- **Helm Charts / Ansible Galaxy** — default values with credentials - -**Cloud Storage** -- **AWS S3 / GCS / Azure Blob / DigitalOcean Spaces / Backblaze B2** — bucket enumeration and content scanning -- **MinIO** — self-hosted instances discovered via Shodan -- **GrayHatWarfare** — searchable database of public bucket objects - -**CI/CD Log Leaks** -- **Travis CI / CircleCI** — public build logs with leaked env vars -- **GitHub Actions** — workflow run log scanning -- **Jenkins** — exposed instances (Shodan-discovered), console output -- **GitLab CI/CD** — public pipeline job traces - -**Web Archives** -- **Wayback Machine** — historical snapshots of removed `.env` files, config pages -- **CommonCrawl** — massive web crawl data, WARC record scanning - -**Forums & Documentation** -- **Stack Overflow** — API + SEDE queries for code snippets with real keys -- **Reddit** — programming subreddit scanning -- **Hacker News** — Algolia API comment search -- **dev.to / Medium** — tutorial articles with hardcoded keys -- **Telegram groups** — public channels sharing configs and "free API keys" -- **Discord** — indexed public server content - -**Collaboration Tools** -- **Notion / Confluence** — public pages and spaces with credentials -- **Trello** — public boards with API key cards -- **Google Docs/Sheets** — publicly shared documents - -**Frontend & JavaScript Leaks** -- **JS Source Maps** — original source recovery with inlined secrets -- **Webpack / Vite bundles** — `REACT_APP_*`, `NEXT_PUBLIC_*`, `VITE_*` variable extraction -- **Exposed `.env` files** — misconfigured web servers serving dotenv from root -- **Swagger / OpenAPI docs** — real auth examples in API docs -- **Vercel / Netlify previews** — deploy preview JS bundles with production secrets - -**Log Aggregators** -- **Elasticsearch / Kibana** — exposed instances with application logs containing API keys -- **Grafana** — exposed dashboards with datasource configs -- **Sentry** — error tracking capturing request headers with keys - -**Threat Intelligence** -- **VirusTotal** — uploaded files/scripts containing embedded keys -- **Intelligence X** — aggregated paste, darknet, and leak search -- **URLhaus** — malicious URLs with API keys in parameters - -**Mobile Apps** -- **APK analysis** — download, decompile, grep for key patterns (via apktool/jadx) - -**DNS / Subdomain Discovery** -- **crt.sh** — Certificate Transparency log for API subdomain discovery -- **Subdomain probing** — config endpoint enumeration (`.env`, `/api/config`, `/actuator/env`) - -**API Marketplaces** -- **Postman** — public collections, workspaces, environments -- **SwaggerHub** — published API definitions with example values - -**`recon full`** — parallel sweep across all 80+ sources with deduplication and unified reporting - -### Active Verification -- Lightweight API calls to verify if detected keys are active +#### Active Verification +- YAML-driven `HTTPVerifier` -- lightweight API calls to verify if detected keys are active - Permission and scope extraction (org, rate limits, model access) +- Consent prompt and `LEGAL.md` for legal safety - Configurable via `--verify` flag (off by default) -- Provider-specific verification endpoints -### External Tool Integration -- **Import TruffleHog** JSON output — enrich with LLM-specific analysis -- **Import Gitleaks** JSON output — cross-reference with 108+ providers -- Generic CSV import for custom tool output +#### Output Formats +- **Table** -- colored terminal output with key masking (default) +- **JSON** -- full key values for programmatic consumption +- **CSV** -- spreadsheet-compatible export +- **SARIF 2.1.0** -- CI/CD integration (GitHub Code Scanning, etc.) +- Exit codes: `0` (clean), `1` (findings), `2` (error) -### Notifications & Dashboard -- **Telegram Bot** — scan triggers, key alerts, recon results -- **Web Dashboard** — htmx + Tailwind, SQLite-backed, real-time scan viewer -- **Webhook** — generic HTTP POST notifications -- **Slack** — workspace notifications -- **Scheduled scans** — cron-based recurring scans with auto-notify +#### Key Management +- `keyhunter keys list` -- list all discovered keys (masked by default) +- `keyhunter keys show ` -- full key details +- `keyhunter keys export` -- export in JSON/CSV format +- `keyhunter keys copy ` -- copy key to clipboard +- `keyhunter keys delete ` -- remove a key from the database +- `keyhunter keys verify ` -- verify a specific key + +#### External Tool Import +- **TruffleHog v3** JSON import with LLM-specific enrichment +- **Gitleaks** JSON and CSV import +- Deduplication across imports via `(provider, masked_key, source)` hashing + +#### Git Pre-commit Hook +- `keyhunter hook install` -- embedded shell script, blocks leaks before commit +- `keyhunter hook uninstall` -- clean removal +- Backup of existing hooks with `--force` + +#### Dork Engine +- **150 built-in YAML dorks** across 8 source types (GitHub, GitLab, Google, Shodan, Censys, ZoomEye, FOFA, Bing) +- GitHub live executor with authenticated API +- CLI management: `keyhunter dorks list`, `keyhunter dorks list --source=github`, `keyhunter dorks add`, `keyhunter dorks run`, `keyhunter dorks export` + +#### OSINT / Recon Engine (18 Sources Live) + +The recon framework provides a `ReconSource` interface with per-source rate limiting, stealth mode, robots.txt compliance, parallel sweep, and result deduplication. + +**Code Hosting & Snippets** (live) +- **GitHub** -- code search with automated dorks +- **GitLab** -- code search +- **Bitbucket** -- code search +- **GitHub Gist** -- public gist search +- **Codeberg** -- alternative Git platform search +- **HuggingFace** -- Spaces, repos, model configs (high-yield for LLM keys) +- **Replit** -- public repl search +- **CodeSandbox** -- sandbox search +- **StackBlitz Sandboxes** -- sandbox search +- **Kaggle** -- notebooks and datasets with API keys + +**Search Engine Dorking** (live) +- **Google** -- Custom Search API / SerpAPI +- **Bing** -- Azure Cognitive Services search +- **DuckDuckGo** -- HTML scraping fallback +- **Yandex** -- XML API search +- **Brave** -- Brave Search API + +**Paste Sites** (live) +- **Pastebin** -- scraping API +- **GistPaste** -- paste search +- **PasteSites** -- multi-paste aggregator + +**`recon full`** -- parallel sweep across all 18 live sources with deduplication and unified reporting. + +#### CLI Commands +| Command | Status | +|---------|--------| +| `keyhunter scan` | Implemented | +| `keyhunter providers list/info/stats` | Implemented | +| `keyhunter config init/set/get` | Implemented | +| `keyhunter keys list/show/export/copy/delete/verify` | Implemented | +| `keyhunter import` | Implemented | +| `keyhunter hook install/uninstall` | Implemented | +| `keyhunter dorks list/add/run/export` | Implemented | +| `keyhunter recon full/list` | Implemented | +| `keyhunter legal` | Implemented | +| `keyhunter verify` | Stub | +| `keyhunter serve` | Stub | +| `keyhunter schedule` | Stub | + +### Coming Soon + +The following features are on the roadmap but not yet implemented: + +#### Phase 12 -- IoT Scanners & Cloud Storage +- **Shodan** -- exposed LLM proxies, dashboards, API endpoints +- **Censys** -- HTTP body search for leaked credentials +- **ZoomEye** -- IoT scanner +- **FOFA** -- Asian infrastructure scanning +- **Netlas** -- HTTP response body search +- **BinaryEdge** -- internet-wide scan data +- **AWS S3 / GCS / Azure Blob / DigitalOcean Spaces** -- bucket enumeration and scanning + +#### Phase 13 -- Package Registries, Containers & IaC +- **npm / PyPI / RubyGems / crates.io / Maven / NuGet** -- package source scanning +- **Docker Hub** -- image layer scanning +- **Terraform / Helm Charts / Ansible** -- IaC scanning + +#### Phase 14 -- CI/CD Logs, Web Archives & Frontend Leaks +- **GitHub Actions / Travis CI / CircleCI / Jenkins / GitLab CI** -- public build log scanning +- **Wayback Machine / CommonCrawl** -- historical web archive scanning +- **JS Source Maps / Webpack bundles / exposed .env** -- frontend leak detection + +#### Phase 15 -- Forums & Collaboration +- **Stack Overflow / Reddit / Hacker News / dev.to / Medium** -- forum scanning +- **Notion / Confluence / Trello** -- collaboration tool scanning +- **Elasticsearch / Grafana / Sentry** -- exposed log aggregators +- **Telegram groups / Discord** -- public channel scanning + +#### Phase 16 -- Threat Intel, Mobile, DNS & API Marketplaces +- **VirusTotal / Intelligence X / URLhaus** -- threat intelligence +- **APK analysis** -- mobile app decompilation +- **crt.sh / subdomain probing** -- DNS/subdomain discovery +- **Postman / SwaggerHub** -- API marketplace scanning + +#### Phase 17 -- Telegram Bot & Scheduler +- **Telegram Bot** -- scan triggers, key alerts, recon results +- **Scheduled scanning** -- cron-based recurring scans with auto-notify + +#### Phase 18 -- Web Dashboard +- **Web Dashboard** -- htmx + Tailwind, SQLite-backed, real-time scan viewer --- @@ -161,161 +181,111 @@ Existing tools like TruffleHog (~3 LLM detectors) and Gitleaks (~5 LLM rules) we ```bash # From source -go install github.com/keyhunter/keyhunter@latest +go install github.com/salvacybersec/keyhunter@latest -# Binary release -curl -sSL https://get.keyhunter.dev | bash - -# Docker -docker pull keyhunter/keyhunter:latest +# Binary release (when available) +curl -sSL https://github.com/salvacybersec/keyhunter/releases/latest/download/keyhunter_linux_amd64.tar.gz | tar -xz +sudo mv keyhunter /usr/local/bin/ ``` ### Basic Usage ```bash # Scan a directory -keyhunter scan path ./my-project/ +keyhunter scan ./my-project/ # Scan with active verification -keyhunter scan path ./my-project/ --verify +keyhunter scan ./my-project/ --verify -# Scan git history (last 30 days) -keyhunter scan git . --since="30 days ago" +# Scan git history +keyhunter scan --git . # Scan from pipe cat secrets.txt | keyhunter scan stdin # Scan only specific providers -keyhunter scan path . --providers=openai,anthropic,deepseek +keyhunter scan . --providers=openai,anthropic,deepseek # JSON output -keyhunter scan path . --output=json > results.json +keyhunter scan . --output=json > results.json + +# SARIF output for CI/CD +keyhunter scan . --output=sarif > keyhunter.sarif + +# CSV output +keyhunter scan . --output=csv > results.csv ``` ### OSINT / Recon ```bash -# ── IoT & Internet Scanners ── -keyhunter recon shodan --dork="http.title:\"LiteLLM\" port:4000" -keyhunter recon censys --query='services.http.response.body:"sk-proj-"' -keyhunter recon zoomeye --query='app:"Elasticsearch" +"api_key"' -keyhunter recon fofa --query='body="OPENAI_API_KEY"' -keyhunter recon netlas --query='http.body:"sk-ant-"' +# Full sweep across all 18 live sources +keyhunter recon full -# ── Code Hosting ── -keyhunter recon github --dork=auto # Tum built-in GitHub dork'lari -keyhunter recon gitlab --dork=auto -keyhunter recon bitbucket --query="OPENAI_API_KEY" -keyhunter recon replit --query="sk-proj-" # Public repl'ler -keyhunter recon huggingface --spaces --query="api_key" # HF Spaces -keyhunter recon kaggle --notebooks --query="openai" -keyhunter recon codesandbox --query="sk-ant-" -keyhunter recon glitch --query="ANTHROPIC_API_KEY" -keyhunter recon gitea --instances-from=shodan # Auto-discover Gitea instances +# Sweep specific sources only +keyhunter recon full --sources=github,gitlab,gist -# ── Search Engine Dorking ── -keyhunter recon google --dork=auto # 100+ built-in Google dorks -keyhunter recon google --dork='"sk-proj-" -github.com filetype:env' -keyhunter recon bing --dork=auto -keyhunter recon brave --query="OPENAI_API_KEY filetype:yaml" +# List available recon sources +keyhunter recon list -# ── Package Registries ── -keyhunter recon npm --recent --query="openai" # Scan yeni paketler -keyhunter recon pypi --recent --query="llm" -keyhunter recon crates --query="api_key" +# Code hosting sources +keyhunter recon full --sources=github +keyhunter recon full --sources=gitlab +keyhunter recon full --sources=bitbucket +keyhunter recon full --sources=gist +keyhunter recon full --sources=codeberg +keyhunter recon full --sources=huggingface +keyhunter recon full --sources=replit +keyhunter recon full --sources=codesandbox +keyhunter recon full --sources=sandboxes +keyhunter recon full --sources=kaggle -# ── Cloud Storage ── -keyhunter recon s3 --domain=targetcorp # S3 bucket enumeration -keyhunter recon gcs --domain=targetcorp # GCS buckets -keyhunter recon azure --domain=targetcorp # Azure Blob -keyhunter recon minio --shodan # Exposed MinIO instances -keyhunter recon grayhat --query="openai api_key" # GrayHatWarfare search +# Search engine dorking +keyhunter recon full --sources=google +keyhunter recon full --sources=bing +keyhunter recon full --sources=duckduckgo +keyhunter recon full --sources=yandex +keyhunter recon full --sources=brave -# ── CI/CD Logs ── -keyhunter recon ghactions --org=targetcorp # GitHub Actions logs -keyhunter recon travis --org=targetcorp -keyhunter recon jenkins --shodan # Exposed Jenkins instances -keyhunter recon circleci --org=targetcorp +# Paste sites +keyhunter recon full --sources=pastebin +keyhunter recon full --sources=gistpaste +keyhunter recon full --sources=pastesites +``` -# ── Web Archives ── -keyhunter recon wayback --domain=targetcorp.com # Wayback Machine -keyhunter recon commoncrawl --domain=targetcorp.com +### Dork Management -# ── Frontend & JS ── -keyhunter recon dotenv --domain-list=targets.txt # Exposed .env files -keyhunter recon sourcemaps --domain=app.target.com # JS source maps -keyhunter recon webpack --url=https://app.target.com/main.js -keyhunter recon swagger --shodan # Exposed Swagger UI's -keyhunter recon deploys --domain=targetcorp # Vercel/Netlify previews - -# ── Forums ── -keyhunter recon stackoverflow --query="sk-proj-" -keyhunter recon reddit --subreddit=openai --query="api key" -keyhunter recon hackernews --query="leaked api key" -keyhunter recon telegram-groups --query="free api key" - -# ── Collaboration ── -keyhunter recon notion --query="API_KEY" # Google dorked -keyhunter recon confluence --shodan # Exposed instances -keyhunter recon trello --query="openai api key" - -# ── Log Aggregators ── -keyhunter recon elasticsearch --shodan # Exposed ES instances -keyhunter recon grafana --shodan -keyhunter recon sentry --shodan - -# ── Threat Intelligence ── -keyhunter recon virustotal --query="sk-proj-" -keyhunter recon intelx --query="sk-ant-api03" # Intelligence X -keyhunter recon urlhaus --query="openai" - -# ── Mobile Apps ── -keyhunter recon apk --query="ai chatbot" # APK download + decompile - -# ── DNS/Subdomain ── -keyhunter recon crtsh --domain=targetcorp.com # Cert transparency -keyhunter recon subdomain --domain=targetcorp.com --probe-configs - -# ── Full Sweep ── -keyhunter recon full --providers=openai,anthropic # ALL 80+ sources parallel -keyhunter recon full --categories=code,cloud # Category-filtered sweep - -# ── Dork Management ── -keyhunter dorks list # All dorks across all sources -keyhunter dorks list --source=github -keyhunter dorks list --source=google +```bash +keyhunter dorks list # All dorks across all sources +keyhunter dorks list --source=github # GitHub dorks only +keyhunter dorks list --source=google # Google dorks only keyhunter dorks add github 'filename:.env "GROQ_API_KEY"' -keyhunter dorks run google --category=frontier # Run Google dorks for frontier providers +keyhunter dorks run google --category=frontier keyhunter dorks export ``` -### Viewing Full API Keys +### Key Management -Default olarak key'ler terminalde maskelenir (omuz surfing koruması). Gerçek key'e erişim yolları: +Keys are masked by default in terminal output (shoulder surfing protection). Ways to access full key values: ```bash -# 1. CLI'da --unmask flag'i ile tam key gör -keyhunter scan path . --unmask -# Provider | Key | Confidence | File | Line | Status -# ─────────────┼──────────────────────────────────────────────┼────────────┼───────────────┼──────┼──────── -# OpenAI | sk-proj-abc123def456ghi789jkl012mno345pqr678 | HIGH | src/config.py | 42 | ACTIVE +# Show full keys in scan output +keyhunter scan . --unmask -# 2. JSON export — her zaman tam key içerir -keyhunter scan path . --output=json > results.json +# JSON export always includes full keys +keyhunter scan . --output=json > results.json -# 3. Key management komutu — bulunan tüm key'leri yönet -keyhunter keys list # Maskelenmiş liste -keyhunter keys list --unmask # Tam key'li liste -keyhunter keys show # Tek key tam detay (her zaman unmasked) -keyhunter keys copy # Key'i clipboard'a kopyala -keyhunter keys export --format=json # Tüm key'leri tam değerleriyle export et -keyhunter keys verify # Key'i doğrula + tam detay göster - -# 4. Web Dashboard — /keys/:id sayfasında "Reveal Key" butonu -# 5. Telegram Bot — /key komutu ile tam key +# Key management commands +keyhunter keys list # Masked list +keyhunter keys list --unmask # Full key list +keyhunter keys show # Single key full details (always unmasked) +keyhunter keys copy # Copy key to clipboard +keyhunter keys export --format=json # Export all keys with full values +keyhunter keys verify # Verify key + show full details +keyhunter keys delete # Remove key from database ``` -**Örnek `keyhunter keys show` çıktısı:** +**Example `keyhunter keys show` output:** ``` ID: a3f7b2c1 Provider: OpenAI @@ -331,42 +301,20 @@ keyhunter keys verify # Key'i doğrula + tam detay göster Revoke URL: https://platform.openai.com/api-keys ``` -### Verify a Single Key - -```bash -keyhunter verify sk-proj-abc123... -# Output: -# Provider: OpenAI -# Status: ACTIVE -# Org: my-org -# Rate Limit: 500 req/min -# Revoke: https://platform.openai.com/api-keys -``` - ### Import External Tools ```bash # Run TruffleHog, then enrich with KeyHunter trufflehog git . --json > trufflehog.json -keyhunter import trufflehog trufflehog.json --verify +keyhunter import --format=trufflehog trufflehog.json # Run Gitleaks, then enrich -gitleaks detect -r gitleaks.json -keyhunter import gitleaks gitleaks.json -``` +gitleaks detect -f json -r gitleaks.json +keyhunter import --format=gitleaks gitleaks.json -### Web Dashboard & Telegram Bot - -```bash -# Start web dashboard -keyhunter serve --port=8080 - -# Start with Telegram bot -keyhunter serve --port=8080 --telegram - -# Configure Telegram -keyhunter config set telegram.token "YOUR_BOT_TOKEN" -keyhunter config set telegram.chat_id "YOUR_CHAT_ID" +# Gitleaks CSV +gitleaks detect -f csv -r gitleaks.csv +keyhunter import --format=gitleaks-csv gitleaks.csv ``` ### CI/CD Integration @@ -394,27 +342,6 @@ keyhunter scan . && echo "Clean" || echo "Keys found!" See [docs/CI-CD.md](docs/CI-CD.md) for the full guide, including a copy-paste GitHub Actions workflow and the pre-commit hook install/uninstall lifecycle. -### Scheduled Scanning - -```bash -# Daily GitHub recon at 09:00 -keyhunter schedule add \ - --name="daily-github" \ - --cron="0 9 * * *" \ - --command="recon github --dork=auto" \ - --notify=telegram - -# Hourly paste site monitoring -keyhunter schedule add \ - --name="hourly-paste" \ - --cron="0 * * * *" \ - --command="recon paste --sources=pastebin" \ - --notify=telegram - -keyhunter schedule list -keyhunter schedule remove daily-github -``` - --- ## Configuration @@ -424,37 +351,21 @@ keyhunter schedule remove daily-github keyhunter config init # Creates ~/.keyhunter.yaml -# Set API keys for recon sources -keyhunter config set shodan.apikey "YOUR_SHODAN_KEY" -keyhunter config set censys.api_id "YOUR_CENSYS_ID" -keyhunter config set censys.api_secret "YOUR_CENSYS_SECRET" -keyhunter config set github.token "YOUR_GITHUB_TOKEN" -keyhunter config set gitlab.token "YOUR_GITLAB_TOKEN" -keyhunter config set zoomeye.apikey "YOUR_ZOOMEYE_KEY" -keyhunter config set fofa.email "YOUR_FOFA_EMAIL" -keyhunter config set fofa.apikey "YOUR_FOFA_KEY" -keyhunter config set netlas.apikey "YOUR_NETLAS_KEY" -keyhunter config set binaryedge.apikey "YOUR_BINARYEDGE_KEY" -keyhunter config set google.cx "YOUR_GOOGLE_CX_ID" -keyhunter config set google.apikey "YOUR_GOOGLE_API_KEY" -keyhunter config set bing.apikey "YOUR_BING_API_KEY" -keyhunter config set brave.apikey "YOUR_BRAVE_API_KEY" -keyhunter config set virustotal.apikey "YOUR_VT_KEY" -keyhunter config set intelx.apikey "YOUR_INTELX_KEY" -keyhunter config set grayhat.apikey "YOUR_GRAYHAT_KEY" -keyhunter config set reddit.client_id "YOUR_REDDIT_ID" -keyhunter config set reddit.client_secret "YOUR_REDDIT_SECRET" -keyhunter config set stackoverflow.apikey "YOUR_SO_KEY" -keyhunter config set kaggle.username "YOUR_KAGGLE_USER" -keyhunter config set kaggle.apikey "YOUR_KAGGLE_KEY" +# Set API tokens for recon sources (currently supported) +keyhunter config set recon.github.token "YOUR_GITHUB_TOKEN" +keyhunter config set recon.gitlab.token "YOUR_GITLAB_TOKEN" +keyhunter config set recon.bitbucket.token "YOUR_BITBUCKET_TOKEN" +keyhunter config set recon.huggingface.token "YOUR_HF_TOKEN" +keyhunter config set recon.kaggle.token "YOUR_KAGGLE_TOKEN" +keyhunter config set recon.google.apikey "YOUR_GOOGLE_API_KEY" +keyhunter config set recon.google.cx "YOUR_GOOGLE_CX_ID" +keyhunter config set recon.bing.apikey "YOUR_BING_API_KEY" +keyhunter config set recon.brave.apikey "YOUR_BRAVE_API_KEY" +keyhunter config set recon.yandex.apikey "YOUR_YANDEX_API_KEY" +keyhunter config set recon.yandex.user "YOUR_YANDEX_USER" -# Set notification channels -keyhunter config set telegram.token "YOUR_BOT_TOKEN" -keyhunter config set telegram.chat_id "YOUR_CHAT_ID" -keyhunter config set webhook.url "https://your-webhook.com/alert" - -# Database encryption -keyhunter config set db.password "YOUR_DB_PASSWORD" +# View current config +keyhunter config get recon.github.token ``` ### Config File (`~/.keyhunter.yaml`) @@ -464,53 +375,43 @@ scan: workers: 8 verify_timeout: 10s default_output: table - respect_robots: true recon: stealth: false - rate_limits: - github: 30 # req/min - shodan: 1 # req/sec - censys: 5 # req/sec - zoomeye: 10 # req/sec - fofa: 1 # req/sec - netlas: 1 # req/sec - google: 100 # req/day (Custom Search API) - bing: 3 # req/sec - stackoverflow: 30 # req/sec - hackernews: 100 # req/min - paste: 0.5 # req/sec - npm: 10 # req/sec - pypi: 5 # req/sec - virustotal: 4 # req/min (free tier) - intelx: 10 # req/day (free tier) - grayhat: 5 # req/sec - wayback: 15 # req/min - trello: 10 # req/sec - devto: 1 # req/sec + respect_robots: true + github: + token: "" + gitlab: + token: "" + bitbucket: + token: "" + huggingface: + token: "" + kaggle: + token: "" + google: + apikey: "" + cx: "" + bing: + apikey: "" + brave: + apikey: "" + yandex: + apikey: "" + user: "" +``` -telegram: - token: "encrypted:..." - chat_id: "123456789" - auto_notify: true - -web: - port: 8080 - auth: - enabled: false - username: admin - password: "encrypted:..." - -db: - path: ~/.keyhunter/keyhunter.db - encrypted: true +### Stealth & Ethics Flags +```bash +--stealth # User-agent rotation, increased request spacing +--respect-robots # Respect robots.txt (default: on) ``` --- ## Supported Providers (108) -### Tier 1 — Frontier +### Tier 1 -- Frontier | Provider | Key Pattern | Confidence | Verify | |----------|-------------|------------|--------| @@ -527,7 +428,7 @@ db: | Inflection AI | Generic UUID | Low | `GET /api/models` | | AI21 Labs | Generic key | Low | `GET /v1/models` | -### Tier 2 — Inference Platforms +### Tier 2 -- Inference Platforms | Provider | Key Pattern | Confidence | Verify | |----------|-------------|------------|--------| @@ -546,7 +447,7 @@ db: | OctoAI | Generic key | Low | `GET /v1/models` | | Friendli AI | Generic key | Low | `GET /v1/models` | -### Tier 3 — Specialized/Vertical +### Tier 3 -- Specialized/Vertical | Provider | Key Pattern | Confidence | Verify | |----------|-------------|------------|--------| @@ -563,7 +464,7 @@ db: | Midjourney | Generic key | Low | N/A | | HuggingFace | `hf_*` | High | `GET /api/whoami` | -### Tier 4 — Chinese/Regional +### Tier 4 -- Chinese/Regional | Provider | Key Pattern | Confidence | Verify | |----------|-------------|------------|--------| @@ -584,7 +485,7 @@ db: | Tencent Hunyuan | SecretId + SecretKey | Medium | `DescribeModels` | | SiliconFlow | `sf_*` | High | `GET /v1/models` | -### Tier 5 — Infrastructure/Gateway +### Tier 5 -- Infrastructure/Gateway | Provider | Key Pattern | Confidence | Verify | |----------|-------------|------------|--------| @@ -600,7 +501,7 @@ db: | Aether | Generic key | Low | `GET /v1/models` | | Not Diamond | Generic key | Low | `GET /v1/models` | -### Tier 6 — Emerging/Niche +### Tier 6 -- Emerging/Niche | Provider | Key Pattern | Confidence | Verify | |----------|-------------|------------|--------| @@ -620,7 +521,7 @@ db: | Neon AI | Generic key | Low | N/A | | Lamini | Generic key | Low | `GET /v1/models` | -### Tier 7 — Code & Dev Tools +### Tier 7 -- Code & Dev Tools | Provider | Key Pattern | Confidence | Verify | |----------|-------------|------------|--------| @@ -635,7 +536,7 @@ db: | IBM watsonx.ai | `ibm_*` | Medium | IAM token endpoint | | Oracle AI | Generic key | Low | N/A | -### Tier 8 — Self-Hosted/Open Infra +### Tier 8 -- Self-Hosted/Open Infra | Provider | Key Pattern | Confidence | Verify | |----------|-------------|------------|--------| @@ -650,7 +551,7 @@ db: | Triton Inference Server | N/A | N/A | `GET /v2/health/ready` | | Jan AI | N/A (local) | N/A | `GET /v1/models` | -### Tier 9 — Enterprise/Legacy +### Tier 9 -- Enterprise/Legacy | Provider | Key Pattern | Confidence | Verify | |----------|-------------|------------|--------| @@ -677,26 +578,15 @@ db: +--------v--+ +------v-----+ +-----v------+ | Input | | Recon | | Import | | Adapters | | Engine | | Adapters | - | - file | | (80+ src) | | - trufflehog| - | - git | | - IoT (6) | | - gitleaks | - | - stdin | | - Code(16) | | - generic | - | - url | | - Search(5)| +-----+------+ - | - clipboard| | - Paste(8+)| | - +--------+---+ | - Pkg (8) | | - | | - Cloud(7) | | - | | - CI/CD(5) | | - | | - Archive2 | | - | | - Forum(7) | | - | | - Collab(4)| | - | | - JS/FE(5) | | - | | - Logs (3) | | - | | - Intel(3) | | - | | - Mobile(1)| | - | | - DNS (2) | | - | | - API (3) | | - | +------+-----+ | - | | | - +-------+-------+--------------+ + | - file | | (18 live) | | - trufflehog| + | - dir | | - Code(10) | | - gitleaks | + | - git | | - Search(5)| +-----+------+ + | - stdin | | - Paste(3) | | + | - url | +------+-----+ | + | - clipboard| | | + +--------+---+ | | + | | | + +-------+------+--------------+ | +-------v--------+ | Scanner Engine | @@ -707,133 +597,30 @@ db: +------------+-------------+ | | | +-----v----+ +----v-----+ +----v-------+ - | Output | | Notify | | Web | - | - table | | - telegram| | Dashboard | - | - json | | - webhook| | - htmx | - | - sarif | | - slack | | - REST API | - | - csv | +----------+ | - SQLite | - +----------+ +------------+ + | Output | | Dork | | Key | + | - table | | Engine | | Management | + | - json | | - 150 | | - list | + | - sarif | | dorks | | - show | + | - csv | | - 8 src | | - export | + +----------+ +----------+ +------------+ +------------------------------------------+ | Provider Registry (108+ YAML providers) | - | Dork Registry (50+ YAML dorks) | + | Dork Registry (150 YAML dorks) | +------------------------------------------+ ``` ### Key Design Decisions -- **YAML Providers** — Adding a new provider = adding a YAML file. No recompile needed for pattern-only changes (when using external provider dir). Built-in providers are embedded at compile time. -- **Keyword Pre-filtering** — Before running regex, files are scanned for keywords. This provides ~10x speedup on large codebases. -- **Worker Pool** — Parallel scanning with configurable worker count. Default: CPU count. -- **Delta-based Git Scanning** — Only scans changes between commits, not entire trees. -- **SQLite Storage** — All scan results persisted with AES-256 encryption. +- **YAML Providers** -- Adding a new provider = adding a YAML file. No recompile needed for pattern-only changes (when using external provider dir). Built-in providers are embedded at compile time. +- **Keyword Pre-filtering** -- Before running regex, files are scanned for keywords via Aho-Corasick. This provides ~10x speedup on large codebases. +- **Worker Pool** -- Parallel scanning with configurable worker count via ants. Default: CPU count. +- **Delta-based Git Scanning** -- Only scans changes between commits, not entire trees. +- **SQLite Storage** -- All scan results persisted with AES-256 encryption. --- -## Security & Ethics - -### Built-in Protections -- Key values **masked by default** in terminal (first 8 + last 4 chars) — use `--unmask` for full keys -- **Full keys always available** via: `--unmask`, `--output=json`, `keyhunter keys show`, web dashboard, Telegram bot -- Database is **AES-256 encrypted** (full keys stored encrypted) -- API tokens stored **encrypted** in config -- No key values written to logs during `--verify` -- Web dashboard supports **basic auth / token auth** - -### Rate Limiting -| Source | Rate Limit | -|--------|-----------| -| GitHub API (auth) | 30 req/min | -| GitHub API (unauth) | 10 req/min | -| Shodan | Per API plan | -| Censys | 250 queries/day (free) | -| ZoomEye | 10,000 results/month (free) | -| FOFA | 100 results/query (free) | -| Netlas | 50 queries/day (free) | -| Google Custom Search | 100/day free, 10K/day paid | -| Bing Search | 1,000/month (free) | -| Stack Overflow | 300/day (no key), 10K/day (key) | -| HN Algolia | 10,000 req/hour | -| VirusTotal | 4 req/min (free) | -| IntelX | 10 searches/day (free) | -| GrayHatWarfare | Per plan | -| Wayback Machine | ~15 req/min | -| Paste sites | 1 req/2sec | -| npm/PyPI | Generous, be respectful | -| Trello | 100 req/10sec | -| Docker Hub | 100 pulls/6hr (unauth) | - -### Stealth & Ethics Flags -```bash ---stealth # User-agent rotation, increased request spacing ---respect-robots # Respect robots.txt (default: on) -``` - ---- - -## Use Cases - -### Red Team / Pentest -```bash -# Full multi-source recon against a target org -keyhunter recon github --query="targetcorp OPENAI_API_KEY" -keyhunter recon gitlab --query="targetcorp api_key" -keyhunter recon shodan --dork='http.html:"targetcorp" "sk-"' -keyhunter recon censys --query='services.http.response.body:"targetcorp" AND "api_key"' -keyhunter recon zoomeye --query='site:targetcorp.com +"api_key"' -keyhunter recon elasticsearch --shodan # Find exposed ES with leaked keys -keyhunter recon jenkins --shodan # Exposed Jenkins with build logs -keyhunter recon dotenv --domain-list=targetcorp-subdomains.txt # .env exposure -keyhunter recon wayback --domain=targetcorp.com # Historical leaks -keyhunter recon sourcemaps --domain=app.targetcorp.com # JS source maps -keyhunter recon crtsh --domain=targetcorp.com # Discover API subdomains -keyhunter recon full --providers=openai,anthropic # Everything at once -``` - -### DevSecOps / CI Pipeline -```bash -# Pre-commit hook -keyhunter hook install - -# GitHub Actions step -- name: KeyHunter Scan - run: | - keyhunter scan path . --output=sarif > keyhunter.sarif - # Upload to GitHub Security tab -``` - -### Bug Bounty -```bash -# Comprehensive target recon -keyhunter recon github --org=targetcorp --dork=auto --verify -keyhunter recon gist --query="targetcorp" -keyhunter recon paste --sources=all --query="targetcorp" -keyhunter recon postman --query="targetcorp" -keyhunter recon trello --query="targetcorp api key" -keyhunter recon notion --query="targetcorp API_KEY" -keyhunter recon confluence --shodan -keyhunter recon npm --query="targetcorp" # Check their published packages -keyhunter recon pypi --query="targetcorp" -keyhunter recon docker --query="targetcorp" --layers # Docker image layer scan -keyhunter recon apk --query="targetcorp" # Mobile app decompile -keyhunter recon swagger --domain=api.targetcorp.com -``` - -### Monitoring / Alerting -```bash -# Continuous monitoring with Telegram alerts -keyhunter schedule add \ - --name="monitor-github" \ - --cron="*/30 * * * *" \ - --command="recon github --dork=auto --providers=openai" \ - --notify=telegram - -keyhunter serve --telegram -``` - ---- - -## Dork Examples (150+ Built-in) +## Dork Examples (150 Built-in) ### GitHub ``` @@ -846,21 +633,11 @@ filename:docker-compose "API_KEY" "api_key" extension:ipynb filename:.toml "api_key" "sk-" filename:terraform.tfvars "api_key" -"kind: Secret" "data:" filename:*.yaml # K8s secrets -filename:.npmrc "_authToken" # npm tokens -filename:requirements.txt "openai" path:.env # Python projects -``` - -### GitLab -``` -"OPENAI_API_KEY" filename:.env -"sk-ant-" filename:*.py -"api_key" filename:settings.json ``` ### Google Dorking ``` -"sk-proj-" -github.com -stackoverflow.com # Outside known code sites +"sk-proj-" -github.com -stackoverflow.com "sk-ant-api03-" filetype:env "OPENAI_API_KEY" filetype:yml "ANTHROPIC_API_KEY" filetype:json @@ -868,67 +645,78 @@ inurl:.env "API_KEY" intitle:"index of" .env site:pastebin.com "sk-proj-" site:replit.com "OPENAI_API_KEY" -site:codesandbox.io "sk-ant-" -site:notion.so "API_KEY" -site:trello.com "openai" -site:docs.google.com "sk-proj-" -site:medium.com "ANTHROPIC_API_KEY" -site:dev.to "sk-proj-" -site:huggingface.co "OPENAI_API_KEY" -site:kaggle.com "api_key" "sk-" -intitle:"Swagger UI" "api_key" -inurl:graphql "authorization" "Bearer sk-" -filetype:tfstate "api_key" # Terraform state -filetype:ipynb "sk-proj-" # Jupyter notebooks ``` -### Shodan +### Shodan (for future IoT recon sources) ``` http.html:"openai" "api_key" port:8080 http.title:"LiteLLM" port:4000 http.html:"ollama" port:11434 http.title:"Kubernetes Dashboard" -"X-Jenkins" "200 OK" -http.title:"Kibana" port:5601 -http.title:"Grafana" -http.title:"Swagger UI" -http.title:"Gitea" port:3000 -http.html:"PrivateBin" -http.title:"MinIO Browser" -http.title:"Sentry" -http.title:"Confluence" -port:6443 "kube-apiserver" -http.html:"langchain" port:8000 ``` -### Censys -``` -services.http.response.body:"openai" and services.http.response.body:"sk-" -services.http.response.body:"langchain" and services.port:8000 -services.http.response.body:"OPENAI_API_KEY" -services.http.response.body:"sk-ant-api03" +--- + +## Use Cases + +### Red Team / Pentest +```bash +# Multi-source recon against a target org +keyhunter recon full --sources=github,gitlab,gist,pastebin + +# Scan a cloned repository +keyhunter scan ./target-repo/ --verify + +# Scan git history for rotated keys +keyhunter scan --git ./target-repo/ ``` -### ZoomEye -``` -app:"Elasticsearch" +"api_key" -app:"Jenkins" +openai -app:"Grafana" +anthropic -app:"Gitea" +### DevSecOps / CI Pipeline +```bash +# Pre-commit hook +keyhunter hook install + +# GitHub Actions step +- name: KeyHunter Scan + run: keyhunter scan . --output=sarif > keyhunter.sarif ``` -### FOFA -``` -body="sk-proj-" -body="OPENAI_API_KEY" -body="sk-ant-api03" -title="LiteLLM" -title="Swagger UI" && body="api_key" -title="Kibana" && body="authorization" +### Bug Bounty +```bash +# Search code hosting platforms for leaked keys +keyhunter recon full --sources=github,gitlab,bitbucket,gist,codeberg +keyhunter recon full --sources=huggingface,kaggle,replit,codesandbox + +# Search engine dorking +keyhunter recon full --sources=google,bing,duckduckgo,brave + +# Paste site monitoring +keyhunter recon full --sources=pastebin,pastesites,gistpaste ``` --- +## Security & Ethics + +### Built-in Protections +- Key values **masked by default** in terminal (first 8 + last 4 chars) -- use `--unmask` for full keys +- **Full keys always available** via: `--unmask`, `--output=json`, `keyhunter keys show` +- Database is **AES-256 encrypted** (full keys stored encrypted) +- API tokens stored **encrypted** in config +- No key values written to logs during `--verify` + +### Rate Limiting (Recon Sources) +| Source | Rate Limit | +|--------|-----------| +| GitHub API (auth) | 30 req/min | +| GitHub API (unauth) | 10 req/min | +| Google Custom Search | 100/day free, 10K/day paid | +| Bing Search | 1,000/month (free) | +| Brave Search | Per API plan | +| Paste sites | 1 req/2sec | + +--- + ## Contributing ### Adding a New Provider @@ -980,17 +768,28 @@ metadata: ## Roadmap -- [ ] Core scanning engine (file, git, stdin) -- [ ] 108 provider YAML definitions -- [ ] Active verification for all providers -- [ ] CLI with Cobra (scan, verify, import, recon, serve) -- [ ] TruffleHog & Gitleaks import adapters -- [ ] OSINT/Recon engine (Shodan, Censys, GitHub, GitLab, Paste, S3) -- [ ] Built-in dork engine with 50+ dorks -- [ ] Web dashboard (htmx + Tailwind + SQLite) +- [x] Core scanning engine (file, dir, git, stdin, url, clipboard) +- [x] 108 provider YAML definitions (Tier 1-9) +- [x] Active verification (YAML-driven HTTPVerifier) +- [x] Output formats: table, JSON, CSV, SARIF 2.1.0 +- [x] CLI with Cobra (scan, providers, config, keys, import, hook, dorks, recon, legal) +- [x] TruffleHog & Gitleaks import adapters +- [x] Key management (list, show, export, copy, delete, verify) +- [x] Git pre-commit hook (install/uninstall) +- [x] Dork engine with 150 built-in dorks across 8 sources +- [x] OSINT recon framework with 18 live sources +- [ ] IoT scanners (Shodan, Censys, ZoomEye, FOFA, Netlas, BinaryEdge) +- [ ] Cloud storage scanning (S3, GCS, Azure, DigitalOcean) +- [ ] Package registries (npm, PyPI, RubyGems, crates.io, Maven, NuGet) +- [ ] Container & IaC scanning (Docker Hub, Terraform, Helm, Ansible) +- [ ] CI/CD log scanning (GitHub Actions, Travis, CircleCI, Jenkins, GitLab CI) +- [ ] Web archives (Wayback Machine, CommonCrawl) +- [ ] Frontend leak detection (source maps, webpack, .env exposure) +- [ ] Forums & collaboration tools (Stack Overflow, Reddit, Notion, Trello) +- [ ] Threat intel (VirusTotal, Intelligence X, URLhaus) - [ ] Telegram bot with auto-notifications - [ ] Scheduled scanning (cron-based) -- [ ] Pre-commit hook & CI/CD integration (SARIF) +- [ ] Web dashboard (htmx + Tailwind + SQLite) - [ ] Docker image - [ ] Homebrew formula