Compare commits
15 Commits
4b39c0828a
...
a5253cf9dd
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a5253cf9dd | ||
|
|
a2347f150a | ||
|
|
f0f22191ef | ||
|
|
870431658d | ||
|
|
ade609d562 | ||
|
|
c54e9c73ca | ||
|
|
0afb19cc83 | ||
|
|
13905eb5ee | ||
|
|
47d542b9de | ||
|
|
8d97b263ec | ||
|
|
6ab411cda2 | ||
|
|
6443e63b9a | ||
|
|
d6c35f4f14 | ||
|
|
270bbbfb49 | ||
|
|
f5d8470aab |
@@ -93,12 +93,12 @@ Requirements for initial release. Each maps to roadmap phases.
|
|||||||
|
|
||||||
### OSINT/Recon — IoT & Internet Scanners
|
### OSINT/Recon — IoT & Internet Scanners
|
||||||
|
|
||||||
- [ ] **RECON-IOT-01**: Shodan API search and dorking
|
- [x] **RECON-IOT-01**: Shodan API search and dorking
|
||||||
- [ ] **RECON-IOT-02**: Censys API search
|
- [x] **RECON-IOT-02**: Censys API search
|
||||||
- [ ] **RECON-IOT-03**: ZoomEye API search
|
- [x] **RECON-IOT-03**: ZoomEye API search
|
||||||
- [ ] **RECON-IOT-04**: FOFA API search
|
- [x] **RECON-IOT-04**: FOFA API search
|
||||||
- [ ] **RECON-IOT-05**: Netlas API search
|
- [x] **RECON-IOT-05**: Netlas API search
|
||||||
- [ ] **RECON-IOT-06**: BinaryEdge API search
|
- [x] **RECON-IOT-06**: BinaryEdge API search
|
||||||
|
|
||||||
### OSINT/Recon — Code Hosting & Snippets
|
### OSINT/Recon — Code Hosting & Snippets
|
||||||
|
|
||||||
@@ -138,10 +138,10 @@ Requirements for initial release. Each maps to roadmap phases.
|
|||||||
|
|
||||||
### OSINT/Recon — Cloud Storage
|
### OSINT/Recon — Cloud Storage
|
||||||
|
|
||||||
- [ ] **RECON-CLOUD-01**: AWS S3 bucket enumeration and content scanning
|
- [x] **RECON-CLOUD-01**: AWS S3 bucket enumeration and content scanning
|
||||||
- [ ] **RECON-CLOUD-02**: GCS, Azure Blob, DigitalOcean Spaces, Backblaze B2 scanning
|
- [x] **RECON-CLOUD-02**: GCS, Azure Blob, DigitalOcean Spaces, Backblaze B2 scanning
|
||||||
- [ ] **RECON-CLOUD-03**: Self-hosted MinIO instance discovery via Shodan
|
- [x] **RECON-CLOUD-03**: Self-hosted MinIO instance discovery via Shodan
|
||||||
- [ ] **RECON-CLOUD-04**: GrayHatWarfare bucket search engine integration
|
- [x] **RECON-CLOUD-04**: GrayHatWarfare bucket search engine integration
|
||||||
|
|
||||||
### OSINT/Recon — CI/CD Logs
|
### OSINT/Recon — CI/CD Logs
|
||||||
|
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ Decimal phases appear between their surrounding integers in numeric order.
|
|||||||
- [ ] **Phase 9: OSINT Infrastructure** - Per-source rate limiter architecture and recon engine framework before any sources
|
- [ ] **Phase 9: OSINT Infrastructure** - Per-source rate limiter architecture and recon engine framework before any sources
|
||||||
- [x] **Phase 10: OSINT Code Hosting** - GitHub, GitLab, Bitbucket, HuggingFace and 6 more code hosting sources (completed 2026-04-05)
|
- [x] **Phase 10: OSINT Code Hosting** - GitHub, GitLab, Bitbucket, HuggingFace and 6 more code hosting sources (completed 2026-04-05)
|
||||||
- [x] **Phase 11: OSINT Search & Paste** - Search engine dorking and paste site aggregation (completed 2026-04-06)
|
- [x] **Phase 11: OSINT Search & Paste** - Search engine dorking and paste site aggregation (completed 2026-04-06)
|
||||||
- [ ] **Phase 12: OSINT IoT & Cloud Storage** - Shodan/Censys/ZoomEye/FOFA and S3/GCS/Azure cloud storage scanning
|
- [x] **Phase 12: OSINT IoT & Cloud Storage** - Shodan/Censys/ZoomEye/FOFA and S3/GCS/Azure cloud storage scanning (completed 2026-04-06)
|
||||||
- [ ] **Phase 13: OSINT Package Registries & Container/IaC** - npm/PyPI/crates.io and Docker Hub/K8s/Terraform scanning
|
- [ ] **Phase 13: OSINT Package Registries & Container/IaC** - npm/PyPI/crates.io and Docker Hub/K8s/Terraform scanning
|
||||||
- [ ] **Phase 14: OSINT CI/CD Logs, Web Archives & Frontend Leaks** - Build logs, Wayback Machine, and JS bundle/env scanning
|
- [ ] **Phase 14: OSINT CI/CD Logs, Web Archives & Frontend Leaks** - Build logs, Wayback Machine, and JS bundle/env scanning
|
||||||
- [ ] **Phase 15: OSINT Forums, Collaboration & Log Aggregators** - StackOverflow/Reddit/HN, Notion/Trello, Elasticsearch/Grafana/Sentry
|
- [ ] **Phase 15: OSINT Forums, Collaboration & Log Aggregators** - StackOverflow/Reddit/HN, Notion/Trello, Elasticsearch/Grafana/Sentry
|
||||||
@@ -255,10 +255,10 @@ Plans:
|
|||||||
**Plans**: 4 plans
|
**Plans**: 4 plans
|
||||||
|
|
||||||
Plans:
|
Plans:
|
||||||
- [ ] 12-01-PLAN.md — ShodanSource + CensysSource + ZoomEyeSource (RECON-IOT-01, RECON-IOT-02, RECON-IOT-03)
|
- [x] 12-01-PLAN.md — ShodanSource + CensysSource + ZoomEyeSource (RECON-IOT-01, RECON-IOT-02, RECON-IOT-03)
|
||||||
- [ ] 12-02-PLAN.md — FOFASource + NetlasSource + BinaryEdgeSource (RECON-IOT-04, RECON-IOT-05, RECON-IOT-06)
|
- [x] 12-02-PLAN.md — FOFASource + NetlasSource + BinaryEdgeSource (RECON-IOT-04, RECON-IOT-05, RECON-IOT-06)
|
||||||
- [ ] 12-03-PLAN.md — S3Scanner + GCSScanner + AzureBlobScanner + DOSpacesScanner (RECON-CLOUD-01, RECON-CLOUD-02, RECON-CLOUD-03, RECON-CLOUD-04)
|
- [x] 12-03-PLAN.md — S3Scanner + GCSScanner + AzureBlobScanner + DOSpacesScanner (RECON-CLOUD-01, RECON-CLOUD-02, RECON-CLOUD-03, RECON-CLOUD-04)
|
||||||
- [ ] 12-04-PLAN.md — RegisterAll wiring + cmd/recon.go credentials + integration test (all Phase 12 reqs)
|
- [x] 12-04-PLAN.md — RegisterAll wiring + cmd/recon.go credentials + integration test (all Phase 12 reqs)
|
||||||
|
|
||||||
### Phase 13: OSINT Package Registries & Container/IaC
|
### Phase 13: OSINT Package Registries & Container/IaC
|
||||||
**Goal**: Users can scan npm, PyPI, and 6 other package registries for packages containing leaked keys, and scan Docker Hub image layers, Kubernetes configs, Terraform state files, Helm charts, and Ansible Galaxy for secrets in infrastructure code
|
**Goal**: Users can scan npm, PyPI, and 6 other package registries for packages containing leaked keys, and scan Docker Hub image layers, Kubernetes configs, Terraform state files, Helm charts, and Ansible Galaxy for secrets in infrastructure code
|
||||||
@@ -349,7 +349,7 @@ Phases execute in numeric order: 1 → 2 → 3 → ... → 18
|
|||||||
| 9. OSINT Infrastructure | 2/6 | In Progress| |
|
| 9. OSINT Infrastructure | 2/6 | In Progress| |
|
||||||
| 10. OSINT Code Hosting | 9/9 | Complete | 2026-04-06 |
|
| 10. OSINT Code Hosting | 9/9 | Complete | 2026-04-06 |
|
||||||
| 11. OSINT Search & Paste | 3/3 | Complete | 2026-04-06 |
|
| 11. OSINT Search & Paste | 3/3 | Complete | 2026-04-06 |
|
||||||
| 12. OSINT IoT & Cloud Storage | 0/? | Not started | - |
|
| 12. OSINT IoT & Cloud Storage | 4/4 | Complete | 2026-04-06 |
|
||||||
| 13. OSINT Package Registries & Container/IaC | 0/? | Not started | - |
|
| 13. OSINT Package Registries & Container/IaC | 0/? | Not started | - |
|
||||||
| 14. OSINT CI/CD Logs, Web Archives & Frontend Leaks | 0/? | Not started | - |
|
| 14. OSINT CI/CD Logs, Web Archives & Frontend Leaks | 0/? | Not started | - |
|
||||||
| 15. OSINT Forums, Collaboration & Log Aggregators | 0/? | Not started | - |
|
| 15. OSINT Forums, Collaboration & Log Aggregators | 0/? | Not started | - |
|
||||||
|
|||||||
@@ -3,14 +3,14 @@ gsd_state_version: 1.0
|
|||||||
milestone: v1.0
|
milestone: v1.0
|
||||||
milestone_name: milestone
|
milestone_name: milestone
|
||||||
status: completed
|
status: completed
|
||||||
stopped_at: Completed 11-03-PLAN.md
|
stopped_at: Completed 12-04-PLAN.md
|
||||||
last_updated: "2026-04-06T09:09:48.100Z"
|
last_updated: "2026-04-06T09:45:38.963Z"
|
||||||
last_activity: 2026-04-06
|
last_activity: 2026-04-06
|
||||||
progress:
|
progress:
|
||||||
total_phases: 18
|
total_phases: 18
|
||||||
completed_phases: 11
|
completed_phases: 12
|
||||||
total_plans: 65
|
total_plans: 69
|
||||||
completed_plans: 66
|
completed_plans: 70
|
||||||
percent: 20
|
percent: 20
|
||||||
---
|
---
|
||||||
|
|
||||||
@@ -21,13 +21,13 @@ progress:
|
|||||||
See: .planning/PROJECT.md (updated 2026-04-04)
|
See: .planning/PROJECT.md (updated 2026-04-04)
|
||||||
|
|
||||||
**Core value:** Detect leaked LLM API keys across more providers and more internet sources than any other tool, with active verification to confirm keys are real and alive.
|
**Core value:** Detect leaked LLM API keys across more providers and more internet sources than any other tool, with active verification to confirm keys are real and alive.
|
||||||
**Current focus:** Phase 11 — osint-search-paste (complete)
|
**Current focus:** Phase 12 — osint_iot_cloud_storage (in progress)
|
||||||
|
|
||||||
## Current Position
|
## Current Position
|
||||||
|
|
||||||
Phase: 12
|
Phase: 13
|
||||||
Plan: Not started
|
Plan: Not started
|
||||||
Status: Phase 11 complete
|
Status: Plan 04 complete
|
||||||
Last activity: 2026-04-06
|
Last activity: 2026-04-06
|
||||||
|
|
||||||
Progress: [██░░░░░░░░] 20%
|
Progress: [██░░░░░░░░] 20%
|
||||||
@@ -91,6 +91,8 @@ Progress: [██░░░░░░░░] 20%
|
|||||||
| Phase 10 P09 | 12min | 2 tasks | 5 files |
|
| Phase 10 P09 | 12min | 2 tasks | 5 files |
|
||||||
| Phase 11 P03 | 6min | 2 tasks | 4 files |
|
| Phase 11 P03 | 6min | 2 tasks | 4 files |
|
||||||
| Phase 11 P01 | 3min | 2 tasks | 11 files |
|
| Phase 11 P01 | 3min | 2 tasks | 11 files |
|
||||||
|
| Phase 12 P01 | 3min | 2 tasks | 6 files |
|
||||||
|
| Phase 12 P04 | 14min | 2 tasks | 4 files |
|
||||||
|
|
||||||
## Accumulated Context
|
## Accumulated Context
|
||||||
|
|
||||||
@@ -131,6 +133,8 @@ Recent decisions affecting current work:
|
|||||||
- [Phase 11]: RegisterAll extended to 18 sources (10 Phase 10 + 8 Phase 11); paste sources use BaseURL prefix in integration test to avoid /search path collision
|
- [Phase 11]: RegisterAll extended to 18 sources (10 Phase 10 + 8 Phase 11); paste sources use BaseURL prefix in integration test to avoid /search path collision
|
||||||
- [Phase 11]: Integration test uses injected test platforms for PasteSites (same pattern as SandboxesSource)
|
- [Phase 11]: Integration test uses injected test platforms for PasteSites (same pattern as SandboxesSource)
|
||||||
- [Phase 11]: All five search sources use dork query format to focus on paste/code hosting leak sites
|
- [Phase 11]: All five search sources use dork query format to focus on paste/code hosting leak sites
|
||||||
|
- [Phase 12]: Shodan/Censys/ZoomEye use bare keyword queries; Censys POST+BasicAuth, Shodan key param, ZoomEye API-KEY header
|
||||||
|
- [Phase 12]: RegisterAll extended to 28 sources (18 Phase 10-11 + 10 Phase 12); cloud scanners credentialless, IoT scanners credential-gated
|
||||||
|
|
||||||
### Pending Todos
|
### Pending Todos
|
||||||
|
|
||||||
@@ -145,6 +149,6 @@ None yet.
|
|||||||
|
|
||||||
## Session Continuity
|
## Session Continuity
|
||||||
|
|
||||||
Last session: 2026-04-06T09:07:51.980Z
|
Last session: 2026-04-06T09:42:09.000Z
|
||||||
Stopped at: Completed 11-03-PLAN.md
|
Stopped at: Completed 12-04-PLAN.md
|
||||||
Resume file: None
|
Resume file: None
|
||||||
|
|||||||
114
.planning/phases/04-input-sources/04-01-PLAN.md
Normal file
114
.planning/phases/04-input-sources/04-01-PLAN.md
Normal file
@@ -0,0 +1,114 @@
|
|||||||
|
---
|
||||||
|
phase: 04-input-sources
|
||||||
|
plan: 01
|
||||||
|
type: execute
|
||||||
|
wave: 0
|
||||||
|
depends_on: []
|
||||||
|
files_modified:
|
||||||
|
- go.mod
|
||||||
|
- go.sum
|
||||||
|
autonomous: true
|
||||||
|
requirements: []
|
||||||
|
must_haves:
|
||||||
|
truths:
|
||||||
|
- "go-git/v5, atotto/clipboard, x/exp/mmap are available as imports"
|
||||||
|
- "go build ./... succeeds with new dependencies"
|
||||||
|
artifacts:
|
||||||
|
- path: "go.mod"
|
||||||
|
provides: "Module declarations for go-git, clipboard, and x/exp"
|
||||||
|
contains: "github.com/go-git/go-git/v5"
|
||||||
|
- path: "go.sum"
|
||||||
|
provides: "Checksums for added dependencies"
|
||||||
|
key_links:
|
||||||
|
- from: "go.mod"
|
||||||
|
to: "module cache"
|
||||||
|
via: "go mod tidy"
|
||||||
|
pattern: "go-git/go-git/v5"
|
||||||
|
---
|
||||||
|
|
||||||
|
<objective>
|
||||||
|
Add the three external Go dependencies that Phase 4 input sources require:
|
||||||
|
- `github.com/go-git/go-git/v5` — git history traversal (INPUT-02)
|
||||||
|
- `github.com/atotto/clipboard` — cross-platform clipboard access (INPUT-05)
|
||||||
|
- `golang.org/x/exp/mmap` — memory-mapped large file reads (CORE-07)
|
||||||
|
|
||||||
|
Purpose: Wave 0 dependency bootstrap so the parallel source implementation plans (04-02, 04-03, 04-04) compile cleanly on first attempt with no dependency resolution thrash.
|
||||||
|
Output: Updated go.mod and go.sum with all three modules resolved.
|
||||||
|
</objective>
|
||||||
|
|
||||||
|
<execution_context>
|
||||||
|
@$HOME/.claude/get-shit-done/workflows/execute-plan.md
|
||||||
|
@$HOME/.claude/get-shit-done/templates/summary.md
|
||||||
|
</execution_context>
|
||||||
|
|
||||||
|
<context>
|
||||||
|
@.planning/PROJECT.md
|
||||||
|
@.planning/ROADMAP.md
|
||||||
|
@.planning/STATE.md
|
||||||
|
@.planning/phases/04-input-sources/04-CONTEXT.md
|
||||||
|
@go.mod
|
||||||
|
</context>
|
||||||
|
|
||||||
|
<tasks>
|
||||||
|
|
||||||
|
<task type="auto">
|
||||||
|
<name>Task 1: Add go-git, clipboard, and x/exp/mmap dependencies</name>
|
||||||
|
<read_first>
|
||||||
|
- go.mod
|
||||||
|
- .planning/phases/04-input-sources/04-CONTEXT.md
|
||||||
|
</read_first>
|
||||||
|
<files>go.mod, go.sum</files>
|
||||||
|
<action>
|
||||||
|
Run the following commands from the repo root in order:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
go get github.com/go-git/go-git/v5@latest
|
||||||
|
go get github.com/atotto/clipboard@latest
|
||||||
|
go get golang.org/x/exp/mmap@latest
|
||||||
|
go mod tidy
|
||||||
|
go build ./...
|
||||||
|
```
|
||||||
|
|
||||||
|
Verify the `require` block in go.mod now contains direct entries (non-indirect) for:
|
||||||
|
|
||||||
|
```
|
||||||
|
github.com/go-git/go-git/v5 vX.Y.Z
|
||||||
|
github.com/atotto/clipboard vX.Y.Z
|
||||||
|
golang.org/x/exp vYYYYMMDD-hash
|
||||||
|
```
|
||||||
|
|
||||||
|
If `go build ./...` fails, do NOT try to fix anything beyond the dependency graph — unrelated build failures must be surfaced. If `go mod tidy` moves a module to indirect, that is acceptable only if no source file yet imports it; the follow-on plans in Wave 1 will promote them to direct.
|
||||||
|
|
||||||
|
Do NOT modify any source files in this plan. This is dependency bootstrap only.
|
||||||
|
</action>
|
||||||
|
<verify>
|
||||||
|
<automated>go build ./... && grep -E "go-git/go-git/v5|atotto/clipboard|golang.org/x/exp" go.mod</automated>
|
||||||
|
</verify>
|
||||||
|
<acceptance_criteria>
|
||||||
|
- `grep "github.com/go-git/go-git/v5" go.mod` returns a match
|
||||||
|
- `grep "github.com/atotto/clipboard" go.mod` returns a match
|
||||||
|
- `grep "golang.org/x/exp" go.mod` returns a match
|
||||||
|
- `go build ./...` exits 0
|
||||||
|
- `go.sum` contains entries for all three modules
|
||||||
|
</acceptance_criteria>
|
||||||
|
<done>All three new modules are present in go.mod, go.sum has their checksums, and `go build ./...` succeeds.</done>
|
||||||
|
</task>
|
||||||
|
|
||||||
|
</tasks>
|
||||||
|
|
||||||
|
<verification>
|
||||||
|
- `go build ./...` succeeds
|
||||||
|
- `go vet ./...` succeeds
|
||||||
|
- `grep -c "go-git/go-git/v5\|atotto/clipboard\|golang.org/x/exp" go.mod` returns 3 or more
|
||||||
|
</verification>
|
||||||
|
|
||||||
|
<success_criteria>
|
||||||
|
Dependencies resolved and build is green. Wave 1 plans can import from these modules without needing their own `go get` calls.
|
||||||
|
</success_criteria>
|
||||||
|
|
||||||
|
<output>
|
||||||
|
After completion, create `.planning/phases/04-input-sources/04-01-SUMMARY.md` with:
|
||||||
|
- Resolved version numbers for the three modules
|
||||||
|
- Any warnings from `go mod tidy`
|
||||||
|
- Confirmation that `go build ./...` passed
|
||||||
|
</output>
|
||||||
573
.planning/phases/04-input-sources/04-02-PLAN.md
Normal file
573
.planning/phases/04-input-sources/04-02-PLAN.md
Normal file
@@ -0,0 +1,573 @@
|
|||||||
|
---
|
||||||
|
phase: 04-input-sources
|
||||||
|
plan: 02
|
||||||
|
type: execute
|
||||||
|
wave: 1
|
||||||
|
depends_on: ["04-01"]
|
||||||
|
files_modified:
|
||||||
|
- pkg/engine/sources/dir.go
|
||||||
|
- pkg/engine/sources/dir_test.go
|
||||||
|
- pkg/engine/sources/file.go
|
||||||
|
- pkg/engine/sources/file_test.go
|
||||||
|
autonomous: true
|
||||||
|
requirements:
|
||||||
|
- INPUT-01
|
||||||
|
- CORE-07
|
||||||
|
must_haves:
|
||||||
|
truths:
|
||||||
|
- "DirSource recursively walks a directory and emits Chunks for every non-excluded file"
|
||||||
|
- "Glob exclusion patterns (--exclude) skip matching files by basename AND full relative path"
|
||||||
|
- "Default exclusions skip .git/, node_modules/, vendor/, *.min.js, *.map"
|
||||||
|
- "Binary files (null byte in first 512 bytes) are skipped"
|
||||||
|
- "Files larger than the mmap threshold (10MB) are read via golang.org/x/exp/mmap, smaller files via os.ReadFile"
|
||||||
|
- "File emission order is deterministic (sorted) for reproducible tests"
|
||||||
|
artifacts:
|
||||||
|
- path: "pkg/engine/sources/dir.go"
|
||||||
|
provides: "DirSource implementing Source interface for recursive directory scanning"
|
||||||
|
exports: ["DirSource", "NewDirSource"]
|
||||||
|
min_lines: 120
|
||||||
|
- path: "pkg/engine/sources/dir_test.go"
|
||||||
|
provides: "Test coverage for recursive walk, exclusion, binary skip, mmap threshold"
|
||||||
|
min_lines: 100
|
||||||
|
- path: "pkg/engine/sources/file.go"
|
||||||
|
provides: "FileSource extended to use mmap for files > 10MB"
|
||||||
|
contains: "mmap"
|
||||||
|
key_links:
|
||||||
|
- from: "pkg/engine/sources/dir.go"
|
||||||
|
to: "golang.org/x/exp/mmap"
|
||||||
|
via: "mmap.Open for large files"
|
||||||
|
pattern: "mmap\\.Open"
|
||||||
|
- from: "pkg/engine/sources/dir.go"
|
||||||
|
to: "filepath.WalkDir"
|
||||||
|
via: "recursive traversal"
|
||||||
|
pattern: "filepath\\.WalkDir"
|
||||||
|
- from: "pkg/engine/sources/dir.go"
|
||||||
|
to: "types.Chunk"
|
||||||
|
via: "channel send"
|
||||||
|
pattern: "out <- types\\.Chunk"
|
||||||
|
---
|
||||||
|
|
||||||
|
<objective>
|
||||||
|
Implement `DirSource` — a recursive directory scanner that walks a root path via `filepath.WalkDir`, honors glob exclusion patterns, detects and skips binary files, and uses memory-mapped I/O for large files. This satisfies INPUT-01 (directory/recursive scanning with exclusions) and CORE-07 (mmap large file reading).
|
||||||
|
|
||||||
|
Purpose: The most common scan target is a repo directory, not a single file. This plan replaces the "wrap FileSource per path" hack with a purpose-built recursive source that emits deterministically ordered chunks and scales to multi-GB files without blowing out memory.
|
||||||
|
Output: `pkg/engine/sources/dir.go`, `dir_test.go`, plus a small `file.go` update to share the mmap read helper.
|
||||||
|
</objective>
|
||||||
|
|
||||||
|
<execution_context>
|
||||||
|
@$HOME/.claude/get-shit-done/workflows/execute-plan.md
|
||||||
|
@$HOME/.claude/get-shit-done/templates/summary.md
|
||||||
|
</execution_context>
|
||||||
|
|
||||||
|
<context>
|
||||||
|
@.planning/PROJECT.md
|
||||||
|
@.planning/phases/04-input-sources/04-CONTEXT.md
|
||||||
|
@pkg/engine/sources/source.go
|
||||||
|
@pkg/engine/sources/file.go
|
||||||
|
@pkg/types/chunk.go
|
||||||
|
|
||||||
|
<interfaces>
|
||||||
|
Source interface (pkg/engine/sources/source.go):
|
||||||
|
```go
|
||||||
|
type Source interface {
|
||||||
|
Chunks(ctx context.Context, out chan<- types.Chunk) error
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Chunk type (pkg/types/chunk.go):
|
||||||
|
```go
|
||||||
|
type Chunk struct {
|
||||||
|
Data []byte
|
||||||
|
Source string
|
||||||
|
Offset int64
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Existing constants in pkg/engine/sources/file.go:
|
||||||
|
```go
|
||||||
|
const defaultChunkSize = 4096
|
||||||
|
const chunkOverlap = 256
|
||||||
|
```
|
||||||
|
</interfaces>
|
||||||
|
</context>
|
||||||
|
|
||||||
|
<tasks>
|
||||||
|
|
||||||
|
<task type="auto" tdd="true">
|
||||||
|
<name>Task 1: Implement DirSource with recursive walk, exclusion, binary detection, and mmap</name>
|
||||||
|
<read_first>
|
||||||
|
- pkg/engine/sources/source.go
|
||||||
|
- pkg/engine/sources/file.go
|
||||||
|
- pkg/types/chunk.go
|
||||||
|
- .planning/phases/04-input-sources/04-CONTEXT.md (Directory/File Scanning section)
|
||||||
|
</read_first>
|
||||||
|
<files>
|
||||||
|
pkg/engine/sources/dir.go,
|
||||||
|
pkg/engine/sources/dir_test.go,
|
||||||
|
pkg/engine/sources/file.go
|
||||||
|
</files>
|
||||||
|
<behavior>
|
||||||
|
- Test 1: DirSource walks a temp dir containing 3 text files, emits 3 chunks, source fields match file paths
|
||||||
|
- Test 2: Default exclusions skip `.git/config`, `node_modules/foo.js`, `vendor/bar.go`, `app.min.js`, `app.js.map`
|
||||||
|
- Test 3: User-supplied exclude pattern `*.log` skips `foo.log` but keeps `foo.txt`
|
||||||
|
- Test 4: Binary file (first 512 bytes contain a null byte) is skipped; text file is emitted
|
||||||
|
- Test 5: File >10MB is read via mmap path and emits chunks whose concatenated data equals file content
|
||||||
|
- Test 6: File emission order is deterministic (sorted lexicographically) across two runs on same dir
|
||||||
|
- Test 7: ctx cancellation mid-walk returns ctx.Err() promptly
|
||||||
|
- Test 8: Non-existent root returns an error
|
||||||
|
</behavior>
|
||||||
|
<action>
|
||||||
|
Create `pkg/engine/sources/dir.go` with the following complete implementation:
|
||||||
|
|
||||||
|
```go
|
||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io/fs"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"golang.org/x/exp/mmap"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/types"
|
||||||
|
)
|
||||||
|
|
||||||
|
// MmapThreshold is the file size above which DirSource/FileSource use memory-mapped reads.
|
||||||
|
const MmapThreshold int64 = 10 * 1024 * 1024 // 10 MB
|
||||||
|
|
||||||
|
// BinarySniffSize is the number of leading bytes inspected for a NUL byte
|
||||||
|
// to classify a file as binary and skip it.
|
||||||
|
const BinarySniffSize = 512
|
||||||
|
|
||||||
|
// DefaultExcludes are glob patterns excluded from directory scans unless
|
||||||
|
// the caller passes an empty slice explicitly via NewDirSourceRaw.
|
||||||
|
var DefaultExcludes = []string{
|
||||||
|
".git/**",
|
||||||
|
"node_modules/**",
|
||||||
|
"vendor/**",
|
||||||
|
"*.min.js",
|
||||||
|
"*.map",
|
||||||
|
}
|
||||||
|
|
||||||
|
// DirSource walks a directory recursively and emits Chunks for every
|
||||||
|
// non-excluded, non-binary file it finds. Files larger than MmapThreshold
|
||||||
|
// are read via mmap; smaller files use os.ReadFile.
|
||||||
|
type DirSource struct {
|
||||||
|
Root string
|
||||||
|
Excludes []string // glob patterns applied to path basename AND full relative path
|
||||||
|
ChunkSize int
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewDirSource creates a DirSource with the default exclusions merged
|
||||||
|
// with the caller-supplied extras.
|
||||||
|
func NewDirSource(root string, extraExcludes ...string) *DirSource {
|
||||||
|
merged := make([]string, 0, len(DefaultExcludes)+len(extraExcludes))
|
||||||
|
merged = append(merged, DefaultExcludes...)
|
||||||
|
merged = append(merged, extraExcludes...)
|
||||||
|
return &DirSource{Root: root, Excludes: merged, ChunkSize: defaultChunkSize}
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewDirSourceRaw creates a DirSource with ONLY the caller-supplied excludes
|
||||||
|
// (no defaults). Useful for tests and advanced users.
|
||||||
|
func NewDirSourceRaw(root string, excludes []string) *DirSource {
|
||||||
|
return &DirSource{Root: root, Excludes: excludes, ChunkSize: defaultChunkSize}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Chunks implements Source. It walks d.Root, filters excluded and binary
|
||||||
|
// files, reads each remaining file (via mmap above MmapThreshold), and
|
||||||
|
// emits overlapping chunks through out.
|
||||||
|
func (d *DirSource) Chunks(ctx context.Context, out chan<- types.Chunk) error {
|
||||||
|
if d.Root == "" {
|
||||||
|
return errors.New("DirSource: Root is empty")
|
||||||
|
}
|
||||||
|
info, err := os.Stat(d.Root)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("DirSource: stat root: %w", err)
|
||||||
|
}
|
||||||
|
if !info.IsDir() {
|
||||||
|
return fmt.Errorf("DirSource: root %q is not a directory", d.Root)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Collect paths first for deterministic ordering across runs.
|
||||||
|
var paths []string
|
||||||
|
err = filepath.WalkDir(d.Root, func(path string, de fs.DirEntry, werr error) error {
|
||||||
|
if werr != nil {
|
||||||
|
return werr
|
||||||
|
}
|
||||||
|
if de.IsDir() {
|
||||||
|
rel, _ := filepath.Rel(d.Root, path)
|
||||||
|
if d.isExcluded(rel, de.Name()) {
|
||||||
|
return filepath.SkipDir
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
rel, _ := filepath.Rel(d.Root, path)
|
||||||
|
if d.isExcluded(rel, de.Name()) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
paths = append(paths, path)
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("DirSource: walk: %w", err)
|
||||||
|
}
|
||||||
|
sort.Strings(paths)
|
||||||
|
|
||||||
|
for _, p := range paths {
|
||||||
|
if err := ctx.Err(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if err := d.emitFile(ctx, p, out); err != nil {
|
||||||
|
// Per-file errors are non-fatal: continue walking, but respect ctx.
|
||||||
|
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// Swallow per-file errors; the engine logs elsewhere.
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// isExcluded returns true if either the relative path or the basename matches
|
||||||
|
// any configured glob pattern.
|
||||||
|
func (d *DirSource) isExcluded(rel, base string) bool {
|
||||||
|
rel = filepath.ToSlash(rel)
|
||||||
|
for _, pat := range d.Excludes {
|
||||||
|
pat = filepath.ToSlash(pat)
|
||||||
|
// Match against basename.
|
||||||
|
if ok, _ := filepath.Match(pat, base); ok {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// Match against full relative path.
|
||||||
|
if ok, _ := filepath.Match(pat, rel); ok {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// `dir/**` style — naive prefix match against the leading segment.
|
||||||
|
if strings.HasSuffix(pat, "/**") {
|
||||||
|
prefix := strings.TrimSuffix(pat, "/**")
|
||||||
|
if rel == prefix || strings.HasPrefix(rel, prefix+"/") {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// emitFile reads a single file and pushes its chunks onto out.
|
||||||
|
func (d *DirSource) emitFile(ctx context.Context, path string, out chan<- types.Chunk) error {
|
||||||
|
fi, err := os.Stat(path)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
size := fi.Size()
|
||||||
|
if size == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var data []byte
|
||||||
|
if size >= MmapThreshold {
|
||||||
|
ra, err := mmap.Open(path)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("mmap open %s: %w", path, err)
|
||||||
|
}
|
||||||
|
defer ra.Close()
|
||||||
|
data = make([]byte, ra.Len())
|
||||||
|
if _, err := ra.ReadAt(data, 0); err != nil {
|
||||||
|
return fmt.Errorf("mmap read %s: %w", path, err)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
data, err = os.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if isBinary(data) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return emitChunks(ctx, data, path, d.ChunkSize, out)
|
||||||
|
}
|
||||||
|
|
||||||
|
// isBinary reports whether the leading BinarySniffSize bytes contain a NUL byte.
|
||||||
|
func isBinary(data []byte) bool {
|
||||||
|
n := len(data)
|
||||||
|
if n > BinarySniffSize {
|
||||||
|
n = BinarySniffSize
|
||||||
|
}
|
||||||
|
return bytes.IndexByte(data[:n], 0x00) >= 0
|
||||||
|
}
|
||||||
|
|
||||||
|
// emitChunks is the shared overlapping-chunk emitter used by FileSource and DirSource.
|
||||||
|
func emitChunks(ctx context.Context, data []byte, source string, chunkSize int, out chan<- types.Chunk) error {
|
||||||
|
if chunkSize <= 0 {
|
||||||
|
chunkSize = defaultChunkSize
|
||||||
|
}
|
||||||
|
if len(data) <= chunkSize {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return ctx.Err()
|
||||||
|
case out <- types.Chunk{Data: data, Source: source, Offset: 0}:
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
var offset int64
|
||||||
|
for start := 0; start < len(data); start += chunkSize - chunkOverlap {
|
||||||
|
end := start + chunkSize
|
||||||
|
if end > len(data) {
|
||||||
|
end = len(data)
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return ctx.Err()
|
||||||
|
case out <- types.Chunk{Data: data[start:end], Source: source, Offset: offset}:
|
||||||
|
}
|
||||||
|
offset += int64(end - start)
|
||||||
|
if end == len(data) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Update `pkg/engine/sources/file.go` so FileSource reuses `emitChunks` and adopts the same mmap threshold for large single-file scans:
|
||||||
|
|
||||||
|
```go
|
||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"golang.org/x/exp/mmap"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/types"
|
||||||
|
)
|
||||||
|
|
||||||
|
const defaultChunkSize = 4096
|
||||||
|
const chunkOverlap = 256
|
||||||
|
|
||||||
|
// FileSource reads a single file and emits overlapping chunks.
|
||||||
|
// For files >= MmapThreshold it uses golang.org/x/exp/mmap.
|
||||||
|
type FileSource struct {
|
||||||
|
Path string
|
||||||
|
ChunkSize int
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewFileSource(path string) *FileSource {
|
||||||
|
return &FileSource{Path: path, ChunkSize: defaultChunkSize}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *FileSource) Chunks(ctx context.Context, out chan<- types.Chunk) error {
|
||||||
|
fi, err := os.Stat(f.Path)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
size := fi.Size()
|
||||||
|
if size == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
var data []byte
|
||||||
|
if size >= MmapThreshold {
|
||||||
|
ra, err := mmap.Open(f.Path)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer ra.Close()
|
||||||
|
data = make([]byte, ra.Len())
|
||||||
|
if _, err := ra.ReadAt(data, 0); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
data, err = os.ReadFile(f.Path)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if isBinary(data) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return emitChunks(ctx, data, f.Path, f.ChunkSize, out)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Create `pkg/engine/sources/dir_test.go` with a comprehensive suite:
|
||||||
|
|
||||||
|
```go
|
||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/types"
|
||||||
|
)
|
||||||
|
|
||||||
|
func drain(t *testing.T, src Source) []types.Chunk {
|
||||||
|
t.Helper()
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
out := make(chan types.Chunk, 1024)
|
||||||
|
errCh := make(chan error, 1)
|
||||||
|
go func() { errCh <- src.Chunks(ctx, out); close(out) }()
|
||||||
|
var got []types.Chunk
|
||||||
|
for c := range out {
|
||||||
|
got = append(got, c)
|
||||||
|
}
|
||||||
|
require.NoError(t, <-errCh)
|
||||||
|
return got
|
||||||
|
}
|
||||||
|
|
||||||
|
func writeFile(t *testing.T, path, content string) {
|
||||||
|
t.Helper()
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Dir(path), 0o755))
|
||||||
|
require.NoError(t, os.WriteFile(path, []byte(content), 0o644))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDirSource_RecursiveWalk(t *testing.T) {
|
||||||
|
root := t.TempDir()
|
||||||
|
writeFile(t, filepath.Join(root, "a.txt"), "alpha content")
|
||||||
|
writeFile(t, filepath.Join(root, "sub", "b.txt"), "bravo content")
|
||||||
|
writeFile(t, filepath.Join(root, "sub", "deep", "c.txt"), "charlie content")
|
||||||
|
|
||||||
|
chunks := drain(t, NewDirSourceRaw(root, nil))
|
||||||
|
require.Len(t, chunks, 3)
|
||||||
|
|
||||||
|
sources := make([]string, 0, len(chunks))
|
||||||
|
for _, c := range chunks {
|
||||||
|
sources = append(sources, c.Source)
|
||||||
|
}
|
||||||
|
// Deterministic sorted order.
|
||||||
|
require.True(t, sort_IsSorted(sources), "emission order must be sorted, got %v", sources)
|
||||||
|
}
|
||||||
|
|
||||||
|
func sort_IsSorted(s []string) bool {
|
||||||
|
for i := 1; i < len(s); i++ {
|
||||||
|
if s[i-1] > s[i] {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDirSource_DefaultExcludes(t *testing.T) {
|
||||||
|
root := t.TempDir()
|
||||||
|
writeFile(t, filepath.Join(root, "keep.txt"), "keep me")
|
||||||
|
writeFile(t, filepath.Join(root, ".git", "config"), "[core]")
|
||||||
|
writeFile(t, filepath.Join(root, "node_modules", "foo.js"), "x")
|
||||||
|
writeFile(t, filepath.Join(root, "vendor", "bar.go"), "package x")
|
||||||
|
writeFile(t, filepath.Join(root, "app.min.js"), "y")
|
||||||
|
writeFile(t, filepath.Join(root, "app.js.map"), "{}")
|
||||||
|
|
||||||
|
chunks := drain(t, NewDirSource(root))
|
||||||
|
require.Len(t, chunks, 1)
|
||||||
|
require.Contains(t, chunks[0].Source, "keep.txt")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDirSource_UserExclude(t *testing.T) {
|
||||||
|
root := t.TempDir()
|
||||||
|
writeFile(t, filepath.Join(root, "keep.txt"), "keep")
|
||||||
|
writeFile(t, filepath.Join(root, "drop.log"), "drop")
|
||||||
|
|
||||||
|
chunks := drain(t, NewDirSourceRaw(root, []string{"*.log"}))
|
||||||
|
require.Len(t, chunks, 1)
|
||||||
|
require.Contains(t, chunks[0].Source, "keep.txt")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDirSource_BinarySkipped(t *testing.T) {
|
||||||
|
root := t.TempDir()
|
||||||
|
writeFile(t, filepath.Join(root, "text.txt"), "plain text content")
|
||||||
|
binPath := filepath.Join(root, "blob.bin")
|
||||||
|
require.NoError(t, os.WriteFile(binPath, []byte{0x7f, 'E', 'L', 'F', 0x00, 0x01, 0x02}, 0o644))
|
||||||
|
|
||||||
|
chunks := drain(t, NewDirSourceRaw(root, nil))
|
||||||
|
require.Len(t, chunks, 1)
|
||||||
|
require.Contains(t, chunks[0].Source, "text.txt")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDirSource_MmapLargeFile(t *testing.T) {
|
||||||
|
if testing.Short() {
|
||||||
|
t.Skip("skipping large file test in short mode")
|
||||||
|
}
|
||||||
|
root := t.TempDir()
|
||||||
|
big := filepath.Join(root, "big.txt")
|
||||||
|
// Construct a payload slightly above MmapThreshold.
|
||||||
|
payload := strings.Repeat("API_KEY=xxxxxxxxxxxxxxxxxxxx\n", (int(MmapThreshold)/28)+10)
|
||||||
|
require.NoError(t, os.WriteFile(big, []byte(payload), 0o644))
|
||||||
|
|
||||||
|
chunks := drain(t, NewDirSourceRaw(root, nil))
|
||||||
|
// Reconstruct data accounting for chunk overlap.
|
||||||
|
require.NotEmpty(t, chunks)
|
||||||
|
require.Equal(t, big, chunks[0].Source)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDirSource_MissingRoot(t *testing.T) {
|
||||||
|
src := NewDirSourceRaw("/definitely/does/not/exist/keyhunter-xyz", nil)
|
||||||
|
ctx := context.Background()
|
||||||
|
out := make(chan types.Chunk, 1)
|
||||||
|
err := src.Chunks(ctx, out)
|
||||||
|
require.Error(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDirSource_CtxCancellation(t *testing.T) {
|
||||||
|
root := t.TempDir()
|
||||||
|
for i := 0; i < 50; i++ {
|
||||||
|
writeFile(t, filepath.Join(root, "f", string(rune('a'+i%26))+".txt"), "payload")
|
||||||
|
}
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
cancel() // pre-cancelled
|
||||||
|
out := make(chan types.Chunk, 1024)
|
||||||
|
err := NewDirSourceRaw(root, nil).Chunks(ctx, out)
|
||||||
|
require.ErrorIs(t, err, context.Canceled)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Also add a minimal update to `pkg/engine/sources/file_test.go` if it exists — if not present, skip. Do NOT alter any other source files in this plan.
|
||||||
|
</action>
|
||||||
|
<verify>
|
||||||
|
<automated>go test ./pkg/engine/sources/... -run 'TestDirSource|TestFileSource' -race -count=1</automated>
|
||||||
|
</verify>
|
||||||
|
<acceptance_criteria>
|
||||||
|
- `go build ./pkg/engine/sources/...` exits 0
|
||||||
|
- `go test ./pkg/engine/sources/... -run TestDirSource -race -count=1` passes all subtests
|
||||||
|
- `grep -n "mmap.Open" pkg/engine/sources/dir.go pkg/engine/sources/file.go` returns two hits
|
||||||
|
- `grep -n "filepath.WalkDir" pkg/engine/sources/dir.go` returns a hit
|
||||||
|
- `grep -n "DefaultExcludes" pkg/engine/sources/dir.go` returns a hit
|
||||||
|
- `grep -n "isBinary" pkg/engine/sources/dir.go` returns a hit
|
||||||
|
</acceptance_criteria>
|
||||||
|
<done>
|
||||||
|
DirSource implements Source, walks recursively, honors default and user glob exclusions, skips binary files, and uses mmap above 10MB. FileSource refactored to share the same mmap/emit helpers. All tests green under -race.
|
||||||
|
</done>
|
||||||
|
</task>
|
||||||
|
|
||||||
|
</tasks>
|
||||||
|
|
||||||
|
<verification>
|
||||||
|
- `go test ./pkg/engine/sources/... -race -count=1` passes
|
||||||
|
- `go vet ./pkg/engine/sources/...` clean
|
||||||
|
- All acceptance criteria grep matches hit
|
||||||
|
</verification>
|
||||||
|
|
||||||
|
<success_criteria>
|
||||||
|
A caller can create `sources.NewDirSource("./myrepo", "*.log")` and receive chunks for every non-excluded, non-binary file in deterministic order, with files >10MB read via mmap.
|
||||||
|
</success_criteria>
|
||||||
|
|
||||||
|
<output>
|
||||||
|
After completion, create `.planning/phases/04-input-sources/04-02-SUMMARY.md` documenting:
|
||||||
|
- File list with line counts
|
||||||
|
- Test names and pass status
|
||||||
|
- Any deviations from the planned exclude semantics (e.g., `**` handling)
|
||||||
|
</output>
|
||||||
456
.planning/phases/04-input-sources/04-03-PLAN.md
Normal file
456
.planning/phases/04-input-sources/04-03-PLAN.md
Normal file
@@ -0,0 +1,456 @@
|
|||||||
|
---
|
||||||
|
phase: 04-input-sources
|
||||||
|
plan: 03
|
||||||
|
type: execute
|
||||||
|
wave: 1
|
||||||
|
depends_on: ["04-01"]
|
||||||
|
files_modified:
|
||||||
|
- pkg/engine/sources/git.go
|
||||||
|
- pkg/engine/sources/git_test.go
|
||||||
|
autonomous: true
|
||||||
|
requirements:
|
||||||
|
- INPUT-02
|
||||||
|
must_haves:
|
||||||
|
truths:
|
||||||
|
- "GitSource opens a local git repo via go-git and iterates commits on all branches and tags"
|
||||||
|
- "Each unique blob (by OID) is scanned exactly once — duplicate blobs across commits are skipped"
|
||||||
|
- "Finding.Source is formatted as 'git:<short-sha>:<path>' for every emitted chunk"
|
||||||
|
- "--since filter (passed via GitSource.Since time.Time) excludes commits older than the cutoff"
|
||||||
|
- "Bare repos and regular repos with worktrees both work"
|
||||||
|
artifacts:
|
||||||
|
- path: "pkg/engine/sources/git.go"
|
||||||
|
provides: "GitSource implementing Source interface via go-git/v5"
|
||||||
|
exports: ["GitSource", "NewGitSource"]
|
||||||
|
min_lines: 120
|
||||||
|
- path: "pkg/engine/sources/git_test.go"
|
||||||
|
provides: "Tests using an in-process go-git repo fixture"
|
||||||
|
min_lines: 100
|
||||||
|
key_links:
|
||||||
|
- from: "pkg/engine/sources/git.go"
|
||||||
|
to: "github.com/go-git/go-git/v5"
|
||||||
|
via: "git.PlainOpen"
|
||||||
|
pattern: "git\\.PlainOpen"
|
||||||
|
- from: "pkg/engine/sources/git.go"
|
||||||
|
to: "repo.References"
|
||||||
|
via: "iterating refs/heads + refs/tags"
|
||||||
|
pattern: "References\\(\\)"
|
||||||
|
- from: "pkg/engine/sources/git.go"
|
||||||
|
to: "types.Chunk"
|
||||||
|
via: "channel send with git:sha:path source"
|
||||||
|
pattern: "git:"
|
||||||
|
---
|
||||||
|
|
||||||
|
<objective>
|
||||||
|
Implement `GitSource` — a git-history-aware input adapter that walks every commit across every branch and tag in a local repository, deduplicates blob scans by OID, and emits chunks with commit-SHA-prefixed source identifiers. Satisfies INPUT-02.
|
||||||
|
|
||||||
|
Purpose: Leaked keys often exist only in git history — deleted from HEAD but still reachable via old commits. A one-shot HEAD scan misses them. This source walks the full commit graph using `go-git/v5` with blob-level deduplication so a 10k-commit repo with 200k historical files scans in minutes, not hours.
|
||||||
|
Output: `pkg/engine/sources/git.go` and `git_test.go`. Wired into CLI in plan 04-05.
|
||||||
|
</objective>
|
||||||
|
|
||||||
|
<execution_context>
|
||||||
|
@$HOME/.claude/get-shit-done/workflows/execute-plan.md
|
||||||
|
@$HOME/.claude/get-shit-done/templates/summary.md
|
||||||
|
</execution_context>
|
||||||
|
|
||||||
|
<context>
|
||||||
|
@.planning/PROJECT.md
|
||||||
|
@.planning/phases/04-input-sources/04-CONTEXT.md
|
||||||
|
@pkg/engine/sources/source.go
|
||||||
|
@pkg/types/chunk.go
|
||||||
|
|
||||||
|
<interfaces>
|
||||||
|
Source interface:
|
||||||
|
```go
|
||||||
|
type Source interface {
|
||||||
|
Chunks(ctx context.Context, out chan<- types.Chunk) error
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Chunk struct:
|
||||||
|
```go
|
||||||
|
type Chunk struct {
|
||||||
|
Data []byte
|
||||||
|
Source string // will be "git:<shortSHA>:<path>"
|
||||||
|
Offset int64
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Relevant go-git/v5 APIs (from https://pkg.go.dev/github.com/go-git/go-git/v5):
|
||||||
|
```go
|
||||||
|
import "github.com/go-git/go-git/v5"
|
||||||
|
import "github.com/go-git/go-git/v5/plumbing"
|
||||||
|
import "github.com/go-git/go-git/v5/plumbing/object"
|
||||||
|
|
||||||
|
repo, err := git.PlainOpen(path) // opens local repo
|
||||||
|
refs, err := repo.References() // iterator over refs
|
||||||
|
refs.ForEach(func(*plumbing.Reference) error { }) // walk refs
|
||||||
|
commit, err := repo.CommitObject(hash) // resolve commit
|
||||||
|
iter, err := repo.Log(&git.LogOptions{From: hash, All: false})
|
||||||
|
iter.ForEach(func(*object.Commit) error { }) // walk commits
|
||||||
|
tree, err := commit.Tree()
|
||||||
|
tree.Files().ForEach(func(*object.File) error { }) // walk blobs
|
||||||
|
file.Contents() // returns (string, error)
|
||||||
|
file.Binary() // (bool, error)
|
||||||
|
file.Hash // plumbing.Hash (blob OID)
|
||||||
|
```
|
||||||
|
|
||||||
|
emitChunks helper from 04-02 plan (pkg/engine/sources/dir.go) — reuse:
|
||||||
|
```go
|
||||||
|
func emitChunks(ctx context.Context, data []byte, source string, chunkSize int, out chan<- types.Chunk) error
|
||||||
|
```
|
||||||
|
</interfaces>
|
||||||
|
</context>
|
||||||
|
|
||||||
|
<tasks>
|
||||||
|
|
||||||
|
<task type="auto" tdd="true">
|
||||||
|
<name>Task 1: Implement GitSource with full-history traversal and blob deduplication</name>
|
||||||
|
<read_first>
|
||||||
|
- pkg/engine/sources/source.go
|
||||||
|
- pkg/engine/sources/dir.go (for emitChunks helper — produced by plan 04-02)
|
||||||
|
- pkg/types/chunk.go
|
||||||
|
- .planning/phases/04-input-sources/04-CONTEXT.md (Git History section)
|
||||||
|
</read_first>
|
||||||
|
<files>
|
||||||
|
pkg/engine/sources/git.go,
|
||||||
|
pkg/engine/sources/git_test.go
|
||||||
|
</files>
|
||||||
|
<behavior>
|
||||||
|
- Test 1: GitSource on a fresh repo with 3 commits (each adding a file) emits exactly 3 unique blob scans
|
||||||
|
- Test 2: Second commit modifying file A creates a new blob — both old and new versions are scanned
|
||||||
|
- Test 3: Duplicate blob (same content in two files on same commit) is scanned once (dedup by OID)
|
||||||
|
- Test 4: Multi-branch repo — branch A with file X, branch B with file Y — both are scanned
|
||||||
|
- Test 5: Tag pointing to an old commit makes that commit's blobs reachable
|
||||||
|
- Test 6: Since filter set to "now + 1 hour" emits zero chunks
|
||||||
|
- Test 7: Finding.Source field matches pattern `git:[0-9a-f]{7}:.*`
|
||||||
|
- Test 8: Non-existent repo path returns an error
|
||||||
|
</behavior>
|
||||||
|
<action>
|
||||||
|
Create `pkg/engine/sources/git.go`:
|
||||||
|
|
||||||
|
```go
|
||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/go-git/go-git/v5"
|
||||||
|
"github.com/go-git/go-git/v5/plumbing"
|
||||||
|
"github.com/go-git/go-git/v5/plumbing/object"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/types"
|
||||||
|
)
|
||||||
|
|
||||||
|
// GitSource scans the full history of a local git repository: every commit
|
||||||
|
// on every branch and tag, deduplicating blob scans by OID.
|
||||||
|
type GitSource struct {
|
||||||
|
// RepoPath is the path to the local git repo (working tree or bare).
|
||||||
|
RepoPath string
|
||||||
|
// Since, if non-zero, excludes commits older than this timestamp
|
||||||
|
// (using commit author date).
|
||||||
|
Since time.Time
|
||||||
|
// ChunkSize is the overlap-chunker size; zero uses defaultChunkSize.
|
||||||
|
ChunkSize int
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewGitSource creates a GitSource for the given repo path.
|
||||||
|
func NewGitSource(repoPath string) *GitSource {
|
||||||
|
return &GitSource{RepoPath: repoPath, ChunkSize: defaultChunkSize}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Chunks walks every commit reachable from every branch, tag, and the
|
||||||
|
// stash ref (if present), streaming each unique blob's content through
|
||||||
|
// the shared emitChunks helper.
|
||||||
|
func (g *GitSource) Chunks(ctx context.Context, out chan<- types.Chunk) error {
|
||||||
|
if g.RepoPath == "" {
|
||||||
|
return errors.New("GitSource: RepoPath is empty")
|
||||||
|
}
|
||||||
|
repo, err := git.PlainOpen(g.RepoPath)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("GitSource: open %q: %w", g.RepoPath, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Collect commit hashes to walk from every ref under refs/heads, refs/tags, refs/stash.
|
||||||
|
seedCommits, err := collectSeedCommits(repo)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("GitSource: collect refs: %w", err)
|
||||||
|
}
|
||||||
|
if len(seedCommits) == 0 {
|
||||||
|
return nil // empty repo is not an error
|
||||||
|
}
|
||||||
|
|
||||||
|
seenCommits := make(map[plumbing.Hash]struct{})
|
||||||
|
seenBlobs := make(map[plumbing.Hash]struct{})
|
||||||
|
|
||||||
|
for _, seed := range seedCommits {
|
||||||
|
if err := ctx.Err(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
iter, err := repo.Log(&git.LogOptions{From: seed, All: false})
|
||||||
|
if err != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
err = iter.ForEach(func(c *object.Commit) error {
|
||||||
|
if ctxErr := ctx.Err(); ctxErr != nil {
|
||||||
|
return ctxErr
|
||||||
|
}
|
||||||
|
if _, ok := seenCommits[c.Hash]; ok {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
seenCommits[c.Hash] = struct{}{}
|
||||||
|
|
||||||
|
if !g.Since.IsZero() && c.Author.When.Before(g.Since) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return g.emitCommitBlobs(ctx, c, seenBlobs, out)
|
||||||
|
})
|
||||||
|
iter.Close()
|
||||||
|
if err != nil {
|
||||||
|
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// Swallow per-seed iterator errors; continue with other refs.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// collectSeedCommits gathers commit hashes from all local branches, tags,
|
||||||
|
// and the stash ref — the union of which reaches every commit worth scanning.
|
||||||
|
func collectSeedCommits(repo *git.Repository) ([]plumbing.Hash, error) {
|
||||||
|
var seeds []plumbing.Hash
|
||||||
|
refs, err := repo.References()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
err = refs.ForEach(func(ref *plumbing.Reference) error {
|
||||||
|
name := ref.Name()
|
||||||
|
if !(name.IsBranch() || name.IsTag() || name == plumbing.ReferenceName("refs/stash") || name.IsRemote()) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
hash := ref.Hash()
|
||||||
|
// For annotated tags the ref points at a tag object; resolve to commit if possible.
|
||||||
|
if name.IsTag() {
|
||||||
|
if tag, err := repo.TagObject(hash); err == nil {
|
||||||
|
if c, err := tag.Commit(); err == nil {
|
||||||
|
hash = c.Hash
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Skip symbolic refs (HEAD) whose target we already walked via IsBranch.
|
||||||
|
seeds = append(seeds, hash)
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
return seeds, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// emitCommitBlobs walks the tree of a commit and emits every blob whose
|
||||||
|
// OID has not already been scanned.
|
||||||
|
func (g *GitSource) emitCommitBlobs(ctx context.Context, c *object.Commit, seenBlobs map[plumbing.Hash]struct{}, out chan<- types.Chunk) error {
|
||||||
|
tree, err := c.Tree()
|
||||||
|
if err != nil {
|
||||||
|
return nil // skip unreadable tree
|
||||||
|
}
|
||||||
|
shortSHA := c.Hash.String()[:7]
|
||||||
|
|
||||||
|
return tree.Files().ForEach(func(f *object.File) error {
|
||||||
|
if err := ctx.Err(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if _, ok := seenBlobs[f.Hash]; ok {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
seenBlobs[f.Hash] = struct{}{}
|
||||||
|
|
||||||
|
// Skip obviously-binary blobs via go-git's helper, then via our sniff.
|
||||||
|
if isBin, _ := f.IsBinary(); isBin {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
reader, err := f.Reader()
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
defer reader.Close()
|
||||||
|
data, err := io.ReadAll(reader)
|
||||||
|
if err != nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if len(data) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if bytes.IndexByte(data[:minInt(len(data), BinarySniffSize)], 0x00) >= 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
source := fmt.Sprintf("git:%s:%s", shortSHA, f.Name)
|
||||||
|
return emitChunks(ctx, data, source, g.ChunkSize, out)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func minInt(a, b int) int {
|
||||||
|
if a < b {
|
||||||
|
return a
|
||||||
|
}
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Create `pkg/engine/sources/git_test.go` using go-git's in-process fixtures:
|
||||||
|
|
||||||
|
```go
|
||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"regexp"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/go-git/go-git/v5"
|
||||||
|
"github.com/go-git/go-git/v5/plumbing/object"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/types"
|
||||||
|
)
|
||||||
|
|
||||||
|
func initRepo(t *testing.T) (string, *git.Repository) {
|
||||||
|
t.Helper()
|
||||||
|
dir := t.TempDir()
|
||||||
|
repo, err := git.PlainInit(dir, false)
|
||||||
|
require.NoError(t, err)
|
||||||
|
return dir, repo
|
||||||
|
}
|
||||||
|
|
||||||
|
func commitFile(t *testing.T, dir string, repo *git.Repository, name, content string) {
|
||||||
|
t.Helper()
|
||||||
|
path := filepath.Join(dir, name)
|
||||||
|
require.NoError(t, os.MkdirAll(filepath.Dir(path), 0o755))
|
||||||
|
require.NoError(t, os.WriteFile(path, []byte(content), 0o644))
|
||||||
|
wt, err := repo.Worktree()
|
||||||
|
require.NoError(t, err)
|
||||||
|
_, err = wt.Add(name)
|
||||||
|
require.NoError(t, err)
|
||||||
|
_, err = wt.Commit("add "+name, &git.CommitOptions{
|
||||||
|
Author: &object.Signature{Name: "test", Email: "t@x", When: time.Now()},
|
||||||
|
})
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func drainGit(t *testing.T, src Source) []types.Chunk {
|
||||||
|
t.Helper()
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
out := make(chan types.Chunk, 1024)
|
||||||
|
errCh := make(chan error, 1)
|
||||||
|
go func() { errCh <- src.Chunks(ctx, out); close(out) }()
|
||||||
|
var got []types.Chunk
|
||||||
|
for c := range out {
|
||||||
|
got = append(got, c)
|
||||||
|
}
|
||||||
|
require.NoError(t, <-errCh)
|
||||||
|
return got
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGitSource_HistoryWalk(t *testing.T) {
|
||||||
|
dir, repo := initRepo(t)
|
||||||
|
commitFile(t, dir, repo, "a.txt", "contents alpha")
|
||||||
|
commitFile(t, dir, repo, "b.txt", "contents bravo")
|
||||||
|
commitFile(t, dir, repo, "c.txt", "contents charlie")
|
||||||
|
|
||||||
|
chunks := drainGit(t, NewGitSource(dir))
|
||||||
|
require.GreaterOrEqual(t, len(chunks), 3)
|
||||||
|
|
||||||
|
re := regexp.MustCompile(`^git:[0-9a-f]{7}:.+$`)
|
||||||
|
for _, c := range chunks {
|
||||||
|
require.Regexp(t, re, c.Source)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGitSource_BlobDeduplication(t *testing.T) {
|
||||||
|
dir, repo := initRepo(t)
|
||||||
|
commitFile(t, dir, repo, "a.txt", "same exact content everywhere")
|
||||||
|
commitFile(t, dir, repo, "b.txt", "same exact content everywhere") // identical blob -> same OID
|
||||||
|
commitFile(t, dir, repo, "c.txt", "different content here")
|
||||||
|
|
||||||
|
chunks := drainGit(t, NewGitSource(dir))
|
||||||
|
// Expect 2 unique blobs scanned, not 3 files.
|
||||||
|
unique := make(map[string]bool)
|
||||||
|
for _, c := range chunks {
|
||||||
|
unique[string(c.Data)] = true
|
||||||
|
}
|
||||||
|
require.Len(t, unique, 2, "duplicate blobs must be deduped by OID")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGitSource_ModifiedFileKeepsBothVersions(t *testing.T) {
|
||||||
|
dir, repo := initRepo(t)
|
||||||
|
commitFile(t, dir, repo, "a.txt", "version one")
|
||||||
|
commitFile(t, dir, repo, "a.txt", "version two") // modifying produces a second blob
|
||||||
|
|
||||||
|
chunks := drainGit(t, NewGitSource(dir))
|
||||||
|
bodies := make(map[string]bool)
|
||||||
|
for _, c := range chunks {
|
||||||
|
bodies[string(c.Data)] = true
|
||||||
|
}
|
||||||
|
require.True(t, bodies["version one"], "old version must still be scanned")
|
||||||
|
require.True(t, bodies["version two"], "new version must be scanned")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGitSource_SinceFilterExcludesAll(t *testing.T) {
|
||||||
|
dir, repo := initRepo(t)
|
||||||
|
commitFile(t, dir, repo, "a.txt", "alpha")
|
||||||
|
|
||||||
|
src := NewGitSource(dir)
|
||||||
|
src.Since = time.Now().Add(1 * time.Hour)
|
||||||
|
chunks := drainGit(t, src)
|
||||||
|
require.Empty(t, chunks)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGitSource_MissingRepo(t *testing.T) {
|
||||||
|
src := NewGitSource(filepath.Join(t.TempDir(), "not-a-repo"))
|
||||||
|
ctx := context.Background()
|
||||||
|
out := make(chan types.Chunk, 1)
|
||||||
|
err := src.Chunks(ctx, out)
|
||||||
|
require.Error(t, err)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Do NOT touch any file outside `pkg/engine/sources/git.go` and `pkg/engine/sources/git_test.go`. CLI wire-up happens in plan 04-05.
|
||||||
|
</action>
|
||||||
|
<verify>
|
||||||
|
<automated>go test ./pkg/engine/sources/... -run TestGitSource -race -count=1 -timeout=60s</automated>
|
||||||
|
</verify>
|
||||||
|
<acceptance_criteria>
|
||||||
|
- `go build ./pkg/engine/sources/...` exits 0
|
||||||
|
- `go test ./pkg/engine/sources/... -run TestGitSource -race -count=1` passes all subtests
|
||||||
|
- `grep -n "git.PlainOpen" pkg/engine/sources/git.go` returns a hit
|
||||||
|
- `grep -n "seenBlobs" pkg/engine/sources/git.go` returns a hit (dedup map)
|
||||||
|
- `grep -n "fmt.Sprintf(\"git:%s:%s\"" pkg/engine/sources/git.go` returns a hit
|
||||||
|
- `grep -n "g.Since" pkg/engine/sources/git.go` returns a hit
|
||||||
|
</acceptance_criteria>
|
||||||
|
<done>
|
||||||
|
GitSource walks all branches/tags, emits each unique blob once, honors Since filter, formats source as `git:<short-sha>:<path>`, and tests cover dedup/history/since/missing-repo.
|
||||||
|
</done>
|
||||||
|
</task>
|
||||||
|
|
||||||
|
</tasks>
|
||||||
|
|
||||||
|
<verification>
|
||||||
|
- `go test ./pkg/engine/sources/... -run TestGitSource -race` passes
|
||||||
|
- `go vet ./pkg/engine/sources/...` clean
|
||||||
|
- All grep acceptance checks hit
|
||||||
|
</verification>
|
||||||
|
|
||||||
|
<success_criteria>
|
||||||
|
A caller can `sources.NewGitSource("./myrepo")` and receive chunks for every historical blob across all refs, with deterministic dedup and source attribution in `git:<sha>:<path>` form.
|
||||||
|
</success_criteria>
|
||||||
|
|
||||||
|
<output>
|
||||||
|
After completion, create `.planning/phases/04-input-sources/04-03-SUMMARY.md` documenting file list, test results, and the go-git version resolved by plan 04-01.
|
||||||
|
</output>
|
||||||
624
.planning/phases/04-input-sources/04-04-PLAN.md
Normal file
624
.planning/phases/04-input-sources/04-04-PLAN.md
Normal file
@@ -0,0 +1,624 @@
|
|||||||
|
---
|
||||||
|
phase: 04-input-sources
|
||||||
|
plan: 04
|
||||||
|
type: execute
|
||||||
|
wave: 1
|
||||||
|
depends_on: ["04-01"]
|
||||||
|
files_modified:
|
||||||
|
- pkg/engine/sources/stdin.go
|
||||||
|
- pkg/engine/sources/stdin_test.go
|
||||||
|
- pkg/engine/sources/url.go
|
||||||
|
- pkg/engine/sources/url_test.go
|
||||||
|
- pkg/engine/sources/clipboard.go
|
||||||
|
- pkg/engine/sources/clipboard_test.go
|
||||||
|
autonomous: true
|
||||||
|
requirements:
|
||||||
|
- INPUT-03
|
||||||
|
- INPUT-04
|
||||||
|
- INPUT-05
|
||||||
|
must_haves:
|
||||||
|
truths:
|
||||||
|
- "StdinSource reads from an io.Reader and emits chunks with Source='stdin'"
|
||||||
|
- "URLSource fetches an http/https URL with 30s timeout, 50MB cap, rejects file:// and other schemes, and emits chunks with Source='url:<url>'"
|
||||||
|
- "URLSource rejects responses with non-text Content-Type unless allowlisted (text/*, application/json, application/javascript, application/xml)"
|
||||||
|
- "ClipboardSource reads current clipboard via atotto/clipboard and emits chunks with Source='clipboard'"
|
||||||
|
- "ClipboardSource returns a clear error if clipboard tooling is unavailable"
|
||||||
|
artifacts:
|
||||||
|
- path: "pkg/engine/sources/stdin.go"
|
||||||
|
provides: "StdinSource"
|
||||||
|
exports: ["StdinSource", "NewStdinSource"]
|
||||||
|
min_lines: 40
|
||||||
|
- path: "pkg/engine/sources/url.go"
|
||||||
|
provides: "URLSource with HTTP fetch, timeout, size cap, content-type filter"
|
||||||
|
exports: ["URLSource", "NewURLSource"]
|
||||||
|
min_lines: 100
|
||||||
|
- path: "pkg/engine/sources/clipboard.go"
|
||||||
|
provides: "ClipboardSource wrapping atotto/clipboard"
|
||||||
|
exports: ["ClipboardSource", "NewClipboardSource"]
|
||||||
|
min_lines: 30
|
||||||
|
key_links:
|
||||||
|
- from: "pkg/engine/sources/url.go"
|
||||||
|
to: "net/http"
|
||||||
|
via: "http.Client with Timeout"
|
||||||
|
pattern: "http\\.Client"
|
||||||
|
- from: "pkg/engine/sources/url.go"
|
||||||
|
to: "io.LimitReader"
|
||||||
|
via: "MaxContentLength enforcement"
|
||||||
|
pattern: "LimitReader"
|
||||||
|
- from: "pkg/engine/sources/clipboard.go"
|
||||||
|
to: "github.com/atotto/clipboard"
|
||||||
|
via: "clipboard.ReadAll"
|
||||||
|
pattern: "clipboard\\.ReadAll"
|
||||||
|
---
|
||||||
|
|
||||||
|
<objective>
|
||||||
|
Implement three smaller Source adapters in a single plan since each is <80 lines and they share no state:
|
||||||
|
- `StdinSource` reads from an injectable `io.Reader` (defaults to `os.Stdin`) — INPUT-03
|
||||||
|
- `URLSource` fetches a remote URL via stdlib `net/http` with timeout, size cap, scheme whitelist, and content-type filter — INPUT-04
|
||||||
|
- `ClipboardSource` reads the current clipboard via `github.com/atotto/clipboard` with graceful fallback — INPUT-05
|
||||||
|
|
||||||
|
Purpose: These three adapters complete the Phase 4 input surface area. Bundling them into one plan keeps wave-1 parallelism healthy (04-02 + 04-03 + 04-04 run simultaneously) while respecting the ~50% context budget since each adapter is self-contained and small.
|
||||||
|
Output: Six files total (three sources + three test files).
|
||||||
|
</objective>
|
||||||
|
|
||||||
|
<execution_context>
|
||||||
|
@$HOME/.claude/get-shit-done/workflows/execute-plan.md
|
||||||
|
@$HOME/.claude/get-shit-done/templates/summary.md
|
||||||
|
</execution_context>
|
||||||
|
|
||||||
|
<context>
|
||||||
|
@.planning/PROJECT.md
|
||||||
|
@.planning/phases/04-input-sources/04-CONTEXT.md
|
||||||
|
@pkg/engine/sources/source.go
|
||||||
|
@pkg/types/chunk.go
|
||||||
|
|
||||||
|
<interfaces>
|
||||||
|
Source interface:
|
||||||
|
```go
|
||||||
|
type Source interface {
|
||||||
|
Chunks(ctx context.Context, out chan<- types.Chunk) error
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Shared helper (produced by plan 04-02 in pkg/engine/sources/dir.go):
|
||||||
|
```go
|
||||||
|
func emitChunks(ctx context.Context, data []byte, source string, chunkSize int, out chan<- types.Chunk) error
|
||||||
|
```
|
||||||
|
|
||||||
|
atotto/clipboard API:
|
||||||
|
```go
|
||||||
|
import "github.com/atotto/clipboard"
|
||||||
|
func ReadAll() (string, error)
|
||||||
|
func Unsupported bool // set on platforms without clipboard tooling
|
||||||
|
```
|
||||||
|
</interfaces>
|
||||||
|
</context>
|
||||||
|
|
||||||
|
<tasks>
|
||||||
|
|
||||||
|
<task type="auto" tdd="true">
|
||||||
|
<name>Task 1: Implement StdinSource, URLSource, and ClipboardSource with full test coverage</name>
|
||||||
|
<read_first>
|
||||||
|
- pkg/engine/sources/source.go
|
||||||
|
- pkg/engine/sources/dir.go (for emitChunks signature from plan 04-02)
|
||||||
|
- pkg/types/chunk.go
|
||||||
|
- .planning/phases/04-input-sources/04-CONTEXT.md (Stdin, URL, Clipboard sections)
|
||||||
|
</read_first>
|
||||||
|
<files>
|
||||||
|
pkg/engine/sources/stdin.go,
|
||||||
|
pkg/engine/sources/stdin_test.go,
|
||||||
|
pkg/engine/sources/url.go,
|
||||||
|
pkg/engine/sources/url_test.go,
|
||||||
|
pkg/engine/sources/clipboard.go,
|
||||||
|
pkg/engine/sources/clipboard_test.go
|
||||||
|
</files>
|
||||||
|
<behavior>
|
||||||
|
StdinSource:
|
||||||
|
- Test 1: Feeding "API_KEY=xyz" through a bytes.Buffer emits one chunk with Source="stdin"
|
||||||
|
- Test 2: Empty input emits zero chunks without error
|
||||||
|
- Test 3: ctx cancellation returns ctx.Err()
|
||||||
|
URLSource:
|
||||||
|
- Test 4: Fetches content from httptest.Server, emits a chunk with Source="url:<server-url>"
|
||||||
|
- Test 5: Server returning 50MB+1 body is rejected with a size error
|
||||||
|
- Test 6: Server returning Content-Type image/png is rejected
|
||||||
|
- Test 7: Scheme "file:///etc/passwd" is rejected without any request attempt
|
||||||
|
- Test 8: Server returning 500 returns a non-nil error containing "500"
|
||||||
|
- Test 9: HTTP 301 redirect is followed (max 5 hops)
|
||||||
|
ClipboardSource:
|
||||||
|
- Test 10: If clipboard.Unsupported is true, returns an error with "clipboard" in the message
|
||||||
|
- Test 11: Otherwise reads clipboard (may skip if empty on CI) — use build tag or t.Skip guard
|
||||||
|
</behavior>
|
||||||
|
<action>
|
||||||
|
|
||||||
|
Create `pkg/engine/sources/stdin.go`:
|
||||||
|
|
||||||
|
```go
|
||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/types"
|
||||||
|
)
|
||||||
|
|
||||||
|
// StdinSource reads content from an io.Reader (defaults to os.Stdin) and
|
||||||
|
// emits overlapping chunks. Used when a user runs `keyhunter scan stdin`
|
||||||
|
// or `keyhunter scan -`.
|
||||||
|
type StdinSource struct {
|
||||||
|
Reader io.Reader
|
||||||
|
ChunkSize int
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewStdinSource returns a StdinSource bound to os.Stdin.
|
||||||
|
func NewStdinSource() *StdinSource {
|
||||||
|
return &StdinSource{Reader: os.Stdin, ChunkSize: defaultChunkSize}
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewStdinSourceFrom returns a StdinSource bound to the given reader
|
||||||
|
// (used primarily by tests).
|
||||||
|
func NewStdinSourceFrom(r io.Reader) *StdinSource {
|
||||||
|
return &StdinSource{Reader: r, ChunkSize: defaultChunkSize}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Chunks reads the entire input, then hands it to the shared chunk emitter.
|
||||||
|
func (s *StdinSource) Chunks(ctx context.Context, out chan<- types.Chunk) error {
|
||||||
|
if s.Reader == nil {
|
||||||
|
s.Reader = os.Stdin
|
||||||
|
}
|
||||||
|
data, err := io.ReadAll(s.Reader)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if len(data) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return emitChunks(ctx, data, "stdin", s.ChunkSize, out)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Create `pkg/engine/sources/stdin_test.go`:
|
||||||
|
|
||||||
|
```go
|
||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/types"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestStdinSource_Basic(t *testing.T) {
|
||||||
|
src := NewStdinSourceFrom(bytes.NewBufferString("API_KEY=sk-test-xyz"))
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
out := make(chan types.Chunk, 8)
|
||||||
|
errCh := make(chan error, 1)
|
||||||
|
go func() { errCh <- src.Chunks(ctx, out); close(out) }()
|
||||||
|
|
||||||
|
var got []types.Chunk
|
||||||
|
for c := range out {
|
||||||
|
got = append(got, c)
|
||||||
|
}
|
||||||
|
require.NoError(t, <-errCh)
|
||||||
|
require.Len(t, got, 1)
|
||||||
|
require.Equal(t, "stdin", got[0].Source)
|
||||||
|
require.Equal(t, "API_KEY=sk-test-xyz", string(got[0].Data))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStdinSource_Empty(t *testing.T) {
|
||||||
|
src := NewStdinSourceFrom(bytes.NewBuffer(nil))
|
||||||
|
out := make(chan types.Chunk, 1)
|
||||||
|
err := src.Chunks(context.Background(), out)
|
||||||
|
close(out)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Len(t, out, 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestStdinSource_CtxCancel(t *testing.T) {
|
||||||
|
// Large buffer so emitChunks iterates and can observe cancellation.
|
||||||
|
data := make([]byte, 1<<20)
|
||||||
|
src := NewStdinSourceFrom(bytes.NewReader(data))
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
cancel()
|
||||||
|
out := make(chan types.Chunk) // unbuffered forces select on ctx
|
||||||
|
err := src.Chunks(ctx, out)
|
||||||
|
require.ErrorIs(t, err, context.Canceled)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Create `pkg/engine/sources/url.go`:
|
||||||
|
|
||||||
|
```go
|
||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/types"
|
||||||
|
)
|
||||||
|
|
||||||
|
// MaxURLContentLength is the hard cap on URLSource response bodies.
|
||||||
|
const MaxURLContentLength int64 = 50 * 1024 * 1024 // 50 MB
|
||||||
|
|
||||||
|
// DefaultURLTimeout is the overall request timeout (connect + read + body).
|
||||||
|
const DefaultURLTimeout = 30 * time.Second
|
||||||
|
|
||||||
|
// allowedContentTypes is the whitelist of Content-Type prefixes URLSource
|
||||||
|
// will accept. Binary types (images, archives, executables) are rejected.
|
||||||
|
var allowedContentTypes = []string{
|
||||||
|
"text/",
|
||||||
|
"application/json",
|
||||||
|
"application/javascript",
|
||||||
|
"application/xml",
|
||||||
|
"application/x-yaml",
|
||||||
|
"application/yaml",
|
||||||
|
}
|
||||||
|
|
||||||
|
// URLSource fetches a remote resource over HTTP(S) and emits its body as chunks.
|
||||||
|
type URLSource struct {
|
||||||
|
URL string
|
||||||
|
Client *http.Client
|
||||||
|
UserAgent string
|
||||||
|
Insecure bool // skip TLS verification (default false)
|
||||||
|
ChunkSize int
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewURLSource creates a URLSource with sane defaults.
|
||||||
|
func NewURLSource(rawURL string) *URLSource {
|
||||||
|
return &URLSource{
|
||||||
|
URL: rawURL,
|
||||||
|
Client: defaultHTTPClient(),
|
||||||
|
UserAgent: "keyhunter/dev",
|
||||||
|
ChunkSize: defaultChunkSize,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func defaultHTTPClient() *http.Client {
|
||||||
|
return &http.Client{
|
||||||
|
Timeout: DefaultURLTimeout,
|
||||||
|
CheckRedirect: func(req *http.Request, via []*http.Request) error {
|
||||||
|
if len(via) >= 5 {
|
||||||
|
return errors.New("stopped after 5 redirects")
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Chunks validates the URL, issues a GET, and emits the response body as chunks.
|
||||||
|
func (u *URLSource) Chunks(ctx context.Context, out chan<- types.Chunk) error {
|
||||||
|
parsed, err := url.Parse(u.URL)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("URLSource: parse %q: %w", u.URL, err)
|
||||||
|
}
|
||||||
|
if parsed.Scheme != "http" && parsed.Scheme != "https" {
|
||||||
|
return fmt.Errorf("URLSource: unsupported scheme %q (only http/https)", parsed.Scheme)
|
||||||
|
}
|
||||||
|
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, u.URL, nil)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("URLSource: new request: %w", err)
|
||||||
|
}
|
||||||
|
req.Header.Set("User-Agent", u.UserAgent)
|
||||||
|
|
||||||
|
client := u.Client
|
||||||
|
if client == nil {
|
||||||
|
client = defaultHTTPClient()
|
||||||
|
}
|
||||||
|
resp, err := client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("URLSource: fetch: %w", err)
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
|
||||||
|
return fmt.Errorf("URLSource: non-2xx status %d from %s", resp.StatusCode, u.URL)
|
||||||
|
}
|
||||||
|
|
||||||
|
ct := resp.Header.Get("Content-Type")
|
||||||
|
if !isAllowedContentType(ct) {
|
||||||
|
return fmt.Errorf("URLSource: disallowed Content-Type %q", ct)
|
||||||
|
}
|
||||||
|
|
||||||
|
if resp.ContentLength > MaxURLContentLength {
|
||||||
|
return fmt.Errorf("URLSource: Content-Length %d exceeds cap %d", resp.ContentLength, MaxURLContentLength)
|
||||||
|
}
|
||||||
|
|
||||||
|
// LimitReader cap + 1 to detect overflow even if ContentLength was missing/wrong.
|
||||||
|
limited := io.LimitReader(resp.Body, MaxURLContentLength+1)
|
||||||
|
data, err := io.ReadAll(limited)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("URLSource: read body: %w", err)
|
||||||
|
}
|
||||||
|
if int64(len(data)) > MaxURLContentLength {
|
||||||
|
return fmt.Errorf("URLSource: body exceeds %d bytes", MaxURLContentLength)
|
||||||
|
}
|
||||||
|
if len(data) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
source := "url:" + u.URL
|
||||||
|
return emitChunks(ctx, data, source, u.ChunkSize, out)
|
||||||
|
}
|
||||||
|
|
||||||
|
func isAllowedContentType(ct string) bool {
|
||||||
|
if ct == "" {
|
||||||
|
return true // some servers omit; trust and scan
|
||||||
|
}
|
||||||
|
// Strip parameters like "; charset=utf-8".
|
||||||
|
if idx := strings.Index(ct, ";"); idx >= 0 {
|
||||||
|
ct = ct[:idx]
|
||||||
|
}
|
||||||
|
ct = strings.TrimSpace(strings.ToLower(ct))
|
||||||
|
for _, prefix := range allowedContentTypes {
|
||||||
|
if strings.HasPrefix(ct, prefix) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Create `pkg/engine/sources/url_test.go`:
|
||||||
|
|
||||||
|
```go
|
||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/types"
|
||||||
|
)
|
||||||
|
|
||||||
|
func drainURL(t *testing.T, src Source) ([]types.Chunk, error) {
|
||||||
|
t.Helper()
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
out := make(chan types.Chunk, 256)
|
||||||
|
errCh := make(chan error, 1)
|
||||||
|
go func() { errCh <- src.Chunks(ctx, out); close(out) }()
|
||||||
|
var got []types.Chunk
|
||||||
|
for c := range out {
|
||||||
|
got = append(got, c)
|
||||||
|
}
|
||||||
|
return got, <-errCh
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestURLSource_Fetches(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "text/plain")
|
||||||
|
_, _ = w.Write([]byte("API_KEY=sk-live-xyz"))
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
chunks, err := drainURL(t, NewURLSource(srv.URL))
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Len(t, chunks, 1)
|
||||||
|
require.Equal(t, "url:"+srv.URL, chunks[0].Source)
|
||||||
|
require.Equal(t, "API_KEY=sk-live-xyz", string(chunks[0].Data))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestURLSource_RejectsBinaryContentType(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "image/png")
|
||||||
|
_, _ = w.Write([]byte{0x89, 0x50, 0x4e, 0x47})
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
_, err := drainURL(t, NewURLSource(srv.URL))
|
||||||
|
require.Error(t, err)
|
||||||
|
require.Contains(t, err.Error(), "Content-Type")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestURLSource_RejectsNonHTTPScheme(t *testing.T) {
|
||||||
|
_, err := drainURL(t, NewURLSource("file:///etc/passwd"))
|
||||||
|
require.Error(t, err)
|
||||||
|
require.Contains(t, err.Error(), "unsupported scheme")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestURLSource_Rejects500(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
http.Error(w, "boom", http.StatusInternalServerError)
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
_, err := drainURL(t, NewURLSource(srv.URL))
|
||||||
|
require.Error(t, err)
|
||||||
|
require.Contains(t, err.Error(), "500")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestURLSource_RejectsOversizeBody(t *testing.T) {
|
||||||
|
// Serve body just over the cap. Use a small override to keep the test fast.
|
||||||
|
big := strings.Repeat("a", int(MaxURLContentLength)+10)
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "text/plain")
|
||||||
|
_, _ = w.Write([]byte(big))
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
_, err := drainURL(t, NewURLSource(srv.URL))
|
||||||
|
require.Error(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestURLSource_FollowsRedirect(t *testing.T) {
|
||||||
|
target := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "text/plain")
|
||||||
|
_, _ = w.Write([]byte("redirected body"))
|
||||||
|
}))
|
||||||
|
defer target.Close()
|
||||||
|
|
||||||
|
redirector := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
http.Redirect(w, r, target.URL, http.StatusMovedPermanently)
|
||||||
|
}))
|
||||||
|
defer redirector.Close()
|
||||||
|
|
||||||
|
chunks, err := drainURL(t, NewURLSource(redirector.URL))
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.NotEmpty(t, chunks)
|
||||||
|
require.Contains(t, string(chunks[0].Data), "redirected body")
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Create `pkg/engine/sources/clipboard.go`:
|
||||||
|
|
||||||
|
```go
|
||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/atotto/clipboard"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/types"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ClipboardSource reads the current OS clipboard contents and emits them
|
||||||
|
// as a single chunk stream with Source="clipboard". Requires xclip/xsel/
|
||||||
|
// wl-clipboard on Linux, pbpaste on macOS, or native API on Windows.
|
||||||
|
type ClipboardSource struct {
|
||||||
|
// Reader overrides the clipboard reader; when nil the real clipboard is used.
|
||||||
|
// Tests inject a func returning a fixture.
|
||||||
|
Reader func() (string, error)
|
||||||
|
ChunkSize int
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewClipboardSource returns a ClipboardSource bound to the real OS clipboard.
|
||||||
|
func NewClipboardSource() *ClipboardSource {
|
||||||
|
return &ClipboardSource{Reader: clipboard.ReadAll, ChunkSize: defaultChunkSize}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Chunks reads the clipboard and emits its contents.
|
||||||
|
func (c *ClipboardSource) Chunks(ctx context.Context, out chan<- types.Chunk) error {
|
||||||
|
if clipboard.Unsupported && c.Reader == nil {
|
||||||
|
return errors.New("ClipboardSource: clipboard tooling unavailable (install xclip/xsel/wl-clipboard on Linux)")
|
||||||
|
}
|
||||||
|
reader := c.Reader
|
||||||
|
if reader == nil {
|
||||||
|
reader = clipboard.ReadAll
|
||||||
|
}
|
||||||
|
text, err := reader()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("ClipboardSource: read: %w", err)
|
||||||
|
}
|
||||||
|
if text == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return emitChunks(ctx, []byte(text), "clipboard", c.ChunkSize, out)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Create `pkg/engine/sources/clipboard_test.go`:
|
||||||
|
|
||||||
|
```go
|
||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/types"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestClipboardSource_FixtureReader(t *testing.T) {
|
||||||
|
src := &ClipboardSource{
|
||||||
|
Reader: func() (string, error) { return "sk-live-xxxxxx", nil },
|
||||||
|
ChunkSize: defaultChunkSize,
|
||||||
|
}
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
out := make(chan types.Chunk, 4)
|
||||||
|
errCh := make(chan error, 1)
|
||||||
|
go func() { errCh <- src.Chunks(ctx, out); close(out) }()
|
||||||
|
|
||||||
|
var got []types.Chunk
|
||||||
|
for c := range out {
|
||||||
|
got = append(got, c)
|
||||||
|
}
|
||||||
|
require.NoError(t, <-errCh)
|
||||||
|
require.Len(t, got, 1)
|
||||||
|
require.Equal(t, "clipboard", got[0].Source)
|
||||||
|
require.Equal(t, "sk-live-xxxxxx", string(got[0].Data))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestClipboardSource_ReaderError(t *testing.T) {
|
||||||
|
src := &ClipboardSource{
|
||||||
|
Reader: func() (string, error) { return "", errors.New("no xclip installed") },
|
||||||
|
}
|
||||||
|
out := make(chan types.Chunk, 1)
|
||||||
|
err := src.Chunks(context.Background(), out)
|
||||||
|
require.Error(t, err)
|
||||||
|
require.Contains(t, err.Error(), "clipboard")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestClipboardSource_EmptyClipboard(t *testing.T) {
|
||||||
|
src := &ClipboardSource{
|
||||||
|
Reader: func() (string, error) { return "", nil },
|
||||||
|
}
|
||||||
|
out := make(chan types.Chunk, 1)
|
||||||
|
err := src.Chunks(context.Background(), out)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Len(t, out, 0)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Do NOT modify `cmd/scan.go` in this plan. Do NOT create `pkg/engine/sources/dir.go`, `git.go`, or touch `file.go` — those are owned by plans 04-02 and 04-03.
|
||||||
|
</action>
|
||||||
|
<verify>
|
||||||
|
<automated>go test ./pkg/engine/sources/... -run 'TestStdinSource|TestURLSource|TestClipboardSource' -race -count=1</automated>
|
||||||
|
</verify>
|
||||||
|
<acceptance_criteria>
|
||||||
|
- `go build ./pkg/engine/sources/...` exits 0
|
||||||
|
- `go test ./pkg/engine/sources/... -run 'TestStdinSource|TestURLSource|TestClipboardSource' -race` passes all subtests
|
||||||
|
- `grep -n "http.Client" pkg/engine/sources/url.go` hits
|
||||||
|
- `grep -n "LimitReader" pkg/engine/sources/url.go` hits
|
||||||
|
- `grep -n "clipboard.ReadAll" pkg/engine/sources/clipboard.go` hits
|
||||||
|
- `grep -n "\"stdin\"" pkg/engine/sources/stdin.go` hits (source label)
|
||||||
|
- `grep -n "\"url:\" + u.URL\\|\"url:\"+u.URL" pkg/engine/sources/url.go` hits
|
||||||
|
</acceptance_criteria>
|
||||||
|
<done>
|
||||||
|
StdinSource, URLSource, and ClipboardSource all implement Source, enforce their respective safety limits (stdin read-to-EOF, url scheme/size/content-type whitelist, clipboard tooling check), and their tests pass under -race.
|
||||||
|
</done>
|
||||||
|
</task>
|
||||||
|
|
||||||
|
</tasks>
|
||||||
|
|
||||||
|
<verification>
|
||||||
|
- `go test ./pkg/engine/sources/... -race -count=1` passes including new tests
|
||||||
|
- `go vet ./pkg/engine/sources/...` clean
|
||||||
|
- All grep acceptance checks hit
|
||||||
|
</verification>
|
||||||
|
|
||||||
|
<success_criteria>
|
||||||
|
Three new source adapters exist, each self-contained, each with test coverage, and none conflicting with file ownership of plans 04-02 (dir/file) or 04-03 (git).
|
||||||
|
</success_criteria>
|
||||||
|
|
||||||
|
<output>
|
||||||
|
After completion, create `.planning/phases/04-input-sources/04-04-SUMMARY.md` listing the six files created, test names with pass status, and any platform-specific notes about clipboard tests on the executor's CI environment.
|
||||||
|
</output>
|
||||||
435
.planning/phases/04-input-sources/04-05-PLAN.md
Normal file
435
.planning/phases/04-input-sources/04-05-PLAN.md
Normal file
@@ -0,0 +1,435 @@
|
|||||||
|
---
|
||||||
|
phase: 04-input-sources
|
||||||
|
plan: 05
|
||||||
|
type: execute
|
||||||
|
wave: 2
|
||||||
|
depends_on: ["04-02", "04-03", "04-04"]
|
||||||
|
files_modified:
|
||||||
|
- cmd/scan.go
|
||||||
|
- cmd/scan_sources_test.go
|
||||||
|
autonomous: true
|
||||||
|
requirements:
|
||||||
|
- INPUT-06
|
||||||
|
must_haves:
|
||||||
|
truths:
|
||||||
|
- "keyhunter scan <dir> uses DirSource when target is a directory (not FileSource)"
|
||||||
|
- "keyhunter scan <file> continues to use FileSource when target is a single file"
|
||||||
|
- "keyhunter scan --git <repo> uses GitSource, honoring --since YYYY-MM-DD"
|
||||||
|
- "keyhunter scan stdin and keyhunter scan - both use StdinSource"
|
||||||
|
- "keyhunter scan --url <https://...> uses URLSource"
|
||||||
|
- "keyhunter scan --clipboard uses ClipboardSource (no positional arg required)"
|
||||||
|
- "--exclude flags are forwarded to DirSource"
|
||||||
|
- "Exactly one source is selected — conflicting flags return an error"
|
||||||
|
artifacts:
|
||||||
|
- path: "cmd/scan.go"
|
||||||
|
provides: "Source-selection logic dispatching to the appropriate Source implementation"
|
||||||
|
contains: "selectSource"
|
||||||
|
min_lines: 180
|
||||||
|
- path: "cmd/scan_sources_test.go"
|
||||||
|
provides: "Unit tests for selectSource covering every flag combination"
|
||||||
|
min_lines: 80
|
||||||
|
key_links:
|
||||||
|
- from: "cmd/scan.go"
|
||||||
|
to: "pkg/engine/sources"
|
||||||
|
via: "sources.NewDirSource/NewGitSource/NewStdinSource/NewURLSource/NewClipboardSource"
|
||||||
|
pattern: "sources\\.New(Dir|Git|Stdin|URL|Clipboard)Source"
|
||||||
|
- from: "cmd/scan.go"
|
||||||
|
to: "cobra flags"
|
||||||
|
via: "--git, --url, --clipboard, --since, --exclude"
|
||||||
|
pattern: "\\-\\-git|\\-\\-url|\\-\\-clipboard|\\-\\-since"
|
||||||
|
---
|
||||||
|
|
||||||
|
<objective>
|
||||||
|
Wire the four new source adapters (DirSource, GitSource, StdinSource, URLSource, ClipboardSource) into `cmd/scan.go` via a new `selectSource` helper that inspects CLI flags and positional args to pick exactly one source. Satisfies INPUT-06 (the "all inputs flow through the same pipeline" integration requirement).
|
||||||
|
|
||||||
|
Purpose: Plans 04-02 through 04-04 deliver the Source implementations in isolation. This plan is the single integration point that makes them reachable from the CLI, with argument validation to prevent ambiguous invocations like `keyhunter scan --git --url https://...`.
|
||||||
|
Output: Updated `cmd/scan.go` with new flags and dispatching logic, plus a focused test file exercising `selectSource` directly.
|
||||||
|
</objective>
|
||||||
|
|
||||||
|
<execution_context>
|
||||||
|
@$HOME/.claude/get-shit-done/workflows/execute-plan.md
|
||||||
|
@$HOME/.claude/get-shit-done/templates/summary.md
|
||||||
|
</execution_context>
|
||||||
|
|
||||||
|
<context>
|
||||||
|
@.planning/PROJECT.md
|
||||||
|
@.planning/phases/04-input-sources/04-CONTEXT.md
|
||||||
|
@cmd/scan.go
|
||||||
|
@pkg/engine/sources/source.go
|
||||||
|
|
||||||
|
<interfaces>
|
||||||
|
Source constructors from Wave 1 plans:
|
||||||
|
```go
|
||||||
|
// Plan 04-02
|
||||||
|
func NewFileSource(path string) *FileSource
|
||||||
|
func NewDirSource(root string, extraExcludes ...string) *DirSource
|
||||||
|
func NewDirSourceRaw(root string, excludes []string) *DirSource
|
||||||
|
|
||||||
|
// Plan 04-03
|
||||||
|
func NewGitSource(repoPath string) *GitSource
|
||||||
|
type GitSource struct {
|
||||||
|
RepoPath string
|
||||||
|
Since time.Time
|
||||||
|
ChunkSize int
|
||||||
|
}
|
||||||
|
|
||||||
|
// Plan 04-04
|
||||||
|
func NewStdinSource() *StdinSource
|
||||||
|
func NewURLSource(rawURL string) *URLSource
|
||||||
|
func NewClipboardSource() *ClipboardSource
|
||||||
|
```
|
||||||
|
|
||||||
|
Existing cmd/scan.go contract (see file for full body):
|
||||||
|
- Package `cmd`
|
||||||
|
- Uses `sources.NewFileSource(target)` unconditionally today
|
||||||
|
- Has `flagExclude []string` already declared
|
||||||
|
- init() registers flags: --workers, --verify, --unmask, --output, --exclude
|
||||||
|
</interfaces>
|
||||||
|
</context>
|
||||||
|
|
||||||
|
<tasks>
|
||||||
|
|
||||||
|
<task type="auto" tdd="true">
|
||||||
|
<name>Task 1: Add source-selection flags and dispatch logic to cmd/scan.go</name>
|
||||||
|
<read_first>
|
||||||
|
- cmd/scan.go (full file)
|
||||||
|
- pkg/engine/sources/source.go
|
||||||
|
- pkg/engine/sources/dir.go (produced by 04-02)
|
||||||
|
- pkg/engine/sources/git.go (produced by 04-03)
|
||||||
|
- pkg/engine/sources/stdin.go (produced by 04-04)
|
||||||
|
- pkg/engine/sources/url.go (produced by 04-04)
|
||||||
|
- pkg/engine/sources/clipboard.go (produced by 04-04)
|
||||||
|
</read_first>
|
||||||
|
<files>cmd/scan.go, cmd/scan_sources_test.go</files>
|
||||||
|
<behavior>
|
||||||
|
- Test 1: selectSource with target="." on a directory returns a *DirSource
|
||||||
|
- Test 2: selectSource with target pointing to a file returns a *FileSource
|
||||||
|
- Test 3: selectSource with flagGit=true and target="./repo" returns a *GitSource
|
||||||
|
- Test 4: selectSource with flagGit=true and flagSince="2024-01-01" sets GitSource.Since correctly
|
||||||
|
- Test 5: selectSource with invalid --since format returns a parse error
|
||||||
|
- Test 6: selectSource with flagURL set returns a *URLSource
|
||||||
|
- Test 7: selectSource with flagClipboard=true and no args returns a *ClipboardSource
|
||||||
|
- Test 8: selectSource with target="stdin" returns a *StdinSource
|
||||||
|
- Test 9: selectSource with target="-" returns a *StdinSource
|
||||||
|
- Test 10: selectSource with both --git and --url set returns an error
|
||||||
|
- Test 11: selectSource with --clipboard and a positional target returns an error
|
||||||
|
- Test 12: selectSource forwards --exclude patterns into DirSource.Excludes
|
||||||
|
</behavior>
|
||||||
|
<action>
|
||||||
|
|
||||||
|
Edit `cmd/scan.go`. The end state must:
|
||||||
|
|
||||||
|
1. Add new package-level flag vars alongside the existing ones:
|
||||||
|
|
||||||
|
```go
|
||||||
|
var (
|
||||||
|
flagWorkers int
|
||||||
|
flagVerify bool
|
||||||
|
flagUnmask bool
|
||||||
|
flagOutput string
|
||||||
|
flagExclude []string
|
||||||
|
flagGit bool
|
||||||
|
flagURL string
|
||||||
|
flagClipboard bool
|
||||||
|
flagSince string
|
||||||
|
flagMaxFileSize int64
|
||||||
|
flagInsecure bool
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
2. Change `scanCmd.Args` so a positional target is optional when `--url` or `--clipboard` is used:
|
||||||
|
|
||||||
|
```go
|
||||||
|
var scanCmd = &cobra.Command{
|
||||||
|
Use: "scan [path|stdin|-]",
|
||||||
|
Short: "Scan files, directories, git history, stdin, URLs, or clipboard for leaked API keys",
|
||||||
|
Args: cobra.MaximumNArgs(1),
|
||||||
|
RunE: func(cmd *cobra.Command, args []string) error {
|
||||||
|
// ... existing config load ...
|
||||||
|
|
||||||
|
src, err := selectSource(args, sourceFlags{
|
||||||
|
Git: flagGit,
|
||||||
|
URL: flagURL,
|
||||||
|
Clipboard: flagClipboard,
|
||||||
|
Since: flagSince,
|
||||||
|
Excludes: flagExclude,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Replace the old `src := sources.NewFileSource(target)` line with use of the dispatched src.
|
||||||
|
// Keep all downstream code unchanged (engine, storage, output).
|
||||||
|
|
||||||
|
// ... rest of existing RunE body, using src ...
|
||||||
|
_ = src
|
||||||
|
return nil // placeholder — keep existing logic
|
||||||
|
},
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
3. Add the selectSource helper and its supporting struct, in `cmd/scan.go`:
|
||||||
|
|
||||||
|
```go
|
||||||
|
// sourceFlags captures the CLI inputs that control source selection.
|
||||||
|
// Extracted into a struct so selectSource is straightforward to unit test.
|
||||||
|
type sourceFlags struct {
|
||||||
|
Git bool
|
||||||
|
URL string
|
||||||
|
Clipboard bool
|
||||||
|
Since string
|
||||||
|
Excludes []string
|
||||||
|
}
|
||||||
|
|
||||||
|
// selectSource inspects positional args and source flags, validates that
|
||||||
|
// exactly one source is specified, and returns the appropriate Source.
|
||||||
|
func selectSource(args []string, f sourceFlags) (sources.Source, error) {
|
||||||
|
// Count explicit source selectors that take no positional path.
|
||||||
|
explicitCount := 0
|
||||||
|
if f.URL != "" {
|
||||||
|
explicitCount++
|
||||||
|
}
|
||||||
|
if f.Clipboard {
|
||||||
|
explicitCount++
|
||||||
|
}
|
||||||
|
if f.Git {
|
||||||
|
explicitCount++
|
||||||
|
}
|
||||||
|
if explicitCount > 1 {
|
||||||
|
return nil, fmt.Errorf("scan: --git, --url, and --clipboard are mutually exclusive")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clipboard and URL take no positional argument.
|
||||||
|
if f.Clipboard {
|
||||||
|
if len(args) > 0 {
|
||||||
|
return nil, fmt.Errorf("scan: --clipboard does not accept a positional argument")
|
||||||
|
}
|
||||||
|
return sources.NewClipboardSource(), nil
|
||||||
|
}
|
||||||
|
if f.URL != "" {
|
||||||
|
if len(args) > 0 {
|
||||||
|
return nil, fmt.Errorf("scan: --url does not accept a positional argument")
|
||||||
|
}
|
||||||
|
return sources.NewURLSource(f.URL), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(args) == 0 {
|
||||||
|
return nil, fmt.Errorf("scan: missing target (path, stdin, -, or a source flag)")
|
||||||
|
}
|
||||||
|
target := args[0]
|
||||||
|
|
||||||
|
if target == "stdin" || target == "-" {
|
||||||
|
if f.Git {
|
||||||
|
return nil, fmt.Errorf("scan: --git cannot be combined with stdin")
|
||||||
|
}
|
||||||
|
return sources.NewStdinSource(), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if f.Git {
|
||||||
|
gs := sources.NewGitSource(target)
|
||||||
|
if f.Since != "" {
|
||||||
|
t, err := time.Parse("2006-01-02", f.Since)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("scan: --since must be YYYY-MM-DD: %w", err)
|
||||||
|
}
|
||||||
|
gs.Since = t
|
||||||
|
}
|
||||||
|
return gs, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
info, err := os.Stat(target)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("scan: stat %q: %w", target, err)
|
||||||
|
}
|
||||||
|
if info.IsDir() {
|
||||||
|
return sources.NewDirSource(target, f.Excludes...), nil
|
||||||
|
}
|
||||||
|
return sources.NewFileSource(target), nil
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
4. In the existing `init()`, register the new flags next to the existing ones:
|
||||||
|
|
||||||
|
```go
|
||||||
|
func init() {
|
||||||
|
scanCmd.Flags().IntVar(&flagWorkers, "workers", 0, "number of worker goroutines (default: CPU*8)")
|
||||||
|
scanCmd.Flags().BoolVar(&flagVerify, "verify", false, "actively verify found keys (opt-in, Phase 5)")
|
||||||
|
scanCmd.Flags().BoolVar(&flagUnmask, "unmask", false, "show full key values (default: masked)")
|
||||||
|
scanCmd.Flags().StringVar(&flagOutput, "output", "table", "output format: table, json")
|
||||||
|
scanCmd.Flags().StringSliceVar(&flagExclude, "exclude", nil, "extra glob patterns to exclude (e.g. *.min.js)")
|
||||||
|
|
||||||
|
// Phase 4 source-selection flags.
|
||||||
|
scanCmd.Flags().BoolVar(&flagGit, "git", false, "treat target as a git repo and scan full history")
|
||||||
|
scanCmd.Flags().StringVar(&flagURL, "url", "", "fetch and scan a remote http(s) URL (no positional arg)")
|
||||||
|
scanCmd.Flags().BoolVar(&flagClipboard, "clipboard", false, "scan current clipboard contents")
|
||||||
|
scanCmd.Flags().StringVar(&flagSince, "since", "", "for --git: only scan commits after YYYY-MM-DD")
|
||||||
|
scanCmd.Flags().Int64Var(&flagMaxFileSize, "max-file-size", 0, "max file size in bytes to scan (0 = unlimited)")
|
||||||
|
scanCmd.Flags().BoolVar(&flagInsecure, "insecure", false, "for --url: skip TLS certificate verification")
|
||||||
|
|
||||||
|
_ = viper.BindPFlag("scan.workers", scanCmd.Flags().Lookup("workers"))
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
5. Replace the single line `src := sources.NewFileSource(target)` in the existing RunE body with the `selectSource` dispatch. Leave ALL downstream code (engine.Scan, storage.SaveFinding, output switch, exit code logic) untouched. Ensure the `target` variable is only used where relevant (it is no longer the sole driver of source construction).
|
||||||
|
|
||||||
|
6. Add the `time` import to `cmd/scan.go`.
|
||||||
|
|
||||||
|
Create `cmd/scan_sources_test.go`:
|
||||||
|
|
||||||
|
```go
|
||||||
|
package cmd
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/engine/sources"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestSelectSource_Directory(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
src, err := selectSource([]string{dir}, sourceFlags{})
|
||||||
|
require.NoError(t, err)
|
||||||
|
_, ok := src.(*sources.DirSource)
|
||||||
|
require.True(t, ok, "expected *DirSource, got %T", src)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSelectSource_File(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
f := filepath.Join(dir, "a.txt")
|
||||||
|
require.NoError(t, os.WriteFile(f, []byte("x"), 0o644))
|
||||||
|
src, err := selectSource([]string{f}, sourceFlags{})
|
||||||
|
require.NoError(t, err)
|
||||||
|
_, ok := src.(*sources.FileSource)
|
||||||
|
require.True(t, ok, "expected *FileSource, got %T", src)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSelectSource_Git(t *testing.T) {
|
||||||
|
src, err := selectSource([]string{"./some-repo"}, sourceFlags{Git: true})
|
||||||
|
require.NoError(t, err)
|
||||||
|
gs, ok := src.(*sources.GitSource)
|
||||||
|
require.True(t, ok, "expected *GitSource, got %T", src)
|
||||||
|
require.Equal(t, "./some-repo", gs.RepoPath)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSelectSource_GitSince(t *testing.T) {
|
||||||
|
src, err := selectSource([]string{"./repo"}, sourceFlags{Git: true, Since: "2024-01-15"})
|
||||||
|
require.NoError(t, err)
|
||||||
|
gs := src.(*sources.GitSource)
|
||||||
|
want, _ := time.Parse("2006-01-02", "2024-01-15")
|
||||||
|
require.Equal(t, want, gs.Since)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSelectSource_GitSinceBadFormat(t *testing.T) {
|
||||||
|
_, err := selectSource([]string{"./repo"}, sourceFlags{Git: true, Since: "15/01/2024"})
|
||||||
|
require.Error(t, err)
|
||||||
|
require.Contains(t, err.Error(), "YYYY-MM-DD")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSelectSource_URL(t *testing.T) {
|
||||||
|
src, err := selectSource(nil, sourceFlags{URL: "https://example.com/a.js"})
|
||||||
|
require.NoError(t, err)
|
||||||
|
_, ok := src.(*sources.URLSource)
|
||||||
|
require.True(t, ok)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSelectSource_URLRejectsPositional(t *testing.T) {
|
||||||
|
_, err := selectSource([]string{"./foo"}, sourceFlags{URL: "https://x"})
|
||||||
|
require.Error(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSelectSource_Clipboard(t *testing.T) {
|
||||||
|
src, err := selectSource(nil, sourceFlags{Clipboard: true})
|
||||||
|
require.NoError(t, err)
|
||||||
|
_, ok := src.(*sources.ClipboardSource)
|
||||||
|
require.True(t, ok)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSelectSource_ClipboardRejectsPositional(t *testing.T) {
|
||||||
|
_, err := selectSource([]string{"./foo"}, sourceFlags{Clipboard: true})
|
||||||
|
require.Error(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSelectSource_Stdin(t *testing.T) {
|
||||||
|
for _, tok := range []string{"stdin", "-"} {
|
||||||
|
src, err := selectSource([]string{tok}, sourceFlags{})
|
||||||
|
require.NoError(t, err)
|
||||||
|
_, ok := src.(*sources.StdinSource)
|
||||||
|
require.True(t, ok, "token %q: expected *StdinSource, got %T", tok, src)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSelectSource_MutuallyExclusive(t *testing.T) {
|
||||||
|
_, err := selectSource(nil, sourceFlags{Git: true, URL: "https://x"})
|
||||||
|
require.Error(t, err)
|
||||||
|
require.Contains(t, err.Error(), "mutually exclusive")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSelectSource_MissingTarget(t *testing.T) {
|
||||||
|
_, err := selectSource(nil, sourceFlags{})
|
||||||
|
require.Error(t, err)
|
||||||
|
require.Contains(t, err.Error(), "missing target")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSelectSource_DirForwardsExcludes(t *testing.T) {
|
||||||
|
dir := t.TempDir()
|
||||||
|
src, err := selectSource([]string{dir}, sourceFlags{Excludes: []string{"*.log", "tmp/**"}})
|
||||||
|
require.NoError(t, err)
|
||||||
|
ds := src.(*sources.DirSource)
|
||||||
|
// NewDirSource merges DefaultExcludes with extras, so user patterns must be present.
|
||||||
|
found := 0
|
||||||
|
for _, e := range ds.Excludes {
|
||||||
|
if e == "*.log" || e == "tmp/**" {
|
||||||
|
found++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
require.Equal(t, 2, found, "user excludes not forwarded, got %v", ds.Excludes)
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
After making these changes, run `go build ./...` and fix any import or compile errors. Do NOT modify pkg/engine/sources/* files — they are owned by Wave 1 plans.
|
||||||
|
</action>
|
||||||
|
<verify>
|
||||||
|
<automated>go build ./... && go test ./cmd/... -run TestSelectSource -race -count=1</automated>
|
||||||
|
</verify>
|
||||||
|
<acceptance_criteria>
|
||||||
|
- `go build ./...` exits 0
|
||||||
|
- `go test ./cmd/... -run TestSelectSource -race -count=1` passes all 13 subtests
|
||||||
|
- `go test ./... -race -count=1` full suite passes
|
||||||
|
- `grep -n "selectSource" cmd/scan.go` returns at least two hits (definition + call site)
|
||||||
|
- `grep -n "flagGit\|flagURL\|flagClipboard\|flagSince" cmd/scan.go` returns at least 4 hits
|
||||||
|
- `grep -n "sources.NewDirSource\|sources.NewGitSource\|sources.NewStdinSource\|sources.NewURLSource\|sources.NewClipboardSource" cmd/scan.go` returns 5 hits
|
||||||
|
- `grep -n "mutually exclusive" cmd/scan.go` returns a hit
|
||||||
|
- `keyhunter scan --help` (via `go run . scan --help`) lists --git, --url, --clipboard, --since flags
|
||||||
|
</acceptance_criteria>
|
||||||
|
<done>
|
||||||
|
cmd/scan.go dispatches to the correct Source implementation based on positional args and flags, with unambiguous error messages for conflicting selectors. All selectSource tests pass under -race. The existing single-file FileSource path still works unchanged.
|
||||||
|
</done>
|
||||||
|
</task>
|
||||||
|
|
||||||
|
</tasks>
|
||||||
|
|
||||||
|
<verification>
|
||||||
|
- `go build ./...` exits 0
|
||||||
|
- `go test ./... -race -count=1` full suite green (including earlier Wave 1 plan tests)
|
||||||
|
- `go run . scan --help` lists new flags
|
||||||
|
- `go run . scan ./pkg` completes successfully (DirSource path)
|
||||||
|
- `echo "API_KEY=test" | go run . scan -` completes successfully (StdinSource path)
|
||||||
|
</verification>
|
||||||
|
|
||||||
|
<success_criteria>
|
||||||
|
Users can invoke every Phase 4 input mode from the CLI and each one flows through the unchanged three-stage detection pipeline. INPUT-01 through INPUT-05 are reachable via CLI, and INPUT-06 (the integration meta-requirement) is satisfied by the passing test suite plus the help-text listing.
|
||||||
|
</success_criteria>
|
||||||
|
|
||||||
|
<output>
|
||||||
|
After completion, create `.planning/phases/04-input-sources/04-05-SUMMARY.md` documenting:
|
||||||
|
- selectSource signature and branches
|
||||||
|
- Flag additions
|
||||||
|
- Test pass summary
|
||||||
|
- A short one-line example invocation per new source (dir, git, stdin, url, clipboard)
|
||||||
|
- Confirmation that existing Phase 1-3 tests still pass
|
||||||
|
</output>
|
||||||
99
.planning/phases/12-osint_iot_cloud_storage/12-01-SUMMARY.md
Normal file
99
.planning/phases/12-osint_iot_cloud_storage/12-01-SUMMARY.md
Normal file
@@ -0,0 +1,99 @@
|
|||||||
|
---
|
||||||
|
phase: 12-osint_iot_cloud_storage
|
||||||
|
plan: 01
|
||||||
|
subsystem: recon
|
||||||
|
tags: [shodan, censys, zoomeye, iot, device-search, osint]
|
||||||
|
|
||||||
|
# Dependency graph
|
||||||
|
requires:
|
||||||
|
- phase: 10-osint-code-hosting
|
||||||
|
provides: ReconSource interface, shared Client, BuildQueries, LimiterRegistry
|
||||||
|
provides:
|
||||||
|
- ShodanSource implementing recon.ReconSource
|
||||||
|
- CensysSource implementing recon.ReconSource
|
||||||
|
- ZoomEyeSource implementing recon.ReconSource
|
||||||
|
affects: [12-osint_iot_cloud_storage, recon-registration]
|
||||||
|
|
||||||
|
# Tech tracking
|
||||||
|
tech-stack:
|
||||||
|
added: []
|
||||||
|
patterns: [IoT device scanner source pattern with API key/header auth]
|
||||||
|
|
||||||
|
key-files:
|
||||||
|
created:
|
||||||
|
- pkg/recon/sources/shodan.go
|
||||||
|
- pkg/recon/sources/censys.go
|
||||||
|
- pkg/recon/sources/zoomeye.go
|
||||||
|
- pkg/recon/sources/shodan_test.go
|
||||||
|
- pkg/recon/sources/censys_test.go
|
||||||
|
- pkg/recon/sources/zoomeye_test.go
|
||||||
|
modified: []
|
||||||
|
|
||||||
|
key-decisions:
|
||||||
|
- "Shodan, Censys, ZoomEye use bare keyword queries (default formatQuery case) -- no special syntax needed"
|
||||||
|
- "Censys uses POST with JSON body + Basic Auth; Shodan/ZoomEye use GET with key param/header"
|
||||||
|
|
||||||
|
patterns-established:
|
||||||
|
- "IoT scanner source pattern: GET/POST to device search API, parse JSON matches, emit Finding per hit"
|
||||||
|
|
||||||
|
requirements-completed: [RECON-IOT-01, RECON-IOT-02, RECON-IOT-03]
|
||||||
|
|
||||||
|
# Metrics
|
||||||
|
duration: 3min
|
||||||
|
completed: 2026-04-06
|
||||||
|
---
|
||||||
|
|
||||||
|
# Phase 12 Plan 01: Shodan, Censys, ZoomEye IoT Scanner Sources Summary
|
||||||
|
|
||||||
|
**Three IoT device scanner recon sources searching Shodan host/search, Censys v2 hosts/search, and ZoomEye host/search for exposed LLM endpoints**
|
||||||
|
|
||||||
|
## Performance
|
||||||
|
|
||||||
|
- **Duration:** 3 min
|
||||||
|
- **Started:** 2026-04-06T09:21:40Z
|
||||||
|
- **Completed:** 2026-04-06T09:24:28Z
|
||||||
|
- **Tasks:** 2
|
||||||
|
- **Files modified:** 6
|
||||||
|
|
||||||
|
## Accomplishments
|
||||||
|
- ShodanSource queries /shodan/host/search with API key param, emits findings per IP:port match
|
||||||
|
- CensysSource POSTs to /v2/hosts/search with Basic Auth (APIId:APISecret), emits findings per host hit
|
||||||
|
- ZoomEyeSource queries /host/search with API-KEY header, emits findings per IP:port match
|
||||||
|
- All three sources disabled when credentials empty, use shared retry Client, respect LimiterRegistry
|
||||||
|
|
||||||
|
## Task Commits
|
||||||
|
|
||||||
|
Each task was committed atomically:
|
||||||
|
|
||||||
|
1. **Task 1: Implement ShodanSource, CensysSource, ZoomEyeSource** - `f5d8470` (feat)
|
||||||
|
2. **Task 2: Unit tests for Shodan, Censys, ZoomEye sources** - `6443e63` (test)
|
||||||
|
|
||||||
|
## Files Created/Modified
|
||||||
|
- `pkg/recon/sources/shodan.go` - ShodanSource with /shodan/host/search API integration
|
||||||
|
- `pkg/recon/sources/censys.go` - CensysSource with POST /v2/hosts/search + Basic Auth
|
||||||
|
- `pkg/recon/sources/zoomeye.go` - ZoomEyeSource with /host/search + API-KEY header
|
||||||
|
- `pkg/recon/sources/shodan_test.go` - 4 tests: enabled, empty key, sweep findings, ctx cancel
|
||||||
|
- `pkg/recon/sources/censys_test.go` - 4 tests: enabled, empty creds, sweep findings, ctx cancel
|
||||||
|
- `pkg/recon/sources/zoomeye_test.go` - 4 tests: enabled, empty key, sweep findings, ctx cancel
|
||||||
|
|
||||||
|
## Decisions Made
|
||||||
|
- Shodan, Censys, ZoomEye use bare keyword queries (default formatQuery case) -- no queries.go changes needed
|
||||||
|
- Censys uses POST with JSON body and Basic Auth; Shodan uses API key as query param; ZoomEye uses API-KEY header
|
||||||
|
|
||||||
|
## Deviations from Plan
|
||||||
|
|
||||||
|
None - plan executed exactly as written.
|
||||||
|
|
||||||
|
## Issues Encountered
|
||||||
|
None
|
||||||
|
|
||||||
|
## User Setup Required
|
||||||
|
None - no external service configuration required.
|
||||||
|
|
||||||
|
## Next Phase Readiness
|
||||||
|
- Three IoT scanner sources ready for RegisterAll wiring in Plan 12-04
|
||||||
|
- Same pattern applies to remaining Phase 12 sources (FOFA, Netlas, BinaryEdge)
|
||||||
|
|
||||||
|
---
|
||||||
|
*Phase: 12-osint_iot_cloud_storage*
|
||||||
|
*Completed: 2026-04-06*
|
||||||
103
.planning/phases/12-osint_iot_cloud_storage/12-02-SUMMARY.md
Normal file
103
.planning/phases/12-osint_iot_cloud_storage/12-02-SUMMARY.md
Normal file
@@ -0,0 +1,103 @@
|
|||||||
|
---
|
||||||
|
phase: 12-osint_iot_cloud_storage
|
||||||
|
plan: 02
|
||||||
|
subsystem: recon
|
||||||
|
tags: [fofa, netlas, binaryedge, iot, osint, httptest]
|
||||||
|
|
||||||
|
requires:
|
||||||
|
- phase: 09-osint-infrastructure
|
||||||
|
provides: LimiterRegistry, shared Client retry/backoff HTTP
|
||||||
|
- phase: 10-osint-code-hosting
|
||||||
|
provides: ReconSource interface pattern, BuildQueries, keywordIndex helpers
|
||||||
|
provides:
|
||||||
|
- FOFASource implementing recon.ReconSource for FOFA internet search
|
||||||
|
- NetlasSource implementing recon.ReconSource for Netlas intelligence API
|
||||||
|
- BinaryEdgeSource implementing recon.ReconSource for BinaryEdge data API
|
||||||
|
affects: [12-osint_iot_cloud_storage, cmd/recon]
|
||||||
|
|
||||||
|
tech-stack:
|
||||||
|
added: []
|
||||||
|
patterns: [base64-encoded query params for FOFA, X-API-Key header auth for Netlas, X-Key header auth for BinaryEdge]
|
||||||
|
|
||||||
|
key-files:
|
||||||
|
created:
|
||||||
|
- pkg/recon/sources/fofa.go
|
||||||
|
- pkg/recon/sources/fofa_test.go
|
||||||
|
- pkg/recon/sources/netlas.go
|
||||||
|
- pkg/recon/sources/netlas_test.go
|
||||||
|
- pkg/recon/sources/binaryedge.go
|
||||||
|
- pkg/recon/sources/binaryedge_test.go
|
||||||
|
modified: []
|
||||||
|
|
||||||
|
key-decisions:
|
||||||
|
- "FOFA uses base64-encoded qbase64 param with email+key auth in query string"
|
||||||
|
- "Netlas uses X-API-Key header; BinaryEdge uses X-Key header for auth"
|
||||||
|
- "All three sources use bare keyword queries (default formatQuery path)"
|
||||||
|
|
||||||
|
patterns-established:
|
||||||
|
- "IoT scanner source pattern: struct with APIKey/BaseURL/Registry/Limiters + lazy client init"
|
||||||
|
|
||||||
|
requirements-completed: [RECON-IOT-04, RECON-IOT-05, RECON-IOT-06]
|
||||||
|
|
||||||
|
duration: 2min
|
||||||
|
completed: 2026-04-06
|
||||||
|
---
|
||||||
|
|
||||||
|
# Phase 12 Plan 02: FOFA, Netlas, BinaryEdge Sources Summary
|
||||||
|
|
||||||
|
**Three IoT/device scanner recon sources (FOFA, Netlas, BinaryEdge) with httptest-based unit tests covering sweep, auth, and cancellation**
|
||||||
|
|
||||||
|
## Performance
|
||||||
|
|
||||||
|
- **Duration:** 2 min
|
||||||
|
- **Started:** 2026-04-06T09:22:18Z
|
||||||
|
- **Completed:** 2026-04-06T09:24:22Z
|
||||||
|
- **Tasks:** 2
|
||||||
|
- **Files modified:** 6
|
||||||
|
|
||||||
|
## Accomplishments
|
||||||
|
- FOFASource searches FOFA API with base64-encoded queries and email+key authentication
|
||||||
|
- NetlasSource searches Netlas API with X-API-Key header authentication
|
||||||
|
- BinaryEdgeSource searches BinaryEdge API with X-Key header authentication
|
||||||
|
- All three sources follow established Phase 10 pattern with shared Client, LimiterRegistry, BuildQueries
|
||||||
|
|
||||||
|
## Task Commits
|
||||||
|
|
||||||
|
Each task was committed atomically:
|
||||||
|
|
||||||
|
1. **Task 1: Implement FOFASource, NetlasSource, BinaryEdgeSource** - `270bbbf` (feat)
|
||||||
|
2. **Task 2: Unit tests for FOFA, Netlas, BinaryEdge sources** - `d6c35f4` (test)
|
||||||
|
|
||||||
|
## Files Created/Modified
|
||||||
|
- `pkg/recon/sources/fofa.go` - FOFASource with base64 query encoding and dual-credential auth
|
||||||
|
- `pkg/recon/sources/fofa_test.go` - httptest tests for FOFA sweep, credentials, cancellation
|
||||||
|
- `pkg/recon/sources/netlas.go` - NetlasSource with X-API-Key header auth
|
||||||
|
- `pkg/recon/sources/netlas_test.go` - httptest tests for Netlas sweep, credentials, cancellation
|
||||||
|
- `pkg/recon/sources/binaryedge.go` - BinaryEdgeSource with X-Key header auth
|
||||||
|
- `pkg/recon/sources/binaryedge_test.go` - httptest tests for BinaryEdge sweep, credentials, cancellation
|
||||||
|
|
||||||
|
## Decisions Made
|
||||||
|
- FOFA uses base64-encoded qbase64 query parameter (matching FOFA API spec) with email+key in query string
|
||||||
|
- Netlas uses X-API-Key header; BinaryEdge uses X-Key header (matching their respective API specs)
|
||||||
|
- All three use bare keyword queries via default formatQuery path (no source-specific query formatting needed)
|
||||||
|
|
||||||
|
## Deviations from Plan
|
||||||
|
|
||||||
|
None - plan executed exactly as written.
|
||||||
|
|
||||||
|
## Issues Encountered
|
||||||
|
None
|
||||||
|
|
||||||
|
## Known Stubs
|
||||||
|
None
|
||||||
|
|
||||||
|
## User Setup Required
|
||||||
|
None - no external service configuration required.
|
||||||
|
|
||||||
|
## Next Phase Readiness
|
||||||
|
- Three IoT scanner sources ready for RegisterAll wiring
|
||||||
|
- FOFA requires email + API key; Netlas and BinaryEdge require API key only
|
||||||
|
|
||||||
|
---
|
||||||
|
*Phase: 12-osint_iot_cloud_storage*
|
||||||
|
*Completed: 2026-04-06*
|
||||||
115
.planning/phases/12-osint_iot_cloud_storage/12-03-SUMMARY.md
Normal file
115
.planning/phases/12-osint_iot_cloud_storage/12-03-SUMMARY.md
Normal file
@@ -0,0 +1,115 @@
|
|||||||
|
---
|
||||||
|
phase: 12-osint_iot_cloud_storage
|
||||||
|
plan: 03
|
||||||
|
subsystem: recon
|
||||||
|
tags: [s3, gcs, azure-blob, digitalocean-spaces, cloud-storage, osint, bucket-enumeration]
|
||||||
|
|
||||||
|
requires:
|
||||||
|
- phase: 09-osint-infrastructure
|
||||||
|
provides: "LimiterRegistry, ReconSource interface, shared Client"
|
||||||
|
- phase: 10-osint-code-hosting
|
||||||
|
provides: "BuildQueries, RegisterAll pattern, sources.Client"
|
||||||
|
provides:
|
||||||
|
- "S3Scanner — public AWS S3 bucket enumeration recon source"
|
||||||
|
- "GCSScanner — public GCS bucket enumeration recon source"
|
||||||
|
- "AzureBlobScanner — public Azure Blob container enumeration recon source"
|
||||||
|
- "DOSpacesScanner — public DigitalOcean Spaces enumeration recon source"
|
||||||
|
- "bucketNames() shared helper for provider-keyword bucket name generation"
|
||||||
|
- "isConfigFile() shared helper for config-pattern file detection"
|
||||||
|
affects: [12-osint_iot_cloud_storage, register-all-wiring]
|
||||||
|
|
||||||
|
tech-stack:
|
||||||
|
added: []
|
||||||
|
patterns: ["credentialless cloud bucket enumeration via anonymous HTTP HEAD+GET"]
|
||||||
|
|
||||||
|
key-files:
|
||||||
|
created:
|
||||||
|
- pkg/recon/sources/s3scanner.go
|
||||||
|
- pkg/recon/sources/gcsscanner.go
|
||||||
|
- pkg/recon/sources/azureblob.go
|
||||||
|
- pkg/recon/sources/dospaces.go
|
||||||
|
- pkg/recon/sources/s3scanner_test.go
|
||||||
|
- pkg/recon/sources/gcsscanner_test.go
|
||||||
|
- pkg/recon/sources/azureblob_test.go
|
||||||
|
- pkg/recon/sources/dospaces_test.go
|
||||||
|
modified: []
|
||||||
|
|
||||||
|
key-decisions:
|
||||||
|
- "bucketNames generates candidates from provider names + suffixes (not keywords) to produce readable bucket names"
|
||||||
|
- "HEAD probe before GET listing to avoid unnecessary bandwidth on non-public buckets"
|
||||||
|
- "isConfigFile checks extensions and common basenames (.env, config.*, credentials.*) without downloading contents"
|
||||||
|
- "Azure iterates fixed container names (config, secrets, backup, etc.) within each account"
|
||||||
|
- "DO Spaces iterates 5 regions (nyc3, sfo3, ams3, sgp1, fra1) per bucket"
|
||||||
|
|
||||||
|
patterns-established:
|
||||||
|
- "Cloud scanner pattern: HEAD probe for existence, GET for listing, filter by isConfigFile"
|
||||||
|
- "BaseURL override pattern with %s placeholder for httptest injection"
|
||||||
|
|
||||||
|
requirements-completed: [RECON-CLOUD-01, RECON-CLOUD-02, RECON-CLOUD-03, RECON-CLOUD-04]
|
||||||
|
|
||||||
|
duration: 4min
|
||||||
|
completed: 2026-04-06
|
||||||
|
---
|
||||||
|
|
||||||
|
# Phase 12 Plan 03: Cloud Storage Scanners Summary
|
||||||
|
|
||||||
|
**Four credentialless cloud storage recon sources (S3, GCS, Azure Blob, DO Spaces) with provider-keyword bucket enumeration and config-file pattern detection**
|
||||||
|
|
||||||
|
## Performance
|
||||||
|
|
||||||
|
- **Duration:** 4 min
|
||||||
|
- **Started:** 2026-04-06T09:22:08Z
|
||||||
|
- **Completed:** 2026-04-06T09:26:11Z
|
||||||
|
- **Tasks:** 2
|
||||||
|
- **Files modified:** 8
|
||||||
|
|
||||||
|
## Accomplishments
|
||||||
|
- S3Scanner enumerates public AWS S3 buckets using S3 ListBucketResult XML parsing
|
||||||
|
- GCSScanner enumerates public GCS buckets using JSON listing format
|
||||||
|
- AzureBlobScanner enumerates public Azure Blob containers using EnumerationResults XML
|
||||||
|
- DOSpacesScanner enumerates public DO Spaces across 5 regions using S3-compatible XML
|
||||||
|
- Shared bucketNames() generates candidates from provider names + common suffixes
|
||||||
|
- Shared isConfigFile() detects .env, .json, .yaml, .toml, .conf and similar patterns
|
||||||
|
|
||||||
|
## Task Commits
|
||||||
|
|
||||||
|
Each task was committed atomically:
|
||||||
|
|
||||||
|
1. **Task 1: Implement S3Scanner and GCSScanner** - `47d542b` (feat)
|
||||||
|
2. **Task 2: Implement AzureBlobScanner, DOSpacesScanner, and all tests** - `13905eb` (feat)
|
||||||
|
|
||||||
|
## Files Created/Modified
|
||||||
|
- `pkg/recon/sources/s3scanner.go` - S3 bucket enumeration with XML ListBucketResult parsing
|
||||||
|
- `pkg/recon/sources/gcsscanner.go` - GCS bucket enumeration with JSON listing parsing
|
||||||
|
- `pkg/recon/sources/azureblob.go` - Azure Blob container enumeration with XML EnumerationResults parsing
|
||||||
|
- `pkg/recon/sources/dospaces.go` - DO Spaces enumeration across 5 regions (S3-compatible XML)
|
||||||
|
- `pkg/recon/sources/s3scanner_test.go` - httptest tests for S3Scanner
|
||||||
|
- `pkg/recon/sources/gcsscanner_test.go` - httptest tests for GCSScanner
|
||||||
|
- `pkg/recon/sources/azureblob_test.go` - httptest tests for AzureBlobScanner
|
||||||
|
- `pkg/recon/sources/dospaces_test.go` - httptest tests for DOSpacesScanner
|
||||||
|
|
||||||
|
## Decisions Made
|
||||||
|
- bucketNames uses provider Name (not Keywords) as base for bucket name generation -- produces more realistic bucket names like "openai-keys" vs "sk-proj--keys"
|
||||||
|
- HEAD probe before GET to minimize bandwidth on non-public buckets
|
||||||
|
- Azure iterates a fixed list of common container names within each generated account name
|
||||||
|
- DO Spaces iterates all 5 supported regions per bucket name
|
||||||
|
- Tests omit rate limiters (nil Limiters) to avoid test slowness from the 500ms rate limit across many bucket/region combinations
|
||||||
|
|
||||||
|
## Deviations from Plan
|
||||||
|
|
||||||
|
None - plan executed exactly as written.
|
||||||
|
|
||||||
|
## Issues Encountered
|
||||||
|
- Azure and DO Spaces tests initially timed out due to rate limiter overhead (9 bucket names x 7 containers = 63 requests at 500ms each). Resolved by omitting rate limiters in tests since rate limiting is tested at the LimiterRegistry level.
|
||||||
|
|
||||||
|
## User Setup Required
|
||||||
|
|
||||||
|
None - no external service configuration required.
|
||||||
|
|
||||||
|
## Next Phase Readiness
|
||||||
|
- Four cloud storage scanners ready for RegisterAll wiring
|
||||||
|
- Sources use same pattern as Phase 10/11 sources (BaseURL override, shared Client, LimiterRegistry)
|
||||||
|
|
||||||
|
---
|
||||||
|
*Phase: 12-osint_iot_cloud_storage*
|
||||||
|
*Completed: 2026-04-06*
|
||||||
117
.planning/phases/12-osint_iot_cloud_storage/12-04-SUMMARY.md
Normal file
117
.planning/phases/12-osint_iot_cloud_storage/12-04-SUMMARY.md
Normal file
@@ -0,0 +1,117 @@
|
|||||||
|
---
|
||||||
|
phase: 12-osint_iot_cloud_storage
|
||||||
|
plan: 04
|
||||||
|
subsystem: recon
|
||||||
|
tags: [shodan, censys, zoomeye, fofa, netlas, binaryedge, s3, gcs, azureblob, spaces, registerall, integration-test]
|
||||||
|
|
||||||
|
requires:
|
||||||
|
- phase: 12-01
|
||||||
|
provides: Shodan, Censys, ZoomEye source implementations
|
||||||
|
- phase: 12-02
|
||||||
|
provides: FOFA, Netlas, BinaryEdge source implementations
|
||||||
|
- phase: 12-03
|
||||||
|
provides: S3, GCS, AzureBlob, DOSpaces scanner implementations
|
||||||
|
provides:
|
||||||
|
- RegisterAll wiring for all 28 sources (Phase 10-11-12)
|
||||||
|
- cmd/recon.go credential lookup for 6 IoT scanner APIs
|
||||||
|
- Integration test covering all 28 sources end-to-end
|
||||||
|
affects: [phase-13, phase-14, phase-15, phase-16]
|
||||||
|
|
||||||
|
tech-stack:
|
||||||
|
added: []
|
||||||
|
patterns: [per-phase RegisterAll extension, env+viper credential precedence chain]
|
||||||
|
|
||||||
|
key-files:
|
||||||
|
created: []
|
||||||
|
modified:
|
||||||
|
- pkg/recon/sources/register.go
|
||||||
|
- cmd/recon.go
|
||||||
|
- pkg/recon/sources/integration_test.go
|
||||||
|
- pkg/recon/sources/register_test.go
|
||||||
|
|
||||||
|
key-decisions:
|
||||||
|
- "Cloud storage sources registered as credentialless (Enabled()==true always); IoT sources require API keys"
|
||||||
|
- "Integration test uses separate cloud storage handlers per format (S3 XML, GCS JSON, Azure EnumerationResults XML)"
|
||||||
|
|
||||||
|
patterns-established:
|
||||||
|
- "Phase source wiring: extend SourcesConfig + RegisterAll + cmd/recon.go buildReconEngine + integration test in lockstep"
|
||||||
|
|
||||||
|
requirements-completed: [RECON-IOT-01, RECON-IOT-02, RECON-IOT-03, RECON-IOT-04, RECON-IOT-05, RECON-IOT-06, RECON-CLOUD-01, RECON-CLOUD-02, RECON-CLOUD-03, RECON-CLOUD-04]
|
||||||
|
|
||||||
|
duration: 14min
|
||||||
|
completed: 2026-04-06
|
||||||
|
---
|
||||||
|
|
||||||
|
# Phase 12 Plan 04: RegisterAll Wiring + Integration Test Summary
|
||||||
|
|
||||||
|
**Wire all 10 Phase 12 IoT/cloud sources into RegisterAll with env/viper credentials and 28-source integration test**
|
||||||
|
|
||||||
|
## Performance
|
||||||
|
|
||||||
|
- **Duration:** 14 min
|
||||||
|
- **Started:** 2026-04-06T09:28:20Z
|
||||||
|
- **Completed:** 2026-04-06T09:42:09Z
|
||||||
|
- **Tasks:** 2
|
||||||
|
- **Files modified:** 4
|
||||||
|
|
||||||
|
## Accomplishments
|
||||||
|
- Extended SourcesConfig with 8 credential fields for 6 IoT scanner APIs (Shodan, Censys, ZoomEye, FOFA, Netlas, BinaryEdge)
|
||||||
|
- Registered all 10 Phase 12 sources in RegisterAll (6 IoT + 4 cloud storage), bringing total to 28
|
||||||
|
- Wired env var + viper config credential lookup in cmd/recon.go for all Phase 12 sources
|
||||||
|
- Integration test verifies all 28 sources produce findings through multiplexed httptest server
|
||||||
|
|
||||||
|
## Task Commits
|
||||||
|
|
||||||
|
Each task was committed atomically:
|
||||||
|
|
||||||
|
1. **Task 1: Extend SourcesConfig, RegisterAll, and cmd/recon.go** - `8704316` (feat)
|
||||||
|
2. **Task 2: Integration test for all 28 registered sources** - `f0f2219` (test)
|
||||||
|
|
||||||
|
## Files Created/Modified
|
||||||
|
- `pkg/recon/sources/register.go` - Added Phase 12 credential fields + source registrations (28 total)
|
||||||
|
- `cmd/recon.go` - Added env/viper credential wiring for 8 IoT scanner fields
|
||||||
|
- `pkg/recon/sources/integration_test.go` - Extended with Phase 12 IoT + cloud storage fixtures and assertions
|
||||||
|
- `pkg/recon/sources/register_test.go` - Updated expected source count from 18 to 28
|
||||||
|
|
||||||
|
## Decisions Made
|
||||||
|
- Cloud storage sources (S3, GCS, AzureBlob, DOSpaces) are credentialless and always enabled
|
||||||
|
- IoT sources require API keys and report Enabled()==false when credentials are empty
|
||||||
|
- Integration test uses format-specific handlers: S3/DOSpaces share S3 XML handler, GCS gets JSON handler, AzureBlob gets EnumerationResults XML handler
|
||||||
|
|
||||||
|
## Deviations from Plan
|
||||||
|
|
||||||
|
### Auto-fixed Issues
|
||||||
|
|
||||||
|
**1. [Rule 1 - Bug] Updated existing register_test.go expected source count**
|
||||||
|
- **Found during:** Task 2 (integration test)
|
||||||
|
- **Issue:** TestRegisterAll_WiresAllEighteenSources and TestRegisterAll_MissingCredsStillRegistered expected 18 sources, now 28
|
||||||
|
- **Fix:** Updated expected count to 28 and added all Phase 12 source names to expected list
|
||||||
|
- **Files modified:** pkg/recon/sources/register_test.go
|
||||||
|
- **Verification:** All RegisterAll tests pass
|
||||||
|
- **Committed in:** f0f2219 (Task 2 commit)
|
||||||
|
|
||||||
|
**2. [Rule 3 - Blocking] Merged main branch to get Phase 12 source files**
|
||||||
|
- **Found during:** Task 1 (build verification)
|
||||||
|
- **Issue:** Worktree branch did not have Phase 12-01/12-02 source files (shodan.go, censys.go, etc.)
|
||||||
|
- **Fix:** Merged main branch into worktree (fast-forward)
|
||||||
|
- **Verification:** go build ./cmd/... succeeds
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**Total deviations:** 2 auto-fixed (1 bug, 1 blocking)
|
||||||
|
**Impact on plan:** Both fixes necessary for correctness. No scope creep.
|
||||||
|
|
||||||
|
## Issues Encountered
|
||||||
|
None beyond the deviations listed above.
|
||||||
|
|
||||||
|
## User Setup Required
|
||||||
|
None - no external service configuration required.
|
||||||
|
|
||||||
|
## Next Phase Readiness
|
||||||
|
- All 28 OSINT sources are wired and discoverable via `keyhunter recon list`
|
||||||
|
- Phase 13+ sources can follow the same pattern: add fields to SourcesConfig, register in RegisterAll, wire credentials in cmd/recon.go
|
||||||
|
- Integration test template established for validating all sources end-to-end
|
||||||
|
|
||||||
|
---
|
||||||
|
*Phase: 12-osint_iot_cloud_storage*
|
||||||
|
*Completed: 2026-04-06*
|
||||||
10
cmd/recon.go
10
cmd/recon.go
@@ -26,7 +26,7 @@ var (
|
|||||||
var reconCmd = &cobra.Command{
|
var reconCmd = &cobra.Command{
|
||||||
Use: "recon",
|
Use: "recon",
|
||||||
Short: "Run OSINT recon across internet sources",
|
Short: "Run OSINT recon across internet sources",
|
||||||
Long: "Run OSINT recon sweeps across registered sources. Phase 10 adds ten code-hosting sources (GitHub/GitLab/Bitbucket/Gist/Codeberg/HuggingFace/Replit/CodeSandbox/Sandboxes/Kaggle). Phase 11 adds search engine dorking (Google/Bing/DuckDuckGo/Yandex/Brave) and paste site scanning (Pastebin/GistPaste/PasteSites).",
|
Long: "Run OSINT recon sweeps across registered sources. Phase 10 adds ten code-hosting sources (GitHub/GitLab/Bitbucket/Gist/Codeberg/HuggingFace/Replit/CodeSandbox/Sandboxes/Kaggle). Phase 11 adds search engine dorking (Google/Bing/DuckDuckGo/Yandex/Brave) and paste site scanning (Pastebin/GistPaste/PasteSites). Phase 12 adds IoT scanners (Shodan/Censys/ZoomEye/FOFA/Netlas/BinaryEdge) and cloud storage scanners (S3/GCS/AzureBlob/Spaces).",
|
||||||
}
|
}
|
||||||
|
|
||||||
var reconFullCmd = &cobra.Command{
|
var reconFullCmd = &cobra.Command{
|
||||||
@@ -159,6 +159,14 @@ func buildReconEngine() *recon.Engine {
|
|||||||
YandexUser: firstNonEmpty(os.Getenv("YANDEX_USER"), viper.GetString("recon.yandex.user")),
|
YandexUser: firstNonEmpty(os.Getenv("YANDEX_USER"), viper.GetString("recon.yandex.user")),
|
||||||
YandexAPIKey: firstNonEmpty(os.Getenv("YANDEX_API_KEY"), viper.GetString("recon.yandex.api_key")),
|
YandexAPIKey: firstNonEmpty(os.Getenv("YANDEX_API_KEY"), viper.GetString("recon.yandex.api_key")),
|
||||||
BraveAPIKey: firstNonEmpty(os.Getenv("BRAVE_API_KEY"), viper.GetString("recon.brave.api_key")),
|
BraveAPIKey: firstNonEmpty(os.Getenv("BRAVE_API_KEY"), viper.GetString("recon.brave.api_key")),
|
||||||
|
ShodanAPIKey: firstNonEmpty(os.Getenv("SHODAN_API_KEY"), viper.GetString("recon.shodan.api_key")),
|
||||||
|
CensysAPIId: firstNonEmpty(os.Getenv("CENSYS_API_ID"), viper.GetString("recon.censys.api_id")),
|
||||||
|
CensysAPISecret: firstNonEmpty(os.Getenv("CENSYS_API_SECRET"), viper.GetString("recon.censys.api_secret")),
|
||||||
|
ZoomEyeAPIKey: firstNonEmpty(os.Getenv("ZOOMEYE_API_KEY"), viper.GetString("recon.zoomeye.api_key")),
|
||||||
|
FOFAEmail: firstNonEmpty(os.Getenv("FOFA_EMAIL"), viper.GetString("recon.fofa.email")),
|
||||||
|
FOFAAPIKey: firstNonEmpty(os.Getenv("FOFA_API_KEY"), viper.GetString("recon.fofa.api_key")),
|
||||||
|
NetlasAPIKey: firstNonEmpty(os.Getenv("NETLAS_API_KEY"), viper.GetString("recon.netlas.api_key")),
|
||||||
|
BinaryEdgeAPIKey: firstNonEmpty(os.Getenv("BINARYEDGE_API_KEY"), viper.GetString("recon.binaryedge.api_key")),
|
||||||
}
|
}
|
||||||
sources.RegisterAll(e, cfg)
|
sources.RegisterAll(e, cfg)
|
||||||
return e
|
return e
|
||||||
|
|||||||
145
pkg/recon/sources/azureblob.go
Normal file
145
pkg/recon/sources/azureblob.go
Normal file
@@ -0,0 +1,145 @@
|
|||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/xml"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"log"
|
||||||
|
"net/http"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"golang.org/x/time/rate"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||||
|
)
|
||||||
|
|
||||||
|
// AzureBlobScanner enumerates publicly accessible Azure Blob Storage containers
|
||||||
|
// by name pattern and flags readable objects that match common config-file
|
||||||
|
// patterns as potential API key exposure vectors.
|
||||||
|
//
|
||||||
|
// Credentialless: uses anonymous HTTP to probe public Azure Blob containers.
|
||||||
|
type AzureBlobScanner struct {
|
||||||
|
Registry *providers.Registry
|
||||||
|
Limiters *recon.LimiterRegistry
|
||||||
|
// BaseURL overrides the Azure Blob endpoint for tests.
|
||||||
|
// Default: "https://%s.blob.core.windows.net/%s?restype=container&comp=list"
|
||||||
|
// Must contain two %s placeholders: account name and container name.
|
||||||
|
BaseURL string
|
||||||
|
client *Client
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compile-time assertion.
|
||||||
|
var _ recon.ReconSource = (*AzureBlobScanner)(nil)
|
||||||
|
|
||||||
|
func (a *AzureBlobScanner) Name() string { return "azureblob" }
|
||||||
|
func (a *AzureBlobScanner) RateLimit() rate.Limit { return rate.Every(500 * time.Millisecond) }
|
||||||
|
func (a *AzureBlobScanner) Burst() int { return 3 }
|
||||||
|
func (a *AzureBlobScanner) RespectsRobots() bool { return false }
|
||||||
|
func (a *AzureBlobScanner) Enabled(_ recon.Config) bool { return true }
|
||||||
|
|
||||||
|
// azureContainerNames are common container names to probe within each account.
|
||||||
|
var azureContainerNames = []string{
|
||||||
|
"config", "secrets", "backup", "data", "keys", "env", "credentials",
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *AzureBlobScanner) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
|
||||||
|
client := a.client
|
||||||
|
if client == nil {
|
||||||
|
client = NewClient()
|
||||||
|
}
|
||||||
|
baseURL := a.BaseURL
|
||||||
|
if baseURL == "" {
|
||||||
|
baseURL = "https://%s.blob.core.windows.net/%s?restype=container&comp=list"
|
||||||
|
}
|
||||||
|
|
||||||
|
accounts := bucketNames(a.Registry)
|
||||||
|
if len(accounts) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, account := range accounts {
|
||||||
|
if err := ctx.Err(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, container := range azureContainerNames {
|
||||||
|
if err := ctx.Err(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if a.Limiters != nil {
|
||||||
|
if err := a.Limiters.Wait(ctx, a.Name(), a.RateLimit(), a.Burst(), false); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
endpoint := fmt.Sprintf(baseURL, account, container)
|
||||||
|
blobs, err := a.listBlobs(ctx, client, endpoint)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("azureblob: account %q container %q probe failed (skipping): %v", account, container, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, name := range blobs {
|
||||||
|
if !isConfigFile(name) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
out <- recon.Finding{
|
||||||
|
Source: fmt.Sprintf("azure://%s/%s/%s", account, container, name),
|
||||||
|
SourceType: "recon:azureblob",
|
||||||
|
Confidence: "medium",
|
||||||
|
DetectedAt: time.Now(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// listBlobs fetches and parses Azure Blob container listing XML.
|
||||||
|
func (a *AzureBlobScanner) listBlobs(ctx context.Context, client *Client, endpoint string) ([]string, error) {
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
resp, err := client.Do(ctx, req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil // non-public or non-existent — skip silently
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
return parseAzureBlobXML(resp.Body)
|
||||||
|
}
|
||||||
|
|
||||||
|
// azureEnumBlobResults models the Azure Blob EnumerationResults XML.
|
||||||
|
type azureEnumBlobResults struct {
|
||||||
|
XMLName xml.Name `xml:"EnumerationResults"`
|
||||||
|
Blobs azureBlobs `xml:"Blobs"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type azureBlobs struct {
|
||||||
|
Blob []azureBlob `xml:"Blob"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type azureBlob struct {
|
||||||
|
Name string `xml:"Name"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseAzureBlobXML(r io.Reader) ([]string, error) {
|
||||||
|
data, err := io.ReadAll(io.LimitReader(r, 1<<20))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
var result azureEnumBlobResults
|
||||||
|
if err := xml.Unmarshal(data, &result); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
names := make([]string, 0, len(result.Blobs.Blob))
|
||||||
|
for _, b := range result.Blobs.Blob {
|
||||||
|
if b.Name != "" {
|
||||||
|
names = append(names, b.Name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return names, nil
|
||||||
|
}
|
||||||
130
pkg/recon/sources/azureblob_test.go
Normal file
130
pkg/recon/sources/azureblob_test.go
Normal file
@@ -0,0 +1,130 @@
|
|||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||||
|
)
|
||||||
|
|
||||||
|
func azureTestServer() *httptest.Server {
|
||||||
|
mux := http.NewServeMux()
|
||||||
|
|
||||||
|
// Respond to any request path that contains "testprov-keys" account + "config" container.
|
||||||
|
mux.HandleFunc("/testprov-keys/config", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "application/xml")
|
||||||
|
_, _ = w.Write([]byte(`<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<EnumerationResults>
|
||||||
|
<Blobs>
|
||||||
|
<Blob><Name>.env</Name></Blob>
|
||||||
|
<Blob><Name>credentials.json</Name></Blob>
|
||||||
|
<Blob><Name>photo.png</Name></Blob>
|
||||||
|
</Blobs>
|
||||||
|
</EnumerationResults>`))
|
||||||
|
})
|
||||||
|
|
||||||
|
// All other containers return error.
|
||||||
|
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusNotFound)
|
||||||
|
})
|
||||||
|
|
||||||
|
return httptest.NewServer(mux)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAzureBlob_Sweep(t *testing.T) {
|
||||||
|
srv := azureTestServer()
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
// BaseURL format: server/{account}/{container}?params
|
||||||
|
// We use a simplified format for tests.
|
||||||
|
src := &AzureBlobScanner{
|
||||||
|
Registry: cloudTestRegistry(),
|
||||||
|
BaseURL: srv.URL + "/%s/%s",
|
||||||
|
client: NewClient(),
|
||||||
|
}
|
||||||
|
|
||||||
|
out := make(chan recon.Finding, 64)
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
if err := src.Sweep(ctx, "", out); err != nil {
|
||||||
|
t.Fatalf("Sweep error: %v", err)
|
||||||
|
}
|
||||||
|
close(out)
|
||||||
|
|
||||||
|
var findings []recon.Finding
|
||||||
|
for f := range out {
|
||||||
|
findings = append(findings, f)
|
||||||
|
}
|
||||||
|
|
||||||
|
// .env and credentials.json match; photo.png does not.
|
||||||
|
// Only the "config" container returns results; others 404.
|
||||||
|
if len(findings) != 2 {
|
||||||
|
t.Fatalf("expected 2 findings, got %d: %+v", len(findings), findings)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, f := range findings {
|
||||||
|
if f.SourceType != "recon:azureblob" {
|
||||||
|
t.Errorf("unexpected SourceType: %s", f.SourceType)
|
||||||
|
}
|
||||||
|
if f.Confidence != "medium" {
|
||||||
|
t.Errorf("unexpected Confidence: %s", f.Confidence)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAzureBlob_EmptyRegistry(t *testing.T) {
|
||||||
|
src := &AzureBlobScanner{
|
||||||
|
Registry: providers.NewRegistryFromProviders(nil),
|
||||||
|
Limiters: recon.NewLimiterRegistry(),
|
||||||
|
client: NewClient(),
|
||||||
|
}
|
||||||
|
|
||||||
|
out := make(chan recon.Finding, 4)
|
||||||
|
if err := src.Sweep(context.Background(), "", out); err != nil {
|
||||||
|
t.Fatalf("Sweep error: %v", err)
|
||||||
|
}
|
||||||
|
close(out)
|
||||||
|
if len(out) != 0 {
|
||||||
|
t.Fatal("expected 0 findings")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAzureBlob_CtxCancelled(t *testing.T) {
|
||||||
|
srv := azureTestServer()
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
src := &AzureBlobScanner{
|
||||||
|
Registry: cloudTestRegistry(),
|
||||||
|
BaseURL: srv.URL + "/%s/%s",
|
||||||
|
client: NewClient(),
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
cancel()
|
||||||
|
|
||||||
|
out := make(chan recon.Finding, 4)
|
||||||
|
if err := src.Sweep(ctx, "", out); err == nil {
|
||||||
|
t.Fatal("expected ctx error")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAzureBlob_EnabledAndMeta(t *testing.T) {
|
||||||
|
a := &AzureBlobScanner{}
|
||||||
|
if a.Name() != "azureblob" {
|
||||||
|
t.Fatalf("unexpected name: %s", a.Name())
|
||||||
|
}
|
||||||
|
if !a.Enabled(recon.Config{}) {
|
||||||
|
t.Fatal("expected Enabled=true")
|
||||||
|
}
|
||||||
|
if a.RespectsRobots() {
|
||||||
|
t.Fatal("expected RespectsRobots=false")
|
||||||
|
}
|
||||||
|
if a.Burst() != 3 {
|
||||||
|
t.Fatal("expected Burst=3")
|
||||||
|
}
|
||||||
|
}
|
||||||
147
pkg/recon/sources/binaryedge.go
Normal file
147
pkg/recon/sources/binaryedge.go
Normal file
@@ -0,0 +1,147 @@
|
|||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"golang.org/x/time/rate"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||||
|
)
|
||||||
|
|
||||||
|
// BinaryEdgeSource implements recon.ReconSource against the BinaryEdge
|
||||||
|
// internet data API. It iterates provider keyword queries and emits a Finding
|
||||||
|
// per result event.
|
||||||
|
//
|
||||||
|
// A missing API key disables the source without error.
|
||||||
|
type BinaryEdgeSource struct {
|
||||||
|
APIKey string
|
||||||
|
BaseURL string
|
||||||
|
Registry *providers.Registry
|
||||||
|
Limiters *recon.LimiterRegistry
|
||||||
|
client *Client
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compile-time assertion.
|
||||||
|
var _ recon.ReconSource = (*BinaryEdgeSource)(nil)
|
||||||
|
|
||||||
|
func (s *BinaryEdgeSource) Name() string { return "binaryedge" }
|
||||||
|
func (s *BinaryEdgeSource) RateLimit() rate.Limit { return rate.Every(2 * time.Second) }
|
||||||
|
func (s *BinaryEdgeSource) Burst() int { return 1 }
|
||||||
|
func (s *BinaryEdgeSource) RespectsRobots() bool { return false }
|
||||||
|
|
||||||
|
// Enabled returns true only when APIKey is configured.
|
||||||
|
func (s *BinaryEdgeSource) Enabled(_ recon.Config) bool { return s.APIKey != "" }
|
||||||
|
|
||||||
|
// Sweep issues one BinaryEdge search request per provider keyword and emits
|
||||||
|
// a Finding for every result event.
|
||||||
|
func (s *BinaryEdgeSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
|
||||||
|
if s.APIKey == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if s.client == nil {
|
||||||
|
s.client = NewClient()
|
||||||
|
}
|
||||||
|
base := s.BaseURL
|
||||||
|
if base == "" {
|
||||||
|
base = "https://api.binaryedge.io"
|
||||||
|
}
|
||||||
|
|
||||||
|
queries := BuildQueries(s.Registry, "binaryedge")
|
||||||
|
kwIndex := binaryedgeKeywordIndex(s.Registry)
|
||||||
|
|
||||||
|
for _, q := range queries {
|
||||||
|
if err := ctx.Err(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if s.Limiters != nil {
|
||||||
|
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
endpoint := fmt.Sprintf("%s/v2/query/search?query=%s&page=1",
|
||||||
|
base, url.QueryEscape(q))
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("binaryedge: build request: %w", err)
|
||||||
|
}
|
||||||
|
req.Header.Set("X-Key", s.APIKey)
|
||||||
|
req.Header.Set("Accept", "application/json")
|
||||||
|
|
||||||
|
resp, err := s.client.Do(ctx, req)
|
||||||
|
if err != nil {
|
||||||
|
if errors.Is(err, ErrUnauthorized) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
var parsed binaryedgeSearchResponse
|
||||||
|
decErr := json.NewDecoder(resp.Body).Decode(&parsed)
|
||||||
|
_ = resp.Body.Close()
|
||||||
|
if decErr != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
provName := kwIndex[strings.ToLower(q)]
|
||||||
|
for _, ev := range parsed.Events {
|
||||||
|
f := recon.Finding{
|
||||||
|
ProviderName: provName,
|
||||||
|
Confidence: "low",
|
||||||
|
Source: fmt.Sprintf("binaryedge://%s:%d", ev.Target.IP, ev.Target.Port),
|
||||||
|
SourceType: "recon:binaryedge",
|
||||||
|
DetectedAt: time.Now(),
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case out <- f:
|
||||||
|
case <-ctx.Done():
|
||||||
|
return ctx.Err()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type binaryedgeSearchResponse struct {
|
||||||
|
Events []binaryedgeEvent `json:"events"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type binaryedgeEvent struct {
|
||||||
|
Target binaryedgeTarget `json:"target"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type binaryedgeTarget struct {
|
||||||
|
IP string `json:"ip"`
|
||||||
|
Port int `json:"port"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// binaryedgeKeywordIndex maps lowercased keywords to provider names.
|
||||||
|
func binaryedgeKeywordIndex(reg *providers.Registry) map[string]string {
|
||||||
|
m := make(map[string]string)
|
||||||
|
if reg == nil {
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
for _, p := range reg.List() {
|
||||||
|
for _, k := range p.Keywords {
|
||||||
|
kl := strings.ToLower(strings.TrimSpace(k))
|
||||||
|
if kl == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if _, exists := m[kl]; !exists {
|
||||||
|
m[kl] = p.Name
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return m
|
||||||
|
}
|
||||||
117
pkg/recon/sources/binaryedge_test.go
Normal file
117
pkg/recon/sources/binaryedge_test.go
Normal file
@@ -0,0 +1,117 @@
|
|||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"strings"
|
||||||
|
"sync/atomic"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||||
|
)
|
||||||
|
|
||||||
|
func binaryedgeStubHandler(t *testing.T, calls *int32) http.HandlerFunc {
|
||||||
|
t.Helper()
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
atomic.AddInt32(calls, 1)
|
||||||
|
if !strings.HasPrefix(r.URL.Path, "/v2/query/search") {
|
||||||
|
t.Errorf("unexpected path: %s", r.URL.Path)
|
||||||
|
}
|
||||||
|
if got := r.Header.Get("X-Key"); got != "testkey" {
|
||||||
|
t.Errorf("missing X-Key header: %q", got)
|
||||||
|
}
|
||||||
|
body := binaryedgeSearchResponse{
|
||||||
|
Events: []binaryedgeEvent{
|
||||||
|
{Target: binaryedgeTarget{IP: "192.168.1.1", Port: 80}},
|
||||||
|
{Target: binaryedgeTarget{IP: "192.168.1.2", Port: 443}},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_ = json.NewEncoder(w).Encode(body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBinaryEdgeSource_EnabledRequiresAPIKey(t *testing.T) {
|
||||||
|
reg := syntheticRegistry()
|
||||||
|
lim := recon.NewLimiterRegistry()
|
||||||
|
|
||||||
|
s := &BinaryEdgeSource{APIKey: "", Registry: reg, Limiters: lim}
|
||||||
|
if s.Enabled(recon.Config{}) {
|
||||||
|
t.Error("expected Enabled=false with empty key")
|
||||||
|
}
|
||||||
|
s = &BinaryEdgeSource{APIKey: "key", Registry: reg, Limiters: lim}
|
||||||
|
if !s.Enabled(recon.Config{}) {
|
||||||
|
t.Error("expected Enabled=true with key")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBinaryEdgeSource_SweepEmitsFindings(t *testing.T) {
|
||||||
|
reg := syntheticRegistry()
|
||||||
|
lim := recon.NewLimiterRegistry()
|
||||||
|
_ = lim.For("binaryedge", 1000, 100)
|
||||||
|
|
||||||
|
var calls int32
|
||||||
|
srv := httptest.NewServer(binaryedgeStubHandler(t, &calls))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
s := &BinaryEdgeSource{
|
||||||
|
APIKey: "testkey",
|
||||||
|
BaseURL: srv.URL,
|
||||||
|
Registry: reg,
|
||||||
|
Limiters: lim,
|
||||||
|
}
|
||||||
|
|
||||||
|
out := make(chan recon.Finding, 32)
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
done := make(chan error, 1)
|
||||||
|
go func() { done <- s.Sweep(ctx, "", out); close(out) }()
|
||||||
|
|
||||||
|
var findings []recon.Finding
|
||||||
|
for f := range out {
|
||||||
|
findings = append(findings, f)
|
||||||
|
}
|
||||||
|
if err := <-done; err != nil {
|
||||||
|
t.Fatalf("Sweep error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2 keywords * 2 events = 4 findings
|
||||||
|
if len(findings) != 4 {
|
||||||
|
t.Fatalf("expected 4 findings, got %d", len(findings))
|
||||||
|
}
|
||||||
|
for _, f := range findings {
|
||||||
|
if f.SourceType != "recon:binaryedge" {
|
||||||
|
t.Errorf("SourceType=%q want recon:binaryedge", f.SourceType)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if got := atomic.LoadInt32(&calls); got != 2 {
|
||||||
|
t.Errorf("expected 2 API calls, got %d", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBinaryEdgeSource_CtxCancelled(t *testing.T) {
|
||||||
|
reg := syntheticRegistry()
|
||||||
|
lim := recon.NewLimiterRegistry()
|
||||||
|
_ = lim.For("binaryedge", 1000, 100)
|
||||||
|
|
||||||
|
s := &BinaryEdgeSource{
|
||||||
|
APIKey: "key",
|
||||||
|
BaseURL: "http://127.0.0.1:1",
|
||||||
|
Registry: reg,
|
||||||
|
Limiters: lim,
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
cancel()
|
||||||
|
|
||||||
|
out := make(chan recon.Finding, 1)
|
||||||
|
err := s.Sweep(ctx, "", out)
|
||||||
|
if !errors.Is(err, context.Canceled) {
|
||||||
|
t.Fatalf("expected context.Canceled, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
170
pkg/recon/sources/censys.go
Normal file
170
pkg/recon/sources/censys.go
Normal file
@@ -0,0 +1,170 @@
|
|||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"golang.org/x/time/rate"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||||
|
)
|
||||||
|
|
||||||
|
// CensysSource implements recon.ReconSource against the Censys v2 /hosts/search
|
||||||
|
// API. It iterates provider keyword queries and emits a Finding for every hit
|
||||||
|
// returned (exposed services leaking API keys).
|
||||||
|
//
|
||||||
|
// Missing API credentials disable the source without error.
|
||||||
|
type CensysSource struct {
|
||||||
|
APIId string
|
||||||
|
APISecret string
|
||||||
|
BaseURL string
|
||||||
|
Registry *providers.Registry
|
||||||
|
Limiters *recon.LimiterRegistry
|
||||||
|
client *Client
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compile-time assertion.
|
||||||
|
var _ recon.ReconSource = (*CensysSource)(nil)
|
||||||
|
|
||||||
|
// NewCensysSource constructs a CensysSource with the shared retry client.
|
||||||
|
func NewCensysSource(apiId, apiSecret string, reg *providers.Registry, lim *recon.LimiterRegistry) *CensysSource {
|
||||||
|
return &CensysSource{
|
||||||
|
APIId: apiId,
|
||||||
|
APISecret: apiSecret,
|
||||||
|
BaseURL: "https://search.censys.io/api",
|
||||||
|
Registry: reg,
|
||||||
|
Limiters: lim,
|
||||||
|
client: NewClient(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *CensysSource) Name() string { return "censys" }
|
||||||
|
func (s *CensysSource) RateLimit() rate.Limit { return rate.Every(2500 * time.Millisecond) }
|
||||||
|
func (s *CensysSource) Burst() int { return 1 }
|
||||||
|
func (s *CensysSource) RespectsRobots() bool { return false }
|
||||||
|
|
||||||
|
// Enabled returns true only when both APIId and APISecret are configured.
|
||||||
|
func (s *CensysSource) Enabled(_ recon.Config) bool {
|
||||||
|
return s.APIId != "" && s.APISecret != ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sweep issues one POST /v2/hosts/search request per provider keyword and
|
||||||
|
// emits a Finding for every hit returned.
|
||||||
|
func (s *CensysSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
|
||||||
|
if s.APIId == "" || s.APISecret == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
base := s.BaseURL
|
||||||
|
if base == "" {
|
||||||
|
base = "https://search.censys.io/api"
|
||||||
|
}
|
||||||
|
|
||||||
|
queries := BuildQueries(s.Registry, "censys")
|
||||||
|
kwIndex := censysKeywordIndex(s.Registry)
|
||||||
|
|
||||||
|
for _, q := range queries {
|
||||||
|
if err := ctx.Err(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if s.Limiters != nil {
|
||||||
|
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
payload, _ := json.Marshal(map[string]any{
|
||||||
|
"q": q,
|
||||||
|
"per_page": 25,
|
||||||
|
})
|
||||||
|
|
||||||
|
endpoint := fmt.Sprintf("%s/v2/hosts/search", base)
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, endpoint, bytes.NewReader(payload))
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("censys: build request: %w", err)
|
||||||
|
}
|
||||||
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
req.Header.Set("Accept", "application/json")
|
||||||
|
req.Header.Set("User-Agent", "keyhunter-recon")
|
||||||
|
req.SetBasicAuth(s.APIId, s.APISecret)
|
||||||
|
|
||||||
|
resp, err := s.client.Do(ctx, req)
|
||||||
|
if err != nil {
|
||||||
|
if errors.Is(err, ErrUnauthorized) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
var parsed censysSearchResponse
|
||||||
|
decErr := json.NewDecoder(resp.Body).Decode(&parsed)
|
||||||
|
_ = resp.Body.Close()
|
||||||
|
if decErr != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
provName := kwIndex[strings.ToLower(q)]
|
||||||
|
for _, hit := range parsed.Result.Hits {
|
||||||
|
f := recon.Finding{
|
||||||
|
ProviderName: provName,
|
||||||
|
Confidence: "low",
|
||||||
|
Source: fmt.Sprintf("censys://%s", hit.IP),
|
||||||
|
SourceType: "recon:censys",
|
||||||
|
DetectedAt: time.Now(),
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case out <- f:
|
||||||
|
case <-ctx.Done():
|
||||||
|
return ctx.Err()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type censysSearchResponse struct {
|
||||||
|
Result censysResult `json:"result"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type censysResult struct {
|
||||||
|
Hits []censysHit `json:"hits"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type censysHit struct {
|
||||||
|
IP string `json:"ip"`
|
||||||
|
Services []censysService `json:"services"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type censysService struct {
|
||||||
|
Port int `json:"port"`
|
||||||
|
ServiceName string `json:"service_name"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// censysKeywordIndex maps lowercased keywords to provider names.
|
||||||
|
func censysKeywordIndex(reg *providers.Registry) map[string]string {
|
||||||
|
m := make(map[string]string)
|
||||||
|
if reg == nil {
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
for _, p := range reg.List() {
|
||||||
|
for _, k := range p.Keywords {
|
||||||
|
kl := strings.ToLower(strings.TrimSpace(k))
|
||||||
|
if kl == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if _, exists := m[kl]; !exists {
|
||||||
|
m[kl] = p.Name
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return m
|
||||||
|
}
|
||||||
130
pkg/recon/sources/censys_test.go
Normal file
130
pkg/recon/sources/censys_test.go
Normal file
@@ -0,0 +1,130 @@
|
|||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"sync/atomic"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||||
|
)
|
||||||
|
|
||||||
|
func censysStubHandler(t *testing.T, calls *int32) http.HandlerFunc {
|
||||||
|
t.Helper()
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
atomic.AddInt32(calls, 1)
|
||||||
|
if r.URL.Path != "/v2/hosts/search" {
|
||||||
|
t.Errorf("unexpected path: %s", r.URL.Path)
|
||||||
|
}
|
||||||
|
if r.Method != http.MethodPost {
|
||||||
|
t.Errorf("expected POST, got %s", r.Method)
|
||||||
|
}
|
||||||
|
user, pass, ok := r.BasicAuth()
|
||||||
|
if !ok || user != "testid" || pass != "testsecret" {
|
||||||
|
t.Errorf("missing/wrong basic auth: user=%q pass=%q ok=%v", user, pass, ok)
|
||||||
|
}
|
||||||
|
body := map[string]any{
|
||||||
|
"result": map[string]any{
|
||||||
|
"hits": []map[string]any{
|
||||||
|
{"ip": "10.0.0.1", "services": []map[string]any{{"port": 443, "service_name": "HTTP"}}},
|
||||||
|
{"ip": "10.0.0.2", "services": []map[string]any{{"port": 8080, "service_name": "HTTP"}}},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_ = json.NewEncoder(w).Encode(body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCensysSource_EnabledRequiresCredentials(t *testing.T) {
|
||||||
|
reg := syntheticRegistry()
|
||||||
|
lim := recon.NewLimiterRegistry()
|
||||||
|
|
||||||
|
if s := NewCensysSource("", "", reg, lim); s.Enabled(recon.Config{}) {
|
||||||
|
t.Error("expected Enabled=false with empty credentials")
|
||||||
|
}
|
||||||
|
if s := NewCensysSource("id", "", reg, lim); s.Enabled(recon.Config{}) {
|
||||||
|
t.Error("expected Enabled=false with missing secret")
|
||||||
|
}
|
||||||
|
if s := NewCensysSource("id", "secret", reg, lim); !s.Enabled(recon.Config{}) {
|
||||||
|
t.Error("expected Enabled=true with both credentials")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCensysSource_SweepEmptyCredsReturnsNil(t *testing.T) {
|
||||||
|
reg := syntheticRegistry()
|
||||||
|
lim := recon.NewLimiterRegistry()
|
||||||
|
s := NewCensysSource("", "", reg, lim)
|
||||||
|
|
||||||
|
out := make(chan recon.Finding, 10)
|
||||||
|
if err := s.Sweep(context.Background(), "", out); err != nil {
|
||||||
|
t.Fatalf("expected nil, got %v", err)
|
||||||
|
}
|
||||||
|
close(out)
|
||||||
|
if n := countFindings(out); n != 0 {
|
||||||
|
t.Fatalf("expected 0 findings, got %d", n)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCensysSource_SweepEmitsFindings(t *testing.T) {
|
||||||
|
reg := syntheticRegistry()
|
||||||
|
lim := recon.NewLimiterRegistry()
|
||||||
|
_ = lim.For("censys", 1000, 100)
|
||||||
|
|
||||||
|
var calls int32
|
||||||
|
srv := httptest.NewServer(censysStubHandler(t, &calls))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
s := NewCensysSource("testid", "testsecret", reg, lim)
|
||||||
|
s.BaseURL = srv.URL
|
||||||
|
|
||||||
|
out := make(chan recon.Finding, 32)
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
done := make(chan error, 1)
|
||||||
|
go func() { done <- s.Sweep(ctx, "", out); close(out) }()
|
||||||
|
|
||||||
|
var findings []recon.Finding
|
||||||
|
for f := range out {
|
||||||
|
findings = append(findings, f)
|
||||||
|
}
|
||||||
|
if err := <-done; err != nil {
|
||||||
|
t.Fatalf("Sweep error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2 keywords * 2 hits = 4 findings
|
||||||
|
if len(findings) != 4 {
|
||||||
|
t.Fatalf("expected 4 findings, got %d", len(findings))
|
||||||
|
}
|
||||||
|
for _, f := range findings {
|
||||||
|
if f.SourceType != "recon:censys" {
|
||||||
|
t.Errorf("SourceType=%q want recon:censys", f.SourceType)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if got := atomic.LoadInt32(&calls); got != 2 {
|
||||||
|
t.Errorf("expected 2 calls, got %d", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCensysSource_CtxCancelled(t *testing.T) {
|
||||||
|
reg := syntheticRegistry()
|
||||||
|
lim := recon.NewLimiterRegistry()
|
||||||
|
_ = lim.For("censys", 1000, 100)
|
||||||
|
|
||||||
|
s := NewCensysSource("id", "secret", reg, lim)
|
||||||
|
s.BaseURL = "http://127.0.0.1:1"
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
cancel()
|
||||||
|
|
||||||
|
out := make(chan recon.Finding, 1)
|
||||||
|
err := s.Sweep(ctx, "", out)
|
||||||
|
if !errors.Is(err, context.Canceled) {
|
||||||
|
t.Fatalf("expected context.Canceled, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
126
pkg/recon/sources/dospaces.go
Normal file
126
pkg/recon/sources/dospaces.go
Normal file
@@ -0,0 +1,126 @@
|
|||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
"net/http"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"golang.org/x/time/rate"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||||
|
)
|
||||||
|
|
||||||
|
// DOSpacesScanner enumerates publicly accessible DigitalOcean Spaces by name
|
||||||
|
// pattern and flags readable objects matching common config-file patterns as
|
||||||
|
// potential API key exposure vectors.
|
||||||
|
//
|
||||||
|
// Credentialless: uses anonymous HTTP to probe public DO Spaces. DO Spaces are
|
||||||
|
// S3-compatible, so the same XML ListBucketResult format is used.
|
||||||
|
type DOSpacesScanner struct {
|
||||||
|
Registry *providers.Registry
|
||||||
|
Limiters *recon.LimiterRegistry
|
||||||
|
// BaseURL overrides the DO Spaces endpoint for tests.
|
||||||
|
// Default: "https://%s.%s.digitaloceanspaces.com"
|
||||||
|
// Must contain two %s placeholders: bucket name and region.
|
||||||
|
BaseURL string
|
||||||
|
client *Client
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compile-time assertion.
|
||||||
|
var _ recon.ReconSource = (*DOSpacesScanner)(nil)
|
||||||
|
|
||||||
|
func (d *DOSpacesScanner) Name() string { return "spaces" }
|
||||||
|
func (d *DOSpacesScanner) RateLimit() rate.Limit { return rate.Every(500 * time.Millisecond) }
|
||||||
|
func (d *DOSpacesScanner) Burst() int { return 3 }
|
||||||
|
func (d *DOSpacesScanner) RespectsRobots() bool { return false }
|
||||||
|
func (d *DOSpacesScanner) Enabled(_ recon.Config) bool { return true }
|
||||||
|
|
||||||
|
// doRegions are the DigitalOcean Spaces regions to iterate.
|
||||||
|
var doRegions = []string{"nyc3", "sfo3", "ams3", "sgp1", "fra1"}
|
||||||
|
|
||||||
|
func (d *DOSpacesScanner) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
|
||||||
|
client := d.client
|
||||||
|
if client == nil {
|
||||||
|
client = NewClient()
|
||||||
|
}
|
||||||
|
baseURL := d.BaseURL
|
||||||
|
if baseURL == "" {
|
||||||
|
baseURL = "https://%s.%s.digitaloceanspaces.com"
|
||||||
|
}
|
||||||
|
|
||||||
|
names := bucketNames(d.Registry)
|
||||||
|
if len(names) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, bucket := range names {
|
||||||
|
if err := ctx.Err(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, region := range doRegions {
|
||||||
|
if err := ctx.Err(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if d.Limiters != nil {
|
||||||
|
if err := d.Limiters.Wait(ctx, d.Name(), d.RateLimit(), d.Burst(), false); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
endpoint := fmt.Sprintf(baseURL, bucket, region)
|
||||||
|
keys, err := d.listSpace(ctx, client, endpoint)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("spaces: bucket %q region %q probe failed (skipping): %v", bucket, region, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, key := range keys {
|
||||||
|
if !isConfigFile(key) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
out <- recon.Finding{
|
||||||
|
Source: fmt.Sprintf("do://%s/%s", bucket, key),
|
||||||
|
SourceType: "recon:spaces",
|
||||||
|
Confidence: "medium",
|
||||||
|
DetectedAt: time.Now(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// listSpace probes a DO Spaces endpoint via HEAD then parses the S3-compatible
|
||||||
|
// ListBucketResult XML on success.
|
||||||
|
func (d *DOSpacesScanner) listSpace(ctx context.Context, client *Client, endpoint string) ([]string, error) {
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodHead, endpoint, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
resp, err := client.HTTP.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
getReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
getResp, err := client.Do(ctx, getReq)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer getResp.Body.Close()
|
||||||
|
|
||||||
|
// DO Spaces uses S3-compatible XML format.
|
||||||
|
return parseS3ListXML(getResp.Body)
|
||||||
|
}
|
||||||
128
pkg/recon/sources/dospaces_test.go
Normal file
128
pkg/recon/sources/dospaces_test.go
Normal file
@@ -0,0 +1,128 @@
|
|||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||||
|
)
|
||||||
|
|
||||||
|
func doSpacesTestServer() *httptest.Server {
|
||||||
|
mux := http.NewServeMux()
|
||||||
|
|
||||||
|
// Only testprov-keys bucket in nyc3 region is publicly listable.
|
||||||
|
mux.HandleFunc("/testprov-keys/nyc3/", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.Method == http.MethodHead {
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.Header().Set("Content-Type", "application/xml")
|
||||||
|
_, _ = w.Write([]byte(`<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<ListBucketResult>
|
||||||
|
<Contents><Key>.env.production</Key></Contents>
|
||||||
|
<Contents><Key>app.conf</Key></Contents>
|
||||||
|
<Contents><Key>logo.svg</Key></Contents>
|
||||||
|
</ListBucketResult>`))
|
||||||
|
})
|
||||||
|
|
||||||
|
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusNotFound)
|
||||||
|
})
|
||||||
|
|
||||||
|
return httptest.NewServer(mux)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDOSpaces_Sweep(t *testing.T) {
|
||||||
|
srv := doSpacesTestServer()
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
src := &DOSpacesScanner{
|
||||||
|
Registry: cloudTestRegistry(),
|
||||||
|
BaseURL: srv.URL + "/%s/%s/",
|
||||||
|
client: NewClient(),
|
||||||
|
}
|
||||||
|
|
||||||
|
out := make(chan recon.Finding, 64)
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
if err := src.Sweep(ctx, "", out); err != nil {
|
||||||
|
t.Fatalf("Sweep error: %v", err)
|
||||||
|
}
|
||||||
|
close(out)
|
||||||
|
|
||||||
|
var findings []recon.Finding
|
||||||
|
for f := range out {
|
||||||
|
findings = append(findings, f)
|
||||||
|
}
|
||||||
|
|
||||||
|
// .env.production and app.conf match; logo.svg does not.
|
||||||
|
if len(findings) != 2 {
|
||||||
|
t.Fatalf("expected 2 findings, got %d: %+v", len(findings), findings)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, f := range findings {
|
||||||
|
if f.SourceType != "recon:spaces" {
|
||||||
|
t.Errorf("unexpected SourceType: %s", f.SourceType)
|
||||||
|
}
|
||||||
|
if f.Confidence != "medium" {
|
||||||
|
t.Errorf("unexpected Confidence: %s", f.Confidence)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDOSpaces_EmptyRegistry(t *testing.T) {
|
||||||
|
src := &DOSpacesScanner{
|
||||||
|
Registry: providers.NewRegistryFromProviders(nil),
|
||||||
|
Limiters: recon.NewLimiterRegistry(),
|
||||||
|
client: NewClient(),
|
||||||
|
}
|
||||||
|
|
||||||
|
out := make(chan recon.Finding, 4)
|
||||||
|
if err := src.Sweep(context.Background(), "", out); err != nil {
|
||||||
|
t.Fatalf("Sweep error: %v", err)
|
||||||
|
}
|
||||||
|
close(out)
|
||||||
|
if len(out) != 0 {
|
||||||
|
t.Fatal("expected 0 findings")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDOSpaces_CtxCancelled(t *testing.T) {
|
||||||
|
srv := doSpacesTestServer()
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
src := &DOSpacesScanner{
|
||||||
|
Registry: cloudTestRegistry(),
|
||||||
|
BaseURL: srv.URL + "/%s/%s/",
|
||||||
|
client: NewClient(),
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
cancel()
|
||||||
|
|
||||||
|
out := make(chan recon.Finding, 4)
|
||||||
|
if err := src.Sweep(ctx, "", out); err == nil {
|
||||||
|
t.Fatal("expected ctx error")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDOSpaces_EnabledAndMeta(t *testing.T) {
|
||||||
|
d := &DOSpacesScanner{}
|
||||||
|
if d.Name() != "spaces" {
|
||||||
|
t.Fatalf("unexpected name: %s", d.Name())
|
||||||
|
}
|
||||||
|
if !d.Enabled(recon.Config{}) {
|
||||||
|
t.Fatal("expected Enabled=true")
|
||||||
|
}
|
||||||
|
if d.RespectsRobots() {
|
||||||
|
t.Fatal("expected RespectsRobots=false")
|
||||||
|
}
|
||||||
|
if d.Burst() != 3 {
|
||||||
|
t.Fatal("expected Burst=3")
|
||||||
|
}
|
||||||
|
}
|
||||||
144
pkg/recon/sources/fofa.go
Normal file
144
pkg/recon/sources/fofa.go
Normal file
@@ -0,0 +1,144 @@
|
|||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/base64"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"golang.org/x/time/rate"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||||
|
)
|
||||||
|
|
||||||
|
// FOFASource implements recon.ReconSource against the FOFA internet search
|
||||||
|
// engine API. It iterates provider keyword queries and emits a Finding per
|
||||||
|
// result.
|
||||||
|
//
|
||||||
|
// A missing Email or API key disables the source without error.
|
||||||
|
type FOFASource struct {
|
||||||
|
Email string
|
||||||
|
APIKey string
|
||||||
|
BaseURL string
|
||||||
|
Registry *providers.Registry
|
||||||
|
Limiters *recon.LimiterRegistry
|
||||||
|
client *Client
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compile-time assertion.
|
||||||
|
var _ recon.ReconSource = (*FOFASource)(nil)
|
||||||
|
|
||||||
|
func (s *FOFASource) Name() string { return "fofa" }
|
||||||
|
func (s *FOFASource) RateLimit() rate.Limit { return rate.Every(1 * time.Second) }
|
||||||
|
func (s *FOFASource) Burst() int { return 1 }
|
||||||
|
func (s *FOFASource) RespectsRobots() bool { return false }
|
||||||
|
|
||||||
|
// Enabled returns true only when both Email and APIKey are configured.
|
||||||
|
func (s *FOFASource) Enabled(_ recon.Config) bool { return s.Email != "" && s.APIKey != "" }
|
||||||
|
|
||||||
|
// Sweep issues one FOFA search request per provider keyword and emits a
|
||||||
|
// Finding for every result row.
|
||||||
|
func (s *FOFASource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
|
||||||
|
if s.Email == "" || s.APIKey == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if s.client == nil {
|
||||||
|
s.client = NewClient()
|
||||||
|
}
|
||||||
|
base := s.BaseURL
|
||||||
|
if base == "" {
|
||||||
|
base = "https://fofa.info"
|
||||||
|
}
|
||||||
|
|
||||||
|
queries := BuildQueries(s.Registry, "fofa")
|
||||||
|
kwIndex := fofaKeywordIndex(s.Registry)
|
||||||
|
|
||||||
|
for _, q := range queries {
|
||||||
|
if err := ctx.Err(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if s.Limiters != nil {
|
||||||
|
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
qb64 := base64.StdEncoding.EncodeToString([]byte(q))
|
||||||
|
endpoint := fmt.Sprintf("%s/api/v1/search/all?email=%s&key=%s&qbase64=%s&size=100",
|
||||||
|
base, s.Email, s.APIKey, qb64)
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("fofa: build request: %w", err)
|
||||||
|
}
|
||||||
|
req.Header.Set("Accept", "application/json")
|
||||||
|
|
||||||
|
resp, err := s.client.Do(ctx, req)
|
||||||
|
if err != nil {
|
||||||
|
if errors.Is(err, ErrUnauthorized) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
var parsed fofaSearchResponse
|
||||||
|
decErr := json.NewDecoder(resp.Body).Decode(&parsed)
|
||||||
|
_ = resp.Body.Close()
|
||||||
|
if decErr != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
provName := kwIndex[strings.ToLower(q)]
|
||||||
|
for _, row := range parsed.Results {
|
||||||
|
// Each row is [host, ip, port].
|
||||||
|
if len(row) < 3 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
f := recon.Finding{
|
||||||
|
ProviderName: provName,
|
||||||
|
Confidence: "low",
|
||||||
|
Source: fmt.Sprintf("fofa://%s:%s", row[1], row[2]),
|
||||||
|
SourceType: "recon:fofa",
|
||||||
|
DetectedAt: time.Now(),
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case out <- f:
|
||||||
|
case <-ctx.Done():
|
||||||
|
return ctx.Err()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type fofaSearchResponse struct {
|
||||||
|
Results [][]string `json:"results"`
|
||||||
|
Size int `json:"size"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// fofaKeywordIndex maps lowercased keywords to provider names.
|
||||||
|
func fofaKeywordIndex(reg *providers.Registry) map[string]string {
|
||||||
|
m := make(map[string]string)
|
||||||
|
if reg == nil {
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
for _, p := range reg.List() {
|
||||||
|
for _, k := range p.Keywords {
|
||||||
|
kl := strings.ToLower(strings.TrimSpace(k))
|
||||||
|
if kl == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if _, exists := m[kl]; !exists {
|
||||||
|
m[kl] = p.Name
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return m
|
||||||
|
}
|
||||||
130
pkg/recon/sources/fofa_test.go
Normal file
130
pkg/recon/sources/fofa_test.go
Normal file
@@ -0,0 +1,130 @@
|
|||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"sync/atomic"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||||
|
)
|
||||||
|
|
||||||
|
func fofaStubHandler(t *testing.T, calls *int32) http.HandlerFunc {
|
||||||
|
t.Helper()
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
atomic.AddInt32(calls, 1)
|
||||||
|
if r.URL.Path != "/api/v1/search/all" {
|
||||||
|
t.Errorf("unexpected path: %s", r.URL.Path)
|
||||||
|
}
|
||||||
|
if got := r.URL.Query().Get("email"); got != "test@example.com" {
|
||||||
|
t.Errorf("missing email param: %q", got)
|
||||||
|
}
|
||||||
|
if got := r.URL.Query().Get("key"); got != "testkey" {
|
||||||
|
t.Errorf("missing key param: %q", got)
|
||||||
|
}
|
||||||
|
body := fofaSearchResponse{
|
||||||
|
Results: [][]string{
|
||||||
|
{"example.com", "1.2.3.4", "443"},
|
||||||
|
{"test.org", "5.6.7.8", "8080"},
|
||||||
|
},
|
||||||
|
Size: 2,
|
||||||
|
}
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_ = json.NewEncoder(w).Encode(body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFOFASource_EnabledRequiresCredentials(t *testing.T) {
|
||||||
|
reg := syntheticRegistry()
|
||||||
|
lim := recon.NewLimiterRegistry()
|
||||||
|
|
||||||
|
s := &FOFASource{Email: "", APIKey: "", Registry: reg, Limiters: lim}
|
||||||
|
if s.Enabled(recon.Config{}) {
|
||||||
|
t.Error("expected Enabled=false with empty credentials")
|
||||||
|
}
|
||||||
|
s = &FOFASource{Email: "a@b.com", APIKey: "", Registry: reg, Limiters: lim}
|
||||||
|
if s.Enabled(recon.Config{}) {
|
||||||
|
t.Error("expected Enabled=false with empty APIKey")
|
||||||
|
}
|
||||||
|
s = &FOFASource{Email: "", APIKey: "key", Registry: reg, Limiters: lim}
|
||||||
|
if s.Enabled(recon.Config{}) {
|
||||||
|
t.Error("expected Enabled=false with empty Email")
|
||||||
|
}
|
||||||
|
s = &FOFASource{Email: "a@b.com", APIKey: "key", Registry: reg, Limiters: lim}
|
||||||
|
if !s.Enabled(recon.Config{}) {
|
||||||
|
t.Error("expected Enabled=true with both credentials")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFOFASource_SweepEmitsFindings(t *testing.T) {
|
||||||
|
reg := syntheticRegistry()
|
||||||
|
lim := recon.NewLimiterRegistry()
|
||||||
|
_ = lim.For("fofa", 1000, 100)
|
||||||
|
|
||||||
|
var calls int32
|
||||||
|
srv := httptest.NewServer(fofaStubHandler(t, &calls))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
s := &FOFASource{
|
||||||
|
Email: "test@example.com",
|
||||||
|
APIKey: "testkey",
|
||||||
|
BaseURL: srv.URL,
|
||||||
|
Registry: reg,
|
||||||
|
Limiters: lim,
|
||||||
|
}
|
||||||
|
|
||||||
|
out := make(chan recon.Finding, 32)
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
done := make(chan error, 1)
|
||||||
|
go func() { done <- s.Sweep(ctx, "", out); close(out) }()
|
||||||
|
|
||||||
|
var findings []recon.Finding
|
||||||
|
for f := range out {
|
||||||
|
findings = append(findings, f)
|
||||||
|
}
|
||||||
|
if err := <-done; err != nil {
|
||||||
|
t.Fatalf("Sweep error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2 keywords * 2 results = 4 findings
|
||||||
|
if len(findings) != 4 {
|
||||||
|
t.Fatalf("expected 4 findings, got %d", len(findings))
|
||||||
|
}
|
||||||
|
for _, f := range findings {
|
||||||
|
if f.SourceType != "recon:fofa" {
|
||||||
|
t.Errorf("SourceType=%q want recon:fofa", f.SourceType)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if got := atomic.LoadInt32(&calls); got != 2 {
|
||||||
|
t.Errorf("expected 2 API calls, got %d", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFOFASource_CtxCancelled(t *testing.T) {
|
||||||
|
reg := syntheticRegistry()
|
||||||
|
lim := recon.NewLimiterRegistry()
|
||||||
|
_ = lim.For("fofa", 1000, 100)
|
||||||
|
|
||||||
|
s := &FOFASource{
|
||||||
|
Email: "a@b.com",
|
||||||
|
APIKey: "key",
|
||||||
|
BaseURL: "http://127.0.0.1:1",
|
||||||
|
Registry: reg,
|
||||||
|
Limiters: lim,
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
cancel()
|
||||||
|
|
||||||
|
out := make(chan recon.Finding, 1)
|
||||||
|
err := s.Sweep(ctx, "", out)
|
||||||
|
if !errors.Is(err, context.Canceled) {
|
||||||
|
t.Fatalf("expected context.Canceled, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
144
pkg/recon/sources/gcsscanner.go
Normal file
144
pkg/recon/sources/gcsscanner.go
Normal file
@@ -0,0 +1,144 @@
|
|||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"log"
|
||||||
|
"net/http"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"golang.org/x/time/rate"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||||
|
)
|
||||||
|
|
||||||
|
// GCSScanner enumerates publicly accessible Google Cloud Storage buckets by
|
||||||
|
// name pattern and flags readable objects that match common config-file
|
||||||
|
// patterns as potential API key exposure vectors.
|
||||||
|
//
|
||||||
|
// Credentialless: uses anonymous HTTP to probe public GCS buckets.
|
||||||
|
type GCSScanner struct {
|
||||||
|
Registry *providers.Registry
|
||||||
|
Limiters *recon.LimiterRegistry
|
||||||
|
// BaseURL overrides the GCS endpoint for tests. Default: "https://storage.googleapis.com/%s".
|
||||||
|
BaseURL string
|
||||||
|
client *Client
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compile-time assertion.
|
||||||
|
var _ recon.ReconSource = (*GCSScanner)(nil)
|
||||||
|
|
||||||
|
func (g *GCSScanner) Name() string { return "gcs" }
|
||||||
|
func (g *GCSScanner) RateLimit() rate.Limit { return rate.Every(500 * time.Millisecond) }
|
||||||
|
func (g *GCSScanner) Burst() int { return 3 }
|
||||||
|
func (g *GCSScanner) RespectsRobots() bool { return false }
|
||||||
|
func (g *GCSScanner) Enabled(_ recon.Config) bool { return true }
|
||||||
|
|
||||||
|
func (g *GCSScanner) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
|
||||||
|
client := g.client
|
||||||
|
if client == nil {
|
||||||
|
client = NewClient()
|
||||||
|
}
|
||||||
|
baseURL := g.BaseURL
|
||||||
|
if baseURL == "" {
|
||||||
|
baseURL = "https://storage.googleapis.com/%s"
|
||||||
|
}
|
||||||
|
|
||||||
|
names := bucketNames(g.Registry)
|
||||||
|
if len(names) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, bucket := range names {
|
||||||
|
if err := ctx.Err(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if g.Limiters != nil {
|
||||||
|
if err := g.Limiters.Wait(ctx, g.Name(), g.RateLimit(), g.Burst(), false); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
endpoint := fmt.Sprintf(baseURL, bucket)
|
||||||
|
items, err := g.listBucketGCS(ctx, client, endpoint)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("gcs: bucket %q probe failed (skipping): %v", bucket, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, name := range items {
|
||||||
|
if !isConfigFile(name) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
out <- recon.Finding{
|
||||||
|
Source: fmt.Sprintf("gs://%s/%s", bucket, name),
|
||||||
|
SourceType: "recon:gcs",
|
||||||
|
Confidence: "medium",
|
||||||
|
DetectedAt: time.Now(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// listBucketGCS probes a GCS bucket endpoint. A HEAD returning 200 means the
|
||||||
|
// bucket is publicly accessible. We then GET with Accept: application/json to
|
||||||
|
// retrieve the JSON listing.
|
||||||
|
func (g *GCSScanner) listBucketGCS(ctx context.Context, client *Client, endpoint string) ([]string, error) {
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodHead, endpoint, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
resp, err := client.HTTP.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
getReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
getReq.Header.Set("Accept", "application/json")
|
||||||
|
getResp, err := client.Do(ctx, getReq)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer getResp.Body.Close()
|
||||||
|
|
||||||
|
return parseGCSListJSON(getResp.Body)
|
||||||
|
}
|
||||||
|
|
||||||
|
// gcsListResult models the GCS JSON listing format.
|
||||||
|
type gcsListResult struct {
|
||||||
|
Items []gcsItem `json:"items"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type gcsItem struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseGCSListJSON(r io.Reader) ([]string, error) {
|
||||||
|
data, err := io.ReadAll(io.LimitReader(r, 1<<20))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
var result gcsListResult
|
||||||
|
if err := json.Unmarshal(data, &result); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
names := make([]string, 0, len(result.Items))
|
||||||
|
for _, item := range result.Items {
|
||||||
|
if item.Name != "" {
|
||||||
|
names = append(names, item.Name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return names, nil
|
||||||
|
}
|
||||||
127
pkg/recon/sources/gcsscanner_test.go
Normal file
127
pkg/recon/sources/gcsscanner_test.go
Normal file
@@ -0,0 +1,127 @@
|
|||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||||
|
)
|
||||||
|
|
||||||
|
func gcsTestServer() *httptest.Server {
|
||||||
|
mux := http.NewServeMux()
|
||||||
|
|
||||||
|
mux.HandleFunc("/testprov-keys/", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.Method == http.MethodHead {
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_, _ = w.Write([]byte(`{"items":[
|
||||||
|
{"name":".env"},
|
||||||
|
{"name":"config.yaml"},
|
||||||
|
{"name":"readme.md"},
|
||||||
|
{"name":"secrets.toml"}
|
||||||
|
]}`))
|
||||||
|
})
|
||||||
|
|
||||||
|
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusNotFound)
|
||||||
|
})
|
||||||
|
|
||||||
|
return httptest.NewServer(mux)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGCSScanner_Sweep(t *testing.T) {
|
||||||
|
srv := gcsTestServer()
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
src := &GCSScanner{
|
||||||
|
Registry: cloudTestRegistry(),
|
||||||
|
BaseURL: srv.URL + "/%s/",
|
||||||
|
client: NewClient(),
|
||||||
|
}
|
||||||
|
|
||||||
|
out := make(chan recon.Finding, 32)
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
if err := src.Sweep(ctx, "", out); err != nil {
|
||||||
|
t.Fatalf("Sweep error: %v", err)
|
||||||
|
}
|
||||||
|
close(out)
|
||||||
|
|
||||||
|
var findings []recon.Finding
|
||||||
|
for f := range out {
|
||||||
|
findings = append(findings, f)
|
||||||
|
}
|
||||||
|
|
||||||
|
// .env, config.yaml, secrets.toml match; readme.md does not.
|
||||||
|
if len(findings) != 3 {
|
||||||
|
t.Fatalf("expected 3 findings, got %d: %+v", len(findings), findings)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, f := range findings {
|
||||||
|
if f.SourceType != "recon:gcs" {
|
||||||
|
t.Errorf("unexpected SourceType: %s", f.SourceType)
|
||||||
|
}
|
||||||
|
if f.Confidence != "medium" {
|
||||||
|
t.Errorf("unexpected Confidence: %s", f.Confidence)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGCSScanner_EmptyRegistry(t *testing.T) {
|
||||||
|
src := &GCSScanner{
|
||||||
|
Registry: providers.NewRegistryFromProviders(nil),
|
||||||
|
Limiters: recon.NewLimiterRegistry(),
|
||||||
|
client: NewClient(),
|
||||||
|
}
|
||||||
|
|
||||||
|
out := make(chan recon.Finding, 4)
|
||||||
|
if err := src.Sweep(context.Background(), "", out); err != nil {
|
||||||
|
t.Fatalf("Sweep error: %v", err)
|
||||||
|
}
|
||||||
|
close(out)
|
||||||
|
if len(out) != 0 {
|
||||||
|
t.Fatal("expected 0 findings")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGCSScanner_CtxCancelled(t *testing.T) {
|
||||||
|
srv := gcsTestServer()
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
src := &GCSScanner{
|
||||||
|
Registry: cloudTestRegistry(),
|
||||||
|
BaseURL: srv.URL + "/%s/",
|
||||||
|
client: NewClient(),
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
cancel()
|
||||||
|
|
||||||
|
out := make(chan recon.Finding, 4)
|
||||||
|
if err := src.Sweep(ctx, "", out); err == nil {
|
||||||
|
t.Fatal("expected ctx error")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGCSScanner_EnabledAndMeta(t *testing.T) {
|
||||||
|
g := &GCSScanner{}
|
||||||
|
if g.Name() != "gcs" {
|
||||||
|
t.Fatalf("unexpected name: %s", g.Name())
|
||||||
|
}
|
||||||
|
if !g.Enabled(recon.Config{}) {
|
||||||
|
t.Fatal("expected Enabled=true")
|
||||||
|
}
|
||||||
|
if g.RespectsRobots() {
|
||||||
|
t.Fatal("expected RespectsRobots=false")
|
||||||
|
}
|
||||||
|
if g.Burst() != 3 {
|
||||||
|
t.Fatal("expected Burst=3")
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -14,11 +14,11 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
// TestIntegration_AllSources_SweepAll spins up a single multiplexed httptest
|
// TestIntegration_AllSources_SweepAll spins up a single multiplexed httptest
|
||||||
// server that serves canned fixtures for every Phase 10 code-hosting source
|
// server that serves canned fixtures for every Phase 10 code-hosting source,
|
||||||
// and Phase 11 search engine / paste site source, registers the sources (with
|
// Phase 11 search engine / paste site source, Phase 12 IoT scanner, and
|
||||||
// BaseURL overrides pointing at the test server) onto a fresh recon.Engine,
|
// Phase 12 cloud storage source, registers the sources (with BaseURL overrides
|
||||||
// runs SweepAll, and asserts at least one Finding was emitted per SourceType
|
// pointing at the test server) onto a fresh recon.Engine, runs SweepAll, and
|
||||||
// across all 18 sources.
|
// asserts at least one Finding was emitted per SourceType across all 28 sources.
|
||||||
//
|
//
|
||||||
// RegisterAll cannot be used directly because it wires production URLs; the
|
// RegisterAll cannot be used directly because it wires production URLs; the
|
||||||
// test exercises the same code paths by constructing each source identically
|
// test exercises the same code paths by constructing each source identically
|
||||||
@@ -167,6 +167,78 @@ func TestIntegration_AllSources_SweepAll(t *testing.T) {
|
|||||||
_, _ = w.Write([]byte("secret: sk-proj-PASTESITES789"))
|
_, _ = w.Write([]byte("secret: sk-proj-PASTESITES789"))
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// ---- Phase 12: Shodan /shodan/host/search ----
|
||||||
|
mux.HandleFunc("/shodan/host/search", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_, _ = w.Write([]byte(`{"matches":[{"ip_str":"1.2.3.4","port":8080,"data":"vllm endpoint"}]}`))
|
||||||
|
})
|
||||||
|
|
||||||
|
// ---- Phase 12: Censys /v2/hosts/search ----
|
||||||
|
mux.HandleFunc("/v2/hosts/search", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_, _ = w.Write([]byte(`{"result":{"hits":[{"ip":"10.0.0.1","services":[{"port":443,"service_name":"HTTP"}]}]}}`))
|
||||||
|
})
|
||||||
|
|
||||||
|
// ---- Phase 12: ZoomEye /host/search ----
|
||||||
|
mux.HandleFunc("/host/search", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_, _ = w.Write([]byte(`{"matches":[{"ip":"172.16.0.1","portinfo":{"port":8443,"service":"https"}}]}`))
|
||||||
|
})
|
||||||
|
|
||||||
|
// ---- Phase 12: FOFA /api/v1/search/all ----
|
||||||
|
mux.HandleFunc("/api/v1/search/all", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_, _ = w.Write([]byte(`{"results":[["example.com","192.168.1.1","443"]],"size":1}`))
|
||||||
|
})
|
||||||
|
|
||||||
|
// ---- Phase 12: Netlas /api/responses/ ----
|
||||||
|
mux.HandleFunc("/api/responses/", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_, _ = w.Write([]byte(`{"items":[{"data":{"ip":"10.10.10.1","port":80}}]}`))
|
||||||
|
})
|
||||||
|
|
||||||
|
// ---- Phase 12: BinaryEdge /v2/query/search ----
|
||||||
|
mux.HandleFunc("/v2/query/search", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_, _ = w.Write([]byte(`{"events":[{"target":{"ip":"192.0.2.1","port":8080}}]}`))
|
||||||
|
})
|
||||||
|
|
||||||
|
// ---- Phase 12: Cloud storage — S3 + DOSpaces (S3 XML format) ----
|
||||||
|
mux.HandleFunc("/cloud-s3/", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.Method == http.MethodHead {
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.Header().Set("Content-Type", "application/xml")
|
||||||
|
_, _ = w.Write([]byte(`<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<ListBucketResult>
|
||||||
|
<Contents><Key>.env</Key></Contents>
|
||||||
|
<Contents><Key>config.yaml</Key></Contents>
|
||||||
|
</ListBucketResult>`))
|
||||||
|
})
|
||||||
|
|
||||||
|
// ---- Phase 12: Cloud storage — GCS (JSON format) ----
|
||||||
|
mux.HandleFunc("/cloud-gcs/", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.Method == http.MethodHead {
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_, _ = w.Write([]byte(`{"items":[{"name":".env"},{"name":"config.yaml"}]}`))
|
||||||
|
})
|
||||||
|
|
||||||
|
// ---- Phase 12: Cloud storage — Azure Blob (EnumerationResults XML) ----
|
||||||
|
mux.HandleFunc("/cloud-azure/", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "application/xml")
|
||||||
|
_, _ = w.Write([]byte(`<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<EnumerationResults>
|
||||||
|
<Blobs>
|
||||||
|
<Blob><Name>.env</Name></Blob>
|
||||||
|
<Blob><Name>config.yaml</Name></Blob>
|
||||||
|
</Blobs>
|
||||||
|
</EnumerationResults>`))
|
||||||
|
})
|
||||||
|
|
||||||
srv := httptest.NewServer(mux)
|
srv := httptest.NewServer(mux)
|
||||||
defer srv.Close()
|
defer srv.Close()
|
||||||
|
|
||||||
@@ -304,9 +376,80 @@ func TestIntegration_AllSources_SweepAll(t *testing.T) {
|
|||||||
BaseURL: srv.URL,
|
BaseURL: srv.URL,
|
||||||
})
|
})
|
||||||
|
|
||||||
// Sanity: all 18 sources registered.
|
// --- Phase 12: IoT scanner sources ---
|
||||||
if n := len(eng.List()); n != 18 {
|
|
||||||
t.Fatalf("expected 18 sources on engine, got %d: %v", n, eng.List())
|
// Shodan
|
||||||
|
shodanSrc := NewShodanSource("test-shodan-key", reg, lim)
|
||||||
|
shodanSrc.BaseURL = srv.URL
|
||||||
|
eng.Register(shodanSrc)
|
||||||
|
// Censys
|
||||||
|
censysSrc := NewCensysSource("test-id", "test-secret", reg, lim)
|
||||||
|
censysSrc.BaseURL = srv.URL
|
||||||
|
eng.Register(censysSrc)
|
||||||
|
// ZoomEye
|
||||||
|
zoomeyeSrc := NewZoomEyeSource("test-zoomeye-key", reg, lim)
|
||||||
|
zoomeyeSrc.BaseURL = srv.URL
|
||||||
|
eng.Register(zoomeyeSrc)
|
||||||
|
// FOFA
|
||||||
|
eng.Register(&FOFASource{
|
||||||
|
Email: "test@example.com",
|
||||||
|
APIKey: "test-fofa-key",
|
||||||
|
BaseURL: srv.URL,
|
||||||
|
Registry: reg,
|
||||||
|
Limiters: lim,
|
||||||
|
client: NewClient(),
|
||||||
|
})
|
||||||
|
// Netlas
|
||||||
|
eng.Register(&NetlasSource{
|
||||||
|
APIKey: "test-netlas-key",
|
||||||
|
BaseURL: srv.URL,
|
||||||
|
Registry: reg,
|
||||||
|
Limiters: lim,
|
||||||
|
client: NewClient(),
|
||||||
|
})
|
||||||
|
// BinaryEdge
|
||||||
|
eng.Register(&BinaryEdgeSource{
|
||||||
|
APIKey: "test-binaryedge-key",
|
||||||
|
BaseURL: srv.URL,
|
||||||
|
Registry: reg,
|
||||||
|
Limiters: lim,
|
||||||
|
client: NewClient(),
|
||||||
|
})
|
||||||
|
|
||||||
|
// --- Phase 12: Cloud storage sources ---
|
||||||
|
|
||||||
|
// S3 -- BaseURL pattern with %s for bucket name
|
||||||
|
eng.Register(&S3Scanner{
|
||||||
|
BaseURL: srv.URL + "/cloud-s3/%s",
|
||||||
|
Registry: reg,
|
||||||
|
Limiters: lim,
|
||||||
|
client: NewClient(),
|
||||||
|
})
|
||||||
|
// GCS -- JSON format handler
|
||||||
|
eng.Register(&GCSScanner{
|
||||||
|
BaseURL: srv.URL + "/cloud-gcs/%s",
|
||||||
|
Registry: reg,
|
||||||
|
Limiters: lim,
|
||||||
|
client: NewClient(),
|
||||||
|
})
|
||||||
|
// AzureBlob -- EnumerationResults XML; needs two %s: account + container
|
||||||
|
eng.Register(&AzureBlobScanner{
|
||||||
|
BaseURL: srv.URL + "/cloud-azure/%s-%s",
|
||||||
|
Registry: reg,
|
||||||
|
Limiters: lim,
|
||||||
|
client: NewClient(),
|
||||||
|
})
|
||||||
|
// DOSpaces -- S3-compatible XML; needs two %s: bucket + region
|
||||||
|
eng.Register(&DOSpacesScanner{
|
||||||
|
BaseURL: srv.URL + "/cloud-s3/%s-%s",
|
||||||
|
Registry: reg,
|
||||||
|
Limiters: lim,
|
||||||
|
client: NewClient(),
|
||||||
|
})
|
||||||
|
|
||||||
|
// Sanity: all 28 sources registered.
|
||||||
|
if n := len(eng.List()); n != 28 {
|
||||||
|
t.Fatalf("expected 28 sources on engine, got %d: %v", n, eng.List())
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
|
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
|
||||||
@@ -344,6 +487,18 @@ func TestIntegration_AllSources_SweepAll(t *testing.T) {
|
|||||||
"recon:pastebin",
|
"recon:pastebin",
|
||||||
"recon:gistpaste",
|
"recon:gistpaste",
|
||||||
"recon:pastesites",
|
"recon:pastesites",
|
||||||
|
// Phase 12: IoT scanners
|
||||||
|
"recon:shodan",
|
||||||
|
"recon:censys",
|
||||||
|
"recon:zoomeye",
|
||||||
|
"recon:fofa",
|
||||||
|
"recon:netlas",
|
||||||
|
"recon:binaryedge",
|
||||||
|
// Phase 12: Cloud storage
|
||||||
|
"recon:s3",
|
||||||
|
"recon:gcs",
|
||||||
|
"recon:azureblob",
|
||||||
|
"recon:spaces",
|
||||||
}
|
}
|
||||||
for _, st := range wantTypes {
|
for _, st := range wantTypes {
|
||||||
if byType[st] == 0 {
|
if byType[st] == 0 {
|
||||||
@@ -352,6 +507,95 @@ func TestIntegration_AllSources_SweepAll(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestRegisterAll_Phase12 verifies that RegisterAll correctly registers all 28
|
||||||
|
// sources (18 Phase 10-11 + 10 Phase 12) and that credential-gated sources
|
||||||
|
// report Enabled()==false when credentials are empty.
|
||||||
|
func TestRegisterAll_Phase12(t *testing.T) {
|
||||||
|
reg := providers.NewRegistryFromProviders([]providers.Provider{
|
||||||
|
{Name: "testprov", Keywords: []string{"test-key"}},
|
||||||
|
})
|
||||||
|
lim := recon.NewLimiterRegistry()
|
||||||
|
|
||||||
|
eng := recon.NewEngine()
|
||||||
|
RegisterAll(eng, SourcesConfig{
|
||||||
|
Registry: reg,
|
||||||
|
Limiters: lim,
|
||||||
|
// All credential fields left empty.
|
||||||
|
})
|
||||||
|
|
||||||
|
names := eng.List()
|
||||||
|
if n := len(names); n != 28 {
|
||||||
|
t.Fatalf("expected 28 sources from RegisterAll, got %d: %v", n, names)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build lookup for source access.
|
||||||
|
nameSet := make(map[string]bool, len(names))
|
||||||
|
for _, n := range names {
|
||||||
|
nameSet[n] = true
|
||||||
|
}
|
||||||
|
|
||||||
|
// All 10 Phase 12 sources must be present.
|
||||||
|
wantPhase12 := []string{
|
||||||
|
"shodan", "censys", "zoomeye", "fofa", "netlas", "binaryedge",
|
||||||
|
"s3", "gcs", "azureblob", "spaces",
|
||||||
|
}
|
||||||
|
for _, name := range wantPhase12 {
|
||||||
|
if !nameSet[name] {
|
||||||
|
t.Errorf("Phase 12 source %q not found in engine; registered: %v", name, names)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cfg := recon.Config{}
|
||||||
|
|
||||||
|
// IoT sources with empty credentials must be disabled.
|
||||||
|
iotSources := []string{"shodan", "censys", "zoomeye", "fofa", "netlas", "binaryedge"}
|
||||||
|
for _, name := range iotSources {
|
||||||
|
src, ok := eng.Get(name)
|
||||||
|
if !ok {
|
||||||
|
t.Errorf("source %q not found via Get", name)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if src.Enabled(cfg) {
|
||||||
|
t.Errorf("IoT source %q should be Enabled()==false with empty credentials", name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cloud storage sources (credentialless) must be enabled.
|
||||||
|
cloudSources := []string{"s3", "gcs", "azureblob", "spaces"}
|
||||||
|
for _, name := range cloudSources {
|
||||||
|
src, ok := eng.Get(name)
|
||||||
|
if !ok {
|
||||||
|
t.Errorf("source %q not found via Get", name)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if !src.Enabled(cfg) {
|
||||||
|
t.Errorf("Cloud source %q should be Enabled()==true (credentialless)", name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestRegisterAll_Phase12_SweepAllNoPanic verifies that SweepAll with a very
|
||||||
|
// short context timeout completes without panic when all 28 sources are
|
||||||
|
// registered with empty credentials.
|
||||||
|
func TestRegisterAll_Phase12_SweepAllNoPanic(t *testing.T) {
|
||||||
|
reg := providers.NewRegistryFromProviders([]providers.Provider{
|
||||||
|
{Name: "testprov", Keywords: []string{"test-key"}},
|
||||||
|
})
|
||||||
|
lim := recon.NewLimiterRegistry()
|
||||||
|
|
||||||
|
eng := recon.NewEngine()
|
||||||
|
RegisterAll(eng, SourcesConfig{
|
||||||
|
Registry: reg,
|
||||||
|
Limiters: lim,
|
||||||
|
})
|
||||||
|
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
// Should not panic regardless of timeout or missing credentials.
|
||||||
|
_, _ = eng.SweepAll(ctx, recon.Config{})
|
||||||
|
}
|
||||||
|
|
||||||
// baseFromReq reconstructs the scheme+host of the inbound request so handlers
|
// baseFromReq reconstructs the scheme+host of the inbound request so handlers
|
||||||
// can build absolute raw URLs pointing back at the same httptest server.
|
// can build absolute raw URLs pointing back at the same httptest server.
|
||||||
func baseFromReq(r *http.Request) string {
|
func baseFromReq(r *http.Request) string {
|
||||||
|
|||||||
147
pkg/recon/sources/netlas.go
Normal file
147
pkg/recon/sources/netlas.go
Normal file
@@ -0,0 +1,147 @@
|
|||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"golang.org/x/time/rate"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||||
|
)
|
||||||
|
|
||||||
|
// NetlasSource implements recon.ReconSource against the Netlas internet
|
||||||
|
// intelligence API. It iterates provider keyword queries and emits a Finding
|
||||||
|
// per result item.
|
||||||
|
//
|
||||||
|
// A missing API key disables the source without error.
|
||||||
|
type NetlasSource struct {
|
||||||
|
APIKey string
|
||||||
|
BaseURL string
|
||||||
|
Registry *providers.Registry
|
||||||
|
Limiters *recon.LimiterRegistry
|
||||||
|
client *Client
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compile-time assertion.
|
||||||
|
var _ recon.ReconSource = (*NetlasSource)(nil)
|
||||||
|
|
||||||
|
func (s *NetlasSource) Name() string { return "netlas" }
|
||||||
|
func (s *NetlasSource) RateLimit() rate.Limit { return rate.Every(1 * time.Second) }
|
||||||
|
func (s *NetlasSource) Burst() int { return 1 }
|
||||||
|
func (s *NetlasSource) RespectsRobots() bool { return false }
|
||||||
|
|
||||||
|
// Enabled returns true only when APIKey is configured.
|
||||||
|
func (s *NetlasSource) Enabled(_ recon.Config) bool { return s.APIKey != "" }
|
||||||
|
|
||||||
|
// Sweep issues one Netlas search request per provider keyword and emits a
|
||||||
|
// Finding for every result item.
|
||||||
|
func (s *NetlasSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
|
||||||
|
if s.APIKey == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if s.client == nil {
|
||||||
|
s.client = NewClient()
|
||||||
|
}
|
||||||
|
base := s.BaseURL
|
||||||
|
if base == "" {
|
||||||
|
base = "https://app.netlas.io"
|
||||||
|
}
|
||||||
|
|
||||||
|
queries := BuildQueries(s.Registry, "netlas")
|
||||||
|
kwIndex := netlasKeywordIndex(s.Registry)
|
||||||
|
|
||||||
|
for _, q := range queries {
|
||||||
|
if err := ctx.Err(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if s.Limiters != nil {
|
||||||
|
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
endpoint := fmt.Sprintf("%s/api/responses/?q=%s&start=0&indices=",
|
||||||
|
base, url.QueryEscape(q))
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("netlas: build request: %w", err)
|
||||||
|
}
|
||||||
|
req.Header.Set("X-API-Key", s.APIKey)
|
||||||
|
req.Header.Set("Accept", "application/json")
|
||||||
|
|
||||||
|
resp, err := s.client.Do(ctx, req)
|
||||||
|
if err != nil {
|
||||||
|
if errors.Is(err, ErrUnauthorized) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
var parsed netlasSearchResponse
|
||||||
|
decErr := json.NewDecoder(resp.Body).Decode(&parsed)
|
||||||
|
_ = resp.Body.Close()
|
||||||
|
if decErr != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
provName := kwIndex[strings.ToLower(q)]
|
||||||
|
for _, item := range parsed.Items {
|
||||||
|
f := recon.Finding{
|
||||||
|
ProviderName: provName,
|
||||||
|
Confidence: "low",
|
||||||
|
Source: fmt.Sprintf("netlas://%s:%d", item.Data.IP, item.Data.Port),
|
||||||
|
SourceType: "recon:netlas",
|
||||||
|
DetectedAt: time.Now(),
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case out <- f:
|
||||||
|
case <-ctx.Done():
|
||||||
|
return ctx.Err()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type netlasSearchResponse struct {
|
||||||
|
Items []netlasItem `json:"items"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type netlasItem struct {
|
||||||
|
Data netlasData `json:"data"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type netlasData struct {
|
||||||
|
IP string `json:"ip"`
|
||||||
|
Port int `json:"port"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// netlasKeywordIndex maps lowercased keywords to provider names.
|
||||||
|
func netlasKeywordIndex(reg *providers.Registry) map[string]string {
|
||||||
|
m := make(map[string]string)
|
||||||
|
if reg == nil {
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
for _, p := range reg.List() {
|
||||||
|
for _, k := range p.Keywords {
|
||||||
|
kl := strings.ToLower(strings.TrimSpace(k))
|
||||||
|
if kl == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if _, exists := m[kl]; !exists {
|
||||||
|
m[kl] = p.Name
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return m
|
||||||
|
}
|
||||||
117
pkg/recon/sources/netlas_test.go
Normal file
117
pkg/recon/sources/netlas_test.go
Normal file
@@ -0,0 +1,117 @@
|
|||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"strings"
|
||||||
|
"sync/atomic"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||||
|
)
|
||||||
|
|
||||||
|
func netlasStubHandler(t *testing.T, calls *int32) http.HandlerFunc {
|
||||||
|
t.Helper()
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
atomic.AddInt32(calls, 1)
|
||||||
|
if !strings.HasPrefix(r.URL.Path, "/api/responses/") {
|
||||||
|
t.Errorf("unexpected path: %s", r.URL.Path)
|
||||||
|
}
|
||||||
|
if got := r.Header.Get("X-API-Key"); got != "testkey" {
|
||||||
|
t.Errorf("missing X-API-Key header: %q", got)
|
||||||
|
}
|
||||||
|
body := netlasSearchResponse{
|
||||||
|
Items: []netlasItem{
|
||||||
|
{Data: netlasData{IP: "10.0.0.1", Port: 443}},
|
||||||
|
{Data: netlasData{IP: "10.0.0.2", Port: 8443}},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_ = json.NewEncoder(w).Encode(body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNetlasSource_EnabledRequiresAPIKey(t *testing.T) {
|
||||||
|
reg := syntheticRegistry()
|
||||||
|
lim := recon.NewLimiterRegistry()
|
||||||
|
|
||||||
|
s := &NetlasSource{APIKey: "", Registry: reg, Limiters: lim}
|
||||||
|
if s.Enabled(recon.Config{}) {
|
||||||
|
t.Error("expected Enabled=false with empty key")
|
||||||
|
}
|
||||||
|
s = &NetlasSource{APIKey: "key", Registry: reg, Limiters: lim}
|
||||||
|
if !s.Enabled(recon.Config{}) {
|
||||||
|
t.Error("expected Enabled=true with key")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNetlasSource_SweepEmitsFindings(t *testing.T) {
|
||||||
|
reg := syntheticRegistry()
|
||||||
|
lim := recon.NewLimiterRegistry()
|
||||||
|
_ = lim.For("netlas", 1000, 100)
|
||||||
|
|
||||||
|
var calls int32
|
||||||
|
srv := httptest.NewServer(netlasStubHandler(t, &calls))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
s := &NetlasSource{
|
||||||
|
APIKey: "testkey",
|
||||||
|
BaseURL: srv.URL,
|
||||||
|
Registry: reg,
|
||||||
|
Limiters: lim,
|
||||||
|
}
|
||||||
|
|
||||||
|
out := make(chan recon.Finding, 32)
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
done := make(chan error, 1)
|
||||||
|
go func() { done <- s.Sweep(ctx, "", out); close(out) }()
|
||||||
|
|
||||||
|
var findings []recon.Finding
|
||||||
|
for f := range out {
|
||||||
|
findings = append(findings, f)
|
||||||
|
}
|
||||||
|
if err := <-done; err != nil {
|
||||||
|
t.Fatalf("Sweep error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2 keywords * 2 items = 4 findings
|
||||||
|
if len(findings) != 4 {
|
||||||
|
t.Fatalf("expected 4 findings, got %d", len(findings))
|
||||||
|
}
|
||||||
|
for _, f := range findings {
|
||||||
|
if f.SourceType != "recon:netlas" {
|
||||||
|
t.Errorf("SourceType=%q want recon:netlas", f.SourceType)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if got := atomic.LoadInt32(&calls); got != 2 {
|
||||||
|
t.Errorf("expected 2 API calls, got %d", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNetlasSource_CtxCancelled(t *testing.T) {
|
||||||
|
reg := syntheticRegistry()
|
||||||
|
lim := recon.NewLimiterRegistry()
|
||||||
|
_ = lim.For("netlas", 1000, 100)
|
||||||
|
|
||||||
|
s := &NetlasSource{
|
||||||
|
APIKey: "key",
|
||||||
|
BaseURL: "http://127.0.0.1:1",
|
||||||
|
Registry: reg,
|
||||||
|
Limiters: lim,
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
cancel()
|
||||||
|
|
||||||
|
out := make(chan recon.Finding, 1)
|
||||||
|
err := s.Sweep(ctx, "", out)
|
||||||
|
if !errors.Is(err, context.Canceled) {
|
||||||
|
t.Fatalf("expected context.Canceled, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -39,14 +39,25 @@ type SourcesConfig struct {
|
|||||||
// Brave Search API subscription token.
|
// Brave Search API subscription token.
|
||||||
BraveAPIKey string
|
BraveAPIKey string
|
||||||
|
|
||||||
|
// Phase 12: IoT scanner API keys.
|
||||||
|
ShodanAPIKey string
|
||||||
|
CensysAPIId string
|
||||||
|
CensysAPISecret string
|
||||||
|
ZoomEyeAPIKey string
|
||||||
|
FOFAEmail string
|
||||||
|
FOFAAPIKey string
|
||||||
|
NetlasAPIKey string
|
||||||
|
BinaryEdgeAPIKey string
|
||||||
|
|
||||||
// Registry drives query generation for every source via BuildQueries.
|
// Registry drives query generation for every source via BuildQueries.
|
||||||
Registry *providers.Registry
|
Registry *providers.Registry
|
||||||
// Limiters is the shared per-source rate-limiter registry.
|
// Limiters is the shared per-source rate-limiter registry.
|
||||||
Limiters *recon.LimiterRegistry
|
Limiters *recon.LimiterRegistry
|
||||||
}
|
}
|
||||||
|
|
||||||
// RegisterAll registers every Phase 10 code-hosting and Phase 11 search
|
// RegisterAll registers every Phase 10 code-hosting, Phase 11 search engine /
|
||||||
// engine / paste site source on engine (18 sources total).
|
// paste site, and Phase 12 IoT scanner / cloud storage source on engine
|
||||||
|
// (28 sources total).
|
||||||
//
|
//
|
||||||
// All sources are registered unconditionally so that cmd/recon.go can surface
|
// All sources are registered unconditionally so that cmd/recon.go can surface
|
||||||
// the full catalog via `keyhunter recon list` regardless of which credentials
|
// the full catalog via `keyhunter recon list` regardless of which credentials
|
||||||
@@ -149,4 +160,56 @@ func RegisterAll(engine *recon.Engine, cfg SourcesConfig) {
|
|||||||
Registry: reg,
|
Registry: reg,
|
||||||
Limiters: lim,
|
Limiters: lim,
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// Phase 12: IoT scanner sources.
|
||||||
|
engine.Register(&ShodanSource{
|
||||||
|
APIKey: cfg.ShodanAPIKey,
|
||||||
|
Registry: reg,
|
||||||
|
Limiters: lim,
|
||||||
|
})
|
||||||
|
engine.Register(&CensysSource{
|
||||||
|
APIId: cfg.CensysAPIId,
|
||||||
|
APISecret: cfg.CensysAPISecret,
|
||||||
|
Registry: reg,
|
||||||
|
Limiters: lim,
|
||||||
|
})
|
||||||
|
engine.Register(&ZoomEyeSource{
|
||||||
|
APIKey: cfg.ZoomEyeAPIKey,
|
||||||
|
Registry: reg,
|
||||||
|
Limiters: lim,
|
||||||
|
})
|
||||||
|
engine.Register(&FOFASource{
|
||||||
|
Email: cfg.FOFAEmail,
|
||||||
|
APIKey: cfg.FOFAAPIKey,
|
||||||
|
Registry: reg,
|
||||||
|
Limiters: lim,
|
||||||
|
})
|
||||||
|
engine.Register(&NetlasSource{
|
||||||
|
APIKey: cfg.NetlasAPIKey,
|
||||||
|
Registry: reg,
|
||||||
|
Limiters: lim,
|
||||||
|
})
|
||||||
|
engine.Register(&BinaryEdgeSource{
|
||||||
|
APIKey: cfg.BinaryEdgeAPIKey,
|
||||||
|
Registry: reg,
|
||||||
|
Limiters: lim,
|
||||||
|
})
|
||||||
|
|
||||||
|
// Phase 12: Cloud storage sources (credentialless).
|
||||||
|
engine.Register(&S3Scanner{
|
||||||
|
Registry: reg,
|
||||||
|
Limiters: lim,
|
||||||
|
})
|
||||||
|
engine.Register(&GCSScanner{
|
||||||
|
Registry: reg,
|
||||||
|
Limiters: lim,
|
||||||
|
})
|
||||||
|
engine.Register(&AzureBlobScanner{
|
||||||
|
Registry: reg,
|
||||||
|
Limiters: lim,
|
||||||
|
})
|
||||||
|
engine.Register(&DOSpacesScanner{
|
||||||
|
Registry: reg,
|
||||||
|
Limiters: lim,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -16,9 +16,9 @@ func registerTestRegistry() *providers.Registry {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// TestRegisterAll_WiresAllEighteenSources asserts that RegisterAll registers
|
// TestRegisterAll_WiresAllTwentyEightSources asserts that RegisterAll registers
|
||||||
// every Phase 10 + Phase 11 source by its stable name on a fresh engine.
|
// every Phase 10 + Phase 11 + Phase 12 source by its stable name on a fresh engine.
|
||||||
func TestRegisterAll_WiresAllEighteenSources(t *testing.T) {
|
func TestRegisterAll_WiresAllTwentyEightSources(t *testing.T) {
|
||||||
eng := recon.NewEngine()
|
eng := recon.NewEngine()
|
||||||
cfg := SourcesConfig{
|
cfg := SourcesConfig{
|
||||||
Registry: registerTestRegistry(),
|
Registry: registerTestRegistry(),
|
||||||
@@ -28,12 +28,17 @@ func TestRegisterAll_WiresAllEighteenSources(t *testing.T) {
|
|||||||
|
|
||||||
got := eng.List()
|
got := eng.List()
|
||||||
want := []string{
|
want := []string{
|
||||||
|
"azureblob",
|
||||||
|
"binaryedge",
|
||||||
"bing",
|
"bing",
|
||||||
"bitbucket",
|
"bitbucket",
|
||||||
"brave",
|
"brave",
|
||||||
|
"censys",
|
||||||
"codeberg",
|
"codeberg",
|
||||||
"codesandbox",
|
"codesandbox",
|
||||||
"duckduckgo",
|
"duckduckgo",
|
||||||
|
"fofa",
|
||||||
|
"gcs",
|
||||||
"gist",
|
"gist",
|
||||||
"gistpaste",
|
"gistpaste",
|
||||||
"github",
|
"github",
|
||||||
@@ -41,11 +46,16 @@ func TestRegisterAll_WiresAllEighteenSources(t *testing.T) {
|
|||||||
"google",
|
"google",
|
||||||
"huggingface",
|
"huggingface",
|
||||||
"kaggle",
|
"kaggle",
|
||||||
|
"netlas",
|
||||||
"pastebin",
|
"pastebin",
|
||||||
"pastesites",
|
"pastesites",
|
||||||
"replit",
|
"replit",
|
||||||
|
"s3",
|
||||||
"sandboxes",
|
"sandboxes",
|
||||||
|
"shodan",
|
||||||
|
"spaces",
|
||||||
"yandex",
|
"yandex",
|
||||||
|
"zoomeye",
|
||||||
}
|
}
|
||||||
if !reflect.DeepEqual(got, want) {
|
if !reflect.DeepEqual(got, want) {
|
||||||
t.Fatalf("RegisterAll names mismatch\n got: %v\nwant: %v", got, want)
|
t.Fatalf("RegisterAll names mismatch\n got: %v\nwant: %v", got, want)
|
||||||
@@ -63,8 +73,8 @@ func TestRegisterAll_MissingCredsStillRegistered(t *testing.T) {
|
|||||||
Limiters: recon.NewLimiterRegistry(),
|
Limiters: recon.NewLimiterRegistry(),
|
||||||
})
|
})
|
||||||
|
|
||||||
if n := len(eng.List()); n != 18 {
|
if n := len(eng.List()); n != 28 {
|
||||||
t.Fatalf("expected 18 sources registered, got %d: %v", n, eng.List())
|
t.Fatalf("expected 28 sources registered, got %d: %v", n, eng.List())
|
||||||
}
|
}
|
||||||
|
|
||||||
// SweepAll with an empty config should filter out cred-gated sources
|
// SweepAll with an empty config should filter out cred-gated sources
|
||||||
|
|||||||
213
pkg/recon/sources/s3scanner.go
Normal file
213
pkg/recon/sources/s3scanner.go
Normal file
@@ -0,0 +1,213 @@
|
|||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/xml"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"log"
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"golang.org/x/time/rate"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||||
|
)
|
||||||
|
|
||||||
|
// S3Scanner enumerates publicly accessible AWS S3 buckets by name pattern and
|
||||||
|
// flags readable objects that match common config-file patterns (.env, *.json,
|
||||||
|
// *.yaml, etc.) as potential API key exposure vectors.
|
||||||
|
//
|
||||||
|
// The scanner is credentialless: it uses anonymous HTTP to probe public buckets.
|
||||||
|
// Object contents are NOT downloaded; only the presence of suspicious filenames
|
||||||
|
// is reported.
|
||||||
|
type S3Scanner struct {
|
||||||
|
Registry *providers.Registry
|
||||||
|
Limiters *recon.LimiterRegistry
|
||||||
|
// BaseURL overrides the S3 endpoint for tests. Default: "https://%s.s3.amazonaws.com".
|
||||||
|
// Must contain exactly one %s placeholder for the bucket name.
|
||||||
|
BaseURL string
|
||||||
|
client *Client
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compile-time assertion.
|
||||||
|
var _ recon.ReconSource = (*S3Scanner)(nil)
|
||||||
|
|
||||||
|
func (s *S3Scanner) Name() string { return "s3" }
|
||||||
|
func (s *S3Scanner) RateLimit() rate.Limit { return rate.Every(500 * time.Millisecond) }
|
||||||
|
func (s *S3Scanner) Burst() int { return 3 }
|
||||||
|
func (s *S3Scanner) RespectsRobots() bool { return false }
|
||||||
|
func (s *S3Scanner) Enabled(_ recon.Config) bool { return true }
|
||||||
|
|
||||||
|
func (s *S3Scanner) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
|
||||||
|
client := s.client
|
||||||
|
if client == nil {
|
||||||
|
client = NewClient()
|
||||||
|
}
|
||||||
|
baseURL := s.BaseURL
|
||||||
|
if baseURL == "" {
|
||||||
|
baseURL = "https://%s.s3.amazonaws.com"
|
||||||
|
}
|
||||||
|
|
||||||
|
names := bucketNames(s.Registry)
|
||||||
|
if len(names) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, bucket := range names {
|
||||||
|
if err := ctx.Err(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if s.Limiters != nil {
|
||||||
|
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
endpoint := fmt.Sprintf(baseURL, bucket)
|
||||||
|
keys, err := s.listBucketS3(ctx, client, endpoint)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("s3: bucket %q probe failed (skipping): %v", bucket, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, key := range keys {
|
||||||
|
if !isConfigFile(key) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
out <- recon.Finding{
|
||||||
|
Source: fmt.Sprintf("s3://%s/%s", bucket, key),
|
||||||
|
SourceType: "recon:s3",
|
||||||
|
Confidence: "medium",
|
||||||
|
DetectedAt: time.Now(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// listBucketS3 probes an S3 bucket endpoint. A HEAD that returns 200 means
|
||||||
|
// public listing is enabled; we then GET to parse the ListBucketResult XML.
|
||||||
|
// Returns nil keys if the bucket is not publicly listable.
|
||||||
|
func (s *S3Scanner) listBucketS3(ctx context.Context, client *Client, endpoint string) ([]string, error) {
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodHead, endpoint, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
resp, err := client.HTTP.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return nil, nil // not publicly listable
|
||||||
|
}
|
||||||
|
|
||||||
|
// Public listing available -- fetch and parse XML.
|
||||||
|
getReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
getResp, err := client.Do(ctx, getReq)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer getResp.Body.Close()
|
||||||
|
|
||||||
|
return parseS3ListXML(getResp.Body)
|
||||||
|
}
|
||||||
|
|
||||||
|
// s3ListResult models the AWS S3 ListBucketResult XML.
|
||||||
|
type s3ListResult struct {
|
||||||
|
XMLName xml.Name `xml:"ListBucketResult"`
|
||||||
|
Contents []s3Object `xml:"Contents"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type s3Object struct {
|
||||||
|
Key string `xml:"Key"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseS3ListXML(r io.Reader) ([]string, error) {
|
||||||
|
data, err := io.ReadAll(io.LimitReader(r, 1<<20))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
var result s3ListResult
|
||||||
|
if err := xml.Unmarshal(data, &result); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
keys := make([]string, 0, len(result.Contents))
|
||||||
|
for _, obj := range result.Contents {
|
||||||
|
if obj.Key != "" {
|
||||||
|
keys = append(keys, obj.Key)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return keys, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// bucketSuffixes are common suffixes appended to provider keywords to generate
|
||||||
|
// candidate bucket names.
|
||||||
|
var bucketSuffixes = []string{
|
||||||
|
"-keys", "-config", "-backup", "-data", "-secrets", "-env",
|
||||||
|
"-api-keys", "-credentials", "-tokens",
|
||||||
|
}
|
||||||
|
|
||||||
|
// bucketNames generates candidate cloud storage bucket names from provider
|
||||||
|
// keywords combined with common suffixes. Exported for use by GCSScanner,
|
||||||
|
// AzureBlobScanner, and DOSpacesScanner.
|
||||||
|
func bucketNames(reg *providers.Registry) []string {
|
||||||
|
if reg == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
seen := make(map[string]struct{})
|
||||||
|
var names []string
|
||||||
|
|
||||||
|
for _, p := range reg.List() {
|
||||||
|
// Use provider name (lowercased, spaces to dashes) as base.
|
||||||
|
base := strings.ToLower(strings.ReplaceAll(p.Name, " ", "-"))
|
||||||
|
if base == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
for _, suffix := range bucketSuffixes {
|
||||||
|
candidate := base + suffix
|
||||||
|
if _, ok := seen[candidate]; !ok {
|
||||||
|
seen[candidate] = struct{}{}
|
||||||
|
names = append(names, candidate)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return names
|
||||||
|
}
|
||||||
|
|
||||||
|
// isConfigFile returns true if the filename matches common config file patterns
|
||||||
|
// that may contain API keys.
|
||||||
|
func isConfigFile(name string) bool {
|
||||||
|
lower := strings.ToLower(name)
|
||||||
|
// Exact basenames.
|
||||||
|
for _, exact := range []string{".env", ".env.local", ".env.production", ".env.development"} {
|
||||||
|
if lower == exact || strings.HasSuffix(lower, "/"+exact) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Extension matches.
|
||||||
|
for _, ext := range []string{".json", ".yaml", ".yml", ".toml", ".conf", ".cfg", ".ini", ".properties"} {
|
||||||
|
if strings.HasSuffix(lower, ext) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Prefix matches (config.*, settings.*).
|
||||||
|
base := lower
|
||||||
|
if idx := strings.LastIndex(lower, "/"); idx >= 0 {
|
||||||
|
base = lower[idx+1:]
|
||||||
|
}
|
||||||
|
for _, prefix := range []string{"config.", "settings.", "credentials.", "secrets."} {
|
||||||
|
if strings.HasPrefix(base, prefix) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
139
pkg/recon/sources/s3scanner_test.go
Normal file
139
pkg/recon/sources/s3scanner_test.go
Normal file
@@ -0,0 +1,139 @@
|
|||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||||
|
)
|
||||||
|
|
||||||
|
func cloudTestRegistry() *providers.Registry {
|
||||||
|
return providers.NewRegistryFromProviders([]providers.Provider{
|
||||||
|
{Name: "testprov", Keywords: []string{"testprov-key"}},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func s3TestServer() *httptest.Server {
|
||||||
|
mux := http.NewServeMux()
|
||||||
|
|
||||||
|
// Respond to HEAD for the testprov-keys bucket with 200 (public).
|
||||||
|
mux.HandleFunc("/testprov-keys/", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.Method == http.MethodHead {
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// GET — return S3 ListBucketResult XML.
|
||||||
|
w.Header().Set("Content-Type", "application/xml")
|
||||||
|
_, _ = w.Write([]byte(`<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<ListBucketResult>
|
||||||
|
<Contents><Key>.env</Key></Contents>
|
||||||
|
<Contents><Key>config.yaml</Key></Contents>
|
||||||
|
<Contents><Key>readme.md</Key></Contents>
|
||||||
|
<Contents><Key>data/settings.json</Key></Contents>
|
||||||
|
</ListBucketResult>`))
|
||||||
|
})
|
||||||
|
|
||||||
|
// All other buckets return 404 (not found).
|
||||||
|
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusNotFound)
|
||||||
|
})
|
||||||
|
|
||||||
|
return httptest.NewServer(mux)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestS3Scanner_Sweep(t *testing.T) {
|
||||||
|
srv := s3TestServer()
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
src := &S3Scanner{
|
||||||
|
Registry: cloudTestRegistry(),
|
||||||
|
BaseURL: srv.URL + "/%s/",
|
||||||
|
client: NewClient(),
|
||||||
|
}
|
||||||
|
|
||||||
|
out := make(chan recon.Finding, 32)
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
if err := src.Sweep(ctx, "", out); err != nil {
|
||||||
|
t.Fatalf("Sweep error: %v", err)
|
||||||
|
}
|
||||||
|
close(out)
|
||||||
|
|
||||||
|
var findings []recon.Finding
|
||||||
|
for f := range out {
|
||||||
|
findings = append(findings, f)
|
||||||
|
}
|
||||||
|
|
||||||
|
// .env, config.yaml, data/settings.json match; readme.md does not.
|
||||||
|
if len(findings) != 3 {
|
||||||
|
t.Fatalf("expected 3 findings, got %d: %+v", len(findings), findings)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, f := range findings {
|
||||||
|
if f.SourceType != "recon:s3" {
|
||||||
|
t.Errorf("unexpected SourceType: %s", f.SourceType)
|
||||||
|
}
|
||||||
|
if f.Confidence != "medium" {
|
||||||
|
t.Errorf("unexpected Confidence: %s", f.Confidence)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestS3Scanner_EmptyRegistry(t *testing.T) {
|
||||||
|
src := &S3Scanner{
|
||||||
|
Registry: providers.NewRegistryFromProviders(nil),
|
||||||
|
Limiters: recon.NewLimiterRegistry(),
|
||||||
|
client: NewClient(),
|
||||||
|
}
|
||||||
|
|
||||||
|
out := make(chan recon.Finding, 4)
|
||||||
|
ctx := context.Background()
|
||||||
|
if err := src.Sweep(ctx, "", out); err != nil {
|
||||||
|
t.Fatalf("Sweep error: %v", err)
|
||||||
|
}
|
||||||
|
close(out)
|
||||||
|
|
||||||
|
if len(out) != 0 {
|
||||||
|
t.Fatal("expected 0 findings with empty registry")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestS3Scanner_CtxCancelled(t *testing.T) {
|
||||||
|
srv := s3TestServer()
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
src := &S3Scanner{
|
||||||
|
Registry: cloudTestRegistry(),
|
||||||
|
BaseURL: srv.URL + "/%s/",
|
||||||
|
client: NewClient(),
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
cancel()
|
||||||
|
|
||||||
|
out := make(chan recon.Finding, 4)
|
||||||
|
if err := src.Sweep(ctx, "", out); err == nil {
|
||||||
|
t.Fatal("expected ctx error")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestS3Scanner_EnabledAndMeta(t *testing.T) {
|
||||||
|
s := &S3Scanner{}
|
||||||
|
if s.Name() != "s3" {
|
||||||
|
t.Fatalf("unexpected name: %s", s.Name())
|
||||||
|
}
|
||||||
|
if !s.Enabled(recon.Config{}) {
|
||||||
|
t.Fatal("expected Enabled=true")
|
||||||
|
}
|
||||||
|
if s.RespectsRobots() {
|
||||||
|
t.Fatal("expected RespectsRobots=false")
|
||||||
|
}
|
||||||
|
if s.Burst() != 3 {
|
||||||
|
t.Fatal("expected Burst=3")
|
||||||
|
}
|
||||||
|
}
|
||||||
153
pkg/recon/sources/shodan.go
Normal file
153
pkg/recon/sources/shodan.go
Normal file
@@ -0,0 +1,153 @@
|
|||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"golang.org/x/time/rate"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ShodanSource implements recon.ReconSource against the Shodan /shodan/host/search
|
||||||
|
// REST API. It iterates provider keyword queries and emits a Finding for every
|
||||||
|
// match returned (exposed LLM endpoints, API keys in banners, etc.).
|
||||||
|
//
|
||||||
|
// A missing API key disables the source -- Sweep returns nil and Enabled reports
|
||||||
|
// false.
|
||||||
|
type ShodanSource struct {
|
||||||
|
APIKey string
|
||||||
|
BaseURL string
|
||||||
|
Registry *providers.Registry
|
||||||
|
Limiters *recon.LimiterRegistry
|
||||||
|
client *Client
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compile-time assertion.
|
||||||
|
var _ recon.ReconSource = (*ShodanSource)(nil)
|
||||||
|
|
||||||
|
// NewShodanSource constructs a ShodanSource with the shared retry client.
|
||||||
|
func NewShodanSource(apiKey string, reg *providers.Registry, lim *recon.LimiterRegistry) *ShodanSource {
|
||||||
|
return &ShodanSource{
|
||||||
|
APIKey: apiKey,
|
||||||
|
BaseURL: "https://api.shodan.io",
|
||||||
|
Registry: reg,
|
||||||
|
Limiters: lim,
|
||||||
|
client: NewClient(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *ShodanSource) Name() string { return "shodan" }
|
||||||
|
func (s *ShodanSource) RateLimit() rate.Limit { return rate.Every(1 * time.Second) }
|
||||||
|
func (s *ShodanSource) Burst() int { return 1 }
|
||||||
|
func (s *ShodanSource) RespectsRobots() bool { return false }
|
||||||
|
|
||||||
|
// Enabled returns true only when APIKey is configured.
|
||||||
|
func (s *ShodanSource) Enabled(_ recon.Config) bool { return s.APIKey != "" }
|
||||||
|
|
||||||
|
// Sweep issues one /shodan/host/search request per provider keyword and emits
|
||||||
|
// a Finding for every match returned.
|
||||||
|
func (s *ShodanSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
|
||||||
|
if s.APIKey == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
base := s.BaseURL
|
||||||
|
if base == "" {
|
||||||
|
base = "https://api.shodan.io"
|
||||||
|
}
|
||||||
|
|
||||||
|
queries := BuildQueries(s.Registry, "shodan")
|
||||||
|
kwIndex := shodanKeywordIndex(s.Registry)
|
||||||
|
|
||||||
|
for _, q := range queries {
|
||||||
|
if err := ctx.Err(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if s.Limiters != nil {
|
||||||
|
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
endpoint := fmt.Sprintf("%s/shodan/host/search?key=%s&query=%s",
|
||||||
|
base, url.QueryEscape(s.APIKey), url.QueryEscape(q))
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("shodan: build request: %w", err)
|
||||||
|
}
|
||||||
|
req.Header.Set("Accept", "application/json")
|
||||||
|
req.Header.Set("User-Agent", "keyhunter-recon")
|
||||||
|
|
||||||
|
resp, err := s.client.Do(ctx, req)
|
||||||
|
if err != nil {
|
||||||
|
if errors.Is(err, ErrUnauthorized) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
var parsed shodanSearchResponse
|
||||||
|
decErr := json.NewDecoder(resp.Body).Decode(&parsed)
|
||||||
|
_ = resp.Body.Close()
|
||||||
|
if decErr != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
provName := kwIndex[strings.ToLower(q)]
|
||||||
|
for _, m := range parsed.Matches {
|
||||||
|
f := recon.Finding{
|
||||||
|
ProviderName: provName,
|
||||||
|
Confidence: "low",
|
||||||
|
Source: fmt.Sprintf("shodan://%s:%d", m.IPStr, m.Port),
|
||||||
|
SourceType: "recon:shodan",
|
||||||
|
DetectedAt: time.Now(),
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case out <- f:
|
||||||
|
case <-ctx.Done():
|
||||||
|
return ctx.Err()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type shodanSearchResponse struct {
|
||||||
|
Matches []shodanMatch `json:"matches"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type shodanMatch struct {
|
||||||
|
IPStr string `json:"ip_str"`
|
||||||
|
Port int `json:"port"`
|
||||||
|
Data string `json:"data"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// shodanKeywordIndex maps lowercased keywords to provider names.
|
||||||
|
func shodanKeywordIndex(reg *providers.Registry) map[string]string {
|
||||||
|
m := make(map[string]string)
|
||||||
|
if reg == nil {
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
for _, p := range reg.List() {
|
||||||
|
for _, k := range p.Keywords {
|
||||||
|
kl := strings.ToLower(strings.TrimSpace(k))
|
||||||
|
if kl == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if _, exists := m[kl]; !exists {
|
||||||
|
m[kl] = p.Name
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return m
|
||||||
|
}
|
||||||
121
pkg/recon/sources/shodan_test.go
Normal file
121
pkg/recon/sources/shodan_test.go
Normal file
@@ -0,0 +1,121 @@
|
|||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"sync/atomic"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||||
|
)
|
||||||
|
|
||||||
|
func shodanStubHandler(t *testing.T, calls *int32) http.HandlerFunc {
|
||||||
|
t.Helper()
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
atomic.AddInt32(calls, 1)
|
||||||
|
if r.URL.Path != "/shodan/host/search" {
|
||||||
|
t.Errorf("unexpected path: %s", r.URL.Path)
|
||||||
|
}
|
||||||
|
if got := r.URL.Query().Get("key"); got != "testkey" {
|
||||||
|
t.Errorf("missing api key param: %q", got)
|
||||||
|
}
|
||||||
|
body := map[string]any{
|
||||||
|
"matches": []map[string]any{
|
||||||
|
{"ip_str": "1.2.3.4", "port": 8080, "data": "vllm"},
|
||||||
|
{"ip_str": "5.6.7.8", "port": 11434, "data": "ollama"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_ = json.NewEncoder(w).Encode(body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestShodanSource_EnabledRequiresAPIKey(t *testing.T) {
|
||||||
|
reg := syntheticRegistry()
|
||||||
|
lim := recon.NewLimiterRegistry()
|
||||||
|
|
||||||
|
if s := NewShodanSource("", reg, lim); s.Enabled(recon.Config{}) {
|
||||||
|
t.Error("expected Enabled=false with empty key")
|
||||||
|
}
|
||||||
|
if s := NewShodanSource("key", reg, lim); !s.Enabled(recon.Config{}) {
|
||||||
|
t.Error("expected Enabled=true with key")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestShodanSource_SweepEmptyKeyReturnsNil(t *testing.T) {
|
||||||
|
reg := syntheticRegistry()
|
||||||
|
lim := recon.NewLimiterRegistry()
|
||||||
|
s := NewShodanSource("", reg, lim)
|
||||||
|
|
||||||
|
out := make(chan recon.Finding, 10)
|
||||||
|
if err := s.Sweep(context.Background(), "", out); err != nil {
|
||||||
|
t.Fatalf("expected nil, got %v", err)
|
||||||
|
}
|
||||||
|
close(out)
|
||||||
|
if n := countFindings(out); n != 0 {
|
||||||
|
t.Fatalf("expected 0 findings, got %d", n)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestShodanSource_SweepEmitsFindings(t *testing.T) {
|
||||||
|
reg := syntheticRegistry()
|
||||||
|
lim := recon.NewLimiterRegistry()
|
||||||
|
_ = lim.For("shodan", 1000, 100)
|
||||||
|
|
||||||
|
var calls int32
|
||||||
|
srv := httptest.NewServer(shodanStubHandler(t, &calls))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
s := NewShodanSource("testkey", reg, lim)
|
||||||
|
s.BaseURL = srv.URL
|
||||||
|
|
||||||
|
out := make(chan recon.Finding, 32)
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
done := make(chan error, 1)
|
||||||
|
go func() { done <- s.Sweep(ctx, "", out); close(out) }()
|
||||||
|
|
||||||
|
var findings []recon.Finding
|
||||||
|
for f := range out {
|
||||||
|
findings = append(findings, f)
|
||||||
|
}
|
||||||
|
if err := <-done; err != nil {
|
||||||
|
t.Fatalf("Sweep error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2 keywords * 2 matches = 4 findings
|
||||||
|
if len(findings) != 4 {
|
||||||
|
t.Fatalf("expected 4 findings, got %d", len(findings))
|
||||||
|
}
|
||||||
|
for _, f := range findings {
|
||||||
|
if f.SourceType != "recon:shodan" {
|
||||||
|
t.Errorf("SourceType=%q want recon:shodan", f.SourceType)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if got := atomic.LoadInt32(&calls); got != 2 {
|
||||||
|
t.Errorf("expected 2 calls, got %d", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestShodanSource_CtxCancelled(t *testing.T) {
|
||||||
|
reg := syntheticRegistry()
|
||||||
|
lim := recon.NewLimiterRegistry()
|
||||||
|
_ = lim.For("shodan", 1000, 100)
|
||||||
|
|
||||||
|
s := NewShodanSource("key", reg, lim)
|
||||||
|
s.BaseURL = "http://127.0.0.1:1"
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
cancel()
|
||||||
|
|
||||||
|
out := make(chan recon.Finding, 1)
|
||||||
|
err := s.Sweep(ctx, "", out)
|
||||||
|
if !errors.Is(err, context.Canceled) {
|
||||||
|
t.Fatalf("expected context.Canceled, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
157
pkg/recon/sources/zoomeye.go
Normal file
157
pkg/recon/sources/zoomeye.go
Normal file
@@ -0,0 +1,157 @@
|
|||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"golang.org/x/time/rate"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ZoomEyeSource implements recon.ReconSource against the ZoomEye /host/search
|
||||||
|
// API. It iterates provider keyword queries and emits a Finding for every match
|
||||||
|
// returned (device/service key exposure).
|
||||||
|
//
|
||||||
|
// A missing API key disables the source without error.
|
||||||
|
type ZoomEyeSource struct {
|
||||||
|
APIKey string
|
||||||
|
BaseURL string
|
||||||
|
Registry *providers.Registry
|
||||||
|
Limiters *recon.LimiterRegistry
|
||||||
|
client *Client
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compile-time assertion.
|
||||||
|
var _ recon.ReconSource = (*ZoomEyeSource)(nil)
|
||||||
|
|
||||||
|
// NewZoomEyeSource constructs a ZoomEyeSource with the shared retry client.
|
||||||
|
func NewZoomEyeSource(apiKey string, reg *providers.Registry, lim *recon.LimiterRegistry) *ZoomEyeSource {
|
||||||
|
return &ZoomEyeSource{
|
||||||
|
APIKey: apiKey,
|
||||||
|
BaseURL: "https://api.zoomeye.org",
|
||||||
|
Registry: reg,
|
||||||
|
Limiters: lim,
|
||||||
|
client: NewClient(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *ZoomEyeSource) Name() string { return "zoomeye" }
|
||||||
|
func (s *ZoomEyeSource) RateLimit() rate.Limit { return rate.Every(2 * time.Second) }
|
||||||
|
func (s *ZoomEyeSource) Burst() int { return 1 }
|
||||||
|
func (s *ZoomEyeSource) RespectsRobots() bool { return false }
|
||||||
|
|
||||||
|
// Enabled returns true only when APIKey is configured.
|
||||||
|
func (s *ZoomEyeSource) Enabled(_ recon.Config) bool { return s.APIKey != "" }
|
||||||
|
|
||||||
|
// Sweep issues one /host/search request per provider keyword and emits a
|
||||||
|
// Finding for every match returned.
|
||||||
|
func (s *ZoomEyeSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
|
||||||
|
if s.APIKey == "" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
base := s.BaseURL
|
||||||
|
if base == "" {
|
||||||
|
base = "https://api.zoomeye.org"
|
||||||
|
}
|
||||||
|
|
||||||
|
queries := BuildQueries(s.Registry, "zoomeye")
|
||||||
|
kwIndex := zoomeyeKeywordIndex(s.Registry)
|
||||||
|
|
||||||
|
for _, q := range queries {
|
||||||
|
if err := ctx.Err(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if s.Limiters != nil {
|
||||||
|
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
endpoint := fmt.Sprintf("%s/host/search?query=%s&page=1",
|
||||||
|
base, url.QueryEscape(q))
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("zoomeye: build request: %w", err)
|
||||||
|
}
|
||||||
|
req.Header.Set("API-KEY", s.APIKey)
|
||||||
|
req.Header.Set("Accept", "application/json")
|
||||||
|
req.Header.Set("User-Agent", "keyhunter-recon")
|
||||||
|
|
||||||
|
resp, err := s.client.Do(ctx, req)
|
||||||
|
if err != nil {
|
||||||
|
if errors.Is(err, ErrUnauthorized) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
var parsed zoomeyeSearchResponse
|
||||||
|
decErr := json.NewDecoder(resp.Body).Decode(&parsed)
|
||||||
|
_ = resp.Body.Close()
|
||||||
|
if decErr != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
provName := kwIndex[strings.ToLower(q)]
|
||||||
|
for _, m := range parsed.Matches {
|
||||||
|
f := recon.Finding{
|
||||||
|
ProviderName: provName,
|
||||||
|
Confidence: "low",
|
||||||
|
Source: fmt.Sprintf("zoomeye://%s:%d", m.IP, m.PortInfo.Port),
|
||||||
|
SourceType: "recon:zoomeye",
|
||||||
|
DetectedAt: time.Now(),
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case out <- f:
|
||||||
|
case <-ctx.Done():
|
||||||
|
return ctx.Err()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
type zoomeyeSearchResponse struct {
|
||||||
|
Matches []zoomeyeMatch `json:"matches"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type zoomeyeMatch struct {
|
||||||
|
IP string `json:"ip"`
|
||||||
|
PortInfo zoomeyePortInfo `json:"portinfo"`
|
||||||
|
Banner string `json:"banner"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type zoomeyePortInfo struct {
|
||||||
|
Port int `json:"port"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// zoomeyeKeywordIndex maps lowercased keywords to provider names.
|
||||||
|
func zoomeyeKeywordIndex(reg *providers.Registry) map[string]string {
|
||||||
|
m := make(map[string]string)
|
||||||
|
if reg == nil {
|
||||||
|
return m
|
||||||
|
}
|
||||||
|
for _, p := range reg.List() {
|
||||||
|
for _, k := range p.Keywords {
|
||||||
|
kl := strings.ToLower(strings.TrimSpace(k))
|
||||||
|
if kl == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if _, exists := m[kl]; !exists {
|
||||||
|
m[kl] = p.Name
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return m
|
||||||
|
}
|
||||||
121
pkg/recon/sources/zoomeye_test.go
Normal file
121
pkg/recon/sources/zoomeye_test.go
Normal file
@@ -0,0 +1,121 @@
|
|||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"sync/atomic"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||||
|
)
|
||||||
|
|
||||||
|
func zoomeyeStubHandler(t *testing.T, calls *int32) http.HandlerFunc {
|
||||||
|
t.Helper()
|
||||||
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
atomic.AddInt32(calls, 1)
|
||||||
|
if r.URL.Path != "/host/search" {
|
||||||
|
t.Errorf("unexpected path: %s", r.URL.Path)
|
||||||
|
}
|
||||||
|
if got := r.Header.Get("API-KEY"); got != "testkey" {
|
||||||
|
t.Errorf("missing/wrong API-KEY header: %q", got)
|
||||||
|
}
|
||||||
|
body := map[string]any{
|
||||||
|
"matches": []map[string]any{
|
||||||
|
{"ip": "192.168.1.1", "portinfo": map[string]any{"port": 8080}, "banner": "vllm"},
|
||||||
|
{"ip": "192.168.1.2", "portinfo": map[string]any{"port": 11434}, "banner": "ollama"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_ = json.NewEncoder(w).Encode(body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestZoomEyeSource_EnabledRequiresAPIKey(t *testing.T) {
|
||||||
|
reg := syntheticRegistry()
|
||||||
|
lim := recon.NewLimiterRegistry()
|
||||||
|
|
||||||
|
if s := NewZoomEyeSource("", reg, lim); s.Enabled(recon.Config{}) {
|
||||||
|
t.Error("expected Enabled=false with empty key")
|
||||||
|
}
|
||||||
|
if s := NewZoomEyeSource("key", reg, lim); !s.Enabled(recon.Config{}) {
|
||||||
|
t.Error("expected Enabled=true with key")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestZoomEyeSource_SweepEmptyKeyReturnsNil(t *testing.T) {
|
||||||
|
reg := syntheticRegistry()
|
||||||
|
lim := recon.NewLimiterRegistry()
|
||||||
|
s := NewZoomEyeSource("", reg, lim)
|
||||||
|
|
||||||
|
out := make(chan recon.Finding, 10)
|
||||||
|
if err := s.Sweep(context.Background(), "", out); err != nil {
|
||||||
|
t.Fatalf("expected nil, got %v", err)
|
||||||
|
}
|
||||||
|
close(out)
|
||||||
|
if n := countFindings(out); n != 0 {
|
||||||
|
t.Fatalf("expected 0 findings, got %d", n)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestZoomEyeSource_SweepEmitsFindings(t *testing.T) {
|
||||||
|
reg := syntheticRegistry()
|
||||||
|
lim := recon.NewLimiterRegistry()
|
||||||
|
_ = lim.For("zoomeye", 1000, 100)
|
||||||
|
|
||||||
|
var calls int32
|
||||||
|
srv := httptest.NewServer(zoomeyeStubHandler(t, &calls))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
s := NewZoomEyeSource("testkey", reg, lim)
|
||||||
|
s.BaseURL = srv.URL
|
||||||
|
|
||||||
|
out := make(chan recon.Finding, 32)
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
done := make(chan error, 1)
|
||||||
|
go func() { done <- s.Sweep(ctx, "", out); close(out) }()
|
||||||
|
|
||||||
|
var findings []recon.Finding
|
||||||
|
for f := range out {
|
||||||
|
findings = append(findings, f)
|
||||||
|
}
|
||||||
|
if err := <-done; err != nil {
|
||||||
|
t.Fatalf("Sweep error: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2 keywords * 2 matches = 4 findings
|
||||||
|
if len(findings) != 4 {
|
||||||
|
t.Fatalf("expected 4 findings, got %d", len(findings))
|
||||||
|
}
|
||||||
|
for _, f := range findings {
|
||||||
|
if f.SourceType != "recon:zoomeye" {
|
||||||
|
t.Errorf("SourceType=%q want recon:zoomeye", f.SourceType)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if got := atomic.LoadInt32(&calls); got != 2 {
|
||||||
|
t.Errorf("expected 2 calls, got %d", got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestZoomEyeSource_CtxCancelled(t *testing.T) {
|
||||||
|
reg := syntheticRegistry()
|
||||||
|
lim := recon.NewLimiterRegistry()
|
||||||
|
_ = lim.For("zoomeye", 1000, 100)
|
||||||
|
|
||||||
|
s := NewZoomEyeSource("key", reg, lim)
|
||||||
|
s.BaseURL = "http://127.0.0.1:1"
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
cancel()
|
||||||
|
|
||||||
|
out := make(chan recon.Finding, 1)
|
||||||
|
err := s.Sweep(ctx, "", out)
|
||||||
|
if !errors.Is(err, context.Canceled) {
|
||||||
|
t.Fatalf("expected context.Canceled, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user