feat(14-02): add WaybackMachine + CommonCrawl recon sources
- WaybackMachineSource queries CDX API for historical snapshots - CommonCrawlSource queries CC Index API for matching pages - Both credentialless, rate-limited at 1 req/5s, RespectsRobots=true - RegisterAll extended to 42 sources (40 Phase 10-13 + 2 Phase 14) - Full httptest-based test coverage for both sources
This commit is contained in:
@@ -56,8 +56,9 @@ type SourcesConfig struct {
|
||||
}
|
||||
|
||||
// RegisterAll registers every Phase 10 code-hosting, Phase 11 search engine /
|
||||
// paste site, Phase 12 IoT scanner / cloud storage, and Phase 13 package
|
||||
// registry / container / IaC source on engine (40 sources total).
|
||||
// paste site, Phase 12 IoT scanner / cloud storage, Phase 13 package
|
||||
// registry / container / IaC, and Phase 14 web archive source on engine
|
||||
// (42 sources total).
|
||||
//
|
||||
// All sources are registered unconditionally so that cmd/recon.go can surface
|
||||
// the full catalog via `keyhunter recon list` regardless of which credentials
|
||||
@@ -228,4 +229,8 @@ func RegisterAll(engine *recon.Engine, cfg SourcesConfig) {
|
||||
engine.Register(&KubernetesSource{Registry: reg, Limiters: lim})
|
||||
engine.Register(&TerraformSource{Registry: reg, Limiters: lim})
|
||||
engine.Register(&HelmSource{Registry: reg, Limiters: lim})
|
||||
|
||||
// Phase 14: Web archive sources (credentialless).
|
||||
engine.Register(&WaybackMachineSource{Registry: reg, Limiters: lim})
|
||||
engine.Register(&CommonCrawlSource{Registry: reg, Limiters: lim})
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user