feat(14-04): wire all 12 Phase 14 sources in RegisterAll (45 -> 52 total)

- Add CircleCIToken to SourcesConfig with env/viper lookup in cmd/recon.go
- Register 7 new sources: travisci, ghactions, circleci, jenkins, wayback, commoncrawl, jsbundle
- Update register_test.go expectations from 45 to 52 sources
- Add integration test handlers + registrations for all 12 Phase 14 sources
- Integration test now validates 52 sources end-to-end
This commit is contained in:
salvacybersec
2026-04-06 13:34:18 +03:00
parent 169b80b3bc
commit 7ef6c2ac34
4 changed files with 186 additions and 15 deletions

View File

@@ -167,6 +167,7 @@ func buildReconEngine() *recon.Engine {
FOFAAPIKey: firstNonEmpty(os.Getenv("FOFA_API_KEY"), viper.GetString("recon.fofa.api_key")),
NetlasAPIKey: firstNonEmpty(os.Getenv("NETLAS_API_KEY"), viper.GetString("recon.netlas.api_key")),
BinaryEdgeAPIKey: firstNonEmpty(os.Getenv("BINARYEDGE_API_KEY"), viper.GetString("recon.binaryedge.api_key")),
CircleCIToken: firstNonEmpty(os.Getenv("CIRCLECI_TOKEN"), viper.GetString("recon.circleci.token")),
}
sources.RegisterAll(e, cfg)
return e

View File

@@ -16,10 +16,11 @@ import (
// TestIntegration_AllSources_SweepAll spins up a single multiplexed httptest
// server that serves canned fixtures for every Phase 10 code-hosting source,
// Phase 11 search engine / paste site source, Phase 12 IoT scanner / cloud
// storage source, and Phase 13 package registry / container / IaC source,
// registers the sources (with BaseURL overrides pointing at the test server)
// onto a fresh recon.Engine, runs SweepAll, and asserts at least one Finding
// was emitted per SourceType across all 40 sources.
// storage source, Phase 13 package registry / container / IaC source, and
// Phase 14 CI/CD log / web archive / frontend leak source, registers the
// sources (with BaseURL overrides pointing at the test server) onto a fresh
// recon.Engine, runs SweepAll, and asserts at least one Finding was emitted
// per SourceType across all 52 sources.
//
// RegisterAll cannot be used directly because it wires production URLs; the
// test exercises the same code paths by constructing each source identically
@@ -312,6 +313,92 @@ func TestIntegration_AllSources_SweepAll(t *testing.T) {
_, _ = w.Write([]byte(`{"packages":[{"package_id":"chart-1","name":"leaked-chart","normalized_name":"leaked-chart","repository":{"name":"bitnami","kind":0}}]}`))
})
// ---- Phase 14: SourceMapSource (probes /static/js/main.js.map) ----
mux.HandleFunc("/sourcemaps/static/js/main.js.map", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"sources":["app.js"],"sourcesContent":["const apiKey = \"sk-proj-SOURCEMAPLEAK123\";"]}`))
})
// ---- Phase 14: WebpackSource (probes /static/js/main.js) ----
mux.HandleFunc("/webpack/static/js/main.js", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/javascript")
_, _ = w.Write([]byte(`!function(){var e={NEXT_PUBLIC_API_KEY:"sk-proj-WEBPACKLEAK123456"}}();`))
})
// ---- Phase 14: EnvLeakSource (probes /.env) ----
mux.HandleFunc("/dotenv/.env", func(w http.ResponseWriter, r *http.Request) {
_, _ = w.Write([]byte("OPENAI_API_KEY=sk-proj-ENVLEAK12345678\nDB_HOST=localhost\n"))
})
// ---- Phase 14: SwaggerSource (probes /swagger.json) ----
mux.HandleFunc("/swagger/swagger.json", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"openapi":"3.0.0","paths":{"/api":{"get":{"parameters":[{"name":"api_key","example":"sk-proj-SWAGGERLEAK12345"}]}}}}`))
})
// ---- Phase 14: DeployPreviewSource (probes /) ----
mux.HandleFunc("/deploypreview/", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/html")
_, _ = w.Write([]byte(`<html><script>window.NEXT_PUBLIC_API_KEY="sk-proj-DEPLOYLEAK12345678"</script></html>`))
})
// ---- Phase 14: TravisCISource /builds + /builds/{id}/log ----
mux.HandleFunc("/travisci/builds", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"builds":[{"id":999,"state":"passed"}]}`))
})
mux.HandleFunc("/travisci/builds/999/log", func(w http.ResponseWriter, r *http.Request) {
_, _ = w.Write([]byte(`export API_KEY="sk-proj-TRAVISLEAK1234567890"`))
})
// ---- Phase 14: GitHubActionsSource /search/code + /actions/runs/{id}/logs ----
mux.HandleFunc("/ghactions/search/code", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"workflow_runs":[{"id":55,"status":"completed","conclusion":"success"}]}`))
})
mux.HandleFunc("/ghactions/actions/runs/55/logs", func(w http.ResponseWriter, r *http.Request) {
_, _ = w.Write([]byte(`SECRET_KEY="sk-proj-GHACTIONSLEAK1234567"`))
})
// ---- Phase 14: CircleCISource /project/gh/{slug}/pipeline + /pipeline/{id}/workflow ----
mux.HandleFunc("/circleci/project/gh/", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"items":[{"id":"pipe-test-1","number":1}]}`))
})
mux.HandleFunc("/circleci/pipeline/pipe-test-1/workflow", func(w http.ResponseWriter, r *http.Request) {
_, _ = w.Write([]byte(`AUTH_TOKEN="sk-proj-CIRCLELEAK1234567890"`))
})
// ---- Phase 14: JenkinsSource /api/json + /job/{name}/lastBuild/consoleText ----
mux.HandleFunc("/jenkins/api/json", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"jobs":[{"name":"build-app","url":"http://jenkins/job/build-app/","color":"blue"}]}`))
})
mux.HandleFunc("/jenkins/job/build-app/lastBuild/consoleText", func(w http.ResponseWriter, r *http.Request) {
_, _ = w.Write([]byte(`Setting TOKEN="sk-proj-JENKINSLEAK12345678"`))
})
// ---- Phase 14: WaybackMachineSource /cdx/search/cdx + /web/{ts}id_/{url} ----
mux.HandleFunc("/wayback/cdx/search/cdx", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`[["url","timestamp","statuscode"],["https://example.com/.env","20240101000000","200"]]`))
})
mux.HandleFunc("/wayback/web/", func(w http.ResponseWriter, r *http.Request) {
_, _ = w.Write([]byte(`API_KEY="sk-proj-WAYBACKLEAK12345678"`))
})
// ---- Phase 14: CommonCrawlSource (NDJSON CDX index) ----
mux.HandleFunc("/commoncrawl", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte("{\"url\":\"https://example.com/.env\",\"timestamp\":\"20240101\",\"status\":\"200\",\"filename\":\"warc.gz\",\"length\":\"100\",\"offset\":\"0\"}\n"))
})
// ---- Phase 14: JSBundleSource (probes /static/js/main.js) ----
mux.HandleFunc("/jsbundle/static/js/main.js", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/javascript")
_, _ = w.Write([]byte(`!function(){var c={apiKey:"sk-proj-JSBUNDLELEAK123456789"}}();`))
})
srv := httptest.NewServer(mux)
defer srv.Close()
@@ -550,9 +637,45 @@ func TestIntegration_AllSources_SweepAll(t *testing.T) {
// helm
eng.Register(&HelmSource{BaseURL: srv.URL + "/helm", Registry: reg, Limiters: lim, Client: NewClient()})
// Sanity: all 40 sources registered.
if n := len(eng.List()); n != 40 {
t.Fatalf("expected 40 sources on engine, got %d: %v", n, eng.List())
// --- Phase 14: Frontend leak sources ---
// sourcemaps
eng.Register(&SourceMapSource{BaseURL: srv.URL + "/sourcemaps", Registry: reg, Limiters: nil, Client: NewClient()})
// webpack
eng.Register(&WebpackSource{BaseURL: srv.URL + "/webpack", Registry: reg, Limiters: nil, Client: NewClient()})
// dotenv
eng.Register(&EnvLeakSource{BaseURL: srv.URL + "/dotenv", Registry: reg, Limiters: nil, Client: NewClient()})
// swagger
eng.Register(&SwaggerSource{BaseURL: srv.URL + "/swagger", Registry: reg, Limiters: nil, Client: NewClient()})
// deploypreview
eng.Register(&DeployPreviewSource{BaseURL: srv.URL + "/deploypreview", Registry: reg, Limiters: nil, Client: NewClient()})
// --- Phase 14: CI/CD log sources ---
// travisci
eng.Register(&TravisCISource{BaseURL: srv.URL + "/travisci", Registry: reg, Limiters: nil, Client: NewClient()})
// ghactions
eng.Register(&GitHubActionsSource{Token: "ghp-test", BaseURL: srv.URL + "/ghactions", Registry: reg, Limiters: nil, Client: NewClient()})
// circleci
eng.Register(&CircleCISource{Token: "cci-test", BaseURL: srv.URL + "/circleci", Registry: reg, Limiters: nil, Client: NewClient()})
// jenkins
eng.Register(&JenkinsSource{BaseURL: srv.URL + "/jenkins", Registry: reg, Limiters: nil, Client: NewClient()})
// --- Phase 14: Web archive sources ---
// wayback
eng.Register(&WaybackMachineSource{BaseURL: srv.URL + "/wayback", Registry: reg, Limiters: nil, Client: NewClient()})
// commoncrawl
eng.Register(&CommonCrawlSource{BaseURL: srv.URL + "/commoncrawl", Registry: reg, Limiters: nil, Client: NewClient()})
// --- Phase 14: JS bundle analysis ---
// jsbundle
eng.Register(&JSBundleSource{BaseURL: srv.URL + "/jsbundle", Registry: reg, Limiters: nil, Client: NewClient()})
// Sanity: all 52 sources registered.
if n := len(eng.List()); n != 52 {
t.Fatalf("expected 52 sources on engine, got %d: %v", n, eng.List())
}
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
@@ -616,6 +739,22 @@ func TestIntegration_AllSources_SweepAll(t *testing.T) {
"recon:k8s",
"recon:terraform",
"recon:helm",
// Phase 14: Frontend leaks
"recon:sourcemaps",
"recon:webpack",
"recon:dotenv",
"recon:swagger",
"recon:deploypreview",
// Phase 14: CI/CD logs
"recon:travisci",
"recon:ghactions",
"recon:circleci",
"recon:jenkins",
// Phase 14: Web archives
"recon:wayback",
"recon:commoncrawl",
// Phase 14: JS bundles
"recon:jsbundle",
}
for _, st := range wantTypes {
if byType[st] == 0 {
@@ -641,8 +780,8 @@ func TestRegisterAll_Phase12(t *testing.T) {
})
names := eng.List()
if n := len(names); n != 45 {
t.Fatalf("expected 45 sources from RegisterAll, got %d: %v", n, names)
if n := len(names); n != 52 {
t.Fatalf("expected 52 sources from RegisterAll, got %d: %v", n, names)
}
// Build lookup for source access.

View File

@@ -49,6 +49,9 @@ type SourcesConfig struct {
NetlasAPIKey string
BinaryEdgeAPIKey string
// Phase 14: CI/CD source tokens.
CircleCIToken string
// Registry drives query generation for every source via BuildQueries.
Registry *providers.Registry
// Limiters is the shared per-source rate-limiter registry.
@@ -57,8 +60,8 @@ type SourcesConfig struct {
// RegisterAll registers every Phase 10 code-hosting, Phase 11 search engine /
// paste site, Phase 12 IoT scanner / cloud storage, Phase 13 package
// registry / container / IaC, and Phase 14 frontend leak source on engine
// (45 sources total).
// registry / container / IaC, and Phase 14 CI/CD log / web archive /
// frontend leak source on engine (52 sources total).
//
// All sources are registered unconditionally so that cmd/recon.go can surface
// the full catalog via `keyhunter recon list` regardless of which credentials
@@ -236,4 +239,25 @@ func RegisterAll(engine *recon.Engine, cfg SourcesConfig) {
engine.Register(&EnvLeakSource{Registry: reg, Limiters: lim})
engine.Register(&SwaggerSource{Registry: reg, Limiters: lim})
engine.Register(&DeployPreviewSource{Registry: reg, Limiters: lim})
// Phase 14: CI/CD log sources.
engine.Register(&TravisCISource{Registry: reg, Limiters: lim})
engine.Register(&GitHubActionsSource{
Token: cfg.GitHubToken,
Registry: reg,
Limiters: lim,
})
engine.Register(&CircleCISource{
Token: cfg.CircleCIToken,
Registry: reg,
Limiters: lim,
})
engine.Register(&JenkinsSource{Registry: reg, Limiters: lim})
// Phase 14: Web archive sources (credentialless).
engine.Register(&WaybackMachineSource{Registry: reg, Limiters: lim})
engine.Register(&CommonCrawlSource{Registry: reg, Limiters: lim})
// Phase 14: JS bundle analysis (credentialless).
engine.Register(&JSBundleSource{Registry: reg, Limiters: lim})
}

View File

@@ -16,9 +16,9 @@ func registerTestRegistry() *providers.Registry {
})
}
// TestRegisterAll_WiresAllFortyFiveSources asserts that RegisterAll registers
// TestRegisterAll_WiresAllFiftyTwoSources asserts that RegisterAll registers
// every Phase 10-14 source by its stable name on a fresh engine.
func TestRegisterAll_WiresAllFortyFiveSources(t *testing.T) {
func TestRegisterAll_WiresAllFiftyTwoSources(t *testing.T) {
eng := recon.NewEngine()
cfg := SourcesConfig{
Registry: registerTestRegistry(),
@@ -34,8 +34,10 @@ func TestRegisterAll_WiresAllFortyFiveSources(t *testing.T) {
"bitbucket",
"brave",
"censys",
"circleci",
"codeberg",
"codesandbox",
"commoncrawl",
"crates",
"deploypreview",
"dockerhub",
@@ -43,6 +45,7 @@ func TestRegisterAll_WiresAllFortyFiveSources(t *testing.T) {
"duckduckgo",
"fofa",
"gcs",
"ghactions",
"gist",
"gistpaste",
"github",
@@ -51,6 +54,8 @@ func TestRegisterAll_WiresAllFortyFiveSources(t *testing.T) {
"goproxy",
"helm",
"huggingface",
"jenkins",
"jsbundle",
"k8s",
"kaggle",
"maven",
@@ -70,6 +75,8 @@ func TestRegisterAll_WiresAllFortyFiveSources(t *testing.T) {
"spaces",
"swagger",
"terraform",
"travisci",
"wayback",
"webpack",
"yandex",
"zoomeye",
@@ -90,8 +97,8 @@ func TestRegisterAll_MissingCredsStillRegistered(t *testing.T) {
Limiters: recon.NewLimiterRegistry(),
})
if n := len(eng.List()); n != 45 {
t.Fatalf("expected 45 sources registered, got %d: %v", n, eng.List())
if n := len(eng.List()); n != 52 {
t.Fatalf("expected 52 sources registered, got %d: %v", n, eng.List())
}
// SweepAll with an empty config should filter out cred-gated sources