From 7ef6c2ac34b529402ba56d81c778ae0d371a686c Mon Sep 17 00:00:00 2001 From: salvacybersec Date: Mon, 6 Apr 2026 13:34:18 +0300 Subject: [PATCH] feat(14-04): wire all 12 Phase 14 sources in RegisterAll (45 -> 52 total) - Add CircleCIToken to SourcesConfig with env/viper lookup in cmd/recon.go - Register 7 new sources: travisci, ghactions, circleci, jenkins, wayback, commoncrawl, jsbundle - Update register_test.go expectations from 45 to 52 sources - Add integration test handlers + registrations for all 12 Phase 14 sources - Integration test now validates 52 sources end-to-end --- cmd/recon.go | 1 + pkg/recon/sources/integration_test.go | 157 ++++++++++++++++++++++++-- pkg/recon/sources/register.go | 28 ++++- pkg/recon/sources/register_test.go | 15 ++- 4 files changed, 186 insertions(+), 15 deletions(-) diff --git a/cmd/recon.go b/cmd/recon.go index 44e131e..49a285b 100644 --- a/cmd/recon.go +++ b/cmd/recon.go @@ -167,6 +167,7 @@ func buildReconEngine() *recon.Engine { FOFAAPIKey: firstNonEmpty(os.Getenv("FOFA_API_KEY"), viper.GetString("recon.fofa.api_key")), NetlasAPIKey: firstNonEmpty(os.Getenv("NETLAS_API_KEY"), viper.GetString("recon.netlas.api_key")), BinaryEdgeAPIKey: firstNonEmpty(os.Getenv("BINARYEDGE_API_KEY"), viper.GetString("recon.binaryedge.api_key")), + CircleCIToken: firstNonEmpty(os.Getenv("CIRCLECI_TOKEN"), viper.GetString("recon.circleci.token")), } sources.RegisterAll(e, cfg) return e diff --git a/pkg/recon/sources/integration_test.go b/pkg/recon/sources/integration_test.go index 91674a9..90e8961 100644 --- a/pkg/recon/sources/integration_test.go +++ b/pkg/recon/sources/integration_test.go @@ -16,10 +16,11 @@ import ( // TestIntegration_AllSources_SweepAll spins up a single multiplexed httptest // server that serves canned fixtures for every Phase 10 code-hosting source, // Phase 11 search engine / paste site source, Phase 12 IoT scanner / cloud -// storage source, and Phase 13 package registry / container / IaC source, -// registers the sources (with BaseURL overrides pointing at the test server) -// onto a fresh recon.Engine, runs SweepAll, and asserts at least one Finding -// was emitted per SourceType across all 40 sources. +// storage source, Phase 13 package registry / container / IaC source, and +// Phase 14 CI/CD log / web archive / frontend leak source, registers the +// sources (with BaseURL overrides pointing at the test server) onto a fresh +// recon.Engine, runs SweepAll, and asserts at least one Finding was emitted +// per SourceType across all 52 sources. // // RegisterAll cannot be used directly because it wires production URLs; the // test exercises the same code paths by constructing each source identically @@ -312,6 +313,92 @@ func TestIntegration_AllSources_SweepAll(t *testing.T) { _, _ = w.Write([]byte(`{"packages":[{"package_id":"chart-1","name":"leaked-chart","normalized_name":"leaked-chart","repository":{"name":"bitnami","kind":0}}]}`)) }) + // ---- Phase 14: SourceMapSource (probes /static/js/main.js.map) ---- + mux.HandleFunc("/sourcemaps/static/js/main.js.map", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"sources":["app.js"],"sourcesContent":["const apiKey = \"sk-proj-SOURCEMAPLEAK123\";"]}`)) + }) + + // ---- Phase 14: WebpackSource (probes /static/js/main.js) ---- + mux.HandleFunc("/webpack/static/js/main.js", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/javascript") + _, _ = w.Write([]byte(`!function(){var e={NEXT_PUBLIC_API_KEY:"sk-proj-WEBPACKLEAK123456"}}();`)) + }) + + // ---- Phase 14: EnvLeakSource (probes /.env) ---- + mux.HandleFunc("/dotenv/.env", func(w http.ResponseWriter, r *http.Request) { + _, _ = w.Write([]byte("OPENAI_API_KEY=sk-proj-ENVLEAK12345678\nDB_HOST=localhost\n")) + }) + + // ---- Phase 14: SwaggerSource (probes /swagger.json) ---- + mux.HandleFunc("/swagger/swagger.json", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"openapi":"3.0.0","paths":{"/api":{"get":{"parameters":[{"name":"api_key","example":"sk-proj-SWAGGERLEAK12345"}]}}}}`)) + }) + + // ---- Phase 14: DeployPreviewSource (probes /) ---- + mux.HandleFunc("/deploypreview/", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html") + _, _ = w.Write([]byte(``)) + }) + + // ---- Phase 14: TravisCISource /builds + /builds/{id}/log ---- + mux.HandleFunc("/travisci/builds", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"builds":[{"id":999,"state":"passed"}]}`)) + }) + mux.HandleFunc("/travisci/builds/999/log", func(w http.ResponseWriter, r *http.Request) { + _, _ = w.Write([]byte(`export API_KEY="sk-proj-TRAVISLEAK1234567890"`)) + }) + + // ---- Phase 14: GitHubActionsSource /search/code + /actions/runs/{id}/logs ---- + mux.HandleFunc("/ghactions/search/code", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"workflow_runs":[{"id":55,"status":"completed","conclusion":"success"}]}`)) + }) + mux.HandleFunc("/ghactions/actions/runs/55/logs", func(w http.ResponseWriter, r *http.Request) { + _, _ = w.Write([]byte(`SECRET_KEY="sk-proj-GHACTIONSLEAK1234567"`)) + }) + + // ---- Phase 14: CircleCISource /project/gh/{slug}/pipeline + /pipeline/{id}/workflow ---- + mux.HandleFunc("/circleci/project/gh/", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"items":[{"id":"pipe-test-1","number":1}]}`)) + }) + mux.HandleFunc("/circleci/pipeline/pipe-test-1/workflow", func(w http.ResponseWriter, r *http.Request) { + _, _ = w.Write([]byte(`AUTH_TOKEN="sk-proj-CIRCLELEAK1234567890"`)) + }) + + // ---- Phase 14: JenkinsSource /api/json + /job/{name}/lastBuild/consoleText ---- + mux.HandleFunc("/jenkins/api/json", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"jobs":[{"name":"build-app","url":"http://jenkins/job/build-app/","color":"blue"}]}`)) + }) + mux.HandleFunc("/jenkins/job/build-app/lastBuild/consoleText", func(w http.ResponseWriter, r *http.Request) { + _, _ = w.Write([]byte(`Setting TOKEN="sk-proj-JENKINSLEAK12345678"`)) + }) + + // ---- Phase 14: WaybackMachineSource /cdx/search/cdx + /web/{ts}id_/{url} ---- + mux.HandleFunc("/wayback/cdx/search/cdx", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`[["url","timestamp","statuscode"],["https://example.com/.env","20240101000000","200"]]`)) + }) + mux.HandleFunc("/wayback/web/", func(w http.ResponseWriter, r *http.Request) { + _, _ = w.Write([]byte(`API_KEY="sk-proj-WAYBACKLEAK12345678"`)) + }) + + // ---- Phase 14: CommonCrawlSource (NDJSON CDX index) ---- + mux.HandleFunc("/commoncrawl", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte("{\"url\":\"https://example.com/.env\",\"timestamp\":\"20240101\",\"status\":\"200\",\"filename\":\"warc.gz\",\"length\":\"100\",\"offset\":\"0\"}\n")) + }) + + // ---- Phase 14: JSBundleSource (probes /static/js/main.js) ---- + mux.HandleFunc("/jsbundle/static/js/main.js", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/javascript") + _, _ = w.Write([]byte(`!function(){var c={apiKey:"sk-proj-JSBUNDLELEAK123456789"}}();`)) + }) + srv := httptest.NewServer(mux) defer srv.Close() @@ -550,9 +637,45 @@ func TestIntegration_AllSources_SweepAll(t *testing.T) { // helm eng.Register(&HelmSource{BaseURL: srv.URL + "/helm", Registry: reg, Limiters: lim, Client: NewClient()}) - // Sanity: all 40 sources registered. - if n := len(eng.List()); n != 40 { - t.Fatalf("expected 40 sources on engine, got %d: %v", n, eng.List()) + // --- Phase 14: Frontend leak sources --- + + // sourcemaps + eng.Register(&SourceMapSource{BaseURL: srv.URL + "/sourcemaps", Registry: reg, Limiters: nil, Client: NewClient()}) + // webpack + eng.Register(&WebpackSource{BaseURL: srv.URL + "/webpack", Registry: reg, Limiters: nil, Client: NewClient()}) + // dotenv + eng.Register(&EnvLeakSource{BaseURL: srv.URL + "/dotenv", Registry: reg, Limiters: nil, Client: NewClient()}) + // swagger + eng.Register(&SwaggerSource{BaseURL: srv.URL + "/swagger", Registry: reg, Limiters: nil, Client: NewClient()}) + // deploypreview + eng.Register(&DeployPreviewSource{BaseURL: srv.URL + "/deploypreview", Registry: reg, Limiters: nil, Client: NewClient()}) + + // --- Phase 14: CI/CD log sources --- + + // travisci + eng.Register(&TravisCISource{BaseURL: srv.URL + "/travisci", Registry: reg, Limiters: nil, Client: NewClient()}) + // ghactions + eng.Register(&GitHubActionsSource{Token: "ghp-test", BaseURL: srv.URL + "/ghactions", Registry: reg, Limiters: nil, Client: NewClient()}) + // circleci + eng.Register(&CircleCISource{Token: "cci-test", BaseURL: srv.URL + "/circleci", Registry: reg, Limiters: nil, Client: NewClient()}) + // jenkins + eng.Register(&JenkinsSource{BaseURL: srv.URL + "/jenkins", Registry: reg, Limiters: nil, Client: NewClient()}) + + // --- Phase 14: Web archive sources --- + + // wayback + eng.Register(&WaybackMachineSource{BaseURL: srv.URL + "/wayback", Registry: reg, Limiters: nil, Client: NewClient()}) + // commoncrawl + eng.Register(&CommonCrawlSource{BaseURL: srv.URL + "/commoncrawl", Registry: reg, Limiters: nil, Client: NewClient()}) + + // --- Phase 14: JS bundle analysis --- + + // jsbundle + eng.Register(&JSBundleSource{BaseURL: srv.URL + "/jsbundle", Registry: reg, Limiters: nil, Client: NewClient()}) + + // Sanity: all 52 sources registered. + if n := len(eng.List()); n != 52 { + t.Fatalf("expected 52 sources on engine, got %d: %v", n, eng.List()) } ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) @@ -616,6 +739,22 @@ func TestIntegration_AllSources_SweepAll(t *testing.T) { "recon:k8s", "recon:terraform", "recon:helm", + // Phase 14: Frontend leaks + "recon:sourcemaps", + "recon:webpack", + "recon:dotenv", + "recon:swagger", + "recon:deploypreview", + // Phase 14: CI/CD logs + "recon:travisci", + "recon:ghactions", + "recon:circleci", + "recon:jenkins", + // Phase 14: Web archives + "recon:wayback", + "recon:commoncrawl", + // Phase 14: JS bundles + "recon:jsbundle", } for _, st := range wantTypes { if byType[st] == 0 { @@ -641,8 +780,8 @@ func TestRegisterAll_Phase12(t *testing.T) { }) names := eng.List() - if n := len(names); n != 45 { - t.Fatalf("expected 45 sources from RegisterAll, got %d: %v", n, names) + if n := len(names); n != 52 { + t.Fatalf("expected 52 sources from RegisterAll, got %d: %v", n, names) } // Build lookup for source access. diff --git a/pkg/recon/sources/register.go b/pkg/recon/sources/register.go index b2d5a01..d38d3ad 100644 --- a/pkg/recon/sources/register.go +++ b/pkg/recon/sources/register.go @@ -49,6 +49,9 @@ type SourcesConfig struct { NetlasAPIKey string BinaryEdgeAPIKey string + // Phase 14: CI/CD source tokens. + CircleCIToken string + // Registry drives query generation for every source via BuildQueries. Registry *providers.Registry // Limiters is the shared per-source rate-limiter registry. @@ -57,8 +60,8 @@ type SourcesConfig struct { // RegisterAll registers every Phase 10 code-hosting, Phase 11 search engine / // paste site, Phase 12 IoT scanner / cloud storage, Phase 13 package -// registry / container / IaC, and Phase 14 frontend leak source on engine -// (45 sources total). +// registry / container / IaC, and Phase 14 CI/CD log / web archive / +// frontend leak source on engine (52 sources total). // // All sources are registered unconditionally so that cmd/recon.go can surface // the full catalog via `keyhunter recon list` regardless of which credentials @@ -236,4 +239,25 @@ func RegisterAll(engine *recon.Engine, cfg SourcesConfig) { engine.Register(&EnvLeakSource{Registry: reg, Limiters: lim}) engine.Register(&SwaggerSource{Registry: reg, Limiters: lim}) engine.Register(&DeployPreviewSource{Registry: reg, Limiters: lim}) + + // Phase 14: CI/CD log sources. + engine.Register(&TravisCISource{Registry: reg, Limiters: lim}) + engine.Register(&GitHubActionsSource{ + Token: cfg.GitHubToken, + Registry: reg, + Limiters: lim, + }) + engine.Register(&CircleCISource{ + Token: cfg.CircleCIToken, + Registry: reg, + Limiters: lim, + }) + engine.Register(&JenkinsSource{Registry: reg, Limiters: lim}) + + // Phase 14: Web archive sources (credentialless). + engine.Register(&WaybackMachineSource{Registry: reg, Limiters: lim}) + engine.Register(&CommonCrawlSource{Registry: reg, Limiters: lim}) + + // Phase 14: JS bundle analysis (credentialless). + engine.Register(&JSBundleSource{Registry: reg, Limiters: lim}) } diff --git a/pkg/recon/sources/register_test.go b/pkg/recon/sources/register_test.go index b718ad6..bb0c11a 100644 --- a/pkg/recon/sources/register_test.go +++ b/pkg/recon/sources/register_test.go @@ -16,9 +16,9 @@ func registerTestRegistry() *providers.Registry { }) } -// TestRegisterAll_WiresAllFortyFiveSources asserts that RegisterAll registers +// TestRegisterAll_WiresAllFiftyTwoSources asserts that RegisterAll registers // every Phase 10-14 source by its stable name on a fresh engine. -func TestRegisterAll_WiresAllFortyFiveSources(t *testing.T) { +func TestRegisterAll_WiresAllFiftyTwoSources(t *testing.T) { eng := recon.NewEngine() cfg := SourcesConfig{ Registry: registerTestRegistry(), @@ -34,8 +34,10 @@ func TestRegisterAll_WiresAllFortyFiveSources(t *testing.T) { "bitbucket", "brave", "censys", + "circleci", "codeberg", "codesandbox", + "commoncrawl", "crates", "deploypreview", "dockerhub", @@ -43,6 +45,7 @@ func TestRegisterAll_WiresAllFortyFiveSources(t *testing.T) { "duckduckgo", "fofa", "gcs", + "ghactions", "gist", "gistpaste", "github", @@ -51,6 +54,8 @@ func TestRegisterAll_WiresAllFortyFiveSources(t *testing.T) { "goproxy", "helm", "huggingface", + "jenkins", + "jsbundle", "k8s", "kaggle", "maven", @@ -70,6 +75,8 @@ func TestRegisterAll_WiresAllFortyFiveSources(t *testing.T) { "spaces", "swagger", "terraform", + "travisci", + "wayback", "webpack", "yandex", "zoomeye", @@ -90,8 +97,8 @@ func TestRegisterAll_MissingCredsStillRegistered(t *testing.T) { Limiters: recon.NewLimiterRegistry(), }) - if n := len(eng.List()); n != 45 { - t.Fatalf("expected 45 sources registered, got %d: %v", n, eng.List()) + if n := len(eng.List()); n != 52 { + t.Fatalf("expected 52 sources registered, got %d: %v", n, eng.List()) } // SweepAll with an empty config should filter out cred-gated sources