package sources import ( "context" "encoding/json" "fmt" "net/http" "net/http/httptest" "testing" "time" "github.com/salvacybersec/keyhunter/pkg/providers" "github.com/salvacybersec/keyhunter/pkg/recon" ) // TestIntegration_AllSources_SweepAll spins up a single multiplexed httptest // server that serves canned fixtures for every Phase 10 code-hosting source, // Phase 11 search engine / paste site source, Phase 12 IoT scanner / cloud // storage source, Phase 13 package registry / container / IaC source, and // Phase 14 CI/CD log / web archive / frontend leak source, registers the // sources (with BaseURL overrides pointing at the test server) onto a fresh // recon.Engine, runs SweepAll, and asserts at least one Finding was emitted // per SourceType across all 52 sources. // // RegisterAll cannot be used directly because it wires production URLs; the // test exercises the same code paths by constructing each source identically // to RegisterAll but with BaseURL/Platforms overrides. func TestIntegration_AllSources_SweepAll(t *testing.T) { mux := http.NewServeMux() // ---- GitHub /search/code ---- mux.HandleFunc("/search/code", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") _ = json.NewEncoder(w).Encode(ghSearchResponse{ Items: []ghCodeItem{ {HTMLURL: "https://github.com/alice/leak/blob/main/.env"}, }, }) }) // ---- GitLab /api/v4/search ---- mux.HandleFunc("/api/v4/search", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(`[{"basename":"keys","data":"sk-proj-abc","path":"keys.env","project_id":42,"ref":"main","startline":1}]`)) }) // ---- Bitbucket /2.0/workspaces//search/code ---- mux.HandleFunc("/2.0/workspaces/kh-test/search/code", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(`{"values":[{"content_match_count":1,"page_url":"https://bitbucket.org/kh-test/repo/src/main/keys.env","file":{"path":"keys.env","commit":{"hash":"deadbeef"}}}]}`)) }) // ---- Gist /gists/public + raw content ---- mux.HandleFunc("/gists/public", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") body := fmt.Sprintf(`[{"html_url":"https://gist.github.com/alice/gistleak","files":{"f.py":{"filename":"f.py","raw_url":"%s/raw/gist1"}}}]`, baseFromReq(r)) _, _ = w.Write([]byte(body)) }) mux.HandleFunc("/raw/gist1", func(w http.ResponseWriter, r *http.Request) { _, _ = w.Write([]byte("api_key = sk-proj-ABCDEF")) }) // ---- Codeberg /api/v1/repos/search ---- mux.HandleFunc("/api/v1/repos/search", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(`{"ok":true,"data":[{"full_name":"bob/keys","html_url":"https://codeberg.org/bob/keys"}]}`)) }) // ---- HuggingFace /api/spaces + /api/models ---- hfHandler := func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(`[{"id":"alice/leaky-space"}]`)) } mux.HandleFunc("/api/spaces", hfHandler) mux.HandleFunc("/api/models", hfHandler) // ---- Replit /search?q=...&type=repls (HTML) ---- // ---- CodeSandbox /search?query=...&type=sandboxes (HTML) ---- // Both hit the same /search path; distinguish on query params. mux.HandleFunc("/search", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "text/html") switch r.URL.Query().Get("type") { case "repls": _, _ = w.Write([]byte(` hit skip `)) case "sandboxes": _, _ = w.Write([]byte(` hit skip `)) default: w.WriteHeader(http.StatusNotFound) } }) // ---- SandboxesSource sub-platforms ---- mux.HandleFunc("/codepen-search", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "text/html") _, _ = w.Write([]byte(`hit`)) }) mux.HandleFunc("/jsfiddle-search", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(`{"results":[{"url":"https://jsfiddle.net/u/leaky/"}]}`)) }) // ---- Kaggle /api/v1/kernels/list ---- mux.HandleFunc("/api/v1/kernels/list", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(`[{"ref":"alice/leaky-notebook"}]`)) }) // ---- Phase 11: Google Custom Search /customsearch/v1 ---- mux.HandleFunc("/customsearch/v1", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(`{"items":[{"link":"https://pastebin.com/abc123","title":"leak","snippet":"sk-proj-xxx"}]}`)) }) // ---- Phase 11: Bing /v7.0/search ---- mux.HandleFunc("/v7.0/search", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(`{"webPages":{"value":[{"url":"https://example.com/bing-leak","name":"leak"}]}}`)) }) // ---- Phase 11: DuckDuckGo /html/ ---- mux.HandleFunc("/html/", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "text/html") _, _ = w.Write([]byte(`result`)) }) // ---- Phase 11: Yandex /search/xml ---- mux.HandleFunc("/search/xml", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/xml") _, _ = w.Write([]byte(` https://example.com/yandex-leak`)) }) // ---- Phase 11: Brave /res/v1/web/search ---- mux.HandleFunc("/res/v1/web/search", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(`{"web":{"results":[{"url":"https://example.com/brave-leak","title":"leak"}]}}`)) }) // ---- Phase 11: Pastebin (routed under /pb/ prefix) ---- mux.HandleFunc("/pb/search", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "text/html") _, _ = w.Write([]byte(`paste1`)) }) mux.HandleFunc("/pb/raw/AbCdEf12", func(w http.ResponseWriter, r *http.Request) { _, _ = w.Write([]byte("leaked key: sk-proj-PASTEBIN123")) }) // ---- Phase 11: GistPaste (routed under /gp/ prefix) ---- mux.HandleFunc("/gp/search", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "text/html") _, _ = w.Write([]byte(`gist1`)) }) mux.HandleFunc("/gp/alice/deadbeef01/raw", func(w http.ResponseWriter, r *http.Request) { _, _ = w.Write([]byte("leaked: sk-proj-GISTPASTE456")) }) // ---- Phase 11: PasteSites sub-platforms ---- mux.HandleFunc("/paste-search", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "text/html") _, _ = w.Write([]byte(`paste`)) }) mux.HandleFunc("/paste-raw/aB3xZ9", func(w http.ResponseWriter, r *http.Request) { _, _ = w.Write([]byte("secret: sk-proj-PASTESITES789")) }) // ---- Phase 12: Shodan /shodan/host/search ---- mux.HandleFunc("/shodan/host/search", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(`{"matches":[{"ip_str":"1.2.3.4","port":8080,"data":"vllm endpoint"}]}`)) }) // ---- Phase 12: Censys /v2/hosts/search ---- mux.HandleFunc("/v2/hosts/search", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(`{"result":{"hits":[{"ip":"10.0.0.1","services":[{"port":443,"service_name":"HTTP"}]}]}}`)) }) // ---- Phase 12: ZoomEye /host/search ---- mux.HandleFunc("/host/search", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(`{"matches":[{"ip":"172.16.0.1","portinfo":{"port":8443,"service":"https"}}]}`)) }) // ---- Phase 12: FOFA /api/v1/search/all ---- mux.HandleFunc("/api/v1/search/all", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(`{"results":[["example.com","192.168.1.1","443"]],"size":1}`)) }) // ---- Phase 12: Netlas /api/responses/ ---- mux.HandleFunc("/api/responses/", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(`{"items":[{"data":{"ip":"10.10.10.1","port":80}}]}`)) }) // ---- Phase 12: BinaryEdge /v2/query/search ---- mux.HandleFunc("/v2/query/search", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(`{"events":[{"target":{"ip":"192.0.2.1","port":8080}}]}`)) }) // ---- Phase 12: Cloud storage — S3 + DOSpaces (S3 XML format) ---- mux.HandleFunc("/cloud-s3/", func(w http.ResponseWriter, r *http.Request) { if r.Method == http.MethodHead { w.WriteHeader(http.StatusOK) return } w.Header().Set("Content-Type", "application/xml") _, _ = w.Write([]byte(` .env config.yaml `)) }) // ---- Phase 12: Cloud storage — GCS (JSON format) ---- mux.HandleFunc("/cloud-gcs/", func(w http.ResponseWriter, r *http.Request) { if r.Method == http.MethodHead { w.WriteHeader(http.StatusOK) return } w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(`{"items":[{"name":".env"},{"name":"config.yaml"}]}`)) }) // ---- Phase 12: Cloud storage — Azure Blob (EnumerationResults XML) ---- mux.HandleFunc("/cloud-azure/", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/xml") _, _ = w.Write([]byte(` .env config.yaml `)) }) // ---- Phase 13: npm /-/v1/search (prefix /npm) ---- mux.HandleFunc("/npm/-/v1/search", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(`{"objects":[{"package":{"name":"leak-pkg","links":{"npm":"https://npmjs.com/package/leak-pkg"}}}]}`)) }) // ---- Phase 13: pypi /search/ (prefix /pypi) ---- mux.HandleFunc("/pypi/search/", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "text/html") _, _ = w.Write([]byte(`leaked-pkg`)) }) // ---- Phase 13: crates /api/v1/crates (prefix /crates) ---- mux.HandleFunc("/crates/api/v1/crates", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(`{"crates":[{"id":"leaked-crate","name":"leaked-crate","repository":"https://github.com/example/leaked-crate"}]}`)) }) // ---- Phase 13: rubygems /api/v1/search.json (prefix /rubygems) ---- mux.HandleFunc("/rubygems/api/v1/search.json", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(`[{"name":"leaked-gem","project_uri":"https://rubygems.org/gems/leaked-gem"}]`)) }) // ---- Phase 13: maven /solrsearch/select (prefix /maven) ---- mux.HandleFunc("/maven/solrsearch/select", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(`{"response":{"numFound":1,"docs":[{"g":"com.leak","a":"sdk","latestVersion":"1.0"}]}}`)) }) // ---- Phase 13: nuget /query (prefix /nuget) ---- mux.HandleFunc("/nuget/query", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(`{"data":[{"id":"LeakedPkg","version":"1.0","projectUrl":"https://nuget.org/packages/LeakedPkg"}]}`)) }) // ---- Phase 13: goproxy /search (prefix /goproxy) ---- mux.HandleFunc("/goproxy/search", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "text/html") _, _ = w.Write([]byte(`module`)) }) // ---- Phase 13: packagist /search.json (prefix /packagist) ---- mux.HandleFunc("/packagist/search.json", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(`{"results":[{"name":"vendor/leaked","url":"https://packagist.org/packages/vendor/leaked"}]}`)) }) // ---- Phase 13: dockerhub /v2/search/repositories/ (prefix /dockerhub) ---- mux.HandleFunc("/dockerhub/v2/search/repositories/", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(`{"results":[{"repo_name":"user/leaked-image","description":"leaked"}]}`)) }) // ---- Phase 13: k8s /api/v1/packages/search (prefix /k8s) ---- mux.HandleFunc("/k8s/api/v1/packages/search", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(`{"packages":[{"package_id":"pkg-1","name":"leaked-operator","normalized_name":"leaked-operator","repository":{"name":"community","kind":6}}]}`)) }) // ---- Phase 13: terraform /v1/modules (prefix /terraform) ---- mux.HandleFunc("/terraform/v1/modules", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(`{"modules":[{"id":"hashicorp/leaked/aws","namespace":"hashicorp","name":"leaked","provider":"aws"}]}`)) }) // ---- Phase 13: helm /api/v1/packages/search (prefix /helm) ---- mux.HandleFunc("/helm/api/v1/packages/search", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(`{"packages":[{"package_id":"chart-1","name":"leaked-chart","normalized_name":"leaked-chart","repository":{"name":"bitnami","kind":0}}]}`)) }) // ---- Phase 14: SourceMapSource (probes /static/js/main.js.map) ---- mux.HandleFunc("/sourcemaps/static/js/main.js.map", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(`{"sources":["app.js"],"sourcesContent":["const apiKey = \"sk-proj-SOURCEMAPLEAK123\";"]}`)) }) // ---- Phase 14: WebpackSource (probes /static/js/main.js) ---- mux.HandleFunc("/webpack/static/js/main.js", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/javascript") _, _ = w.Write([]byte(`!function(){var e={NEXT_PUBLIC_API_KEY:"sk-proj-WEBPACKLEAK123456"}}();`)) }) // ---- Phase 14: EnvLeakSource (probes /.env) ---- mux.HandleFunc("/dotenv/.env", func(w http.ResponseWriter, r *http.Request) { _, _ = w.Write([]byte("OPENAI_API_KEY=sk-proj-ENVLEAK12345678\nDB_HOST=localhost\n")) }) // ---- Phase 14: SwaggerSource (probes /swagger.json) ---- mux.HandleFunc("/swagger/swagger.json", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(`{"openapi":"3.0.0","paths":{"/api":{"get":{"parameters":[{"name":"api_key","example":"sk-proj-SWAGGERLEAK12345"}]}}}}`)) }) // ---- Phase 14: DeployPreviewSource (probes /) ---- mux.HandleFunc("/deploypreview/", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "text/html") _, _ = w.Write([]byte(``)) }) // ---- Phase 14: TravisCISource /builds + /builds/{id}/log ---- mux.HandleFunc("/travisci/builds", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(`{"builds":[{"id":999,"state":"passed"}]}`)) }) mux.HandleFunc("/travisci/builds/999/log", func(w http.ResponseWriter, r *http.Request) { _, _ = w.Write([]byte(`export API_KEY="sk-proj-TRAVISLEAK1234567890"`)) }) // ---- Phase 14: GitHubActionsSource /search/code + /actions/runs/{id}/logs ---- mux.HandleFunc("/ghactions/search/code", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(`{"workflow_runs":[{"id":55,"status":"completed","conclusion":"success"}]}`)) }) mux.HandleFunc("/ghactions/actions/runs/55/logs", func(w http.ResponseWriter, r *http.Request) { _, _ = w.Write([]byte(`SECRET_KEY="sk-proj-GHACTIONSLEAK1234567"`)) }) // ---- Phase 14: CircleCISource /project/gh/{slug}/pipeline + /pipeline/{id}/workflow ---- mux.HandleFunc("/circleci/project/gh/", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(`{"items":[{"id":"pipe-test-1","number":1}]}`)) }) mux.HandleFunc("/circleci/pipeline/pipe-test-1/workflow", func(w http.ResponseWriter, r *http.Request) { _, _ = w.Write([]byte(`AUTH_TOKEN="sk-proj-CIRCLELEAK1234567890"`)) }) // ---- Phase 14: JenkinsSource /api/json + /job/{name}/lastBuild/consoleText ---- mux.HandleFunc("/jenkins/api/json", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(`{"jobs":[{"name":"build-app","url":"http://jenkins/job/build-app/","color":"blue"}]}`)) }) mux.HandleFunc("/jenkins/job/build-app/lastBuild/consoleText", func(w http.ResponseWriter, r *http.Request) { _, _ = w.Write([]byte(`Setting TOKEN="sk-proj-JENKINSLEAK12345678"`)) }) // ---- Phase 14: WaybackMachineSource /cdx/search/cdx + /web/{ts}id_/{url} ---- mux.HandleFunc("/wayback/cdx/search/cdx", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(`[["url","timestamp","statuscode"],["https://example.com/.env","20240101000000","200"]]`)) }) mux.HandleFunc("/wayback/web/", func(w http.ResponseWriter, r *http.Request) { _, _ = w.Write([]byte(`API_KEY="sk-proj-WAYBACKLEAK12345678"`)) }) // ---- Phase 14: CommonCrawlSource (NDJSON CDX index) ---- mux.HandleFunc("/commoncrawl", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte("{\"url\":\"https://example.com/.env\",\"timestamp\":\"20240101\",\"status\":\"200\",\"filename\":\"warc.gz\",\"length\":\"100\",\"offset\":\"0\"}\n")) }) // ---- Phase 14: JSBundleSource (probes /static/js/main.js) ---- mux.HandleFunc("/jsbundle/static/js/main.js", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/javascript") _, _ = w.Write([]byte(`!function(){var c={apiKey:"sk-proj-JSBUNDLELEAK123456789"}}();`)) }) srv := httptest.NewServer(mux) defer srv.Close() reg := providers.NewRegistryFromProviders([]providers.Provider{ {Name: "openai", Keywords: []string{"sk-proj-"}}, }) lim := recon.NewLimiterRegistry() eng := recon.NewEngine() // --- Phase 10 sources --- // GitHub -- token + BaseURL override. Use the real constructor so `client` // is initialized, then retarget BaseURL at the test server. ghs := NewGitHubSource("ghp-test", reg, lim) ghs.BaseURL = srv.URL eng.Register(ghs) // GitLab eng.Register(&GitLabSource{ Token: "glpat-test", BaseURL: srv.URL, Registry: reg, Limiters: lim, }) // Bitbucket eng.Register(&BitbucketSource{ Token: "bb-test", Workspace: "kh-test", BaseURL: srv.URL, Registry: reg, Limiters: lim, }) // Gist -- uses same BaseURL for /gists/public; raw URLs are absolute in fixture. eng.Register(&GistSource{ Token: "ghp-test", BaseURL: srv.URL, Registry: reg, Limiters: lim, }) // Codeberg eng.Register(&CodebergSource{ BaseURL: srv.URL, Registry: reg, Limiters: lim, }) // HuggingFace eng.Register(NewHuggingFaceSource(HuggingFaceConfig{ BaseURL: srv.URL, Registry: reg, Limiters: lim, })) // Replit eng.Register(&ReplitSource{ BaseURL: srv.URL, Registry: reg, Limiters: lim, }) // CodeSandbox eng.Register(&CodeSandboxSource{ BaseURL: srv.URL, Registry: reg, Limiters: lim, }) // Sandboxes -- inject test sub-platforms that hit srv.URL. eng.Register(&SandboxesSource{ Platforms: []subPlatform{ {Name: "codepen", SearchPath: "/codepen-search?q=%s", ResultLinkRegex: `^/[^/]+/pen/[a-zA-Z0-9]+$`, IsJSON: false}, {Name: "jsfiddle", SearchPath: "/jsfiddle-search?q=%s", IsJSON: true, JSONItemsKey: "results", JSONURLKey: "url"}, }, Registry: reg, Limiters: lim, Client: NewClient(), BaseURL: srv.URL, }) // Kaggle eng.Register(&KaggleSource{ User: "kh-user", Key: "kh-key", BaseURL: srv.URL, WebBaseURL: "https://www.kaggle.com", Registry: reg, Limiters: lim, client: NewClient(), }) // --- Phase 11 sources --- // Google Custom Search gs := NewGoogleDorkSource("test-api-key", "test-cx", reg, lim) gs.BaseURL = srv.URL eng.Register(gs) // Bing bs := NewBingDorkSource("test-bing-key", reg, lim) bs.BaseURL = srv.URL eng.Register(bs) // DuckDuckGo ddg := NewDuckDuckGoSource(reg, lim) ddg.BaseURL = srv.URL eng.Register(ddg) // Yandex ys := NewYandexSource("test-user", "test-key", reg, lim) ys.BaseURL = srv.URL eng.Register(ys) // Brave brs := NewBraveSource("test-brave-key", reg, lim) brs.BaseURL = srv.URL eng.Register(brs) // Pastebin -- uses /pb/ prefix to avoid /search collision eng.Register(&PastebinSource{ BaseURL: srv.URL + "/pb", Registry: reg, Limiters: lim, Client: NewClient(), }) // GistPaste -- uses /gp/ prefix eng.Register(&GistPasteSource{ BaseURL: srv.URL + "/gp", Registry: reg, Limiters: lim, Client: NewClient(), }) // PasteSites -- inject test sub-platform eng.Register(&PasteSitesSource{ Platforms: []pastePlatform{ { Name: "testpaste", SearchPath: "/paste-search?q=%s", ResultLinkRegex: `^/[a-zA-Z0-9]+$`, RawPathTemplate: "/paste-raw%s", }, }, Registry: reg, Limiters: lim, Client: NewClient(), BaseURL: srv.URL, }) // --- Phase 12: IoT scanner sources --- // Shodan shodanSrc := NewShodanSource("test-shodan-key", reg, lim) shodanSrc.BaseURL = srv.URL eng.Register(shodanSrc) // Censys censysSrc := NewCensysSource("test-id", "test-secret", reg, lim) censysSrc.BaseURL = srv.URL eng.Register(censysSrc) // ZoomEye zoomeyeSrc := NewZoomEyeSource("test-zoomeye-key", reg, lim) zoomeyeSrc.BaseURL = srv.URL eng.Register(zoomeyeSrc) // FOFA eng.Register(&FOFASource{ Email: "test@example.com", APIKey: "test-fofa-key", BaseURL: srv.URL, Registry: reg, Limiters: lim, client: NewClient(), }) // Netlas eng.Register(&NetlasSource{ APIKey: "test-netlas-key", BaseURL: srv.URL, Registry: reg, Limiters: lim, client: NewClient(), }) // BinaryEdge eng.Register(&BinaryEdgeSource{ APIKey: "test-binaryedge-key", BaseURL: srv.URL, Registry: reg, Limiters: lim, client: NewClient(), }) // --- Phase 12: Cloud storage sources --- // S3 -- BaseURL pattern with %s for bucket name eng.Register(&S3Scanner{ BaseURL: srv.URL + "/cloud-s3/%s", Registry: reg, Limiters: lim, client: NewClient(), }) // GCS -- JSON format handler eng.Register(&GCSScanner{ BaseURL: srv.URL + "/cloud-gcs/%s", Registry: reg, Limiters: lim, client: NewClient(), }) // AzureBlob -- EnumerationResults XML; needs two %s: account + container eng.Register(&AzureBlobScanner{ BaseURL: srv.URL + "/cloud-azure/%s-%s", Registry: reg, Limiters: lim, client: NewClient(), }) // DOSpaces -- S3-compatible XML; needs two %s: bucket + region eng.Register(&DOSpacesScanner{ BaseURL: srv.URL + "/cloud-s3/%s-%s", Registry: reg, Limiters: lim, client: NewClient(), }) // --- Phase 13: Package registry sources --- // npm eng.Register(&NpmSource{BaseURL: srv.URL + "/npm", Registry: reg, Limiters: lim, Client: NewClient()}) // pypi eng.Register(&PyPISource{BaseURL: srv.URL + "/pypi", Registry: reg, Limiters: lim, Client: NewClient()}) // crates eng.Register(&CratesIOSource{BaseURL: srv.URL + "/crates", Registry: reg, Limiters: lim, Client: NewClient()}) // rubygems eng.Register(&RubyGemsSource{BaseURL: srv.URL + "/rubygems", Registry: reg, Limiters: lim, Client: NewClient()}) // maven eng.Register(&MavenSource{BaseURL: srv.URL + "/maven", Registry: reg, Limiters: lim, Client: NewClient()}) // nuget eng.Register(&NuGetSource{BaseURL: srv.URL + "/nuget", Registry: reg, Limiters: lim, Client: NewClient()}) // goproxy eng.Register(&GoProxySource{BaseURL: srv.URL + "/goproxy", Registry: reg, Limiters: lim, Client: NewClient()}) // packagist eng.Register(&PackagistSource{BaseURL: srv.URL + "/packagist", Registry: reg, Limiters: lim, Client: NewClient()}) // --- Phase 13: Container & IaC sources --- // dockerhub eng.Register(&DockerHubSource{BaseURL: srv.URL + "/dockerhub", Registry: reg, Limiters: lim, Client: NewClient()}) // k8s eng.Register(&KubernetesSource{BaseURL: srv.URL + "/k8s", Registry: reg, Limiters: lim, Client: NewClient()}) // terraform eng.Register(&TerraformSource{BaseURL: srv.URL + "/terraform", Registry: reg, Limiters: lim, Client: NewClient()}) // helm eng.Register(&HelmSource{BaseURL: srv.URL + "/helm", Registry: reg, Limiters: lim, Client: NewClient()}) // --- Phase 14: Frontend leak sources --- // sourcemaps eng.Register(&SourceMapSource{BaseURL: srv.URL + "/sourcemaps", Registry: reg, Limiters: nil, Client: NewClient()}) // webpack eng.Register(&WebpackSource{BaseURL: srv.URL + "/webpack", Registry: reg, Limiters: nil, Client: NewClient()}) // dotenv eng.Register(&EnvLeakSource{BaseURL: srv.URL + "/dotenv", Registry: reg, Limiters: nil, Client: NewClient()}) // swagger eng.Register(&SwaggerSource{BaseURL: srv.URL + "/swagger", Registry: reg, Limiters: nil, Client: NewClient()}) // deploypreview eng.Register(&DeployPreviewSource{BaseURL: srv.URL + "/deploypreview", Registry: reg, Limiters: nil, Client: NewClient()}) // --- Phase 14: CI/CD log sources --- // travisci eng.Register(&TravisCISource{BaseURL: srv.URL + "/travisci", Registry: reg, Limiters: nil, Client: NewClient()}) // ghactions eng.Register(&GitHubActionsSource{Token: "ghp-test", BaseURL: srv.URL + "/ghactions", Registry: reg, Limiters: nil, Client: NewClient()}) // circleci eng.Register(&CircleCISource{Token: "cci-test", BaseURL: srv.URL + "/circleci", Registry: reg, Limiters: nil, Client: NewClient()}) // jenkins eng.Register(&JenkinsSource{BaseURL: srv.URL + "/jenkins", Registry: reg, Limiters: nil, Client: NewClient()}) // --- Phase 14: Web archive sources --- // wayback eng.Register(&WaybackMachineSource{BaseURL: srv.URL + "/wayback", Registry: reg, Limiters: nil, Client: NewClient()}) // commoncrawl eng.Register(&CommonCrawlSource{BaseURL: srv.URL + "/commoncrawl", Registry: reg, Limiters: nil, Client: NewClient()}) // --- Phase 14: JS bundle analysis --- // jsbundle eng.Register(&JSBundleSource{BaseURL: srv.URL + "/jsbundle", Registry: reg, Limiters: nil, Client: NewClient()}) // Sanity: all 52 sources registered. if n := len(eng.List()); n != 52 { t.Fatalf("expected 52 sources on engine, got %d: %v", n, eng.List()) } ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) defer cancel() findings, err := eng.SweepAll(ctx, recon.Config{Query: "ignored"}) if err != nil { t.Fatalf("SweepAll returned error: %v", err) } // Group findings by SourceType and assert every expected bucket is present. byType := make(map[string]int) for _, f := range findings { byType[f.SourceType]++ } wantTypes := []string{ // Phase 10 "recon:github", "recon:gitlab", "recon:bitbucket", "recon:gist", "recon:codeberg", "recon:huggingface", "recon:replit", "recon:codesandbox", "recon:sandboxes", "recon:kaggle", // Phase 11 "recon:google", "recon:bing", "recon:duckduckgo", "recon:yandex", "recon:brave", "recon:pastebin", "recon:gistpaste", "recon:pastesites", // Phase 12: IoT scanners "recon:shodan", "recon:censys", "recon:zoomeye", "recon:fofa", "recon:netlas", "recon:binaryedge", // Phase 12: Cloud storage "recon:s3", "recon:gcs", "recon:azureblob", "recon:spaces", // Phase 13: Package registries "recon:npm", "recon:pypi", "recon:crates", "recon:rubygems", "recon:maven", "recon:nuget", "recon:goproxy", "recon:packagist", // Phase 13: Container & IaC "recon:dockerhub", "recon:k8s", "recon:terraform", "recon:helm", // Phase 14: Frontend leaks "recon:sourcemaps", "recon:webpack", "recon:dotenv", "recon:swagger", "recon:deploypreview", // Phase 14: CI/CD logs "recon:travisci", "recon:ghactions", "recon:circleci", "recon:jenkins", // Phase 14: Web archives "recon:wayback", "recon:commoncrawl", // Phase 14: JS bundles "recon:jsbundle", } for _, st := range wantTypes { if byType[st] == 0 { t.Errorf("expected at least one finding with SourceType=%q, got none\nall findings: %+v", st, findings) } } } // TestRegisterAll_Phase12 verifies that RegisterAll correctly registers all 28 // sources (18 Phase 10-11 + 10 Phase 12) and that credential-gated sources // report Enabled()==false when credentials are empty. func TestRegisterAll_Phase12(t *testing.T) { reg := providers.NewRegistryFromProviders([]providers.Provider{ {Name: "testprov", Keywords: []string{"test-key"}}, }) lim := recon.NewLimiterRegistry() eng := recon.NewEngine() RegisterAll(eng, SourcesConfig{ Registry: reg, Limiters: lim, // All credential fields left empty. }) names := eng.List() if n := len(names); n != 52 { t.Fatalf("expected 52 sources from RegisterAll, got %d: %v", n, names) } // Build lookup for source access. nameSet := make(map[string]bool, len(names)) for _, n := range names { nameSet[n] = true } // All 10 Phase 12 sources must be present. wantPhase12 := []string{ "shodan", "censys", "zoomeye", "fofa", "netlas", "binaryedge", "s3", "gcs", "azureblob", "spaces", } for _, name := range wantPhase12 { if !nameSet[name] { t.Errorf("Phase 12 source %q not found in engine; registered: %v", name, names) } } cfg := recon.Config{} // IoT sources with empty credentials must be disabled. iotSources := []string{"shodan", "censys", "zoomeye", "fofa", "netlas", "binaryedge"} for _, name := range iotSources { src, ok := eng.Get(name) if !ok { t.Errorf("source %q not found via Get", name) continue } if src.Enabled(cfg) { t.Errorf("IoT source %q should be Enabled()==false with empty credentials", name) } } // Cloud storage sources (credentialless) must be enabled. cloudSources := []string{"s3", "gcs", "azureblob", "spaces"} for _, name := range cloudSources { src, ok := eng.Get(name) if !ok { t.Errorf("source %q not found via Get", name) continue } if !src.Enabled(cfg) { t.Errorf("Cloud source %q should be Enabled()==true (credentialless)", name) } } } // TestRegisterAll_Phase12_SweepAllNoPanic verifies that SweepAll with a very // short context timeout completes without panic when all 28 sources are // registered with empty credentials. func TestRegisterAll_Phase12_SweepAllNoPanic(t *testing.T) { reg := providers.NewRegistryFromProviders([]providers.Provider{ {Name: "testprov", Keywords: []string{"test-key"}}, }) lim := recon.NewLimiterRegistry() eng := recon.NewEngine() RegisterAll(eng, SourcesConfig{ Registry: reg, Limiters: lim, }) ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond) defer cancel() // Should not panic regardless of timeout or missing credentials. _, _ = eng.SweepAll(ctx, recon.Config{}) } // baseFromReq reconstructs the scheme+host of the inbound request so handlers // can build absolute raw URLs pointing back at the same httptest server. func baseFromReq(r *http.Request) string { scheme := "http" if r.TLS != nil { scheme = "https" } return scheme + "://" + r.Host }