- Add CircleCIToken to SourcesConfig with env/viper lookup in cmd/recon.go - Register 7 new sources: travisci, ghactions, circleci, jenkins, wayback, commoncrawl, jsbundle - Update register_test.go expectations from 45 to 52 sources - Add integration test handlers + registrations for all 12 Phase 14 sources - Integration test now validates 52 sources end-to-end
864 lines
32 KiB
Go
864 lines
32 KiB
Go
package sources
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"net/http"
|
|
"net/http/httptest"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/salvacybersec/keyhunter/pkg/providers"
|
|
"github.com/salvacybersec/keyhunter/pkg/recon"
|
|
)
|
|
|
|
// TestIntegration_AllSources_SweepAll spins up a single multiplexed httptest
|
|
// server that serves canned fixtures for every Phase 10 code-hosting source,
|
|
// Phase 11 search engine / paste site source, Phase 12 IoT scanner / cloud
|
|
// storage source, Phase 13 package registry / container / IaC source, and
|
|
// Phase 14 CI/CD log / web archive / frontend leak source, registers the
|
|
// sources (with BaseURL overrides pointing at the test server) onto a fresh
|
|
// recon.Engine, runs SweepAll, and asserts at least one Finding was emitted
|
|
// per SourceType across all 52 sources.
|
|
//
|
|
// RegisterAll cannot be used directly because it wires production URLs; the
|
|
// test exercises the same code paths by constructing each source identically
|
|
// to RegisterAll but with BaseURL/Platforms overrides.
|
|
func TestIntegration_AllSources_SweepAll(t *testing.T) {
|
|
mux := http.NewServeMux()
|
|
|
|
// ---- GitHub /search/code ----
|
|
mux.HandleFunc("/search/code", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_ = json.NewEncoder(w).Encode(ghSearchResponse{
|
|
Items: []ghCodeItem{
|
|
{HTMLURL: "https://github.com/alice/leak/blob/main/.env"},
|
|
},
|
|
})
|
|
})
|
|
|
|
// ---- GitLab /api/v4/search ----
|
|
mux.HandleFunc("/api/v4/search", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_, _ = w.Write([]byte(`[{"basename":"keys","data":"sk-proj-abc","path":"keys.env","project_id":42,"ref":"main","startline":1}]`))
|
|
})
|
|
|
|
// ---- Bitbucket /2.0/workspaces/<ws>/search/code ----
|
|
mux.HandleFunc("/2.0/workspaces/kh-test/search/code", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_, _ = w.Write([]byte(`{"values":[{"content_match_count":1,"page_url":"https://bitbucket.org/kh-test/repo/src/main/keys.env","file":{"path":"keys.env","commit":{"hash":"deadbeef"}}}]}`))
|
|
})
|
|
|
|
// ---- Gist /gists/public + raw content ----
|
|
mux.HandleFunc("/gists/public", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
body := fmt.Sprintf(`[{"html_url":"https://gist.github.com/alice/gistleak","files":{"f.py":{"filename":"f.py","raw_url":"%s/raw/gist1"}}}]`, baseFromReq(r))
|
|
_, _ = w.Write([]byte(body))
|
|
})
|
|
mux.HandleFunc("/raw/gist1", func(w http.ResponseWriter, r *http.Request) {
|
|
_, _ = w.Write([]byte("api_key = sk-proj-ABCDEF"))
|
|
})
|
|
|
|
// ---- Codeberg /api/v1/repos/search ----
|
|
mux.HandleFunc("/api/v1/repos/search", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_, _ = w.Write([]byte(`{"ok":true,"data":[{"full_name":"bob/keys","html_url":"https://codeberg.org/bob/keys"}]}`))
|
|
})
|
|
|
|
// ---- HuggingFace /api/spaces + /api/models ----
|
|
hfHandler := func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_, _ = w.Write([]byte(`[{"id":"alice/leaky-space"}]`))
|
|
}
|
|
mux.HandleFunc("/api/spaces", hfHandler)
|
|
mux.HandleFunc("/api/models", hfHandler)
|
|
|
|
// ---- Replit /search?q=...&type=repls (HTML) ----
|
|
// ---- CodeSandbox /search?query=...&type=sandboxes (HTML) ----
|
|
// Both hit the same /search path; distinguish on query params.
|
|
mux.HandleFunc("/search", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "text/html")
|
|
switch r.URL.Query().Get("type") {
|
|
case "repls":
|
|
_, _ = w.Write([]byte(`<html><body>
|
|
<a href="/@alice/leaky-repl">hit</a>
|
|
<a href="/other/path">skip</a>
|
|
</body></html>`))
|
|
case "sandboxes":
|
|
_, _ = w.Write([]byte(`<html><body>
|
|
<a href="/s/leaky-sandbox">hit</a>
|
|
<a href="/other">skip</a>
|
|
</body></html>`))
|
|
default:
|
|
w.WriteHeader(http.StatusNotFound)
|
|
}
|
|
})
|
|
|
|
// ---- SandboxesSource sub-platforms ----
|
|
mux.HandleFunc("/codepen-search", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "text/html")
|
|
_, _ = w.Write([]byte(`<html><body><a href="/alice/pen/AbCd1234">hit</a></body></html>`))
|
|
})
|
|
mux.HandleFunc("/jsfiddle-search", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_, _ = w.Write([]byte(`{"results":[{"url":"https://jsfiddle.net/u/leaky/"}]}`))
|
|
})
|
|
|
|
// ---- Kaggle /api/v1/kernels/list ----
|
|
mux.HandleFunc("/api/v1/kernels/list", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_, _ = w.Write([]byte(`[{"ref":"alice/leaky-notebook"}]`))
|
|
})
|
|
|
|
// ---- Phase 11: Google Custom Search /customsearch/v1 ----
|
|
mux.HandleFunc("/customsearch/v1", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_, _ = w.Write([]byte(`{"items":[{"link":"https://pastebin.com/abc123","title":"leak","snippet":"sk-proj-xxx"}]}`))
|
|
})
|
|
|
|
// ---- Phase 11: Bing /v7.0/search ----
|
|
mux.HandleFunc("/v7.0/search", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_, _ = w.Write([]byte(`{"webPages":{"value":[{"url":"https://example.com/bing-leak","name":"leak"}]}}`))
|
|
})
|
|
|
|
// ---- Phase 11: DuckDuckGo /html/ ----
|
|
mux.HandleFunc("/html/", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "text/html")
|
|
_, _ = w.Write([]byte(`<html><body><a class="result__a" href="https://example.com/ddg-leak">result</a></body></html>`))
|
|
})
|
|
|
|
// ---- Phase 11: Yandex /search/xml ----
|
|
mux.HandleFunc("/search/xml", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/xml")
|
|
_, _ = w.Write([]byte(`<?xml version="1.0" encoding="utf-8"?>
|
|
<yandexsearch><response><results><grouping><group><doc><url>https://example.com/yandex-leak</url></doc></group></grouping></results></response></yandexsearch>`))
|
|
})
|
|
|
|
// ---- Phase 11: Brave /res/v1/web/search ----
|
|
mux.HandleFunc("/res/v1/web/search", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_, _ = w.Write([]byte(`{"web":{"results":[{"url":"https://example.com/brave-leak","title":"leak"}]}}`))
|
|
})
|
|
|
|
// ---- Phase 11: Pastebin (routed under /pb/ prefix) ----
|
|
mux.HandleFunc("/pb/search", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "text/html")
|
|
_, _ = w.Write([]byte(`<html><body><a href="/AbCdEf12">paste1</a></body></html>`))
|
|
})
|
|
mux.HandleFunc("/pb/raw/AbCdEf12", func(w http.ResponseWriter, r *http.Request) {
|
|
_, _ = w.Write([]byte("leaked key: sk-proj-PASTEBIN123"))
|
|
})
|
|
|
|
// ---- Phase 11: GistPaste (routed under /gp/ prefix) ----
|
|
mux.HandleFunc("/gp/search", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "text/html")
|
|
_, _ = w.Write([]byte(`<html><body><a href="/alice/deadbeef01">gist1</a></body></html>`))
|
|
})
|
|
mux.HandleFunc("/gp/alice/deadbeef01/raw", func(w http.ResponseWriter, r *http.Request) {
|
|
_, _ = w.Write([]byte("leaked: sk-proj-GISTPASTE456"))
|
|
})
|
|
|
|
// ---- Phase 11: PasteSites sub-platforms ----
|
|
mux.HandleFunc("/paste-search", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "text/html")
|
|
_, _ = w.Write([]byte(`<html><body><a href="/aB3xZ9">paste</a></body></html>`))
|
|
})
|
|
mux.HandleFunc("/paste-raw/aB3xZ9", func(w http.ResponseWriter, r *http.Request) {
|
|
_, _ = w.Write([]byte("secret: sk-proj-PASTESITES789"))
|
|
})
|
|
|
|
// ---- Phase 12: Shodan /shodan/host/search ----
|
|
mux.HandleFunc("/shodan/host/search", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_, _ = w.Write([]byte(`{"matches":[{"ip_str":"1.2.3.4","port":8080,"data":"vllm endpoint"}]}`))
|
|
})
|
|
|
|
// ---- Phase 12: Censys /v2/hosts/search ----
|
|
mux.HandleFunc("/v2/hosts/search", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_, _ = w.Write([]byte(`{"result":{"hits":[{"ip":"10.0.0.1","services":[{"port":443,"service_name":"HTTP"}]}]}}`))
|
|
})
|
|
|
|
// ---- Phase 12: ZoomEye /host/search ----
|
|
mux.HandleFunc("/host/search", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_, _ = w.Write([]byte(`{"matches":[{"ip":"172.16.0.1","portinfo":{"port":8443,"service":"https"}}]}`))
|
|
})
|
|
|
|
// ---- Phase 12: FOFA /api/v1/search/all ----
|
|
mux.HandleFunc("/api/v1/search/all", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_, _ = w.Write([]byte(`{"results":[["example.com","192.168.1.1","443"]],"size":1}`))
|
|
})
|
|
|
|
// ---- Phase 12: Netlas /api/responses/ ----
|
|
mux.HandleFunc("/api/responses/", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_, _ = w.Write([]byte(`{"items":[{"data":{"ip":"10.10.10.1","port":80}}]}`))
|
|
})
|
|
|
|
// ---- Phase 12: BinaryEdge /v2/query/search ----
|
|
mux.HandleFunc("/v2/query/search", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_, _ = w.Write([]byte(`{"events":[{"target":{"ip":"192.0.2.1","port":8080}}]}`))
|
|
})
|
|
|
|
// ---- Phase 12: Cloud storage — S3 + DOSpaces (S3 XML format) ----
|
|
mux.HandleFunc("/cloud-s3/", func(w http.ResponseWriter, r *http.Request) {
|
|
if r.Method == http.MethodHead {
|
|
w.WriteHeader(http.StatusOK)
|
|
return
|
|
}
|
|
w.Header().Set("Content-Type", "application/xml")
|
|
_, _ = w.Write([]byte(`<?xml version="1.0" encoding="UTF-8"?>
|
|
<ListBucketResult>
|
|
<Contents><Key>.env</Key></Contents>
|
|
<Contents><Key>config.yaml</Key></Contents>
|
|
</ListBucketResult>`))
|
|
})
|
|
|
|
// ---- Phase 12: Cloud storage — GCS (JSON format) ----
|
|
mux.HandleFunc("/cloud-gcs/", func(w http.ResponseWriter, r *http.Request) {
|
|
if r.Method == http.MethodHead {
|
|
w.WriteHeader(http.StatusOK)
|
|
return
|
|
}
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_, _ = w.Write([]byte(`{"items":[{"name":".env"},{"name":"config.yaml"}]}`))
|
|
})
|
|
|
|
// ---- Phase 12: Cloud storage — Azure Blob (EnumerationResults XML) ----
|
|
mux.HandleFunc("/cloud-azure/", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/xml")
|
|
_, _ = w.Write([]byte(`<?xml version="1.0" encoding="UTF-8"?>
|
|
<EnumerationResults>
|
|
<Blobs>
|
|
<Blob><Name>.env</Name></Blob>
|
|
<Blob><Name>config.yaml</Name></Blob>
|
|
</Blobs>
|
|
</EnumerationResults>`))
|
|
})
|
|
|
|
// ---- Phase 13: npm /-/v1/search (prefix /npm) ----
|
|
mux.HandleFunc("/npm/-/v1/search", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_, _ = w.Write([]byte(`{"objects":[{"package":{"name":"leak-pkg","links":{"npm":"https://npmjs.com/package/leak-pkg"}}}]}`))
|
|
})
|
|
|
|
// ---- Phase 13: pypi /search/ (prefix /pypi) ----
|
|
mux.HandleFunc("/pypi/search/", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "text/html")
|
|
_, _ = w.Write([]byte(`<html><body><a href="/project/leaked-pkg/">leaked-pkg</a></body></html>`))
|
|
})
|
|
|
|
// ---- Phase 13: crates /api/v1/crates (prefix /crates) ----
|
|
mux.HandleFunc("/crates/api/v1/crates", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_, _ = w.Write([]byte(`{"crates":[{"id":"leaked-crate","name":"leaked-crate","repository":"https://github.com/example/leaked-crate"}]}`))
|
|
})
|
|
|
|
// ---- Phase 13: rubygems /api/v1/search.json (prefix /rubygems) ----
|
|
mux.HandleFunc("/rubygems/api/v1/search.json", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_, _ = w.Write([]byte(`[{"name":"leaked-gem","project_uri":"https://rubygems.org/gems/leaked-gem"}]`))
|
|
})
|
|
|
|
// ---- Phase 13: maven /solrsearch/select (prefix /maven) ----
|
|
mux.HandleFunc("/maven/solrsearch/select", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_, _ = w.Write([]byte(`{"response":{"numFound":1,"docs":[{"g":"com.leak","a":"sdk","latestVersion":"1.0"}]}}`))
|
|
})
|
|
|
|
// ---- Phase 13: nuget /query (prefix /nuget) ----
|
|
mux.HandleFunc("/nuget/query", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_, _ = w.Write([]byte(`{"data":[{"id":"LeakedPkg","version":"1.0","projectUrl":"https://nuget.org/packages/LeakedPkg"}]}`))
|
|
})
|
|
|
|
// ---- Phase 13: goproxy /search (prefix /goproxy) ----
|
|
mux.HandleFunc("/goproxy/search", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "text/html")
|
|
_, _ = w.Write([]byte(`<html><body><a href="/github.com/leak/module">module</a></body></html>`))
|
|
})
|
|
|
|
// ---- Phase 13: packagist /search.json (prefix /packagist) ----
|
|
mux.HandleFunc("/packagist/search.json", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_, _ = w.Write([]byte(`{"results":[{"name":"vendor/leaked","url":"https://packagist.org/packages/vendor/leaked"}]}`))
|
|
})
|
|
|
|
// ---- Phase 13: dockerhub /v2/search/repositories/ (prefix /dockerhub) ----
|
|
mux.HandleFunc("/dockerhub/v2/search/repositories/", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_, _ = w.Write([]byte(`{"results":[{"repo_name":"user/leaked-image","description":"leaked"}]}`))
|
|
})
|
|
|
|
// ---- Phase 13: k8s /api/v1/packages/search (prefix /k8s) ----
|
|
mux.HandleFunc("/k8s/api/v1/packages/search", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_, _ = w.Write([]byte(`{"packages":[{"package_id":"pkg-1","name":"leaked-operator","normalized_name":"leaked-operator","repository":{"name":"community","kind":6}}]}`))
|
|
})
|
|
|
|
// ---- Phase 13: terraform /v1/modules (prefix /terraform) ----
|
|
mux.HandleFunc("/terraform/v1/modules", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_, _ = w.Write([]byte(`{"modules":[{"id":"hashicorp/leaked/aws","namespace":"hashicorp","name":"leaked","provider":"aws"}]}`))
|
|
})
|
|
|
|
// ---- Phase 13: helm /api/v1/packages/search (prefix /helm) ----
|
|
mux.HandleFunc("/helm/api/v1/packages/search", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_, _ = w.Write([]byte(`{"packages":[{"package_id":"chart-1","name":"leaked-chart","normalized_name":"leaked-chart","repository":{"name":"bitnami","kind":0}}]}`))
|
|
})
|
|
|
|
// ---- Phase 14: SourceMapSource (probes /static/js/main.js.map) ----
|
|
mux.HandleFunc("/sourcemaps/static/js/main.js.map", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_, _ = w.Write([]byte(`{"sources":["app.js"],"sourcesContent":["const apiKey = \"sk-proj-SOURCEMAPLEAK123\";"]}`))
|
|
})
|
|
|
|
// ---- Phase 14: WebpackSource (probes /static/js/main.js) ----
|
|
mux.HandleFunc("/webpack/static/js/main.js", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/javascript")
|
|
_, _ = w.Write([]byte(`!function(){var e={NEXT_PUBLIC_API_KEY:"sk-proj-WEBPACKLEAK123456"}}();`))
|
|
})
|
|
|
|
// ---- Phase 14: EnvLeakSource (probes /.env) ----
|
|
mux.HandleFunc("/dotenv/.env", func(w http.ResponseWriter, r *http.Request) {
|
|
_, _ = w.Write([]byte("OPENAI_API_KEY=sk-proj-ENVLEAK12345678\nDB_HOST=localhost\n"))
|
|
})
|
|
|
|
// ---- Phase 14: SwaggerSource (probes /swagger.json) ----
|
|
mux.HandleFunc("/swagger/swagger.json", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_, _ = w.Write([]byte(`{"openapi":"3.0.0","paths":{"/api":{"get":{"parameters":[{"name":"api_key","example":"sk-proj-SWAGGERLEAK12345"}]}}}}`))
|
|
})
|
|
|
|
// ---- Phase 14: DeployPreviewSource (probes /) ----
|
|
mux.HandleFunc("/deploypreview/", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "text/html")
|
|
_, _ = w.Write([]byte(`<html><script>window.NEXT_PUBLIC_API_KEY="sk-proj-DEPLOYLEAK12345678"</script></html>`))
|
|
})
|
|
|
|
// ---- Phase 14: TravisCISource /builds + /builds/{id}/log ----
|
|
mux.HandleFunc("/travisci/builds", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_, _ = w.Write([]byte(`{"builds":[{"id":999,"state":"passed"}]}`))
|
|
})
|
|
mux.HandleFunc("/travisci/builds/999/log", func(w http.ResponseWriter, r *http.Request) {
|
|
_, _ = w.Write([]byte(`export API_KEY="sk-proj-TRAVISLEAK1234567890"`))
|
|
})
|
|
|
|
// ---- Phase 14: GitHubActionsSource /search/code + /actions/runs/{id}/logs ----
|
|
mux.HandleFunc("/ghactions/search/code", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_, _ = w.Write([]byte(`{"workflow_runs":[{"id":55,"status":"completed","conclusion":"success"}]}`))
|
|
})
|
|
mux.HandleFunc("/ghactions/actions/runs/55/logs", func(w http.ResponseWriter, r *http.Request) {
|
|
_, _ = w.Write([]byte(`SECRET_KEY="sk-proj-GHACTIONSLEAK1234567"`))
|
|
})
|
|
|
|
// ---- Phase 14: CircleCISource /project/gh/{slug}/pipeline + /pipeline/{id}/workflow ----
|
|
mux.HandleFunc("/circleci/project/gh/", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_, _ = w.Write([]byte(`{"items":[{"id":"pipe-test-1","number":1}]}`))
|
|
})
|
|
mux.HandleFunc("/circleci/pipeline/pipe-test-1/workflow", func(w http.ResponseWriter, r *http.Request) {
|
|
_, _ = w.Write([]byte(`AUTH_TOKEN="sk-proj-CIRCLELEAK1234567890"`))
|
|
})
|
|
|
|
// ---- Phase 14: JenkinsSource /api/json + /job/{name}/lastBuild/consoleText ----
|
|
mux.HandleFunc("/jenkins/api/json", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_, _ = w.Write([]byte(`{"jobs":[{"name":"build-app","url":"http://jenkins/job/build-app/","color":"blue"}]}`))
|
|
})
|
|
mux.HandleFunc("/jenkins/job/build-app/lastBuild/consoleText", func(w http.ResponseWriter, r *http.Request) {
|
|
_, _ = w.Write([]byte(`Setting TOKEN="sk-proj-JENKINSLEAK12345678"`))
|
|
})
|
|
|
|
// ---- Phase 14: WaybackMachineSource /cdx/search/cdx + /web/{ts}id_/{url} ----
|
|
mux.HandleFunc("/wayback/cdx/search/cdx", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_, _ = w.Write([]byte(`[["url","timestamp","statuscode"],["https://example.com/.env","20240101000000","200"]]`))
|
|
})
|
|
mux.HandleFunc("/wayback/web/", func(w http.ResponseWriter, r *http.Request) {
|
|
_, _ = w.Write([]byte(`API_KEY="sk-proj-WAYBACKLEAK12345678"`))
|
|
})
|
|
|
|
// ---- Phase 14: CommonCrawlSource (NDJSON CDX index) ----
|
|
mux.HandleFunc("/commoncrawl", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
_, _ = w.Write([]byte("{\"url\":\"https://example.com/.env\",\"timestamp\":\"20240101\",\"status\":\"200\",\"filename\":\"warc.gz\",\"length\":\"100\",\"offset\":\"0\"}\n"))
|
|
})
|
|
|
|
// ---- Phase 14: JSBundleSource (probes /static/js/main.js) ----
|
|
mux.HandleFunc("/jsbundle/static/js/main.js", func(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/javascript")
|
|
_, _ = w.Write([]byte(`!function(){var c={apiKey:"sk-proj-JSBUNDLELEAK123456789"}}();`))
|
|
})
|
|
|
|
srv := httptest.NewServer(mux)
|
|
defer srv.Close()
|
|
|
|
reg := providers.NewRegistryFromProviders([]providers.Provider{
|
|
{Name: "openai", Keywords: []string{"sk-proj-"}},
|
|
})
|
|
lim := recon.NewLimiterRegistry()
|
|
|
|
eng := recon.NewEngine()
|
|
|
|
// --- Phase 10 sources ---
|
|
|
|
// GitHub -- token + BaseURL override. Use the real constructor so `client`
|
|
// is initialized, then retarget BaseURL at the test server.
|
|
ghs := NewGitHubSource("ghp-test", reg, lim)
|
|
ghs.BaseURL = srv.URL
|
|
eng.Register(ghs)
|
|
// GitLab
|
|
eng.Register(&GitLabSource{
|
|
Token: "glpat-test",
|
|
BaseURL: srv.URL,
|
|
Registry: reg,
|
|
Limiters: lim,
|
|
})
|
|
// Bitbucket
|
|
eng.Register(&BitbucketSource{
|
|
Token: "bb-test",
|
|
Workspace: "kh-test",
|
|
BaseURL: srv.URL,
|
|
Registry: reg,
|
|
Limiters: lim,
|
|
})
|
|
// Gist -- uses same BaseURL for /gists/public; raw URLs are absolute in fixture.
|
|
eng.Register(&GistSource{
|
|
Token: "ghp-test",
|
|
BaseURL: srv.URL,
|
|
Registry: reg,
|
|
Limiters: lim,
|
|
})
|
|
// Codeberg
|
|
eng.Register(&CodebergSource{
|
|
BaseURL: srv.URL,
|
|
Registry: reg,
|
|
Limiters: lim,
|
|
})
|
|
// HuggingFace
|
|
eng.Register(NewHuggingFaceSource(HuggingFaceConfig{
|
|
BaseURL: srv.URL,
|
|
Registry: reg,
|
|
Limiters: lim,
|
|
}))
|
|
// Replit
|
|
eng.Register(&ReplitSource{
|
|
BaseURL: srv.URL,
|
|
Registry: reg,
|
|
Limiters: lim,
|
|
})
|
|
// CodeSandbox
|
|
eng.Register(&CodeSandboxSource{
|
|
BaseURL: srv.URL,
|
|
Registry: reg,
|
|
Limiters: lim,
|
|
})
|
|
// Sandboxes -- inject test sub-platforms that hit srv.URL.
|
|
eng.Register(&SandboxesSource{
|
|
Platforms: []subPlatform{
|
|
{Name: "codepen", SearchPath: "/codepen-search?q=%s", ResultLinkRegex: `^/[^/]+/pen/[a-zA-Z0-9]+$`, IsJSON: false},
|
|
{Name: "jsfiddle", SearchPath: "/jsfiddle-search?q=%s", IsJSON: true, JSONItemsKey: "results", JSONURLKey: "url"},
|
|
},
|
|
Registry: reg,
|
|
Limiters: lim,
|
|
Client: NewClient(),
|
|
BaseURL: srv.URL,
|
|
})
|
|
// Kaggle
|
|
eng.Register(&KaggleSource{
|
|
User: "kh-user",
|
|
Key: "kh-key",
|
|
BaseURL: srv.URL,
|
|
WebBaseURL: "https://www.kaggle.com",
|
|
Registry: reg,
|
|
Limiters: lim,
|
|
client: NewClient(),
|
|
})
|
|
|
|
// --- Phase 11 sources ---
|
|
|
|
// Google Custom Search
|
|
gs := NewGoogleDorkSource("test-api-key", "test-cx", reg, lim)
|
|
gs.BaseURL = srv.URL
|
|
eng.Register(gs)
|
|
// Bing
|
|
bs := NewBingDorkSource("test-bing-key", reg, lim)
|
|
bs.BaseURL = srv.URL
|
|
eng.Register(bs)
|
|
// DuckDuckGo
|
|
ddg := NewDuckDuckGoSource(reg, lim)
|
|
ddg.BaseURL = srv.URL
|
|
eng.Register(ddg)
|
|
// Yandex
|
|
ys := NewYandexSource("test-user", "test-key", reg, lim)
|
|
ys.BaseURL = srv.URL
|
|
eng.Register(ys)
|
|
// Brave
|
|
brs := NewBraveSource("test-brave-key", reg, lim)
|
|
brs.BaseURL = srv.URL
|
|
eng.Register(brs)
|
|
// Pastebin -- uses /pb/ prefix to avoid /search collision
|
|
eng.Register(&PastebinSource{
|
|
BaseURL: srv.URL + "/pb",
|
|
Registry: reg,
|
|
Limiters: lim,
|
|
Client: NewClient(),
|
|
})
|
|
// GistPaste -- uses /gp/ prefix
|
|
eng.Register(&GistPasteSource{
|
|
BaseURL: srv.URL + "/gp",
|
|
Registry: reg,
|
|
Limiters: lim,
|
|
Client: NewClient(),
|
|
})
|
|
// PasteSites -- inject test sub-platform
|
|
eng.Register(&PasteSitesSource{
|
|
Platforms: []pastePlatform{
|
|
{
|
|
Name: "testpaste",
|
|
SearchPath: "/paste-search?q=%s",
|
|
ResultLinkRegex: `^/[a-zA-Z0-9]+$`,
|
|
RawPathTemplate: "/paste-raw%s",
|
|
},
|
|
},
|
|
Registry: reg,
|
|
Limiters: lim,
|
|
Client: NewClient(),
|
|
BaseURL: srv.URL,
|
|
})
|
|
|
|
// --- Phase 12: IoT scanner sources ---
|
|
|
|
// Shodan
|
|
shodanSrc := NewShodanSource("test-shodan-key", reg, lim)
|
|
shodanSrc.BaseURL = srv.URL
|
|
eng.Register(shodanSrc)
|
|
// Censys
|
|
censysSrc := NewCensysSource("test-id", "test-secret", reg, lim)
|
|
censysSrc.BaseURL = srv.URL
|
|
eng.Register(censysSrc)
|
|
// ZoomEye
|
|
zoomeyeSrc := NewZoomEyeSource("test-zoomeye-key", reg, lim)
|
|
zoomeyeSrc.BaseURL = srv.URL
|
|
eng.Register(zoomeyeSrc)
|
|
// FOFA
|
|
eng.Register(&FOFASource{
|
|
Email: "test@example.com",
|
|
APIKey: "test-fofa-key",
|
|
BaseURL: srv.URL,
|
|
Registry: reg,
|
|
Limiters: lim,
|
|
client: NewClient(),
|
|
})
|
|
// Netlas
|
|
eng.Register(&NetlasSource{
|
|
APIKey: "test-netlas-key",
|
|
BaseURL: srv.URL,
|
|
Registry: reg,
|
|
Limiters: lim,
|
|
client: NewClient(),
|
|
})
|
|
// BinaryEdge
|
|
eng.Register(&BinaryEdgeSource{
|
|
APIKey: "test-binaryedge-key",
|
|
BaseURL: srv.URL,
|
|
Registry: reg,
|
|
Limiters: lim,
|
|
client: NewClient(),
|
|
})
|
|
|
|
// --- Phase 12: Cloud storage sources ---
|
|
|
|
// S3 -- BaseURL pattern with %s for bucket name
|
|
eng.Register(&S3Scanner{
|
|
BaseURL: srv.URL + "/cloud-s3/%s",
|
|
Registry: reg,
|
|
Limiters: lim,
|
|
client: NewClient(),
|
|
})
|
|
// GCS -- JSON format handler
|
|
eng.Register(&GCSScanner{
|
|
BaseURL: srv.URL + "/cloud-gcs/%s",
|
|
Registry: reg,
|
|
Limiters: lim,
|
|
client: NewClient(),
|
|
})
|
|
// AzureBlob -- EnumerationResults XML; needs two %s: account + container
|
|
eng.Register(&AzureBlobScanner{
|
|
BaseURL: srv.URL + "/cloud-azure/%s-%s",
|
|
Registry: reg,
|
|
Limiters: lim,
|
|
client: NewClient(),
|
|
})
|
|
// DOSpaces -- S3-compatible XML; needs two %s: bucket + region
|
|
eng.Register(&DOSpacesScanner{
|
|
BaseURL: srv.URL + "/cloud-s3/%s-%s",
|
|
Registry: reg,
|
|
Limiters: lim,
|
|
client: NewClient(),
|
|
})
|
|
|
|
// --- Phase 13: Package registry sources ---
|
|
|
|
// npm
|
|
eng.Register(&NpmSource{BaseURL: srv.URL + "/npm", Registry: reg, Limiters: lim, Client: NewClient()})
|
|
// pypi
|
|
eng.Register(&PyPISource{BaseURL: srv.URL + "/pypi", Registry: reg, Limiters: lim, Client: NewClient()})
|
|
// crates
|
|
eng.Register(&CratesIOSource{BaseURL: srv.URL + "/crates", Registry: reg, Limiters: lim, Client: NewClient()})
|
|
// rubygems
|
|
eng.Register(&RubyGemsSource{BaseURL: srv.URL + "/rubygems", Registry: reg, Limiters: lim, Client: NewClient()})
|
|
// maven
|
|
eng.Register(&MavenSource{BaseURL: srv.URL + "/maven", Registry: reg, Limiters: lim, Client: NewClient()})
|
|
// nuget
|
|
eng.Register(&NuGetSource{BaseURL: srv.URL + "/nuget", Registry: reg, Limiters: lim, Client: NewClient()})
|
|
// goproxy
|
|
eng.Register(&GoProxySource{BaseURL: srv.URL + "/goproxy", Registry: reg, Limiters: lim, Client: NewClient()})
|
|
// packagist
|
|
eng.Register(&PackagistSource{BaseURL: srv.URL + "/packagist", Registry: reg, Limiters: lim, Client: NewClient()})
|
|
|
|
// --- Phase 13: Container & IaC sources ---
|
|
|
|
// dockerhub
|
|
eng.Register(&DockerHubSource{BaseURL: srv.URL + "/dockerhub", Registry: reg, Limiters: lim, Client: NewClient()})
|
|
// k8s
|
|
eng.Register(&KubernetesSource{BaseURL: srv.URL + "/k8s", Registry: reg, Limiters: lim, Client: NewClient()})
|
|
// terraform
|
|
eng.Register(&TerraformSource{BaseURL: srv.URL + "/terraform", Registry: reg, Limiters: lim, Client: NewClient()})
|
|
// helm
|
|
eng.Register(&HelmSource{BaseURL: srv.URL + "/helm", Registry: reg, Limiters: lim, Client: NewClient()})
|
|
|
|
// --- Phase 14: Frontend leak sources ---
|
|
|
|
// sourcemaps
|
|
eng.Register(&SourceMapSource{BaseURL: srv.URL + "/sourcemaps", Registry: reg, Limiters: nil, Client: NewClient()})
|
|
// webpack
|
|
eng.Register(&WebpackSource{BaseURL: srv.URL + "/webpack", Registry: reg, Limiters: nil, Client: NewClient()})
|
|
// dotenv
|
|
eng.Register(&EnvLeakSource{BaseURL: srv.URL + "/dotenv", Registry: reg, Limiters: nil, Client: NewClient()})
|
|
// swagger
|
|
eng.Register(&SwaggerSource{BaseURL: srv.URL + "/swagger", Registry: reg, Limiters: nil, Client: NewClient()})
|
|
// deploypreview
|
|
eng.Register(&DeployPreviewSource{BaseURL: srv.URL + "/deploypreview", Registry: reg, Limiters: nil, Client: NewClient()})
|
|
|
|
// --- Phase 14: CI/CD log sources ---
|
|
|
|
// travisci
|
|
eng.Register(&TravisCISource{BaseURL: srv.URL + "/travisci", Registry: reg, Limiters: nil, Client: NewClient()})
|
|
// ghactions
|
|
eng.Register(&GitHubActionsSource{Token: "ghp-test", BaseURL: srv.URL + "/ghactions", Registry: reg, Limiters: nil, Client: NewClient()})
|
|
// circleci
|
|
eng.Register(&CircleCISource{Token: "cci-test", BaseURL: srv.URL + "/circleci", Registry: reg, Limiters: nil, Client: NewClient()})
|
|
// jenkins
|
|
eng.Register(&JenkinsSource{BaseURL: srv.URL + "/jenkins", Registry: reg, Limiters: nil, Client: NewClient()})
|
|
|
|
// --- Phase 14: Web archive sources ---
|
|
|
|
// wayback
|
|
eng.Register(&WaybackMachineSource{BaseURL: srv.URL + "/wayback", Registry: reg, Limiters: nil, Client: NewClient()})
|
|
// commoncrawl
|
|
eng.Register(&CommonCrawlSource{BaseURL: srv.URL + "/commoncrawl", Registry: reg, Limiters: nil, Client: NewClient()})
|
|
|
|
// --- Phase 14: JS bundle analysis ---
|
|
|
|
// jsbundle
|
|
eng.Register(&JSBundleSource{BaseURL: srv.URL + "/jsbundle", Registry: reg, Limiters: nil, Client: NewClient()})
|
|
|
|
// Sanity: all 52 sources registered.
|
|
if n := len(eng.List()); n != 52 {
|
|
t.Fatalf("expected 52 sources on engine, got %d: %v", n, eng.List())
|
|
}
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
|
|
defer cancel()
|
|
|
|
findings, err := eng.SweepAll(ctx, recon.Config{Query: "ignored"})
|
|
if err != nil {
|
|
t.Fatalf("SweepAll returned error: %v", err)
|
|
}
|
|
|
|
// Group findings by SourceType and assert every expected bucket is present.
|
|
byType := make(map[string]int)
|
|
for _, f := range findings {
|
|
byType[f.SourceType]++
|
|
}
|
|
|
|
wantTypes := []string{
|
|
// Phase 10
|
|
"recon:github",
|
|
"recon:gitlab",
|
|
"recon:bitbucket",
|
|
"recon:gist",
|
|
"recon:codeberg",
|
|
"recon:huggingface",
|
|
"recon:replit",
|
|
"recon:codesandbox",
|
|
"recon:sandboxes",
|
|
"recon:kaggle",
|
|
// Phase 11
|
|
"recon:google",
|
|
"recon:bing",
|
|
"recon:duckduckgo",
|
|
"recon:yandex",
|
|
"recon:brave",
|
|
"recon:pastebin",
|
|
"recon:gistpaste",
|
|
"recon:pastesites",
|
|
// Phase 12: IoT scanners
|
|
"recon:shodan",
|
|
"recon:censys",
|
|
"recon:zoomeye",
|
|
"recon:fofa",
|
|
"recon:netlas",
|
|
"recon:binaryedge",
|
|
// Phase 12: Cloud storage
|
|
"recon:s3",
|
|
"recon:gcs",
|
|
"recon:azureblob",
|
|
"recon:spaces",
|
|
// Phase 13: Package registries
|
|
"recon:npm",
|
|
"recon:pypi",
|
|
"recon:crates",
|
|
"recon:rubygems",
|
|
"recon:maven",
|
|
"recon:nuget",
|
|
"recon:goproxy",
|
|
"recon:packagist",
|
|
// Phase 13: Container & IaC
|
|
"recon:dockerhub",
|
|
"recon:k8s",
|
|
"recon:terraform",
|
|
"recon:helm",
|
|
// Phase 14: Frontend leaks
|
|
"recon:sourcemaps",
|
|
"recon:webpack",
|
|
"recon:dotenv",
|
|
"recon:swagger",
|
|
"recon:deploypreview",
|
|
// Phase 14: CI/CD logs
|
|
"recon:travisci",
|
|
"recon:ghactions",
|
|
"recon:circleci",
|
|
"recon:jenkins",
|
|
// Phase 14: Web archives
|
|
"recon:wayback",
|
|
"recon:commoncrawl",
|
|
// Phase 14: JS bundles
|
|
"recon:jsbundle",
|
|
}
|
|
for _, st := range wantTypes {
|
|
if byType[st] == 0 {
|
|
t.Errorf("expected at least one finding with SourceType=%q, got none\nall findings: %+v", st, findings)
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestRegisterAll_Phase12 verifies that RegisterAll correctly registers all 28
|
|
// sources (18 Phase 10-11 + 10 Phase 12) and that credential-gated sources
|
|
// report Enabled()==false when credentials are empty.
|
|
func TestRegisterAll_Phase12(t *testing.T) {
|
|
reg := providers.NewRegistryFromProviders([]providers.Provider{
|
|
{Name: "testprov", Keywords: []string{"test-key"}},
|
|
})
|
|
lim := recon.NewLimiterRegistry()
|
|
|
|
eng := recon.NewEngine()
|
|
RegisterAll(eng, SourcesConfig{
|
|
Registry: reg,
|
|
Limiters: lim,
|
|
// All credential fields left empty.
|
|
})
|
|
|
|
names := eng.List()
|
|
if n := len(names); n != 52 {
|
|
t.Fatalf("expected 52 sources from RegisterAll, got %d: %v", n, names)
|
|
}
|
|
|
|
// Build lookup for source access.
|
|
nameSet := make(map[string]bool, len(names))
|
|
for _, n := range names {
|
|
nameSet[n] = true
|
|
}
|
|
|
|
// All 10 Phase 12 sources must be present.
|
|
wantPhase12 := []string{
|
|
"shodan", "censys", "zoomeye", "fofa", "netlas", "binaryedge",
|
|
"s3", "gcs", "azureblob", "spaces",
|
|
}
|
|
for _, name := range wantPhase12 {
|
|
if !nameSet[name] {
|
|
t.Errorf("Phase 12 source %q not found in engine; registered: %v", name, names)
|
|
}
|
|
}
|
|
|
|
cfg := recon.Config{}
|
|
|
|
// IoT sources with empty credentials must be disabled.
|
|
iotSources := []string{"shodan", "censys", "zoomeye", "fofa", "netlas", "binaryedge"}
|
|
for _, name := range iotSources {
|
|
src, ok := eng.Get(name)
|
|
if !ok {
|
|
t.Errorf("source %q not found via Get", name)
|
|
continue
|
|
}
|
|
if src.Enabled(cfg) {
|
|
t.Errorf("IoT source %q should be Enabled()==false with empty credentials", name)
|
|
}
|
|
}
|
|
|
|
// Cloud storage sources (credentialless) must be enabled.
|
|
cloudSources := []string{"s3", "gcs", "azureblob", "spaces"}
|
|
for _, name := range cloudSources {
|
|
src, ok := eng.Get(name)
|
|
if !ok {
|
|
t.Errorf("source %q not found via Get", name)
|
|
continue
|
|
}
|
|
if !src.Enabled(cfg) {
|
|
t.Errorf("Cloud source %q should be Enabled()==true (credentialless)", name)
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestRegisterAll_Phase12_SweepAllNoPanic verifies that SweepAll with a very
|
|
// short context timeout completes without panic when all 28 sources are
|
|
// registered with empty credentials.
|
|
func TestRegisterAll_Phase12_SweepAllNoPanic(t *testing.T) {
|
|
reg := providers.NewRegistryFromProviders([]providers.Provider{
|
|
{Name: "testprov", Keywords: []string{"test-key"}},
|
|
})
|
|
lim := recon.NewLimiterRegistry()
|
|
|
|
eng := recon.NewEngine()
|
|
RegisterAll(eng, SourcesConfig{
|
|
Registry: reg,
|
|
Limiters: lim,
|
|
})
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond)
|
|
defer cancel()
|
|
|
|
// Should not panic regardless of timeout or missing credentials.
|
|
_, _ = eng.SweepAll(ctx, recon.Config{})
|
|
}
|
|
|
|
// baseFromReq reconstructs the scheme+host of the inbound request so handlers
|
|
// can build absolute raw URLs pointing back at the same httptest server.
|
|
func baseFromReq(r *http.Request) string {
|
|
scheme := "http"
|
|
if r.TLS != nil {
|
|
scheme = "https"
|
|
}
|
|
return scheme + "://" + r.Host
|
|
}
|