From bebc3e7a0bd1c710168c4b2570d9cbf506fe9def Mon Sep 17 00:00:00 2001 From: salvacybersec Date: Mon, 6 Apr 2026 12:06:27 +0300 Subject: [PATCH] test(11-03): add end-to-end SweepAll integration test across all 18 sources - Extend httptest mux with fixtures for Google, Bing, DuckDuckGo, Yandex, Brave - Add Pastebin (routed /pb/), GistPaste (/gp/), PasteSites (injected platform) - Assert all 18 SourceTypes emit at least one finding via SweepAll --- pkg/recon/sources/integration_test.go | 145 ++++++++++++++++++++++++-- 1 file changed, 134 insertions(+), 11 deletions(-) diff --git a/pkg/recon/sources/integration_test.go b/pkg/recon/sources/integration_test.go index 7cd0285..73a5f3b 100644 --- a/pkg/recon/sources/integration_test.go +++ b/pkg/recon/sources/integration_test.go @@ -14,10 +14,11 @@ import ( ) // TestIntegration_AllSources_SweepAll spins up a single multiplexed httptest -// server that serves canned fixtures for every Phase 10 code-hosting source, -// registers the sources (with BaseURL overrides pointing at the test server) -// onto a fresh recon.Engine, runs SweepAll, and asserts at least one Finding -// was emitted per SourceType across all ten sources. +// server that serves canned fixtures for every Phase 10 code-hosting source +// and Phase 11 search engine / paste site source, registers the sources (with +// BaseURL overrides pointing at the test server) onto a fresh recon.Engine, +// runs SweepAll, and asserts at least one Finding was emitted per SourceType +// across all 18 sources. // // RegisterAll cannot be used directly because it wires production URLs; the // test exercises the same code paths by constructing each source identically @@ -108,6 +109,64 @@ func TestIntegration_AllSources_SweepAll(t *testing.T) { _, _ = w.Write([]byte(`[{"ref":"alice/leaky-notebook"}]`)) }) + // ---- Phase 11: Google Custom Search /customsearch/v1 ---- + mux.HandleFunc("/customsearch/v1", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"items":[{"link":"https://pastebin.com/abc123","title":"leak","snippet":"sk-proj-xxx"}]}`)) + }) + + // ---- Phase 11: Bing /v7.0/search ---- + mux.HandleFunc("/v7.0/search", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"webPages":{"value":[{"url":"https://example.com/bing-leak","name":"leak"}]}}`)) + }) + + // ---- Phase 11: DuckDuckGo /html/ ---- + mux.HandleFunc("/html/", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html") + _, _ = w.Write([]byte(`result`)) + }) + + // ---- Phase 11: Yandex /search/xml ---- + mux.HandleFunc("/search/xml", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/xml") + _, _ = w.Write([]byte(` +https://example.com/yandex-leak`)) + }) + + // ---- Phase 11: Brave /res/v1/web/search ---- + mux.HandleFunc("/res/v1/web/search", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"web":{"results":[{"url":"https://example.com/brave-leak","title":"leak"}]}}`)) + }) + + // ---- Phase 11: Pastebin (routed under /pb/ prefix) ---- + mux.HandleFunc("/pb/search", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html") + _, _ = w.Write([]byte(`paste1`)) + }) + mux.HandleFunc("/pb/raw/AbCdEf12", func(w http.ResponseWriter, r *http.Request) { + _, _ = w.Write([]byte("leaked key: sk-proj-PASTEBIN123")) + }) + + // ---- Phase 11: GistPaste (routed under /gp/ prefix) ---- + mux.HandleFunc("/gp/search", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html") + _, _ = w.Write([]byte(`gist1`)) + }) + mux.HandleFunc("/gp/alice/deadbeef01/raw", func(w http.ResponseWriter, r *http.Request) { + _, _ = w.Write([]byte("leaked: sk-proj-GISTPASTE456")) + }) + + // ---- Phase 11: PasteSites sub-platforms ---- + mux.HandleFunc("/paste-search", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html") + _, _ = w.Write([]byte(`paste`)) + }) + mux.HandleFunc("/paste-raw/aB3xZ9", func(w http.ResponseWriter, r *http.Request) { + _, _ = w.Write([]byte("secret: sk-proj-PASTESITES789")) + }) + srv := httptest.NewServer(mux) defer srv.Close() @@ -118,7 +177,9 @@ func TestIntegration_AllSources_SweepAll(t *testing.T) { eng := recon.NewEngine() - // GitHub — token + BaseURL override. Use the real constructor so `client` + // --- Phase 10 sources --- + + // GitHub -- token + BaseURL override. Use the real constructor so `client` // is initialized, then retarget BaseURL at the test server. ghs := NewGitHubSource("ghp-test", reg, lim) ghs.BaseURL = srv.URL @@ -138,7 +199,7 @@ func TestIntegration_AllSources_SweepAll(t *testing.T) { Registry: reg, Limiters: lim, }) - // Gist — uses same BaseURL for /gists/public; raw URLs are absolute in fixture. + // Gist -- uses same BaseURL for /gists/public; raw URLs are absolute in fixture. eng.Register(&GistSource{ Token: "ghp-test", BaseURL: srv.URL, @@ -169,7 +230,7 @@ func TestIntegration_AllSources_SweepAll(t *testing.T) { Registry: reg, Limiters: lim, }) - // Sandboxes — inject test sub-platforms that hit srv.URL. + // Sandboxes -- inject test sub-platforms that hit srv.URL. eng.Register(&SandboxesSource{ Platforms: []subPlatform{ {Name: "codepen", SearchPath: "/codepen-search?q=%s", ResultLinkRegex: `^/[^/]+/pen/[a-zA-Z0-9]+$`, IsJSON: false}, @@ -191,12 +252,64 @@ func TestIntegration_AllSources_SweepAll(t *testing.T) { client: NewClient(), }) - // Sanity: all 10 sources registered. - if n := len(eng.List()); n != 10 { - t.Fatalf("expected 10 sources on engine, got %d: %v", n, eng.List()) + // --- Phase 11 sources --- + + // Google Custom Search + gs := NewGoogleDorkSource("test-api-key", "test-cx", reg, lim) + gs.BaseURL = srv.URL + eng.Register(gs) + // Bing + bs := NewBingDorkSource("test-bing-key", reg, lim) + bs.BaseURL = srv.URL + eng.Register(bs) + // DuckDuckGo + ddg := NewDuckDuckGoSource(reg, lim) + ddg.BaseURL = srv.URL + eng.Register(ddg) + // Yandex + ys := NewYandexSource("test-user", "test-key", reg, lim) + ys.BaseURL = srv.URL + eng.Register(ys) + // Brave + brs := NewBraveSource("test-brave-key", reg, lim) + brs.BaseURL = srv.URL + eng.Register(brs) + // Pastebin -- uses /pb/ prefix to avoid /search collision + eng.Register(&PastebinSource{ + BaseURL: srv.URL + "/pb", + Registry: reg, + Limiters: lim, + Client: NewClient(), + }) + // GistPaste -- uses /gp/ prefix + eng.Register(&GistPasteSource{ + BaseURL: srv.URL + "/gp", + Registry: reg, + Limiters: lim, + Client: NewClient(), + }) + // PasteSites -- inject test sub-platform + eng.Register(&PasteSitesSource{ + Platforms: []pastePlatform{ + { + Name: "testpaste", + SearchPath: "/paste-search?q=%s", + ResultLinkRegex: `^/[a-zA-Z0-9]+$`, + RawPathTemplate: "/paste-raw%s", + }, + }, + Registry: reg, + Limiters: lim, + Client: NewClient(), + BaseURL: srv.URL, + }) + + // Sanity: all 18 sources registered. + if n := len(eng.List()); n != 18 { + t.Fatalf("expected 18 sources on engine, got %d: %v", n, eng.List()) } - ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) defer cancel() findings, err := eng.SweepAll(ctx, recon.Config{Query: "ignored"}) @@ -211,6 +324,7 @@ func TestIntegration_AllSources_SweepAll(t *testing.T) { } wantTypes := []string{ + // Phase 10 "recon:github", "recon:gitlab", "recon:bitbucket", @@ -221,6 +335,15 @@ func TestIntegration_AllSources_SweepAll(t *testing.T) { "recon:codesandbox", "recon:sandboxes", "recon:kaggle", + // Phase 11 + "recon:google", + "recon:bing", + "recon:duckduckgo", + "recon:yandex", + "recon:brave", + "recon:pastebin", + "recon:gistpaste", + "recon:pastesites", } for _, st := range wantTypes { if byType[st] == 0 {