test(11-03): add end-to-end SweepAll integration test across all 18 sources

- Extend httptest mux with fixtures for Google, Bing, DuckDuckGo, Yandex, Brave
- Add Pastebin (routed /pb/), GistPaste (/gp/), PasteSites (injected platform)
- Assert all 18 SourceTypes emit at least one finding via SweepAll
This commit is contained in:
salvacybersec
2026-04-06 12:06:27 +03:00
parent 3250408f23
commit bebc3e7a0b

View File

@@ -14,10 +14,11 @@ import (
)
// TestIntegration_AllSources_SweepAll spins up a single multiplexed httptest
// server that serves canned fixtures for every Phase 10 code-hosting source,
// registers the sources (with BaseURL overrides pointing at the test server)
// onto a fresh recon.Engine, runs SweepAll, and asserts at least one Finding
// was emitted per SourceType across all ten sources.
// server that serves canned fixtures for every Phase 10 code-hosting source
// and Phase 11 search engine / paste site source, registers the sources (with
// BaseURL overrides pointing at the test server) onto a fresh recon.Engine,
// runs SweepAll, and asserts at least one Finding was emitted per SourceType
// across all 18 sources.
//
// RegisterAll cannot be used directly because it wires production URLs; the
// test exercises the same code paths by constructing each source identically
@@ -108,6 +109,64 @@ func TestIntegration_AllSources_SweepAll(t *testing.T) {
_, _ = w.Write([]byte(`[{"ref":"alice/leaky-notebook"}]`))
})
// ---- Phase 11: Google Custom Search /customsearch/v1 ----
mux.HandleFunc("/customsearch/v1", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"items":[{"link":"https://pastebin.com/abc123","title":"leak","snippet":"sk-proj-xxx"}]}`))
})
// ---- Phase 11: Bing /v7.0/search ----
mux.HandleFunc("/v7.0/search", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"webPages":{"value":[{"url":"https://example.com/bing-leak","name":"leak"}]}}`))
})
// ---- Phase 11: DuckDuckGo /html/ ----
mux.HandleFunc("/html/", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/html")
_, _ = w.Write([]byte(`<html><body><a class="result__a" href="https://example.com/ddg-leak">result</a></body></html>`))
})
// ---- Phase 11: Yandex /search/xml ----
mux.HandleFunc("/search/xml", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/xml")
_, _ = w.Write([]byte(`<?xml version="1.0" encoding="utf-8"?>
<yandexsearch><response><results><grouping><group><doc><url>https://example.com/yandex-leak</url></doc></group></grouping></results></response></yandexsearch>`))
})
// ---- Phase 11: Brave /res/v1/web/search ----
mux.HandleFunc("/res/v1/web/search", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"web":{"results":[{"url":"https://example.com/brave-leak","title":"leak"}]}}`))
})
// ---- Phase 11: Pastebin (routed under /pb/ prefix) ----
mux.HandleFunc("/pb/search", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/html")
_, _ = w.Write([]byte(`<html><body><a href="/AbCdEf12">paste1</a></body></html>`))
})
mux.HandleFunc("/pb/raw/AbCdEf12", func(w http.ResponseWriter, r *http.Request) {
_, _ = w.Write([]byte("leaked key: sk-proj-PASTEBIN123"))
})
// ---- Phase 11: GistPaste (routed under /gp/ prefix) ----
mux.HandleFunc("/gp/search", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/html")
_, _ = w.Write([]byte(`<html><body><a href="/alice/deadbeef01">gist1</a></body></html>`))
})
mux.HandleFunc("/gp/alice/deadbeef01/raw", func(w http.ResponseWriter, r *http.Request) {
_, _ = w.Write([]byte("leaked: sk-proj-GISTPASTE456"))
})
// ---- Phase 11: PasteSites sub-platforms ----
mux.HandleFunc("/paste-search", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/html")
_, _ = w.Write([]byte(`<html><body><a href="/aB3xZ9">paste</a></body></html>`))
})
mux.HandleFunc("/paste-raw/aB3xZ9", func(w http.ResponseWriter, r *http.Request) {
_, _ = w.Write([]byte("secret: sk-proj-PASTESITES789"))
})
srv := httptest.NewServer(mux)
defer srv.Close()
@@ -118,7 +177,9 @@ func TestIntegration_AllSources_SweepAll(t *testing.T) {
eng := recon.NewEngine()
// GitHub — token + BaseURL override. Use the real constructor so `client`
// --- Phase 10 sources ---
// GitHub -- token + BaseURL override. Use the real constructor so `client`
// is initialized, then retarget BaseURL at the test server.
ghs := NewGitHubSource("ghp-test", reg, lim)
ghs.BaseURL = srv.URL
@@ -138,7 +199,7 @@ func TestIntegration_AllSources_SweepAll(t *testing.T) {
Registry: reg,
Limiters: lim,
})
// Gist uses same BaseURL for /gists/public; raw URLs are absolute in fixture.
// Gist -- uses same BaseURL for /gists/public; raw URLs are absolute in fixture.
eng.Register(&GistSource{
Token: "ghp-test",
BaseURL: srv.URL,
@@ -169,7 +230,7 @@ func TestIntegration_AllSources_SweepAll(t *testing.T) {
Registry: reg,
Limiters: lim,
})
// Sandboxes inject test sub-platforms that hit srv.URL.
// Sandboxes -- inject test sub-platforms that hit srv.URL.
eng.Register(&SandboxesSource{
Platforms: []subPlatform{
{Name: "codepen", SearchPath: "/codepen-search?q=%s", ResultLinkRegex: `^/[^/]+/pen/[a-zA-Z0-9]+$`, IsJSON: false},
@@ -191,12 +252,64 @@ func TestIntegration_AllSources_SweepAll(t *testing.T) {
client: NewClient(),
})
// Sanity: all 10 sources registered.
if n := len(eng.List()); n != 10 {
t.Fatalf("expected 10 sources on engine, got %d: %v", n, eng.List())
// --- Phase 11 sources ---
// Google Custom Search
gs := NewGoogleDorkSource("test-api-key", "test-cx", reg, lim)
gs.BaseURL = srv.URL
eng.Register(gs)
// Bing
bs := NewBingDorkSource("test-bing-key", reg, lim)
bs.BaseURL = srv.URL
eng.Register(bs)
// DuckDuckGo
ddg := NewDuckDuckGoSource(reg, lim)
ddg.BaseURL = srv.URL
eng.Register(ddg)
// Yandex
ys := NewYandexSource("test-user", "test-key", reg, lim)
ys.BaseURL = srv.URL
eng.Register(ys)
// Brave
brs := NewBraveSource("test-brave-key", reg, lim)
brs.BaseURL = srv.URL
eng.Register(brs)
// Pastebin -- uses /pb/ prefix to avoid /search collision
eng.Register(&PastebinSource{
BaseURL: srv.URL + "/pb",
Registry: reg,
Limiters: lim,
Client: NewClient(),
})
// GistPaste -- uses /gp/ prefix
eng.Register(&GistPasteSource{
BaseURL: srv.URL + "/gp",
Registry: reg,
Limiters: lim,
Client: NewClient(),
})
// PasteSites -- inject test sub-platform
eng.Register(&PasteSitesSource{
Platforms: []pastePlatform{
{
Name: "testpaste",
SearchPath: "/paste-search?q=%s",
ResultLinkRegex: `^/[a-zA-Z0-9]+$`,
RawPathTemplate: "/paste-raw%s",
},
},
Registry: reg,
Limiters: lim,
Client: NewClient(),
BaseURL: srv.URL,
})
// Sanity: all 18 sources registered.
if n := len(eng.List()); n != 18 {
t.Fatalf("expected 18 sources on engine, got %d: %v", n, eng.List())
}
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
defer cancel()
findings, err := eng.SweepAll(ctx, recon.Config{Query: "ignored"})
@@ -211,6 +324,7 @@ func TestIntegration_AllSources_SweepAll(t *testing.T) {
}
wantTypes := []string{
// Phase 10
"recon:github",
"recon:gitlab",
"recon:bitbucket",
@@ -221,6 +335,15 @@ func TestIntegration_AllSources_SweepAll(t *testing.T) {
"recon:codesandbox",
"recon:sandboxes",
"recon:kaggle",
// Phase 11
"recon:google",
"recon:bing",
"recon:duckduckgo",
"recon:yandex",
"recon:brave",
"recon:pastebin",
"recon:gistpaste",
"recon:pastesites",
}
for _, st := range wantTypes {
if byType[st] == 0 {