package sources import ( "context" "net/http" "net/http/httptest" "testing" "time" "github.com/salvacybersec/keyhunter/pkg/providers" "github.com/salvacybersec/keyhunter/pkg/recon" ) func pasteSitesTestRegistry() *providers.Registry { return providers.NewRegistryFromProviders([]providers.Provider{ {Name: "openai", Keywords: []string{"sk-proj-"}}, }) } // Fixture HTML for each sub-platform search result page. const dpasteSearchHTML = `dpaste hit` const pasteEeSearchHTML = `paste.ee hit` const rentrySearchHTML = `rentry hit` const hastebinSearchHTML = `hastebin hit` // Raw content fixtures -- some match, some don't. const dpasteRaw = `leaked: sk-proj-AAAA1234 oops` const pasteEeRaw = `config sk-proj-BBBBB5678 here` const rentryRaw = `has sk-proj-CCCC9012 inside` const hastebinRaw = `nothing interesting` func TestPasteSites_Sweep_ExtractsFindings(t *testing.T) { mux := http.NewServeMux() // dpaste routes mux.HandleFunc("/dpaste-search/", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "text/html") _, _ = w.Write([]byte(dpasteSearchHTML)) }) mux.HandleFunc("/AbcDef12/raw", func(w http.ResponseWriter, r *http.Request) { _, _ = w.Write([]byte(dpasteRaw)) }) // paste.ee routes mux.HandleFunc("/pasteee-search", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "text/html") _, _ = w.Write([]byte(pasteEeSearchHTML)) }) mux.HandleFunc("/r/p/Xyz789", func(w http.ResponseWriter, r *http.Request) { _, _ = w.Write([]byte(pasteEeRaw)) }) // rentry routes mux.HandleFunc("/rentry-search", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "text/html") _, _ = w.Write([]byte(rentrySearchHTML)) }) mux.HandleFunc("/my-paste/raw", func(w http.ResponseWriter, r *http.Request) { _, _ = w.Write([]byte(rentryRaw)) }) // hastebin routes mux.HandleFunc("/hastebin-search", func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "text/html") _, _ = w.Write([]byte(hastebinSearchHTML)) }) mux.HandleFunc("/raw/abcdef", func(w http.ResponseWriter, r *http.Request) { _, _ = w.Write([]byte(hastebinRaw)) }) srv := httptest.NewServer(mux) defer srv.Close() // Override platforms to use test server with relative paths. testPlats := []pastePlatform{ { Name: "dpaste", SearchPath: srv.URL + "/dpaste-search/?q=%s", ResultLinkRegex: `^/[A-Za-z0-9]+$`, RawPathTemplate: "%s/raw", }, { Name: "paste.ee", SearchPath: srv.URL + "/pasteee-search?q=%s", ResultLinkRegex: `^/p/[A-Za-z0-9]+$`, RawPathTemplate: "/r%s", }, { Name: "rentry", SearchPath: srv.URL + "/rentry-search?q=%s", ResultLinkRegex: `^/[a-z0-9-]+$`, RawPathTemplate: "%s/raw", }, { Name: "hastebin", SearchPath: srv.URL + "/hastebin-search?q=%s", ResultLinkRegex: `^/[a-z]+$`, RawPathTemplate: "/raw%s", }, } src := &PasteSitesSource{ Platforms: testPlats, Registry: pasteSitesTestRegistry(), Limiters: recon.NewLimiterRegistry(), Client: NewClient(), } out := make(chan recon.Finding, 32) ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) defer cancel() if err := src.Sweep(ctx, "", out); err != nil { t.Fatalf("Sweep err: %v", err) } close(out) var findings []recon.Finding for f := range out { findings = append(findings, f) } // dpaste, paste.ee, rentry have matching content; hastebin does not. if len(findings) < 3 { t.Fatalf("expected at least 3 findings (dpaste+paste.ee+rentry), got %d", len(findings)) } platforms := make(map[string]bool) for _, f := range findings { if f.SourceType != "recon:pastesites" { t.Errorf("SourceType=%s, want recon:pastesites", f.SourceType) } // Extract platform from KeyMasked. if len(f.KeyMasked) > len("platform=") { platforms[f.KeyMasked[len("platform="):]] = true } } for _, want := range []string{"dpaste", "paste.ee", "rentry"} { if !platforms[want] { t.Errorf("missing platform %q in findings; got platforms=%v", want, platforms) } } } func TestPasteSites_NameAndRate(t *testing.T) { s := &PasteSitesSource{} if s.Name() != "pastesites" { t.Errorf("Name=%s", s.Name()) } if s.Burst() != 1 { t.Errorf("Burst=%d", s.Burst()) } if !s.RespectsRobots() { t.Error("expected RespectsRobots=true") } if !s.Enabled(recon.Config{}) { t.Error("expected Enabled=true") } } func TestPasteSites_Sweep_CtxCancelled(t *testing.T) { srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { time.Sleep(500 * time.Millisecond) _, _ = w.Write([]byte(``)) })) defer srv.Close() testPlats := []pastePlatform{ { Name: "test", SearchPath: srv.URL + "/search?q=%s", ResultLinkRegex: `^/[a-z]+$`, RawPathTemplate: "/raw%s", }, } src := &PasteSitesSource{ Platforms: testPlats, Registry: pasteSitesTestRegistry(), Limiters: recon.NewLimiterRegistry(), Client: NewClient(), } ctx, cancel := context.WithCancel(context.Background()) cancel() out := make(chan recon.Finding, 4) if err := src.Sweep(ctx, "", out); err == nil { t.Fatal("expected ctx error") } }