package sources import ( "context" "errors" "fmt" "net/http" "net/http/httptest" "sync/atomic" "testing" "time" "github.com/salvacybersec/keyhunter/pkg/recon" ) func waybackStubHandler(t *testing.T, calls *int32) http.HandlerFunc { t.Helper() return func(w http.ResponseWriter, r *http.Request) { atomic.AddInt32(calls, 1) if r.URL.Path != "/cdx/search/cdx" { t.Errorf("unexpected path: %s", r.URL.Path) } if r.URL.Query().Get("query") == "" { t.Errorf("missing query param") } w.Header().Set("Content-Type", "text/plain") // Two CDX records per query: "timestamp original-url" fmt.Fprintln(w, "20230101120000 https://example.com/config.js") fmt.Fprintln(w, "20230615080000 https://example.com/env.json") } } func TestWayback_SweepEmitsFindings(t *testing.T) { reg := syntheticRegistry() lim := recon.NewLimiterRegistry() _ = lim.For("wayback", 1000, 100) var calls int32 srv := httptest.NewServer(waybackStubHandler(t, &calls)) defer srv.Close() src := &WaybackMachineSource{ BaseURL: srv.URL, Registry: reg, Limiters: lim, Client: NewClient(), } out := make(chan recon.Finding, 32) ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() done := make(chan error, 1) go func() { done <- src.Sweep(ctx, "", out); close(out) }() var findings []recon.Finding for f := range out { findings = append(findings, f) } if err := <-done; err != nil { t.Fatalf("Sweep error: %v", err) } // 2 keywords * 2 results = 4 findings if len(findings) != 4 { t.Fatalf("expected 4 findings, got %d", len(findings)) } for _, f := range findings { if f.SourceType != "recon:wayback" { t.Errorf("SourceType=%q want recon:wayback", f.SourceType) } } if got := atomic.LoadInt32(&calls); got != 2 { t.Errorf("expected 2 server calls, got %d", got) } } func TestWayback_SnapshotURL(t *testing.T) { reg := syntheticRegistry() lim := recon.NewLimiterRegistry() _ = lim.For("wayback", 1000, 100) srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "text/plain") fmt.Fprintln(w, "20240101000000 https://target.com/page") })) defer srv.Close() src := &WaybackMachineSource{ BaseURL: srv.URL, Registry: reg, Limiters: lim, Client: NewClient(), } out := make(chan recon.Finding, 32) ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() done := make(chan error, 1) go func() { done <- src.Sweep(ctx, "", out); close(out) }() var findings []recon.Finding for f := range out { findings = append(findings, f) } if err := <-done; err != nil { t.Fatalf("Sweep error: %v", err) } // Each finding should have a proper Wayback snapshot URL for _, f := range findings { want := srv.URL + "/web/20240101000000/https://target.com/page" if f.Source != want { t.Errorf("Source=%q want %q", f.Source, want) } } } func TestWayback_EnabledAlwaysTrue(t *testing.T) { s := &WaybackMachineSource{} if !s.Enabled(recon.Config{}) { t.Fatal("expected Enabled=true") } } func TestWayback_NameAndRate(t *testing.T) { s := &WaybackMachineSource{} if s.Name() != "wayback" { t.Errorf("unexpected name: %s", s.Name()) } if s.Burst() != 1 { t.Errorf("burst: %d", s.Burst()) } if !s.RespectsRobots() { t.Error("expected RespectsRobots=true") } } func TestWayback_CtxCancelled(t *testing.T) { reg := syntheticRegistry() lim := recon.NewLimiterRegistry() _ = lim.For("wayback", 1000, 100) src := &WaybackMachineSource{ BaseURL: "http://127.0.0.1:1", Registry: reg, Limiters: lim, Client: NewClient(), } ctx, cancel := context.WithCancel(context.Background()) cancel() out := make(chan recon.Finding, 1) err := src.Sweep(ctx, "", out) if !errors.Is(err, context.Canceled) { t.Fatalf("expected context.Canceled, got %v", err) } } func TestWayback_NilRegistryNoError(t *testing.T) { src := &WaybackMachineSource{Client: NewClient()} out := make(chan recon.Finding, 1) if err := src.Sweep(context.Background(), "", out); err != nil { t.Fatalf("expected nil, got %v", err) } }