diff --git a/pkg/recon/sources/integration_test.go b/pkg/recon/sources/integration_test.go index 73a5f3b..6068ce0 100644 --- a/pkg/recon/sources/integration_test.go +++ b/pkg/recon/sources/integration_test.go @@ -14,11 +14,11 @@ import ( ) // TestIntegration_AllSources_SweepAll spins up a single multiplexed httptest -// server that serves canned fixtures for every Phase 10 code-hosting source -// and Phase 11 search engine / paste site source, registers the sources (with -// BaseURL overrides pointing at the test server) onto a fresh recon.Engine, -// runs SweepAll, and asserts at least one Finding was emitted per SourceType -// across all 18 sources. +// server that serves canned fixtures for every Phase 10 code-hosting source, +// Phase 11 search engine / paste site source, Phase 12 IoT scanner, and +// Phase 12 cloud storage source, registers the sources (with BaseURL overrides +// pointing at the test server) onto a fresh recon.Engine, runs SweepAll, and +// asserts at least one Finding was emitted per SourceType across all 28 sources. // // RegisterAll cannot be used directly because it wires production URLs; the // test exercises the same code paths by constructing each source identically @@ -167,6 +167,78 @@ func TestIntegration_AllSources_SweepAll(t *testing.T) { _, _ = w.Write([]byte("secret: sk-proj-PASTESITES789")) }) + // ---- Phase 12: Shodan /shodan/host/search ---- + mux.HandleFunc("/shodan/host/search", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"matches":[{"ip_str":"1.2.3.4","port":8080,"data":"vllm endpoint"}]}`)) + }) + + // ---- Phase 12: Censys /v2/hosts/search ---- + mux.HandleFunc("/v2/hosts/search", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"result":{"hits":[{"ip":"10.0.0.1","services":[{"port":443,"service_name":"HTTP"}]}]}}`)) + }) + + // ---- Phase 12: ZoomEye /host/search ---- + mux.HandleFunc("/host/search", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"matches":[{"ip":"172.16.0.1","portinfo":{"port":8443,"service":"https"}}]}`)) + }) + + // ---- Phase 12: FOFA /api/v1/search/all ---- + mux.HandleFunc("/api/v1/search/all", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"results":[["example.com","192.168.1.1","443"]],"size":1}`)) + }) + + // ---- Phase 12: Netlas /api/responses/ ---- + mux.HandleFunc("/api/responses/", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"items":[{"data":{"ip":"10.10.10.1","port":80}}]}`)) + }) + + // ---- Phase 12: BinaryEdge /v2/query/search ---- + mux.HandleFunc("/v2/query/search", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"events":[{"target":{"ip":"192.0.2.1","port":8080}}]}`)) + }) + + // ---- Phase 12: Cloud storage — S3 + DOSpaces (S3 XML format) ---- + mux.HandleFunc("/cloud-s3/", func(w http.ResponseWriter, r *http.Request) { + if r.Method == http.MethodHead { + w.WriteHeader(http.StatusOK) + return + } + w.Header().Set("Content-Type", "application/xml") + _, _ = w.Write([]byte(` + + .env + config.yaml +`)) + }) + + // ---- Phase 12: Cloud storage — GCS (JSON format) ---- + mux.HandleFunc("/cloud-gcs/", func(w http.ResponseWriter, r *http.Request) { + if r.Method == http.MethodHead { + w.WriteHeader(http.StatusOK) + return + } + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"items":[{"name":".env"},{"name":"config.yaml"}]}`)) + }) + + // ---- Phase 12: Cloud storage — Azure Blob (EnumerationResults XML) ---- + mux.HandleFunc("/cloud-azure/", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/xml") + _, _ = w.Write([]byte(` + + + .env + config.yaml + +`)) + }) + srv := httptest.NewServer(mux) defer srv.Close() @@ -304,9 +376,80 @@ func TestIntegration_AllSources_SweepAll(t *testing.T) { BaseURL: srv.URL, }) - // Sanity: all 18 sources registered. - if n := len(eng.List()); n != 18 { - t.Fatalf("expected 18 sources on engine, got %d: %v", n, eng.List()) + // --- Phase 12: IoT scanner sources --- + + // Shodan + shodanSrc := NewShodanSource("test-shodan-key", reg, lim) + shodanSrc.BaseURL = srv.URL + eng.Register(shodanSrc) + // Censys + censysSrc := NewCensysSource("test-id", "test-secret", reg, lim) + censysSrc.BaseURL = srv.URL + eng.Register(censysSrc) + // ZoomEye + zoomeyeSrc := NewZoomEyeSource("test-zoomeye-key", reg, lim) + zoomeyeSrc.BaseURL = srv.URL + eng.Register(zoomeyeSrc) + // FOFA + eng.Register(&FOFASource{ + Email: "test@example.com", + APIKey: "test-fofa-key", + BaseURL: srv.URL, + Registry: reg, + Limiters: lim, + client: NewClient(), + }) + // Netlas + eng.Register(&NetlasSource{ + APIKey: "test-netlas-key", + BaseURL: srv.URL, + Registry: reg, + Limiters: lim, + client: NewClient(), + }) + // BinaryEdge + eng.Register(&BinaryEdgeSource{ + APIKey: "test-binaryedge-key", + BaseURL: srv.URL, + Registry: reg, + Limiters: lim, + client: NewClient(), + }) + + // --- Phase 12: Cloud storage sources --- + + // S3 -- BaseURL pattern with %s for bucket name + eng.Register(&S3Scanner{ + BaseURL: srv.URL + "/cloud-s3/%s", + Registry: reg, + Limiters: lim, + client: NewClient(), + }) + // GCS -- JSON format handler + eng.Register(&GCSScanner{ + BaseURL: srv.URL + "/cloud-gcs/%s", + Registry: reg, + Limiters: lim, + client: NewClient(), + }) + // AzureBlob -- EnumerationResults XML; needs two %s: account + container + eng.Register(&AzureBlobScanner{ + BaseURL: srv.URL + "/cloud-azure/%s-%s", + Registry: reg, + Limiters: lim, + client: NewClient(), + }) + // DOSpaces -- S3-compatible XML; needs two %s: bucket + region + eng.Register(&DOSpacesScanner{ + BaseURL: srv.URL + "/cloud-s3/%s-%s", + Registry: reg, + Limiters: lim, + client: NewClient(), + }) + + // Sanity: all 28 sources registered. + if n := len(eng.List()); n != 28 { + t.Fatalf("expected 28 sources on engine, got %d: %v", n, eng.List()) } ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) @@ -344,6 +487,18 @@ func TestIntegration_AllSources_SweepAll(t *testing.T) { "recon:pastebin", "recon:gistpaste", "recon:pastesites", + // Phase 12: IoT scanners + "recon:shodan", + "recon:censys", + "recon:zoomeye", + "recon:fofa", + "recon:netlas", + "recon:binaryedge", + // Phase 12: Cloud storage + "recon:s3", + "recon:gcs", + "recon:azureblob", + "recon:spaces", } for _, st := range wantTypes { if byType[st] == 0 { @@ -352,6 +507,95 @@ func TestIntegration_AllSources_SweepAll(t *testing.T) { } } +// TestRegisterAll_Phase12 verifies that RegisterAll correctly registers all 28 +// sources (18 Phase 10-11 + 10 Phase 12) and that credential-gated sources +// report Enabled()==false when credentials are empty. +func TestRegisterAll_Phase12(t *testing.T) { + reg := providers.NewRegistryFromProviders([]providers.Provider{ + {Name: "testprov", Keywords: []string{"test-key"}}, + }) + lim := recon.NewLimiterRegistry() + + eng := recon.NewEngine() + RegisterAll(eng, SourcesConfig{ + Registry: reg, + Limiters: lim, + // All credential fields left empty. + }) + + names := eng.List() + if n := len(names); n != 28 { + t.Fatalf("expected 28 sources from RegisterAll, got %d: %v", n, names) + } + + // Build lookup for source access. + nameSet := make(map[string]bool, len(names)) + for _, n := range names { + nameSet[n] = true + } + + // All 10 Phase 12 sources must be present. + wantPhase12 := []string{ + "shodan", "censys", "zoomeye", "fofa", "netlas", "binaryedge", + "s3", "gcs", "azureblob", "spaces", + } + for _, name := range wantPhase12 { + if !nameSet[name] { + t.Errorf("Phase 12 source %q not found in engine; registered: %v", name, names) + } + } + + cfg := recon.Config{} + + // IoT sources with empty credentials must be disabled. + iotSources := []string{"shodan", "censys", "zoomeye", "fofa", "netlas", "binaryedge"} + for _, name := range iotSources { + src, ok := eng.Get(name) + if !ok { + t.Errorf("source %q not found via Get", name) + continue + } + if src.Enabled(cfg) { + t.Errorf("IoT source %q should be Enabled()==false with empty credentials", name) + } + } + + // Cloud storage sources (credentialless) must be enabled. + cloudSources := []string{"s3", "gcs", "azureblob", "spaces"} + for _, name := range cloudSources { + src, ok := eng.Get(name) + if !ok { + t.Errorf("source %q not found via Get", name) + continue + } + if !src.Enabled(cfg) { + t.Errorf("Cloud source %q should be Enabled()==true (credentialless)", name) + } + } +} + +// TestRegisterAll_Phase12_SweepAllNoPanic verifies that SweepAll with a very +// short context timeout completes without panic when all 28 sources are +// registered with empty credentials. +func TestRegisterAll_Phase12_SweepAllNoPanic(t *testing.T) { + reg := providers.NewRegistryFromProviders([]providers.Provider{ + {Name: "testprov", Keywords: []string{"test-key"}}, + }) + lim := recon.NewLimiterRegistry() + + eng := recon.NewEngine() + RegisterAll(eng, SourcesConfig{ + Registry: reg, + Limiters: lim, + }) + + ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond) + defer cancel() + + // Should not panic regardless of timeout or missing credentials. + _, _ = eng.SweepAll(ctx, recon.Config{}) +} + // baseFromReq reconstructs the scheme+host of the inbound request so handlers // can build absolute raw URLs pointing back at the same httptest server. func baseFromReq(r *http.Request) string { diff --git a/pkg/recon/sources/register_test.go b/pkg/recon/sources/register_test.go index b1bbd44..d5cf1db 100644 --- a/pkg/recon/sources/register_test.go +++ b/pkg/recon/sources/register_test.go @@ -16,9 +16,9 @@ func registerTestRegistry() *providers.Registry { }) } -// TestRegisterAll_WiresAllEighteenSources asserts that RegisterAll registers -// every Phase 10 + Phase 11 source by its stable name on a fresh engine. -func TestRegisterAll_WiresAllEighteenSources(t *testing.T) { +// TestRegisterAll_WiresAllTwentyEightSources asserts that RegisterAll registers +// every Phase 10 + Phase 11 + Phase 12 source by its stable name on a fresh engine. +func TestRegisterAll_WiresAllTwentyEightSources(t *testing.T) { eng := recon.NewEngine() cfg := SourcesConfig{ Registry: registerTestRegistry(), @@ -28,12 +28,17 @@ func TestRegisterAll_WiresAllEighteenSources(t *testing.T) { got := eng.List() want := []string{ + "azureblob", + "binaryedge", "bing", "bitbucket", "brave", + "censys", "codeberg", "codesandbox", "duckduckgo", + "fofa", + "gcs", "gist", "gistpaste", "github", @@ -41,11 +46,16 @@ func TestRegisterAll_WiresAllEighteenSources(t *testing.T) { "google", "huggingface", "kaggle", + "netlas", "pastebin", "pastesites", "replit", + "s3", "sandboxes", + "shodan", + "spaces", "yandex", + "zoomeye", } if !reflect.DeepEqual(got, want) { t.Fatalf("RegisterAll names mismatch\n got: %v\nwant: %v", got, want) @@ -63,8 +73,8 @@ func TestRegisterAll_MissingCredsStillRegistered(t *testing.T) { Limiters: recon.NewLimiterRegistry(), }) - if n := len(eng.List()); n != 18 { - t.Fatalf("expected 18 sources registered, got %d: %v", n, eng.List()) + if n := len(eng.List()); n != 28 { + t.Fatalf("expected 28 sources registered, got %d: %v", n, eng.List()) } // SweepAll with an empty config should filter out cred-gated sources