diff --git a/pkg/recon/sources/integration_test.go b/pkg/recon/sources/integration_test.go
index 73a5f3b..6068ce0 100644
--- a/pkg/recon/sources/integration_test.go
+++ b/pkg/recon/sources/integration_test.go
@@ -14,11 +14,11 @@ import (
)
// TestIntegration_AllSources_SweepAll spins up a single multiplexed httptest
-// server that serves canned fixtures for every Phase 10 code-hosting source
-// and Phase 11 search engine / paste site source, registers the sources (with
-// BaseURL overrides pointing at the test server) onto a fresh recon.Engine,
-// runs SweepAll, and asserts at least one Finding was emitted per SourceType
-// across all 18 sources.
+// server that serves canned fixtures for every Phase 10 code-hosting source,
+// Phase 11 search engine / paste site source, Phase 12 IoT scanner, and
+// Phase 12 cloud storage source, registers the sources (with BaseURL overrides
+// pointing at the test server) onto a fresh recon.Engine, runs SweepAll, and
+// asserts at least one Finding was emitted per SourceType across all 28 sources.
//
// RegisterAll cannot be used directly because it wires production URLs; the
// test exercises the same code paths by constructing each source identically
@@ -167,6 +167,78 @@ func TestIntegration_AllSources_SweepAll(t *testing.T) {
_, _ = w.Write([]byte("secret: sk-proj-PASTESITES789"))
})
+ // ---- Phase 12: Shodan /shodan/host/search ----
+ mux.HandleFunc("/shodan/host/search", func(w http.ResponseWriter, r *http.Request) {
+ w.Header().Set("Content-Type", "application/json")
+ _, _ = w.Write([]byte(`{"matches":[{"ip_str":"1.2.3.4","port":8080,"data":"vllm endpoint"}]}`))
+ })
+
+ // ---- Phase 12: Censys /v2/hosts/search ----
+ mux.HandleFunc("/v2/hosts/search", func(w http.ResponseWriter, r *http.Request) {
+ w.Header().Set("Content-Type", "application/json")
+ _, _ = w.Write([]byte(`{"result":{"hits":[{"ip":"10.0.0.1","services":[{"port":443,"service_name":"HTTP"}]}]}}`))
+ })
+
+ // ---- Phase 12: ZoomEye /host/search ----
+ mux.HandleFunc("/host/search", func(w http.ResponseWriter, r *http.Request) {
+ w.Header().Set("Content-Type", "application/json")
+ _, _ = w.Write([]byte(`{"matches":[{"ip":"172.16.0.1","portinfo":{"port":8443,"service":"https"}}]}`))
+ })
+
+ // ---- Phase 12: FOFA /api/v1/search/all ----
+ mux.HandleFunc("/api/v1/search/all", func(w http.ResponseWriter, r *http.Request) {
+ w.Header().Set("Content-Type", "application/json")
+ _, _ = w.Write([]byte(`{"results":[["example.com","192.168.1.1","443"]],"size":1}`))
+ })
+
+ // ---- Phase 12: Netlas /api/responses/ ----
+ mux.HandleFunc("/api/responses/", func(w http.ResponseWriter, r *http.Request) {
+ w.Header().Set("Content-Type", "application/json")
+ _, _ = w.Write([]byte(`{"items":[{"data":{"ip":"10.10.10.1","port":80}}]}`))
+ })
+
+ // ---- Phase 12: BinaryEdge /v2/query/search ----
+ mux.HandleFunc("/v2/query/search", func(w http.ResponseWriter, r *http.Request) {
+ w.Header().Set("Content-Type", "application/json")
+ _, _ = w.Write([]byte(`{"events":[{"target":{"ip":"192.0.2.1","port":8080}}]}`))
+ })
+
+ // ---- Phase 12: Cloud storage — S3 + DOSpaces (S3 XML format) ----
+ mux.HandleFunc("/cloud-s3/", func(w http.ResponseWriter, r *http.Request) {
+ if r.Method == http.MethodHead {
+ w.WriteHeader(http.StatusOK)
+ return
+ }
+ w.Header().Set("Content-Type", "application/xml")
+ _, _ = w.Write([]byte(`
+
+ .env
+ config.yaml
+`))
+ })
+
+ // ---- Phase 12: Cloud storage — GCS (JSON format) ----
+ mux.HandleFunc("/cloud-gcs/", func(w http.ResponseWriter, r *http.Request) {
+ if r.Method == http.MethodHead {
+ w.WriteHeader(http.StatusOK)
+ return
+ }
+ w.Header().Set("Content-Type", "application/json")
+ _, _ = w.Write([]byte(`{"items":[{"name":".env"},{"name":"config.yaml"}]}`))
+ })
+
+ // ---- Phase 12: Cloud storage — Azure Blob (EnumerationResults XML) ----
+ mux.HandleFunc("/cloud-azure/", func(w http.ResponseWriter, r *http.Request) {
+ w.Header().Set("Content-Type", "application/xml")
+ _, _ = w.Write([]byte(`
+
+
+ .env
+ config.yaml
+
+`))
+ })
+
srv := httptest.NewServer(mux)
defer srv.Close()
@@ -304,9 +376,80 @@ func TestIntegration_AllSources_SweepAll(t *testing.T) {
BaseURL: srv.URL,
})
- // Sanity: all 18 sources registered.
- if n := len(eng.List()); n != 18 {
- t.Fatalf("expected 18 sources on engine, got %d: %v", n, eng.List())
+ // --- Phase 12: IoT scanner sources ---
+
+ // Shodan
+ shodanSrc := NewShodanSource("test-shodan-key", reg, lim)
+ shodanSrc.BaseURL = srv.URL
+ eng.Register(shodanSrc)
+ // Censys
+ censysSrc := NewCensysSource("test-id", "test-secret", reg, lim)
+ censysSrc.BaseURL = srv.URL
+ eng.Register(censysSrc)
+ // ZoomEye
+ zoomeyeSrc := NewZoomEyeSource("test-zoomeye-key", reg, lim)
+ zoomeyeSrc.BaseURL = srv.URL
+ eng.Register(zoomeyeSrc)
+ // FOFA
+ eng.Register(&FOFASource{
+ Email: "test@example.com",
+ APIKey: "test-fofa-key",
+ BaseURL: srv.URL,
+ Registry: reg,
+ Limiters: lim,
+ client: NewClient(),
+ })
+ // Netlas
+ eng.Register(&NetlasSource{
+ APIKey: "test-netlas-key",
+ BaseURL: srv.URL,
+ Registry: reg,
+ Limiters: lim,
+ client: NewClient(),
+ })
+ // BinaryEdge
+ eng.Register(&BinaryEdgeSource{
+ APIKey: "test-binaryedge-key",
+ BaseURL: srv.URL,
+ Registry: reg,
+ Limiters: lim,
+ client: NewClient(),
+ })
+
+ // --- Phase 12: Cloud storage sources ---
+
+ // S3 -- BaseURL pattern with %s for bucket name
+ eng.Register(&S3Scanner{
+ BaseURL: srv.URL + "/cloud-s3/%s",
+ Registry: reg,
+ Limiters: lim,
+ client: NewClient(),
+ })
+ // GCS -- JSON format handler
+ eng.Register(&GCSScanner{
+ BaseURL: srv.URL + "/cloud-gcs/%s",
+ Registry: reg,
+ Limiters: lim,
+ client: NewClient(),
+ })
+ // AzureBlob -- EnumerationResults XML; needs two %s: account + container
+ eng.Register(&AzureBlobScanner{
+ BaseURL: srv.URL + "/cloud-azure/%s-%s",
+ Registry: reg,
+ Limiters: lim,
+ client: NewClient(),
+ })
+ // DOSpaces -- S3-compatible XML; needs two %s: bucket + region
+ eng.Register(&DOSpacesScanner{
+ BaseURL: srv.URL + "/cloud-s3/%s-%s",
+ Registry: reg,
+ Limiters: lim,
+ client: NewClient(),
+ })
+
+ // Sanity: all 28 sources registered.
+ if n := len(eng.List()); n != 28 {
+ t.Fatalf("expected 28 sources on engine, got %d: %v", n, eng.List())
}
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
@@ -344,6 +487,18 @@ func TestIntegration_AllSources_SweepAll(t *testing.T) {
"recon:pastebin",
"recon:gistpaste",
"recon:pastesites",
+ // Phase 12: IoT scanners
+ "recon:shodan",
+ "recon:censys",
+ "recon:zoomeye",
+ "recon:fofa",
+ "recon:netlas",
+ "recon:binaryedge",
+ // Phase 12: Cloud storage
+ "recon:s3",
+ "recon:gcs",
+ "recon:azureblob",
+ "recon:spaces",
}
for _, st := range wantTypes {
if byType[st] == 0 {
@@ -352,6 +507,95 @@ func TestIntegration_AllSources_SweepAll(t *testing.T) {
}
}
+// TestRegisterAll_Phase12 verifies that RegisterAll correctly registers all 28
+// sources (18 Phase 10-11 + 10 Phase 12) and that credential-gated sources
+// report Enabled()==false when credentials are empty.
+func TestRegisterAll_Phase12(t *testing.T) {
+ reg := providers.NewRegistryFromProviders([]providers.Provider{
+ {Name: "testprov", Keywords: []string{"test-key"}},
+ })
+ lim := recon.NewLimiterRegistry()
+
+ eng := recon.NewEngine()
+ RegisterAll(eng, SourcesConfig{
+ Registry: reg,
+ Limiters: lim,
+ // All credential fields left empty.
+ })
+
+ names := eng.List()
+ if n := len(names); n != 28 {
+ t.Fatalf("expected 28 sources from RegisterAll, got %d: %v", n, names)
+ }
+
+ // Build lookup for source access.
+ nameSet := make(map[string]bool, len(names))
+ for _, n := range names {
+ nameSet[n] = true
+ }
+
+ // All 10 Phase 12 sources must be present.
+ wantPhase12 := []string{
+ "shodan", "censys", "zoomeye", "fofa", "netlas", "binaryedge",
+ "s3", "gcs", "azureblob", "spaces",
+ }
+ for _, name := range wantPhase12 {
+ if !nameSet[name] {
+ t.Errorf("Phase 12 source %q not found in engine; registered: %v", name, names)
+ }
+ }
+
+ cfg := recon.Config{}
+
+ // IoT sources with empty credentials must be disabled.
+ iotSources := []string{"shodan", "censys", "zoomeye", "fofa", "netlas", "binaryedge"}
+ for _, name := range iotSources {
+ src, ok := eng.Get(name)
+ if !ok {
+ t.Errorf("source %q not found via Get", name)
+ continue
+ }
+ if src.Enabled(cfg) {
+ t.Errorf("IoT source %q should be Enabled()==false with empty credentials", name)
+ }
+ }
+
+ // Cloud storage sources (credentialless) must be enabled.
+ cloudSources := []string{"s3", "gcs", "azureblob", "spaces"}
+ for _, name := range cloudSources {
+ src, ok := eng.Get(name)
+ if !ok {
+ t.Errorf("source %q not found via Get", name)
+ continue
+ }
+ if !src.Enabled(cfg) {
+ t.Errorf("Cloud source %q should be Enabled()==true (credentialless)", name)
+ }
+ }
+}
+
+// TestRegisterAll_Phase12_SweepAllNoPanic verifies that SweepAll with a very
+// short context timeout completes without panic when all 28 sources are
+// registered with empty credentials.
+func TestRegisterAll_Phase12_SweepAllNoPanic(t *testing.T) {
+ reg := providers.NewRegistryFromProviders([]providers.Provider{
+ {Name: "testprov", Keywords: []string{"test-key"}},
+ })
+ lim := recon.NewLimiterRegistry()
+
+ eng := recon.NewEngine()
+ RegisterAll(eng, SourcesConfig{
+ Registry: reg,
+ Limiters: lim,
+ })
+
+ ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond)
+ defer cancel()
+
+ // Should not panic regardless of timeout or missing credentials.
+ _, _ = eng.SweepAll(ctx, recon.Config{})
+}
+
// baseFromReq reconstructs the scheme+host of the inbound request so handlers
// can build absolute raw URLs pointing back at the same httptest server.
func baseFromReq(r *http.Request) string {
diff --git a/pkg/recon/sources/register_test.go b/pkg/recon/sources/register_test.go
index b1bbd44..d5cf1db 100644
--- a/pkg/recon/sources/register_test.go
+++ b/pkg/recon/sources/register_test.go
@@ -16,9 +16,9 @@ func registerTestRegistry() *providers.Registry {
})
}
-// TestRegisterAll_WiresAllEighteenSources asserts that RegisterAll registers
-// every Phase 10 + Phase 11 source by its stable name on a fresh engine.
-func TestRegisterAll_WiresAllEighteenSources(t *testing.T) {
+// TestRegisterAll_WiresAllTwentyEightSources asserts that RegisterAll registers
+// every Phase 10 + Phase 11 + Phase 12 source by its stable name on a fresh engine.
+func TestRegisterAll_WiresAllTwentyEightSources(t *testing.T) {
eng := recon.NewEngine()
cfg := SourcesConfig{
Registry: registerTestRegistry(),
@@ -28,12 +28,17 @@ func TestRegisterAll_WiresAllEighteenSources(t *testing.T) {
got := eng.List()
want := []string{
+ "azureblob",
+ "binaryedge",
"bing",
"bitbucket",
"brave",
+ "censys",
"codeberg",
"codesandbox",
"duckduckgo",
+ "fofa",
+ "gcs",
"gist",
"gistpaste",
"github",
@@ -41,11 +46,16 @@ func TestRegisterAll_WiresAllEighteenSources(t *testing.T) {
"google",
"huggingface",
"kaggle",
+ "netlas",
"pastebin",
"pastesites",
"replit",
+ "s3",
"sandboxes",
+ "shodan",
+ "spaces",
"yandex",
+ "zoomeye",
}
if !reflect.DeepEqual(got, want) {
t.Fatalf("RegisterAll names mismatch\n got: %v\nwant: %v", got, want)
@@ -63,8 +73,8 @@ func TestRegisterAll_MissingCredsStillRegistered(t *testing.T) {
Limiters: recon.NewLimiterRegistry(),
})
- if n := len(eng.List()); n != 18 {
- t.Fatalf("expected 18 sources registered, got %d: %v", n, eng.List())
+ if n := len(eng.List()); n != 28 {
+ t.Fatalf("expected 28 sources registered, got %d: %v", n, eng.List())
}
// SweepAll with an empty config should filter out cred-gated sources