From 13905eb5ee550c966236dd2c987699217d7ab8eb Mon Sep 17 00:00:00 2001 From: salvacybersec Date: Mon, 6 Apr 2026 12:26:01 +0300 Subject: [PATCH] feat(12-03): implement AzureBlobScanner, DOSpacesScanner, and all cloud scanner tests - AzureBlobScanner enumerates public Azure Blob containers with XML listing - DOSpacesScanner enumerates public DO Spaces across 5 regions (S3-compatible XML) - httptest-based tests for all four scanners: sweep, empty registry, ctx cancel, metadata - All sources credentialless, compile-time interface assertions --- pkg/recon/sources/azureblob.go | 145 +++++++++++++++++++++++++++ pkg/recon/sources/azureblob_test.go | 130 ++++++++++++++++++++++++ pkg/recon/sources/dospaces.go | 126 +++++++++++++++++++++++ pkg/recon/sources/dospaces_test.go | 128 +++++++++++++++++++++++ pkg/recon/sources/gcsscanner_test.go | 127 +++++++++++++++++++++++ pkg/recon/sources/s3scanner_test.go | 139 +++++++++++++++++++++++++ 6 files changed, 795 insertions(+) create mode 100644 pkg/recon/sources/azureblob.go create mode 100644 pkg/recon/sources/azureblob_test.go create mode 100644 pkg/recon/sources/dospaces.go create mode 100644 pkg/recon/sources/dospaces_test.go create mode 100644 pkg/recon/sources/gcsscanner_test.go create mode 100644 pkg/recon/sources/s3scanner_test.go diff --git a/pkg/recon/sources/azureblob.go b/pkg/recon/sources/azureblob.go new file mode 100644 index 0000000..4cebedb --- /dev/null +++ b/pkg/recon/sources/azureblob.go @@ -0,0 +1,145 @@ +package sources + +import ( + "context" + "encoding/xml" + "fmt" + "io" + "log" + "net/http" + "time" + + "golang.org/x/time/rate" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +// AzureBlobScanner enumerates publicly accessible Azure Blob Storage containers +// by name pattern and flags readable objects that match common config-file +// patterns as potential API key exposure vectors. +// +// Credentialless: uses anonymous HTTP to probe public Azure Blob containers. +type AzureBlobScanner struct { + Registry *providers.Registry + Limiters *recon.LimiterRegistry + // BaseURL overrides the Azure Blob endpoint for tests. + // Default: "https://%s.blob.core.windows.net/%s?restype=container&comp=list" + // Must contain two %s placeholders: account name and container name. + BaseURL string + client *Client +} + +// Compile-time assertion. +var _ recon.ReconSource = (*AzureBlobScanner)(nil) + +func (a *AzureBlobScanner) Name() string { return "azureblob" } +func (a *AzureBlobScanner) RateLimit() rate.Limit { return rate.Every(500 * time.Millisecond) } +func (a *AzureBlobScanner) Burst() int { return 3 } +func (a *AzureBlobScanner) RespectsRobots() bool { return false } +func (a *AzureBlobScanner) Enabled(_ recon.Config) bool { return true } + +// azureContainerNames are common container names to probe within each account. +var azureContainerNames = []string{ + "config", "secrets", "backup", "data", "keys", "env", "credentials", +} + +func (a *AzureBlobScanner) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error { + client := a.client + if client == nil { + client = NewClient() + } + baseURL := a.BaseURL + if baseURL == "" { + baseURL = "https://%s.blob.core.windows.net/%s?restype=container&comp=list" + } + + accounts := bucketNames(a.Registry) + if len(accounts) == 0 { + return nil + } + + for _, account := range accounts { + if err := ctx.Err(); err != nil { + return err + } + + for _, container := range azureContainerNames { + if err := ctx.Err(); err != nil { + return err + } + if a.Limiters != nil { + if err := a.Limiters.Wait(ctx, a.Name(), a.RateLimit(), a.Burst(), false); err != nil { + return err + } + } + + endpoint := fmt.Sprintf(baseURL, account, container) + blobs, err := a.listBlobs(ctx, client, endpoint) + if err != nil { + log.Printf("azureblob: account %q container %q probe failed (skipping): %v", account, container, err) + continue + } + + for _, name := range blobs { + if !isConfigFile(name) { + continue + } + out <- recon.Finding{ + Source: fmt.Sprintf("azure://%s/%s/%s", account, container, name), + SourceType: "recon:azureblob", + Confidence: "medium", + DetectedAt: time.Now(), + } + } + } + } + return nil +} + +// listBlobs fetches and parses Azure Blob container listing XML. +func (a *AzureBlobScanner) listBlobs(ctx context.Context, client *Client, endpoint string) ([]string, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil) + if err != nil { + return nil, err + } + resp, err := client.Do(ctx, req) + if err != nil { + return nil, nil // non-public or non-existent — skip silently + } + defer resp.Body.Close() + + return parseAzureBlobXML(resp.Body) +} + +// azureEnumBlobResults models the Azure Blob EnumerationResults XML. +type azureEnumBlobResults struct { + XMLName xml.Name `xml:"EnumerationResults"` + Blobs azureBlobs `xml:"Blobs"` +} + +type azureBlobs struct { + Blob []azureBlob `xml:"Blob"` +} + +type azureBlob struct { + Name string `xml:"Name"` +} + +func parseAzureBlobXML(r io.Reader) ([]string, error) { + data, err := io.ReadAll(io.LimitReader(r, 1<<20)) + if err != nil { + return nil, err + } + var result azureEnumBlobResults + if err := xml.Unmarshal(data, &result); err != nil { + return nil, err + } + names := make([]string, 0, len(result.Blobs.Blob)) + for _, b := range result.Blobs.Blob { + if b.Name != "" { + names = append(names, b.Name) + } + } + return names, nil +} diff --git a/pkg/recon/sources/azureblob_test.go b/pkg/recon/sources/azureblob_test.go new file mode 100644 index 0000000..20a02e0 --- /dev/null +++ b/pkg/recon/sources/azureblob_test.go @@ -0,0 +1,130 @@ +package sources + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +func azureTestServer() *httptest.Server { + mux := http.NewServeMux() + + // Respond to any request path that contains "testprov-keys" account + "config" container. + mux.HandleFunc("/testprov-keys/config", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/xml") + _, _ = w.Write([]byte(` + + + .env + credentials.json + photo.png + +`)) + }) + + // All other containers return error. + mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotFound) + }) + + return httptest.NewServer(mux) +} + +func TestAzureBlob_Sweep(t *testing.T) { + srv := azureTestServer() + defer srv.Close() + + // BaseURL format: server/{account}/{container}?params + // We use a simplified format for tests. + src := &AzureBlobScanner{ + Registry: cloudTestRegistry(), + BaseURL: srv.URL + "/%s/%s", + client: NewClient(), + } + + out := make(chan recon.Finding, 64) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + if err := src.Sweep(ctx, "", out); err != nil { + t.Fatalf("Sweep error: %v", err) + } + close(out) + + var findings []recon.Finding + for f := range out { + findings = append(findings, f) + } + + // .env and credentials.json match; photo.png does not. + // Only the "config" container returns results; others 404. + if len(findings) != 2 { + t.Fatalf("expected 2 findings, got %d: %+v", len(findings), findings) + } + + for _, f := range findings { + if f.SourceType != "recon:azureblob" { + t.Errorf("unexpected SourceType: %s", f.SourceType) + } + if f.Confidence != "medium" { + t.Errorf("unexpected Confidence: %s", f.Confidence) + } + } +} + +func TestAzureBlob_EmptyRegistry(t *testing.T) { + src := &AzureBlobScanner{ + Registry: providers.NewRegistryFromProviders(nil), + Limiters: recon.NewLimiterRegistry(), + client: NewClient(), + } + + out := make(chan recon.Finding, 4) + if err := src.Sweep(context.Background(), "", out); err != nil { + t.Fatalf("Sweep error: %v", err) + } + close(out) + if len(out) != 0 { + t.Fatal("expected 0 findings") + } +} + +func TestAzureBlob_CtxCancelled(t *testing.T) { + srv := azureTestServer() + defer srv.Close() + + src := &AzureBlobScanner{ + Registry: cloudTestRegistry(), + BaseURL: srv.URL + "/%s/%s", + client: NewClient(), + } + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + out := make(chan recon.Finding, 4) + if err := src.Sweep(ctx, "", out); err == nil { + t.Fatal("expected ctx error") + } +} + +func TestAzureBlob_EnabledAndMeta(t *testing.T) { + a := &AzureBlobScanner{} + if a.Name() != "azureblob" { + t.Fatalf("unexpected name: %s", a.Name()) + } + if !a.Enabled(recon.Config{}) { + t.Fatal("expected Enabled=true") + } + if a.RespectsRobots() { + t.Fatal("expected RespectsRobots=false") + } + if a.Burst() != 3 { + t.Fatal("expected Burst=3") + } +} diff --git a/pkg/recon/sources/dospaces.go b/pkg/recon/sources/dospaces.go new file mode 100644 index 0000000..a85ed84 --- /dev/null +++ b/pkg/recon/sources/dospaces.go @@ -0,0 +1,126 @@ +package sources + +import ( + "context" + "fmt" + "log" + "net/http" + "time" + + "golang.org/x/time/rate" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +// DOSpacesScanner enumerates publicly accessible DigitalOcean Spaces by name +// pattern and flags readable objects matching common config-file patterns as +// potential API key exposure vectors. +// +// Credentialless: uses anonymous HTTP to probe public DO Spaces. DO Spaces are +// S3-compatible, so the same XML ListBucketResult format is used. +type DOSpacesScanner struct { + Registry *providers.Registry + Limiters *recon.LimiterRegistry + // BaseURL overrides the DO Spaces endpoint for tests. + // Default: "https://%s.%s.digitaloceanspaces.com" + // Must contain two %s placeholders: bucket name and region. + BaseURL string + client *Client +} + +// Compile-time assertion. +var _ recon.ReconSource = (*DOSpacesScanner)(nil) + +func (d *DOSpacesScanner) Name() string { return "spaces" } +func (d *DOSpacesScanner) RateLimit() rate.Limit { return rate.Every(500 * time.Millisecond) } +func (d *DOSpacesScanner) Burst() int { return 3 } +func (d *DOSpacesScanner) RespectsRobots() bool { return false } +func (d *DOSpacesScanner) Enabled(_ recon.Config) bool { return true } + +// doRegions are the DigitalOcean Spaces regions to iterate. +var doRegions = []string{"nyc3", "sfo3", "ams3", "sgp1", "fra1"} + +func (d *DOSpacesScanner) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error { + client := d.client + if client == nil { + client = NewClient() + } + baseURL := d.BaseURL + if baseURL == "" { + baseURL = "https://%s.%s.digitaloceanspaces.com" + } + + names := bucketNames(d.Registry) + if len(names) == 0 { + return nil + } + + for _, bucket := range names { + if err := ctx.Err(); err != nil { + return err + } + + for _, region := range doRegions { + if err := ctx.Err(); err != nil { + return err + } + if d.Limiters != nil { + if err := d.Limiters.Wait(ctx, d.Name(), d.RateLimit(), d.Burst(), false); err != nil { + return err + } + } + + endpoint := fmt.Sprintf(baseURL, bucket, region) + keys, err := d.listSpace(ctx, client, endpoint) + if err != nil { + log.Printf("spaces: bucket %q region %q probe failed (skipping): %v", bucket, region, err) + continue + } + + for _, key := range keys { + if !isConfigFile(key) { + continue + } + out <- recon.Finding{ + Source: fmt.Sprintf("do://%s/%s", bucket, key), + SourceType: "recon:spaces", + Confidence: "medium", + DetectedAt: time.Now(), + } + } + } + } + return nil +} + +// listSpace probes a DO Spaces endpoint via HEAD then parses the S3-compatible +// ListBucketResult XML on success. +func (d *DOSpacesScanner) listSpace(ctx context.Context, client *Client, endpoint string) ([]string, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodHead, endpoint, nil) + if err != nil { + return nil, err + } + resp, err := client.HTTP.Do(req) + if err != nil { + return nil, err + } + resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, nil + } + + getReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil) + if err != nil { + return nil, err + } + getResp, err := client.Do(ctx, getReq) + if err != nil { + return nil, err + } + defer getResp.Body.Close() + + // DO Spaces uses S3-compatible XML format. + return parseS3ListXML(getResp.Body) +} diff --git a/pkg/recon/sources/dospaces_test.go b/pkg/recon/sources/dospaces_test.go new file mode 100644 index 0000000..cab0f41 --- /dev/null +++ b/pkg/recon/sources/dospaces_test.go @@ -0,0 +1,128 @@ +package sources + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +func doSpacesTestServer() *httptest.Server { + mux := http.NewServeMux() + + // Only testprov-keys bucket in nyc3 region is publicly listable. + mux.HandleFunc("/testprov-keys/nyc3/", func(w http.ResponseWriter, r *http.Request) { + if r.Method == http.MethodHead { + w.WriteHeader(http.StatusOK) + return + } + w.Header().Set("Content-Type", "application/xml") + _, _ = w.Write([]byte(` + + .env.production + app.conf + logo.svg +`)) + }) + + mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotFound) + }) + + return httptest.NewServer(mux) +} + +func TestDOSpaces_Sweep(t *testing.T) { + srv := doSpacesTestServer() + defer srv.Close() + + src := &DOSpacesScanner{ + Registry: cloudTestRegistry(), + BaseURL: srv.URL + "/%s/%s/", + client: NewClient(), + } + + out := make(chan recon.Finding, 64) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + if err := src.Sweep(ctx, "", out); err != nil { + t.Fatalf("Sweep error: %v", err) + } + close(out) + + var findings []recon.Finding + for f := range out { + findings = append(findings, f) + } + + // .env.production and app.conf match; logo.svg does not. + if len(findings) != 2 { + t.Fatalf("expected 2 findings, got %d: %+v", len(findings), findings) + } + + for _, f := range findings { + if f.SourceType != "recon:spaces" { + t.Errorf("unexpected SourceType: %s", f.SourceType) + } + if f.Confidence != "medium" { + t.Errorf("unexpected Confidence: %s", f.Confidence) + } + } +} + +func TestDOSpaces_EmptyRegistry(t *testing.T) { + src := &DOSpacesScanner{ + Registry: providers.NewRegistryFromProviders(nil), + Limiters: recon.NewLimiterRegistry(), + client: NewClient(), + } + + out := make(chan recon.Finding, 4) + if err := src.Sweep(context.Background(), "", out); err != nil { + t.Fatalf("Sweep error: %v", err) + } + close(out) + if len(out) != 0 { + t.Fatal("expected 0 findings") + } +} + +func TestDOSpaces_CtxCancelled(t *testing.T) { + srv := doSpacesTestServer() + defer srv.Close() + + src := &DOSpacesScanner{ + Registry: cloudTestRegistry(), + BaseURL: srv.URL + "/%s/%s/", + client: NewClient(), + } + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + out := make(chan recon.Finding, 4) + if err := src.Sweep(ctx, "", out); err == nil { + t.Fatal("expected ctx error") + } +} + +func TestDOSpaces_EnabledAndMeta(t *testing.T) { + d := &DOSpacesScanner{} + if d.Name() != "spaces" { + t.Fatalf("unexpected name: %s", d.Name()) + } + if !d.Enabled(recon.Config{}) { + t.Fatal("expected Enabled=true") + } + if d.RespectsRobots() { + t.Fatal("expected RespectsRobots=false") + } + if d.Burst() != 3 { + t.Fatal("expected Burst=3") + } +} diff --git a/pkg/recon/sources/gcsscanner_test.go b/pkg/recon/sources/gcsscanner_test.go new file mode 100644 index 0000000..c57c199 --- /dev/null +++ b/pkg/recon/sources/gcsscanner_test.go @@ -0,0 +1,127 @@ +package sources + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +func gcsTestServer() *httptest.Server { + mux := http.NewServeMux() + + mux.HandleFunc("/testprov-keys/", func(w http.ResponseWriter, r *http.Request) { + if r.Method == http.MethodHead { + w.WriteHeader(http.StatusOK) + return + } + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"items":[ + {"name":".env"}, + {"name":"config.yaml"}, + {"name":"readme.md"}, + {"name":"secrets.toml"} + ]}`)) + }) + + mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotFound) + }) + + return httptest.NewServer(mux) +} + +func TestGCSScanner_Sweep(t *testing.T) { + srv := gcsTestServer() + defer srv.Close() + + src := &GCSScanner{ + Registry: cloudTestRegistry(), + BaseURL: srv.URL + "/%s/", + client: NewClient(), + } + + out := make(chan recon.Finding, 32) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + if err := src.Sweep(ctx, "", out); err != nil { + t.Fatalf("Sweep error: %v", err) + } + close(out) + + var findings []recon.Finding + for f := range out { + findings = append(findings, f) + } + + // .env, config.yaml, secrets.toml match; readme.md does not. + if len(findings) != 3 { + t.Fatalf("expected 3 findings, got %d: %+v", len(findings), findings) + } + + for _, f := range findings { + if f.SourceType != "recon:gcs" { + t.Errorf("unexpected SourceType: %s", f.SourceType) + } + if f.Confidence != "medium" { + t.Errorf("unexpected Confidence: %s", f.Confidence) + } + } +} + +func TestGCSScanner_EmptyRegistry(t *testing.T) { + src := &GCSScanner{ + Registry: providers.NewRegistryFromProviders(nil), + Limiters: recon.NewLimiterRegistry(), + client: NewClient(), + } + + out := make(chan recon.Finding, 4) + if err := src.Sweep(context.Background(), "", out); err != nil { + t.Fatalf("Sweep error: %v", err) + } + close(out) + if len(out) != 0 { + t.Fatal("expected 0 findings") + } +} + +func TestGCSScanner_CtxCancelled(t *testing.T) { + srv := gcsTestServer() + defer srv.Close() + + src := &GCSScanner{ + Registry: cloudTestRegistry(), + BaseURL: srv.URL + "/%s/", + client: NewClient(), + } + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + out := make(chan recon.Finding, 4) + if err := src.Sweep(ctx, "", out); err == nil { + t.Fatal("expected ctx error") + } +} + +func TestGCSScanner_EnabledAndMeta(t *testing.T) { + g := &GCSScanner{} + if g.Name() != "gcs" { + t.Fatalf("unexpected name: %s", g.Name()) + } + if !g.Enabled(recon.Config{}) { + t.Fatal("expected Enabled=true") + } + if g.RespectsRobots() { + t.Fatal("expected RespectsRobots=false") + } + if g.Burst() != 3 { + t.Fatal("expected Burst=3") + } +} diff --git a/pkg/recon/sources/s3scanner_test.go b/pkg/recon/sources/s3scanner_test.go new file mode 100644 index 0000000..a903dab --- /dev/null +++ b/pkg/recon/sources/s3scanner_test.go @@ -0,0 +1,139 @@ +package sources + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +func cloudTestRegistry() *providers.Registry { + return providers.NewRegistryFromProviders([]providers.Provider{ + {Name: "testprov", Keywords: []string{"testprov-key"}}, + }) +} + +func s3TestServer() *httptest.Server { + mux := http.NewServeMux() + + // Respond to HEAD for the testprov-keys bucket with 200 (public). + mux.HandleFunc("/testprov-keys/", func(w http.ResponseWriter, r *http.Request) { + if r.Method == http.MethodHead { + w.WriteHeader(http.StatusOK) + return + } + // GET — return S3 ListBucketResult XML. + w.Header().Set("Content-Type", "application/xml") + _, _ = w.Write([]byte(` + + .env + config.yaml + readme.md + data/settings.json +`)) + }) + + // All other buckets return 404 (not found). + mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotFound) + }) + + return httptest.NewServer(mux) +} + +func TestS3Scanner_Sweep(t *testing.T) { + srv := s3TestServer() + defer srv.Close() + + src := &S3Scanner{ + Registry: cloudTestRegistry(), + BaseURL: srv.URL + "/%s/", + client: NewClient(), + } + + out := make(chan recon.Finding, 32) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + if err := src.Sweep(ctx, "", out); err != nil { + t.Fatalf("Sweep error: %v", err) + } + close(out) + + var findings []recon.Finding + for f := range out { + findings = append(findings, f) + } + + // .env, config.yaml, data/settings.json match; readme.md does not. + if len(findings) != 3 { + t.Fatalf("expected 3 findings, got %d: %+v", len(findings), findings) + } + + for _, f := range findings { + if f.SourceType != "recon:s3" { + t.Errorf("unexpected SourceType: %s", f.SourceType) + } + if f.Confidence != "medium" { + t.Errorf("unexpected Confidence: %s", f.Confidence) + } + } +} + +func TestS3Scanner_EmptyRegistry(t *testing.T) { + src := &S3Scanner{ + Registry: providers.NewRegistryFromProviders(nil), + Limiters: recon.NewLimiterRegistry(), + client: NewClient(), + } + + out := make(chan recon.Finding, 4) + ctx := context.Background() + if err := src.Sweep(ctx, "", out); err != nil { + t.Fatalf("Sweep error: %v", err) + } + close(out) + + if len(out) != 0 { + t.Fatal("expected 0 findings with empty registry") + } +} + +func TestS3Scanner_CtxCancelled(t *testing.T) { + srv := s3TestServer() + defer srv.Close() + + src := &S3Scanner{ + Registry: cloudTestRegistry(), + BaseURL: srv.URL + "/%s/", + client: NewClient(), + } + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + out := make(chan recon.Finding, 4) + if err := src.Sweep(ctx, "", out); err == nil { + t.Fatal("expected ctx error") + } +} + +func TestS3Scanner_EnabledAndMeta(t *testing.T) { + s := &S3Scanner{} + if s.Name() != "s3" { + t.Fatalf("unexpected name: %s", s.Name()) + } + if !s.Enabled(recon.Config{}) { + t.Fatal("expected Enabled=true") + } + if s.RespectsRobots() { + t.Fatal("expected RespectsRobots=false") + } + if s.Burst() != 3 { + t.Fatal("expected Burst=3") + } +}