feat(12-03): implement AzureBlobScanner, DOSpacesScanner, and all cloud scanner tests

- AzureBlobScanner enumerates public Azure Blob containers with XML listing
- DOSpacesScanner enumerates public DO Spaces across 5 regions (S3-compatible XML)
- httptest-based tests for all four scanners: sweep, empty registry, ctx cancel, metadata
- All sources credentialless, compile-time interface assertions
This commit is contained in:
salvacybersec
2026-04-06 12:26:01 +03:00
parent 47d542b9de
commit 13905eb5ee
6 changed files with 795 additions and 0 deletions

View File

@@ -0,0 +1,145 @@
package sources
import (
"context"
"encoding/xml"
"fmt"
"io"
"log"
"net/http"
"time"
"golang.org/x/time/rate"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
// AzureBlobScanner enumerates publicly accessible Azure Blob Storage containers
// by name pattern and flags readable objects that match common config-file
// patterns as potential API key exposure vectors.
//
// Credentialless: uses anonymous HTTP to probe public Azure Blob containers.
type AzureBlobScanner struct {
Registry *providers.Registry
Limiters *recon.LimiterRegistry
// BaseURL overrides the Azure Blob endpoint for tests.
// Default: "https://%s.blob.core.windows.net/%s?restype=container&comp=list"
// Must contain two %s placeholders: account name and container name.
BaseURL string
client *Client
}
// Compile-time assertion.
var _ recon.ReconSource = (*AzureBlobScanner)(nil)
func (a *AzureBlobScanner) Name() string { return "azureblob" }
func (a *AzureBlobScanner) RateLimit() rate.Limit { return rate.Every(500 * time.Millisecond) }
func (a *AzureBlobScanner) Burst() int { return 3 }
func (a *AzureBlobScanner) RespectsRobots() bool { return false }
func (a *AzureBlobScanner) Enabled(_ recon.Config) bool { return true }
// azureContainerNames are common container names to probe within each account.
var azureContainerNames = []string{
"config", "secrets", "backup", "data", "keys", "env", "credentials",
}
func (a *AzureBlobScanner) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
client := a.client
if client == nil {
client = NewClient()
}
baseURL := a.BaseURL
if baseURL == "" {
baseURL = "https://%s.blob.core.windows.net/%s?restype=container&comp=list"
}
accounts := bucketNames(a.Registry)
if len(accounts) == 0 {
return nil
}
for _, account := range accounts {
if err := ctx.Err(); err != nil {
return err
}
for _, container := range azureContainerNames {
if err := ctx.Err(); err != nil {
return err
}
if a.Limiters != nil {
if err := a.Limiters.Wait(ctx, a.Name(), a.RateLimit(), a.Burst(), false); err != nil {
return err
}
}
endpoint := fmt.Sprintf(baseURL, account, container)
blobs, err := a.listBlobs(ctx, client, endpoint)
if err != nil {
log.Printf("azureblob: account %q container %q probe failed (skipping): %v", account, container, err)
continue
}
for _, name := range blobs {
if !isConfigFile(name) {
continue
}
out <- recon.Finding{
Source: fmt.Sprintf("azure://%s/%s/%s", account, container, name),
SourceType: "recon:azureblob",
Confidence: "medium",
DetectedAt: time.Now(),
}
}
}
}
return nil
}
// listBlobs fetches and parses Azure Blob container listing XML.
func (a *AzureBlobScanner) listBlobs(ctx context.Context, client *Client, endpoint string) ([]string, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
if err != nil {
return nil, err
}
resp, err := client.Do(ctx, req)
if err != nil {
return nil, nil // non-public or non-existent — skip silently
}
defer resp.Body.Close()
return parseAzureBlobXML(resp.Body)
}
// azureEnumBlobResults models the Azure Blob EnumerationResults XML.
type azureEnumBlobResults struct {
XMLName xml.Name `xml:"EnumerationResults"`
Blobs azureBlobs `xml:"Blobs"`
}
type azureBlobs struct {
Blob []azureBlob `xml:"Blob"`
}
type azureBlob struct {
Name string `xml:"Name"`
}
func parseAzureBlobXML(r io.Reader) ([]string, error) {
data, err := io.ReadAll(io.LimitReader(r, 1<<20))
if err != nil {
return nil, err
}
var result azureEnumBlobResults
if err := xml.Unmarshal(data, &result); err != nil {
return nil, err
}
names := make([]string, 0, len(result.Blobs.Blob))
for _, b := range result.Blobs.Blob {
if b.Name != "" {
names = append(names, b.Name)
}
}
return names, nil
}

View File

@@ -0,0 +1,130 @@
package sources
import (
"context"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
func azureTestServer() *httptest.Server {
mux := http.NewServeMux()
// Respond to any request path that contains "testprov-keys" account + "config" container.
mux.HandleFunc("/testprov-keys/config", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/xml")
_, _ = w.Write([]byte(`<?xml version="1.0" encoding="UTF-8"?>
<EnumerationResults>
<Blobs>
<Blob><Name>.env</Name></Blob>
<Blob><Name>credentials.json</Name></Blob>
<Blob><Name>photo.png</Name></Blob>
</Blobs>
</EnumerationResults>`))
})
// All other containers return error.
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusNotFound)
})
return httptest.NewServer(mux)
}
func TestAzureBlob_Sweep(t *testing.T) {
srv := azureTestServer()
defer srv.Close()
// BaseURL format: server/{account}/{container}?params
// We use a simplified format for tests.
src := &AzureBlobScanner{
Registry: cloudTestRegistry(),
BaseURL: srv.URL + "/%s/%s",
client: NewClient(),
}
out := make(chan recon.Finding, 64)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if err := src.Sweep(ctx, "", out); err != nil {
t.Fatalf("Sweep error: %v", err)
}
close(out)
var findings []recon.Finding
for f := range out {
findings = append(findings, f)
}
// .env and credentials.json match; photo.png does not.
// Only the "config" container returns results; others 404.
if len(findings) != 2 {
t.Fatalf("expected 2 findings, got %d: %+v", len(findings), findings)
}
for _, f := range findings {
if f.SourceType != "recon:azureblob" {
t.Errorf("unexpected SourceType: %s", f.SourceType)
}
if f.Confidence != "medium" {
t.Errorf("unexpected Confidence: %s", f.Confidence)
}
}
}
func TestAzureBlob_EmptyRegistry(t *testing.T) {
src := &AzureBlobScanner{
Registry: providers.NewRegistryFromProviders(nil),
Limiters: recon.NewLimiterRegistry(),
client: NewClient(),
}
out := make(chan recon.Finding, 4)
if err := src.Sweep(context.Background(), "", out); err != nil {
t.Fatalf("Sweep error: %v", err)
}
close(out)
if len(out) != 0 {
t.Fatal("expected 0 findings")
}
}
func TestAzureBlob_CtxCancelled(t *testing.T) {
srv := azureTestServer()
defer srv.Close()
src := &AzureBlobScanner{
Registry: cloudTestRegistry(),
BaseURL: srv.URL + "/%s/%s",
client: NewClient(),
}
ctx, cancel := context.WithCancel(context.Background())
cancel()
out := make(chan recon.Finding, 4)
if err := src.Sweep(ctx, "", out); err == nil {
t.Fatal("expected ctx error")
}
}
func TestAzureBlob_EnabledAndMeta(t *testing.T) {
a := &AzureBlobScanner{}
if a.Name() != "azureblob" {
t.Fatalf("unexpected name: %s", a.Name())
}
if !a.Enabled(recon.Config{}) {
t.Fatal("expected Enabled=true")
}
if a.RespectsRobots() {
t.Fatal("expected RespectsRobots=false")
}
if a.Burst() != 3 {
t.Fatal("expected Burst=3")
}
}

View File

@@ -0,0 +1,126 @@
package sources
import (
"context"
"fmt"
"log"
"net/http"
"time"
"golang.org/x/time/rate"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
// DOSpacesScanner enumerates publicly accessible DigitalOcean Spaces by name
// pattern and flags readable objects matching common config-file patterns as
// potential API key exposure vectors.
//
// Credentialless: uses anonymous HTTP to probe public DO Spaces. DO Spaces are
// S3-compatible, so the same XML ListBucketResult format is used.
type DOSpacesScanner struct {
Registry *providers.Registry
Limiters *recon.LimiterRegistry
// BaseURL overrides the DO Spaces endpoint for tests.
// Default: "https://%s.%s.digitaloceanspaces.com"
// Must contain two %s placeholders: bucket name and region.
BaseURL string
client *Client
}
// Compile-time assertion.
var _ recon.ReconSource = (*DOSpacesScanner)(nil)
func (d *DOSpacesScanner) Name() string { return "spaces" }
func (d *DOSpacesScanner) RateLimit() rate.Limit { return rate.Every(500 * time.Millisecond) }
func (d *DOSpacesScanner) Burst() int { return 3 }
func (d *DOSpacesScanner) RespectsRobots() bool { return false }
func (d *DOSpacesScanner) Enabled(_ recon.Config) bool { return true }
// doRegions are the DigitalOcean Spaces regions to iterate.
var doRegions = []string{"nyc3", "sfo3", "ams3", "sgp1", "fra1"}
func (d *DOSpacesScanner) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
client := d.client
if client == nil {
client = NewClient()
}
baseURL := d.BaseURL
if baseURL == "" {
baseURL = "https://%s.%s.digitaloceanspaces.com"
}
names := bucketNames(d.Registry)
if len(names) == 0 {
return nil
}
for _, bucket := range names {
if err := ctx.Err(); err != nil {
return err
}
for _, region := range doRegions {
if err := ctx.Err(); err != nil {
return err
}
if d.Limiters != nil {
if err := d.Limiters.Wait(ctx, d.Name(), d.RateLimit(), d.Burst(), false); err != nil {
return err
}
}
endpoint := fmt.Sprintf(baseURL, bucket, region)
keys, err := d.listSpace(ctx, client, endpoint)
if err != nil {
log.Printf("spaces: bucket %q region %q probe failed (skipping): %v", bucket, region, err)
continue
}
for _, key := range keys {
if !isConfigFile(key) {
continue
}
out <- recon.Finding{
Source: fmt.Sprintf("do://%s/%s", bucket, key),
SourceType: "recon:spaces",
Confidence: "medium",
DetectedAt: time.Now(),
}
}
}
}
return nil
}
// listSpace probes a DO Spaces endpoint via HEAD then parses the S3-compatible
// ListBucketResult XML on success.
func (d *DOSpacesScanner) listSpace(ctx context.Context, client *Client, endpoint string) ([]string, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodHead, endpoint, nil)
if err != nil {
return nil, err
}
resp, err := client.HTTP.Do(req)
if err != nil {
return nil, err
}
resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, nil
}
getReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
if err != nil {
return nil, err
}
getResp, err := client.Do(ctx, getReq)
if err != nil {
return nil, err
}
defer getResp.Body.Close()
// DO Spaces uses S3-compatible XML format.
return parseS3ListXML(getResp.Body)
}

View File

@@ -0,0 +1,128 @@
package sources
import (
"context"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
func doSpacesTestServer() *httptest.Server {
mux := http.NewServeMux()
// Only testprov-keys bucket in nyc3 region is publicly listable.
mux.HandleFunc("/testprov-keys/nyc3/", func(w http.ResponseWriter, r *http.Request) {
if r.Method == http.MethodHead {
w.WriteHeader(http.StatusOK)
return
}
w.Header().Set("Content-Type", "application/xml")
_, _ = w.Write([]byte(`<?xml version="1.0" encoding="UTF-8"?>
<ListBucketResult>
<Contents><Key>.env.production</Key></Contents>
<Contents><Key>app.conf</Key></Contents>
<Contents><Key>logo.svg</Key></Contents>
</ListBucketResult>`))
})
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusNotFound)
})
return httptest.NewServer(mux)
}
func TestDOSpaces_Sweep(t *testing.T) {
srv := doSpacesTestServer()
defer srv.Close()
src := &DOSpacesScanner{
Registry: cloudTestRegistry(),
BaseURL: srv.URL + "/%s/%s/",
client: NewClient(),
}
out := make(chan recon.Finding, 64)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if err := src.Sweep(ctx, "", out); err != nil {
t.Fatalf("Sweep error: %v", err)
}
close(out)
var findings []recon.Finding
for f := range out {
findings = append(findings, f)
}
// .env.production and app.conf match; logo.svg does not.
if len(findings) != 2 {
t.Fatalf("expected 2 findings, got %d: %+v", len(findings), findings)
}
for _, f := range findings {
if f.SourceType != "recon:spaces" {
t.Errorf("unexpected SourceType: %s", f.SourceType)
}
if f.Confidence != "medium" {
t.Errorf("unexpected Confidence: %s", f.Confidence)
}
}
}
func TestDOSpaces_EmptyRegistry(t *testing.T) {
src := &DOSpacesScanner{
Registry: providers.NewRegistryFromProviders(nil),
Limiters: recon.NewLimiterRegistry(),
client: NewClient(),
}
out := make(chan recon.Finding, 4)
if err := src.Sweep(context.Background(), "", out); err != nil {
t.Fatalf("Sweep error: %v", err)
}
close(out)
if len(out) != 0 {
t.Fatal("expected 0 findings")
}
}
func TestDOSpaces_CtxCancelled(t *testing.T) {
srv := doSpacesTestServer()
defer srv.Close()
src := &DOSpacesScanner{
Registry: cloudTestRegistry(),
BaseURL: srv.URL + "/%s/%s/",
client: NewClient(),
}
ctx, cancel := context.WithCancel(context.Background())
cancel()
out := make(chan recon.Finding, 4)
if err := src.Sweep(ctx, "", out); err == nil {
t.Fatal("expected ctx error")
}
}
func TestDOSpaces_EnabledAndMeta(t *testing.T) {
d := &DOSpacesScanner{}
if d.Name() != "spaces" {
t.Fatalf("unexpected name: %s", d.Name())
}
if !d.Enabled(recon.Config{}) {
t.Fatal("expected Enabled=true")
}
if d.RespectsRobots() {
t.Fatal("expected RespectsRobots=false")
}
if d.Burst() != 3 {
t.Fatal("expected Burst=3")
}
}

View File

@@ -0,0 +1,127 @@
package sources
import (
"context"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
func gcsTestServer() *httptest.Server {
mux := http.NewServeMux()
mux.HandleFunc("/testprov-keys/", func(w http.ResponseWriter, r *http.Request) {
if r.Method == http.MethodHead {
w.WriteHeader(http.StatusOK)
return
}
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"items":[
{"name":".env"},
{"name":"config.yaml"},
{"name":"readme.md"},
{"name":"secrets.toml"}
]}`))
})
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusNotFound)
})
return httptest.NewServer(mux)
}
func TestGCSScanner_Sweep(t *testing.T) {
srv := gcsTestServer()
defer srv.Close()
src := &GCSScanner{
Registry: cloudTestRegistry(),
BaseURL: srv.URL + "/%s/",
client: NewClient(),
}
out := make(chan recon.Finding, 32)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if err := src.Sweep(ctx, "", out); err != nil {
t.Fatalf("Sweep error: %v", err)
}
close(out)
var findings []recon.Finding
for f := range out {
findings = append(findings, f)
}
// .env, config.yaml, secrets.toml match; readme.md does not.
if len(findings) != 3 {
t.Fatalf("expected 3 findings, got %d: %+v", len(findings), findings)
}
for _, f := range findings {
if f.SourceType != "recon:gcs" {
t.Errorf("unexpected SourceType: %s", f.SourceType)
}
if f.Confidence != "medium" {
t.Errorf("unexpected Confidence: %s", f.Confidence)
}
}
}
func TestGCSScanner_EmptyRegistry(t *testing.T) {
src := &GCSScanner{
Registry: providers.NewRegistryFromProviders(nil),
Limiters: recon.NewLimiterRegistry(),
client: NewClient(),
}
out := make(chan recon.Finding, 4)
if err := src.Sweep(context.Background(), "", out); err != nil {
t.Fatalf("Sweep error: %v", err)
}
close(out)
if len(out) != 0 {
t.Fatal("expected 0 findings")
}
}
func TestGCSScanner_CtxCancelled(t *testing.T) {
srv := gcsTestServer()
defer srv.Close()
src := &GCSScanner{
Registry: cloudTestRegistry(),
BaseURL: srv.URL + "/%s/",
client: NewClient(),
}
ctx, cancel := context.WithCancel(context.Background())
cancel()
out := make(chan recon.Finding, 4)
if err := src.Sweep(ctx, "", out); err == nil {
t.Fatal("expected ctx error")
}
}
func TestGCSScanner_EnabledAndMeta(t *testing.T) {
g := &GCSScanner{}
if g.Name() != "gcs" {
t.Fatalf("unexpected name: %s", g.Name())
}
if !g.Enabled(recon.Config{}) {
t.Fatal("expected Enabled=true")
}
if g.RespectsRobots() {
t.Fatal("expected RespectsRobots=false")
}
if g.Burst() != 3 {
t.Fatal("expected Burst=3")
}
}

View File

@@ -0,0 +1,139 @@
package sources
import (
"context"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
func cloudTestRegistry() *providers.Registry {
return providers.NewRegistryFromProviders([]providers.Provider{
{Name: "testprov", Keywords: []string{"testprov-key"}},
})
}
func s3TestServer() *httptest.Server {
mux := http.NewServeMux()
// Respond to HEAD for the testprov-keys bucket with 200 (public).
mux.HandleFunc("/testprov-keys/", func(w http.ResponseWriter, r *http.Request) {
if r.Method == http.MethodHead {
w.WriteHeader(http.StatusOK)
return
}
// GET — return S3 ListBucketResult XML.
w.Header().Set("Content-Type", "application/xml")
_, _ = w.Write([]byte(`<?xml version="1.0" encoding="UTF-8"?>
<ListBucketResult>
<Contents><Key>.env</Key></Contents>
<Contents><Key>config.yaml</Key></Contents>
<Contents><Key>readme.md</Key></Contents>
<Contents><Key>data/settings.json</Key></Contents>
</ListBucketResult>`))
})
// All other buckets return 404 (not found).
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusNotFound)
})
return httptest.NewServer(mux)
}
func TestS3Scanner_Sweep(t *testing.T) {
srv := s3TestServer()
defer srv.Close()
src := &S3Scanner{
Registry: cloudTestRegistry(),
BaseURL: srv.URL + "/%s/",
client: NewClient(),
}
out := make(chan recon.Finding, 32)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if err := src.Sweep(ctx, "", out); err != nil {
t.Fatalf("Sweep error: %v", err)
}
close(out)
var findings []recon.Finding
for f := range out {
findings = append(findings, f)
}
// .env, config.yaml, data/settings.json match; readme.md does not.
if len(findings) != 3 {
t.Fatalf("expected 3 findings, got %d: %+v", len(findings), findings)
}
for _, f := range findings {
if f.SourceType != "recon:s3" {
t.Errorf("unexpected SourceType: %s", f.SourceType)
}
if f.Confidence != "medium" {
t.Errorf("unexpected Confidence: %s", f.Confidence)
}
}
}
func TestS3Scanner_EmptyRegistry(t *testing.T) {
src := &S3Scanner{
Registry: providers.NewRegistryFromProviders(nil),
Limiters: recon.NewLimiterRegistry(),
client: NewClient(),
}
out := make(chan recon.Finding, 4)
ctx := context.Background()
if err := src.Sweep(ctx, "", out); err != nil {
t.Fatalf("Sweep error: %v", err)
}
close(out)
if len(out) != 0 {
t.Fatal("expected 0 findings with empty registry")
}
}
func TestS3Scanner_CtxCancelled(t *testing.T) {
srv := s3TestServer()
defer srv.Close()
src := &S3Scanner{
Registry: cloudTestRegistry(),
BaseURL: srv.URL + "/%s/",
client: NewClient(),
}
ctx, cancel := context.WithCancel(context.Background())
cancel()
out := make(chan recon.Finding, 4)
if err := src.Sweep(ctx, "", out); err == nil {
t.Fatal("expected ctx error")
}
}
func TestS3Scanner_EnabledAndMeta(t *testing.T) {
s := &S3Scanner{}
if s.Name() != "s3" {
t.Fatalf("unexpected name: %s", s.Name())
}
if !s.Enabled(recon.Config{}) {
t.Fatal("expected Enabled=true")
}
if s.RespectsRobots() {
t.Fatal("expected RespectsRobots=false")
}
if s.Burst() != 3 {
t.Fatal("expected Burst=3")
}
}