package sources import ( "context" "encoding/xml" "fmt" "io" "log" "net/http" "time" "golang.org/x/time/rate" "github.com/salvacybersec/keyhunter/pkg/providers" "github.com/salvacybersec/keyhunter/pkg/recon" ) // AzureBlobScanner enumerates publicly accessible Azure Blob Storage containers // by name pattern and flags readable objects that match common config-file // patterns as potential API key exposure vectors. // // Credentialless: uses anonymous HTTP to probe public Azure Blob containers. type AzureBlobScanner struct { Registry *providers.Registry Limiters *recon.LimiterRegistry // BaseURL overrides the Azure Blob endpoint for tests. // Default: "https://%s.blob.core.windows.net/%s?restype=container&comp=list" // Must contain two %s placeholders: account name and container name. BaseURL string client *Client } // Compile-time assertion. var _ recon.ReconSource = (*AzureBlobScanner)(nil) func (a *AzureBlobScanner) Name() string { return "azureblob" } func (a *AzureBlobScanner) RateLimit() rate.Limit { return rate.Every(500 * time.Millisecond) } func (a *AzureBlobScanner) Burst() int { return 3 } func (a *AzureBlobScanner) RespectsRobots() bool { return false } func (a *AzureBlobScanner) Enabled(_ recon.Config) bool { return true } // azureContainerNames are common container names to probe within each account. var azureContainerNames = []string{ "config", "secrets", "backup", "data", "keys", "env", "credentials", } func (a *AzureBlobScanner) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error { client := a.client if client == nil { client = NewClient() } baseURL := a.BaseURL if baseURL == "" { baseURL = "https://%s.blob.core.windows.net/%s?restype=container&comp=list" } accounts := bucketNames(a.Registry) if len(accounts) == 0 { return nil } for _, account := range accounts { if err := ctx.Err(); err != nil { return err } for _, container := range azureContainerNames { if err := ctx.Err(); err != nil { return err } if a.Limiters != nil { if err := a.Limiters.Wait(ctx, a.Name(), a.RateLimit(), a.Burst(), false); err != nil { return err } } endpoint := fmt.Sprintf(baseURL, account, container) blobs, err := a.listBlobs(ctx, client, endpoint) if err != nil { log.Printf("azureblob: account %q container %q probe failed (skipping): %v", account, container, err) continue } for _, name := range blobs { if !isConfigFile(name) { continue } out <- recon.Finding{ Source: fmt.Sprintf("azure://%s/%s/%s", account, container, name), SourceType: "recon:azureblob", Confidence: "medium", DetectedAt: time.Now(), } } } } return nil } // listBlobs fetches and parses Azure Blob container listing XML. func (a *AzureBlobScanner) listBlobs(ctx context.Context, client *Client, endpoint string) ([]string, error) { req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil) if err != nil { return nil, err } resp, err := client.Do(ctx, req) if err != nil { return nil, nil // non-public or non-existent — skip silently } defer resp.Body.Close() return parseAzureBlobXML(resp.Body) } // azureEnumBlobResults models the Azure Blob EnumerationResults XML. type azureEnumBlobResults struct { XMLName xml.Name `xml:"EnumerationResults"` Blobs azureBlobs `xml:"Blobs"` } type azureBlobs struct { Blob []azureBlob `xml:"Blob"` } type azureBlob struct { Name string `xml:"Name"` } func parseAzureBlobXML(r io.Reader) ([]string, error) { data, err := io.ReadAll(io.LimitReader(r, 1<<20)) if err != nil { return nil, err } var result azureEnumBlobResults if err := xml.Unmarshal(data, &result); err != nil { return nil, err } names := make([]string, 0, len(result.Blobs.Blob)) for _, b := range result.Blobs.Blob { if b.Name != "" { names = append(names, b.Name) } } return names, nil }