Files
keyhunter/pkg/recon/sources/s3scanner.go
salvacybersec 47d542b9de feat(12-03): implement S3Scanner and GCSScanner cloud storage recon sources
- S3Scanner enumerates public AWS S3 buckets by provider keyword + suffix pattern
- GCSScanner enumerates public GCS buckets with JSON listing format
- Shared bucketNames() helper and isConfigFile() filter for config-pattern files
- Both credentialless (anonymous HTTP), always Enabled, BaseURL override for tests
2026-04-06 12:25:55 +03:00

214 lines
5.6 KiB
Go

package sources
import (
"context"
"encoding/xml"
"fmt"
"io"
"log"
"net/http"
"strings"
"time"
"golang.org/x/time/rate"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
// S3Scanner enumerates publicly accessible AWS S3 buckets by name pattern and
// flags readable objects that match common config-file patterns (.env, *.json,
// *.yaml, etc.) as potential API key exposure vectors.
//
// The scanner is credentialless: it uses anonymous HTTP to probe public buckets.
// Object contents are NOT downloaded; only the presence of suspicious filenames
// is reported.
type S3Scanner struct {
Registry *providers.Registry
Limiters *recon.LimiterRegistry
// BaseURL overrides the S3 endpoint for tests. Default: "https://%s.s3.amazonaws.com".
// Must contain exactly one %s placeholder for the bucket name.
BaseURL string
client *Client
}
// Compile-time assertion.
var _ recon.ReconSource = (*S3Scanner)(nil)
func (s *S3Scanner) Name() string { return "s3" }
func (s *S3Scanner) RateLimit() rate.Limit { return rate.Every(500 * time.Millisecond) }
func (s *S3Scanner) Burst() int { return 3 }
func (s *S3Scanner) RespectsRobots() bool { return false }
func (s *S3Scanner) Enabled(_ recon.Config) bool { return true }
func (s *S3Scanner) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
client := s.client
if client == nil {
client = NewClient()
}
baseURL := s.BaseURL
if baseURL == "" {
baseURL = "https://%s.s3.amazonaws.com"
}
names := bucketNames(s.Registry)
if len(names) == 0 {
return nil
}
for _, bucket := range names {
if err := ctx.Err(); err != nil {
return err
}
if s.Limiters != nil {
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
return err
}
}
endpoint := fmt.Sprintf(baseURL, bucket)
keys, err := s.listBucketS3(ctx, client, endpoint)
if err != nil {
log.Printf("s3: bucket %q probe failed (skipping): %v", bucket, err)
continue
}
for _, key := range keys {
if !isConfigFile(key) {
continue
}
out <- recon.Finding{
Source: fmt.Sprintf("s3://%s/%s", bucket, key),
SourceType: "recon:s3",
Confidence: "medium",
DetectedAt: time.Now(),
}
}
}
return nil
}
// listBucketS3 probes an S3 bucket endpoint. A HEAD that returns 200 means
// public listing is enabled; we then GET to parse the ListBucketResult XML.
// Returns nil keys if the bucket is not publicly listable.
func (s *S3Scanner) listBucketS3(ctx context.Context, client *Client, endpoint string) ([]string, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodHead, endpoint, nil)
if err != nil {
return nil, err
}
resp, err := client.HTTP.Do(req)
if err != nil {
return nil, err
}
resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, nil // not publicly listable
}
// Public listing available -- fetch and parse XML.
getReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
if err != nil {
return nil, err
}
getResp, err := client.Do(ctx, getReq)
if err != nil {
return nil, err
}
defer getResp.Body.Close()
return parseS3ListXML(getResp.Body)
}
// s3ListResult models the AWS S3 ListBucketResult XML.
type s3ListResult struct {
XMLName xml.Name `xml:"ListBucketResult"`
Contents []s3Object `xml:"Contents"`
}
type s3Object struct {
Key string `xml:"Key"`
}
func parseS3ListXML(r io.Reader) ([]string, error) {
data, err := io.ReadAll(io.LimitReader(r, 1<<20))
if err != nil {
return nil, err
}
var result s3ListResult
if err := xml.Unmarshal(data, &result); err != nil {
return nil, err
}
keys := make([]string, 0, len(result.Contents))
for _, obj := range result.Contents {
if obj.Key != "" {
keys = append(keys, obj.Key)
}
}
return keys, nil
}
// bucketSuffixes are common suffixes appended to provider keywords to generate
// candidate bucket names.
var bucketSuffixes = []string{
"-keys", "-config", "-backup", "-data", "-secrets", "-env",
"-api-keys", "-credentials", "-tokens",
}
// bucketNames generates candidate cloud storage bucket names from provider
// keywords combined with common suffixes. Exported for use by GCSScanner,
// AzureBlobScanner, and DOSpacesScanner.
func bucketNames(reg *providers.Registry) []string {
if reg == nil {
return nil
}
seen := make(map[string]struct{})
var names []string
for _, p := range reg.List() {
// Use provider name (lowercased, spaces to dashes) as base.
base := strings.ToLower(strings.ReplaceAll(p.Name, " ", "-"))
if base == "" {
continue
}
for _, suffix := range bucketSuffixes {
candidate := base + suffix
if _, ok := seen[candidate]; !ok {
seen[candidate] = struct{}{}
names = append(names, candidate)
}
}
}
return names
}
// isConfigFile returns true if the filename matches common config file patterns
// that may contain API keys.
func isConfigFile(name string) bool {
lower := strings.ToLower(name)
// Exact basenames.
for _, exact := range []string{".env", ".env.local", ".env.production", ".env.development"} {
if lower == exact || strings.HasSuffix(lower, "/"+exact) {
return true
}
}
// Extension matches.
for _, ext := range []string{".json", ".yaml", ".yml", ".toml", ".conf", ".cfg", ".ini", ".properties"} {
if strings.HasSuffix(lower, ext) {
return true
}
}
// Prefix matches (config.*, settings.*).
base := lower
if idx := strings.LastIndex(lower, "/"); idx >= 0 {
base = lower[idx+1:]
}
for _, prefix := range []string{"config.", "settings.", "credentials.", "secrets."} {
if strings.HasPrefix(base, prefix) {
return true
}
}
return false
}