package sources import ( "context" "encoding/xml" "fmt" "io" "log" "net/http" "strings" "time" "golang.org/x/time/rate" "github.com/salvacybersec/keyhunter/pkg/providers" "github.com/salvacybersec/keyhunter/pkg/recon" ) // S3Scanner enumerates publicly accessible AWS S3 buckets by name pattern and // flags readable objects that match common config-file patterns (.env, *.json, // *.yaml, etc.) as potential API key exposure vectors. // // The scanner is credentialless: it uses anonymous HTTP to probe public buckets. // Object contents are NOT downloaded; only the presence of suspicious filenames // is reported. type S3Scanner struct { Registry *providers.Registry Limiters *recon.LimiterRegistry // BaseURL overrides the S3 endpoint for tests. Default: "https://%s.s3.amazonaws.com". // Must contain exactly one %s placeholder for the bucket name. BaseURL string client *Client } // Compile-time assertion. var _ recon.ReconSource = (*S3Scanner)(nil) func (s *S3Scanner) Name() string { return "s3" } func (s *S3Scanner) RateLimit() rate.Limit { return rate.Every(500 * time.Millisecond) } func (s *S3Scanner) Burst() int { return 3 } func (s *S3Scanner) RespectsRobots() bool { return false } func (s *S3Scanner) Enabled(_ recon.Config) bool { return true } func (s *S3Scanner) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error { client := s.client if client == nil { client = NewClient() } baseURL := s.BaseURL if baseURL == "" { baseURL = "https://%s.s3.amazonaws.com" } names := bucketNames(s.Registry) if len(names) == 0 { return nil } for _, bucket := range names { if err := ctx.Err(); err != nil { return err } if s.Limiters != nil { if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil { return err } } endpoint := fmt.Sprintf(baseURL, bucket) keys, err := s.listBucketS3(ctx, client, endpoint) if err != nil { log.Printf("s3: bucket %q probe failed (skipping): %v", bucket, err) continue } for _, key := range keys { if !isConfigFile(key) { continue } out <- recon.Finding{ Source: fmt.Sprintf("s3://%s/%s", bucket, key), SourceType: "recon:s3", Confidence: "medium", DetectedAt: time.Now(), } } } return nil } // listBucketS3 probes an S3 bucket endpoint. A HEAD that returns 200 means // public listing is enabled; we then GET to parse the ListBucketResult XML. // Returns nil keys if the bucket is not publicly listable. func (s *S3Scanner) listBucketS3(ctx context.Context, client *Client, endpoint string) ([]string, error) { req, err := http.NewRequestWithContext(ctx, http.MethodHead, endpoint, nil) if err != nil { return nil, err } resp, err := client.HTTP.Do(req) if err != nil { return nil, err } resp.Body.Close() if resp.StatusCode != http.StatusOK { return nil, nil // not publicly listable } // Public listing available -- fetch and parse XML. getReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil) if err != nil { return nil, err } getResp, err := client.Do(ctx, getReq) if err != nil { return nil, err } defer getResp.Body.Close() return parseS3ListXML(getResp.Body) } // s3ListResult models the AWS S3 ListBucketResult XML. type s3ListResult struct { XMLName xml.Name `xml:"ListBucketResult"` Contents []s3Object `xml:"Contents"` } type s3Object struct { Key string `xml:"Key"` } func parseS3ListXML(r io.Reader) ([]string, error) { data, err := io.ReadAll(io.LimitReader(r, 1<<20)) if err != nil { return nil, err } var result s3ListResult if err := xml.Unmarshal(data, &result); err != nil { return nil, err } keys := make([]string, 0, len(result.Contents)) for _, obj := range result.Contents { if obj.Key != "" { keys = append(keys, obj.Key) } } return keys, nil } // bucketSuffixes are common suffixes appended to provider keywords to generate // candidate bucket names. var bucketSuffixes = []string{ "-keys", "-config", "-backup", "-data", "-secrets", "-env", "-api-keys", "-credentials", "-tokens", } // bucketNames generates candidate cloud storage bucket names from provider // keywords combined with common suffixes. Exported for use by GCSScanner, // AzureBlobScanner, and DOSpacesScanner. func bucketNames(reg *providers.Registry) []string { if reg == nil { return nil } seen := make(map[string]struct{}) var names []string for _, p := range reg.List() { // Use provider name (lowercased, spaces to dashes) as base. base := strings.ToLower(strings.ReplaceAll(p.Name, " ", "-")) if base == "" { continue } for _, suffix := range bucketSuffixes { candidate := base + suffix if _, ok := seen[candidate]; !ok { seen[candidate] = struct{}{} names = append(names, candidate) } } } return names } // isConfigFile returns true if the filename matches common config file patterns // that may contain API keys. func isConfigFile(name string) bool { lower := strings.ToLower(name) // Exact basenames. for _, exact := range []string{".env", ".env.local", ".env.production", ".env.development"} { if lower == exact || strings.HasSuffix(lower, "/"+exact) { return true } } // Extension matches. for _, ext := range []string{".json", ".yaml", ".yml", ".toml", ".conf", ".cfg", ".ini", ".properties"} { if strings.HasSuffix(lower, ext) { return true } } // Prefix matches (config.*, settings.*). base := lower if idx := strings.LastIndex(lower, "/"); idx >= 0 { base = lower[idx+1:] } for _, prefix := range []string{"config.", "settings.", "credentials.", "secrets."} { if strings.HasPrefix(base, prefix) { return true } } return false }