diff --git a/pkg/recon/sources/gcsscanner.go b/pkg/recon/sources/gcsscanner.go new file mode 100644 index 0000000..93a9172 --- /dev/null +++ b/pkg/recon/sources/gcsscanner.go @@ -0,0 +1,144 @@ +package sources + +import ( + "context" + "encoding/json" + "fmt" + "io" + "log" + "net/http" + "time" + + "golang.org/x/time/rate" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +// GCSScanner enumerates publicly accessible Google Cloud Storage buckets by +// name pattern and flags readable objects that match common config-file +// patterns as potential API key exposure vectors. +// +// Credentialless: uses anonymous HTTP to probe public GCS buckets. +type GCSScanner struct { + Registry *providers.Registry + Limiters *recon.LimiterRegistry + // BaseURL overrides the GCS endpoint for tests. Default: "https://storage.googleapis.com/%s". + BaseURL string + client *Client +} + +// Compile-time assertion. +var _ recon.ReconSource = (*GCSScanner)(nil) + +func (g *GCSScanner) Name() string { return "gcs" } +func (g *GCSScanner) RateLimit() rate.Limit { return rate.Every(500 * time.Millisecond) } +func (g *GCSScanner) Burst() int { return 3 } +func (g *GCSScanner) RespectsRobots() bool { return false } +func (g *GCSScanner) Enabled(_ recon.Config) bool { return true } + +func (g *GCSScanner) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error { + client := g.client + if client == nil { + client = NewClient() + } + baseURL := g.BaseURL + if baseURL == "" { + baseURL = "https://storage.googleapis.com/%s" + } + + names := bucketNames(g.Registry) + if len(names) == 0 { + return nil + } + + for _, bucket := range names { + if err := ctx.Err(); err != nil { + return err + } + if g.Limiters != nil { + if err := g.Limiters.Wait(ctx, g.Name(), g.RateLimit(), g.Burst(), false); err != nil { + return err + } + } + + endpoint := fmt.Sprintf(baseURL, bucket) + items, err := g.listBucketGCS(ctx, client, endpoint) + if err != nil { + log.Printf("gcs: bucket %q probe failed (skipping): %v", bucket, err) + continue + } + + for _, name := range items { + if !isConfigFile(name) { + continue + } + out <- recon.Finding{ + Source: fmt.Sprintf("gs://%s/%s", bucket, name), + SourceType: "recon:gcs", + Confidence: "medium", + DetectedAt: time.Now(), + } + } + } + return nil +} + +// listBucketGCS probes a GCS bucket endpoint. A HEAD returning 200 means the +// bucket is publicly accessible. We then GET with Accept: application/json to +// retrieve the JSON listing. +func (g *GCSScanner) listBucketGCS(ctx context.Context, client *Client, endpoint string) ([]string, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodHead, endpoint, nil) + if err != nil { + return nil, err + } + resp, err := client.HTTP.Do(req) + if err != nil { + return nil, err + } + resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, nil + } + + getReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil) + if err != nil { + return nil, err + } + getReq.Header.Set("Accept", "application/json") + getResp, err := client.Do(ctx, getReq) + if err != nil { + return nil, err + } + defer getResp.Body.Close() + + return parseGCSListJSON(getResp.Body) +} + +// gcsListResult models the GCS JSON listing format. +type gcsListResult struct { + Items []gcsItem `json:"items"` +} + +type gcsItem struct { + Name string `json:"name"` +} + +func parseGCSListJSON(r io.Reader) ([]string, error) { + data, err := io.ReadAll(io.LimitReader(r, 1<<20)) + if err != nil { + return nil, err + } + var result gcsListResult + if err := json.Unmarshal(data, &result); err != nil { + return nil, err + } + names := make([]string, 0, len(result.Items)) + for _, item := range result.Items { + if item.Name != "" { + names = append(names, item.Name) + } + } + return names, nil +} diff --git a/pkg/recon/sources/s3scanner.go b/pkg/recon/sources/s3scanner.go new file mode 100644 index 0000000..7e09cc2 --- /dev/null +++ b/pkg/recon/sources/s3scanner.go @@ -0,0 +1,213 @@ +package sources + +import ( + "context" + "encoding/xml" + "fmt" + "io" + "log" + "net/http" + "strings" + "time" + + "golang.org/x/time/rate" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +// S3Scanner enumerates publicly accessible AWS S3 buckets by name pattern and +// flags readable objects that match common config-file patterns (.env, *.json, +// *.yaml, etc.) as potential API key exposure vectors. +// +// The scanner is credentialless: it uses anonymous HTTP to probe public buckets. +// Object contents are NOT downloaded; only the presence of suspicious filenames +// is reported. +type S3Scanner struct { + Registry *providers.Registry + Limiters *recon.LimiterRegistry + // BaseURL overrides the S3 endpoint for tests. Default: "https://%s.s3.amazonaws.com". + // Must contain exactly one %s placeholder for the bucket name. + BaseURL string + client *Client +} + +// Compile-time assertion. +var _ recon.ReconSource = (*S3Scanner)(nil) + +func (s *S3Scanner) Name() string { return "s3" } +func (s *S3Scanner) RateLimit() rate.Limit { return rate.Every(500 * time.Millisecond) } +func (s *S3Scanner) Burst() int { return 3 } +func (s *S3Scanner) RespectsRobots() bool { return false } +func (s *S3Scanner) Enabled(_ recon.Config) bool { return true } + +func (s *S3Scanner) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error { + client := s.client + if client == nil { + client = NewClient() + } + baseURL := s.BaseURL + if baseURL == "" { + baseURL = "https://%s.s3.amazonaws.com" + } + + names := bucketNames(s.Registry) + if len(names) == 0 { + return nil + } + + for _, bucket := range names { + if err := ctx.Err(); err != nil { + return err + } + if s.Limiters != nil { + if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil { + return err + } + } + + endpoint := fmt.Sprintf(baseURL, bucket) + keys, err := s.listBucketS3(ctx, client, endpoint) + if err != nil { + log.Printf("s3: bucket %q probe failed (skipping): %v", bucket, err) + continue + } + + for _, key := range keys { + if !isConfigFile(key) { + continue + } + out <- recon.Finding{ + Source: fmt.Sprintf("s3://%s/%s", bucket, key), + SourceType: "recon:s3", + Confidence: "medium", + DetectedAt: time.Now(), + } + } + } + return nil +} + +// listBucketS3 probes an S3 bucket endpoint. A HEAD that returns 200 means +// public listing is enabled; we then GET to parse the ListBucketResult XML. +// Returns nil keys if the bucket is not publicly listable. +func (s *S3Scanner) listBucketS3(ctx context.Context, client *Client, endpoint string) ([]string, error) { + req, err := http.NewRequestWithContext(ctx, http.MethodHead, endpoint, nil) + if err != nil { + return nil, err + } + resp, err := client.HTTP.Do(req) + if err != nil { + return nil, err + } + resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, nil // not publicly listable + } + + // Public listing available -- fetch and parse XML. + getReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil) + if err != nil { + return nil, err + } + getResp, err := client.Do(ctx, getReq) + if err != nil { + return nil, err + } + defer getResp.Body.Close() + + return parseS3ListXML(getResp.Body) +} + +// s3ListResult models the AWS S3 ListBucketResult XML. +type s3ListResult struct { + XMLName xml.Name `xml:"ListBucketResult"` + Contents []s3Object `xml:"Contents"` +} + +type s3Object struct { + Key string `xml:"Key"` +} + +func parseS3ListXML(r io.Reader) ([]string, error) { + data, err := io.ReadAll(io.LimitReader(r, 1<<20)) + if err != nil { + return nil, err + } + var result s3ListResult + if err := xml.Unmarshal(data, &result); err != nil { + return nil, err + } + keys := make([]string, 0, len(result.Contents)) + for _, obj := range result.Contents { + if obj.Key != "" { + keys = append(keys, obj.Key) + } + } + return keys, nil +} + +// bucketSuffixes are common suffixes appended to provider keywords to generate +// candidate bucket names. +var bucketSuffixes = []string{ + "-keys", "-config", "-backup", "-data", "-secrets", "-env", + "-api-keys", "-credentials", "-tokens", +} + +// bucketNames generates candidate cloud storage bucket names from provider +// keywords combined with common suffixes. Exported for use by GCSScanner, +// AzureBlobScanner, and DOSpacesScanner. +func bucketNames(reg *providers.Registry) []string { + if reg == nil { + return nil + } + + seen := make(map[string]struct{}) + var names []string + + for _, p := range reg.List() { + // Use provider name (lowercased, spaces to dashes) as base. + base := strings.ToLower(strings.ReplaceAll(p.Name, " ", "-")) + if base == "" { + continue + } + for _, suffix := range bucketSuffixes { + candidate := base + suffix + if _, ok := seen[candidate]; !ok { + seen[candidate] = struct{}{} + names = append(names, candidate) + } + } + } + return names +} + +// isConfigFile returns true if the filename matches common config file patterns +// that may contain API keys. +func isConfigFile(name string) bool { + lower := strings.ToLower(name) + // Exact basenames. + for _, exact := range []string{".env", ".env.local", ".env.production", ".env.development"} { + if lower == exact || strings.HasSuffix(lower, "/"+exact) { + return true + } + } + // Extension matches. + for _, ext := range []string{".json", ".yaml", ".yml", ".toml", ".conf", ".cfg", ".ini", ".properties"} { + if strings.HasSuffix(lower, ext) { + return true + } + } + // Prefix matches (config.*, settings.*). + base := lower + if idx := strings.LastIndex(lower, "/"); idx >= 0 { + base = lower[idx+1:] + } + for _, prefix := range []string{"config.", "settings.", "credentials.", "secrets."} { + if strings.HasPrefix(base, prefix) { + return true + } + } + return false +}