feat(12-03): implement S3Scanner and GCSScanner cloud storage recon sources
- S3Scanner enumerates public AWS S3 buckets by provider keyword + suffix pattern - GCSScanner enumerates public GCS buckets with JSON listing format - Shared bucketNames() helper and isConfigFile() filter for config-pattern files - Both credentialless (anonymous HTTP), always Enabled, BaseURL override for tests
This commit is contained in:
144
pkg/recon/sources/gcsscanner.go
Normal file
144
pkg/recon/sources/gcsscanner.go
Normal file
@@ -0,0 +1,144 @@
|
||||
package sources
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"golang.org/x/time/rate"
|
||||
|
||||
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||
)
|
||||
|
||||
// GCSScanner enumerates publicly accessible Google Cloud Storage buckets by
|
||||
// name pattern and flags readable objects that match common config-file
|
||||
// patterns as potential API key exposure vectors.
|
||||
//
|
||||
// Credentialless: uses anonymous HTTP to probe public GCS buckets.
|
||||
type GCSScanner struct {
|
||||
Registry *providers.Registry
|
||||
Limiters *recon.LimiterRegistry
|
||||
// BaseURL overrides the GCS endpoint for tests. Default: "https://storage.googleapis.com/%s".
|
||||
BaseURL string
|
||||
client *Client
|
||||
}
|
||||
|
||||
// Compile-time assertion.
|
||||
var _ recon.ReconSource = (*GCSScanner)(nil)
|
||||
|
||||
func (g *GCSScanner) Name() string { return "gcs" }
|
||||
func (g *GCSScanner) RateLimit() rate.Limit { return rate.Every(500 * time.Millisecond) }
|
||||
func (g *GCSScanner) Burst() int { return 3 }
|
||||
func (g *GCSScanner) RespectsRobots() bool { return false }
|
||||
func (g *GCSScanner) Enabled(_ recon.Config) bool { return true }
|
||||
|
||||
func (g *GCSScanner) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
|
||||
client := g.client
|
||||
if client == nil {
|
||||
client = NewClient()
|
||||
}
|
||||
baseURL := g.BaseURL
|
||||
if baseURL == "" {
|
||||
baseURL = "https://storage.googleapis.com/%s"
|
||||
}
|
||||
|
||||
names := bucketNames(g.Registry)
|
||||
if len(names) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, bucket := range names {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
if g.Limiters != nil {
|
||||
if err := g.Limiters.Wait(ctx, g.Name(), g.RateLimit(), g.Burst(), false); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
endpoint := fmt.Sprintf(baseURL, bucket)
|
||||
items, err := g.listBucketGCS(ctx, client, endpoint)
|
||||
if err != nil {
|
||||
log.Printf("gcs: bucket %q probe failed (skipping): %v", bucket, err)
|
||||
continue
|
||||
}
|
||||
|
||||
for _, name := range items {
|
||||
if !isConfigFile(name) {
|
||||
continue
|
||||
}
|
||||
out <- recon.Finding{
|
||||
Source: fmt.Sprintf("gs://%s/%s", bucket, name),
|
||||
SourceType: "recon:gcs",
|
||||
Confidence: "medium",
|
||||
DetectedAt: time.Now(),
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// listBucketGCS probes a GCS bucket endpoint. A HEAD returning 200 means the
|
||||
// bucket is publicly accessible. We then GET with Accept: application/json to
|
||||
// retrieve the JSON listing.
|
||||
func (g *GCSScanner) listBucketGCS(ctx context.Context, client *Client, endpoint string) ([]string, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodHead, endpoint, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
resp, err := client.HTTP.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
getReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
getReq.Header.Set("Accept", "application/json")
|
||||
getResp, err := client.Do(ctx, getReq)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer getResp.Body.Close()
|
||||
|
||||
return parseGCSListJSON(getResp.Body)
|
||||
}
|
||||
|
||||
// gcsListResult models the GCS JSON listing format.
|
||||
type gcsListResult struct {
|
||||
Items []gcsItem `json:"items"`
|
||||
}
|
||||
|
||||
type gcsItem struct {
|
||||
Name string `json:"name"`
|
||||
}
|
||||
|
||||
func parseGCSListJSON(r io.Reader) ([]string, error) {
|
||||
data, err := io.ReadAll(io.LimitReader(r, 1<<20))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var result gcsListResult
|
||||
if err := json.Unmarshal(data, &result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
names := make([]string, 0, len(result.Items))
|
||||
for _, item := range result.Items {
|
||||
if item.Name != "" {
|
||||
names = append(names, item.Name)
|
||||
}
|
||||
}
|
||||
return names, nil
|
||||
}
|
||||
213
pkg/recon/sources/s3scanner.go
Normal file
213
pkg/recon/sources/s3scanner.go
Normal file
@@ -0,0 +1,213 @@
|
||||
package sources
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"golang.org/x/time/rate"
|
||||
|
||||
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||
)
|
||||
|
||||
// S3Scanner enumerates publicly accessible AWS S3 buckets by name pattern and
|
||||
// flags readable objects that match common config-file patterns (.env, *.json,
|
||||
// *.yaml, etc.) as potential API key exposure vectors.
|
||||
//
|
||||
// The scanner is credentialless: it uses anonymous HTTP to probe public buckets.
|
||||
// Object contents are NOT downloaded; only the presence of suspicious filenames
|
||||
// is reported.
|
||||
type S3Scanner struct {
|
||||
Registry *providers.Registry
|
||||
Limiters *recon.LimiterRegistry
|
||||
// BaseURL overrides the S3 endpoint for tests. Default: "https://%s.s3.amazonaws.com".
|
||||
// Must contain exactly one %s placeholder for the bucket name.
|
||||
BaseURL string
|
||||
client *Client
|
||||
}
|
||||
|
||||
// Compile-time assertion.
|
||||
var _ recon.ReconSource = (*S3Scanner)(nil)
|
||||
|
||||
func (s *S3Scanner) Name() string { return "s3" }
|
||||
func (s *S3Scanner) RateLimit() rate.Limit { return rate.Every(500 * time.Millisecond) }
|
||||
func (s *S3Scanner) Burst() int { return 3 }
|
||||
func (s *S3Scanner) RespectsRobots() bool { return false }
|
||||
func (s *S3Scanner) Enabled(_ recon.Config) bool { return true }
|
||||
|
||||
func (s *S3Scanner) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
|
||||
client := s.client
|
||||
if client == nil {
|
||||
client = NewClient()
|
||||
}
|
||||
baseURL := s.BaseURL
|
||||
if baseURL == "" {
|
||||
baseURL = "https://%s.s3.amazonaws.com"
|
||||
}
|
||||
|
||||
names := bucketNames(s.Registry)
|
||||
if len(names) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, bucket := range names {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
if s.Limiters != nil {
|
||||
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
endpoint := fmt.Sprintf(baseURL, bucket)
|
||||
keys, err := s.listBucketS3(ctx, client, endpoint)
|
||||
if err != nil {
|
||||
log.Printf("s3: bucket %q probe failed (skipping): %v", bucket, err)
|
||||
continue
|
||||
}
|
||||
|
||||
for _, key := range keys {
|
||||
if !isConfigFile(key) {
|
||||
continue
|
||||
}
|
||||
out <- recon.Finding{
|
||||
Source: fmt.Sprintf("s3://%s/%s", bucket, key),
|
||||
SourceType: "recon:s3",
|
||||
Confidence: "medium",
|
||||
DetectedAt: time.Now(),
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// listBucketS3 probes an S3 bucket endpoint. A HEAD that returns 200 means
|
||||
// public listing is enabled; we then GET to parse the ListBucketResult XML.
|
||||
// Returns nil keys if the bucket is not publicly listable.
|
||||
func (s *S3Scanner) listBucketS3(ctx context.Context, client *Client, endpoint string) ([]string, error) {
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodHead, endpoint, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
resp, err := client.HTTP.Do(req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
return nil, nil // not publicly listable
|
||||
}
|
||||
|
||||
// Public listing available -- fetch and parse XML.
|
||||
getReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
getResp, err := client.Do(ctx, getReq)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer getResp.Body.Close()
|
||||
|
||||
return parseS3ListXML(getResp.Body)
|
||||
}
|
||||
|
||||
// s3ListResult models the AWS S3 ListBucketResult XML.
|
||||
type s3ListResult struct {
|
||||
XMLName xml.Name `xml:"ListBucketResult"`
|
||||
Contents []s3Object `xml:"Contents"`
|
||||
}
|
||||
|
||||
type s3Object struct {
|
||||
Key string `xml:"Key"`
|
||||
}
|
||||
|
||||
func parseS3ListXML(r io.Reader) ([]string, error) {
|
||||
data, err := io.ReadAll(io.LimitReader(r, 1<<20))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var result s3ListResult
|
||||
if err := xml.Unmarshal(data, &result); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
keys := make([]string, 0, len(result.Contents))
|
||||
for _, obj := range result.Contents {
|
||||
if obj.Key != "" {
|
||||
keys = append(keys, obj.Key)
|
||||
}
|
||||
}
|
||||
return keys, nil
|
||||
}
|
||||
|
||||
// bucketSuffixes are common suffixes appended to provider keywords to generate
|
||||
// candidate bucket names.
|
||||
var bucketSuffixes = []string{
|
||||
"-keys", "-config", "-backup", "-data", "-secrets", "-env",
|
||||
"-api-keys", "-credentials", "-tokens",
|
||||
}
|
||||
|
||||
// bucketNames generates candidate cloud storage bucket names from provider
|
||||
// keywords combined with common suffixes. Exported for use by GCSScanner,
|
||||
// AzureBlobScanner, and DOSpacesScanner.
|
||||
func bucketNames(reg *providers.Registry) []string {
|
||||
if reg == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
seen := make(map[string]struct{})
|
||||
var names []string
|
||||
|
||||
for _, p := range reg.List() {
|
||||
// Use provider name (lowercased, spaces to dashes) as base.
|
||||
base := strings.ToLower(strings.ReplaceAll(p.Name, " ", "-"))
|
||||
if base == "" {
|
||||
continue
|
||||
}
|
||||
for _, suffix := range bucketSuffixes {
|
||||
candidate := base + suffix
|
||||
if _, ok := seen[candidate]; !ok {
|
||||
seen[candidate] = struct{}{}
|
||||
names = append(names, candidate)
|
||||
}
|
||||
}
|
||||
}
|
||||
return names
|
||||
}
|
||||
|
||||
// isConfigFile returns true if the filename matches common config file patterns
|
||||
// that may contain API keys.
|
||||
func isConfigFile(name string) bool {
|
||||
lower := strings.ToLower(name)
|
||||
// Exact basenames.
|
||||
for _, exact := range []string{".env", ".env.local", ".env.production", ".env.development"} {
|
||||
if lower == exact || strings.HasSuffix(lower, "/"+exact) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
// Extension matches.
|
||||
for _, ext := range []string{".json", ".yaml", ".yml", ".toml", ".conf", ".cfg", ".ini", ".properties"} {
|
||||
if strings.HasSuffix(lower, ext) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
// Prefix matches (config.*, settings.*).
|
||||
base := lower
|
||||
if idx := strings.LastIndex(lower, "/"); idx >= 0 {
|
||||
base = lower[idx+1:]
|
||||
}
|
||||
for _, prefix := range []string{"config.", "settings.", "credentials.", "secrets."} {
|
||||
if strings.HasPrefix(base, prefix) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
Reference in New Issue
Block a user