Files
keyhunter/pkg/recon/sources/gcsscanner.go
salvacybersec 47d542b9de feat(12-03): implement S3Scanner and GCSScanner cloud storage recon sources
- S3Scanner enumerates public AWS S3 buckets by provider keyword + suffix pattern
- GCSScanner enumerates public GCS buckets with JSON listing format
- Shared bucketNames() helper and isConfigFile() filter for config-pattern files
- Both credentialless (anonymous HTTP), always Enabled, BaseURL override for tests
2026-04-06 12:25:55 +03:00

145 lines
3.6 KiB
Go

package sources
import (
"context"
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"time"
"golang.org/x/time/rate"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
// GCSScanner enumerates publicly accessible Google Cloud Storage buckets by
// name pattern and flags readable objects that match common config-file
// patterns as potential API key exposure vectors.
//
// Credentialless: uses anonymous HTTP to probe public GCS buckets.
type GCSScanner struct {
Registry *providers.Registry
Limiters *recon.LimiterRegistry
// BaseURL overrides the GCS endpoint for tests. Default: "https://storage.googleapis.com/%s".
BaseURL string
client *Client
}
// Compile-time assertion.
var _ recon.ReconSource = (*GCSScanner)(nil)
func (g *GCSScanner) Name() string { return "gcs" }
func (g *GCSScanner) RateLimit() rate.Limit { return rate.Every(500 * time.Millisecond) }
func (g *GCSScanner) Burst() int { return 3 }
func (g *GCSScanner) RespectsRobots() bool { return false }
func (g *GCSScanner) Enabled(_ recon.Config) bool { return true }
func (g *GCSScanner) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
client := g.client
if client == nil {
client = NewClient()
}
baseURL := g.BaseURL
if baseURL == "" {
baseURL = "https://storage.googleapis.com/%s"
}
names := bucketNames(g.Registry)
if len(names) == 0 {
return nil
}
for _, bucket := range names {
if err := ctx.Err(); err != nil {
return err
}
if g.Limiters != nil {
if err := g.Limiters.Wait(ctx, g.Name(), g.RateLimit(), g.Burst(), false); err != nil {
return err
}
}
endpoint := fmt.Sprintf(baseURL, bucket)
items, err := g.listBucketGCS(ctx, client, endpoint)
if err != nil {
log.Printf("gcs: bucket %q probe failed (skipping): %v", bucket, err)
continue
}
for _, name := range items {
if !isConfigFile(name) {
continue
}
out <- recon.Finding{
Source: fmt.Sprintf("gs://%s/%s", bucket, name),
SourceType: "recon:gcs",
Confidence: "medium",
DetectedAt: time.Now(),
}
}
}
return nil
}
// listBucketGCS probes a GCS bucket endpoint. A HEAD returning 200 means the
// bucket is publicly accessible. We then GET with Accept: application/json to
// retrieve the JSON listing.
func (g *GCSScanner) listBucketGCS(ctx context.Context, client *Client, endpoint string) ([]string, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodHead, endpoint, nil)
if err != nil {
return nil, err
}
resp, err := client.HTTP.Do(req)
if err != nil {
return nil, err
}
resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, nil
}
getReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
if err != nil {
return nil, err
}
getReq.Header.Set("Accept", "application/json")
getResp, err := client.Do(ctx, getReq)
if err != nil {
return nil, err
}
defer getResp.Body.Close()
return parseGCSListJSON(getResp.Body)
}
// gcsListResult models the GCS JSON listing format.
type gcsListResult struct {
Items []gcsItem `json:"items"`
}
type gcsItem struct {
Name string `json:"name"`
}
func parseGCSListJSON(r io.Reader) ([]string, error) {
data, err := io.ReadAll(io.LimitReader(r, 1<<20))
if err != nil {
return nil, err
}
var result gcsListResult
if err := json.Unmarshal(data, &result); err != nil {
return nil, err
}
names := make([]string, 0, len(result.Items))
for _, item := range result.Items {
if item.Name != "" {
names = append(names, item.Name)
}
}
return names, nil
}