merge: phase 12 resolve conflicts
This commit is contained in:
@@ -138,10 +138,10 @@ Requirements for initial release. Each maps to roadmap phases.
|
|||||||
|
|
||||||
### OSINT/Recon — Cloud Storage
|
### OSINT/Recon — Cloud Storage
|
||||||
|
|
||||||
- [ ] **RECON-CLOUD-01**: AWS S3 bucket enumeration and content scanning
|
- [x] **RECON-CLOUD-01**: AWS S3 bucket enumeration and content scanning
|
||||||
- [ ] **RECON-CLOUD-02**: GCS, Azure Blob, DigitalOcean Spaces, Backblaze B2 scanning
|
- [x] **RECON-CLOUD-02**: GCS, Azure Blob, DigitalOcean Spaces, Backblaze B2 scanning
|
||||||
- [ ] **RECON-CLOUD-03**: Self-hosted MinIO instance discovery via Shodan
|
- [x] **RECON-CLOUD-03**: Self-hosted MinIO instance discovery via Shodan
|
||||||
- [ ] **RECON-CLOUD-04**: GrayHatWarfare bucket search engine integration
|
- [x] **RECON-CLOUD-04**: GrayHatWarfare bucket search engine integration
|
||||||
|
|
||||||
### OSINT/Recon — CI/CD Logs
|
### OSINT/Recon — CI/CD Logs
|
||||||
|
|
||||||
|
|||||||
@@ -257,7 +257,7 @@ Plans:
|
|||||||
Plans:
|
Plans:
|
||||||
- [x] 12-01-PLAN.md — ShodanSource + CensysSource + ZoomEyeSource (RECON-IOT-01, RECON-IOT-02, RECON-IOT-03)
|
- [x] 12-01-PLAN.md — ShodanSource + CensysSource + ZoomEyeSource (RECON-IOT-01, RECON-IOT-02, RECON-IOT-03)
|
||||||
- [ ] 12-02-PLAN.md — FOFASource + NetlasSource + BinaryEdgeSource (RECON-IOT-04, RECON-IOT-05, RECON-IOT-06)
|
- [ ] 12-02-PLAN.md — FOFASource + NetlasSource + BinaryEdgeSource (RECON-IOT-04, RECON-IOT-05, RECON-IOT-06)
|
||||||
- [ ] 12-03-PLAN.md — S3Scanner + GCSScanner + AzureBlobScanner + DOSpacesScanner (RECON-CLOUD-01, RECON-CLOUD-02, RECON-CLOUD-03, RECON-CLOUD-04)
|
- [x] 12-03-PLAN.md — S3Scanner + GCSScanner + AzureBlobScanner + DOSpacesScanner (RECON-CLOUD-01, RECON-CLOUD-02, RECON-CLOUD-03, RECON-CLOUD-04)
|
||||||
- [ ] 12-04-PLAN.md — RegisterAll wiring + cmd/recon.go credentials + integration test (all Phase 12 reqs)
|
- [ ] 12-04-PLAN.md — RegisterAll wiring + cmd/recon.go credentials + integration test (all Phase 12 reqs)
|
||||||
|
|
||||||
### Phase 13: OSINT Package Registries & Container/IaC
|
### Phase 13: OSINT Package Registries & Container/IaC
|
||||||
|
|||||||
115
.planning/phases/12-osint_iot_cloud_storage/12-03-SUMMARY.md
Normal file
115
.planning/phases/12-osint_iot_cloud_storage/12-03-SUMMARY.md
Normal file
@@ -0,0 +1,115 @@
|
|||||||
|
---
|
||||||
|
phase: 12-osint_iot_cloud_storage
|
||||||
|
plan: 03
|
||||||
|
subsystem: recon
|
||||||
|
tags: [s3, gcs, azure-blob, digitalocean-spaces, cloud-storage, osint, bucket-enumeration]
|
||||||
|
|
||||||
|
requires:
|
||||||
|
- phase: 09-osint-infrastructure
|
||||||
|
provides: "LimiterRegistry, ReconSource interface, shared Client"
|
||||||
|
- phase: 10-osint-code-hosting
|
||||||
|
provides: "BuildQueries, RegisterAll pattern, sources.Client"
|
||||||
|
provides:
|
||||||
|
- "S3Scanner — public AWS S3 bucket enumeration recon source"
|
||||||
|
- "GCSScanner — public GCS bucket enumeration recon source"
|
||||||
|
- "AzureBlobScanner — public Azure Blob container enumeration recon source"
|
||||||
|
- "DOSpacesScanner — public DigitalOcean Spaces enumeration recon source"
|
||||||
|
- "bucketNames() shared helper for provider-keyword bucket name generation"
|
||||||
|
- "isConfigFile() shared helper for config-pattern file detection"
|
||||||
|
affects: [12-osint_iot_cloud_storage, register-all-wiring]
|
||||||
|
|
||||||
|
tech-stack:
|
||||||
|
added: []
|
||||||
|
patterns: ["credentialless cloud bucket enumeration via anonymous HTTP HEAD+GET"]
|
||||||
|
|
||||||
|
key-files:
|
||||||
|
created:
|
||||||
|
- pkg/recon/sources/s3scanner.go
|
||||||
|
- pkg/recon/sources/gcsscanner.go
|
||||||
|
- pkg/recon/sources/azureblob.go
|
||||||
|
- pkg/recon/sources/dospaces.go
|
||||||
|
- pkg/recon/sources/s3scanner_test.go
|
||||||
|
- pkg/recon/sources/gcsscanner_test.go
|
||||||
|
- pkg/recon/sources/azureblob_test.go
|
||||||
|
- pkg/recon/sources/dospaces_test.go
|
||||||
|
modified: []
|
||||||
|
|
||||||
|
key-decisions:
|
||||||
|
- "bucketNames generates candidates from provider names + suffixes (not keywords) to produce readable bucket names"
|
||||||
|
- "HEAD probe before GET listing to avoid unnecessary bandwidth on non-public buckets"
|
||||||
|
- "isConfigFile checks extensions and common basenames (.env, config.*, credentials.*) without downloading contents"
|
||||||
|
- "Azure iterates fixed container names (config, secrets, backup, etc.) within each account"
|
||||||
|
- "DO Spaces iterates 5 regions (nyc3, sfo3, ams3, sgp1, fra1) per bucket"
|
||||||
|
|
||||||
|
patterns-established:
|
||||||
|
- "Cloud scanner pattern: HEAD probe for existence, GET for listing, filter by isConfigFile"
|
||||||
|
- "BaseURL override pattern with %s placeholder for httptest injection"
|
||||||
|
|
||||||
|
requirements-completed: [RECON-CLOUD-01, RECON-CLOUD-02, RECON-CLOUD-03, RECON-CLOUD-04]
|
||||||
|
|
||||||
|
duration: 4min
|
||||||
|
completed: 2026-04-06
|
||||||
|
---
|
||||||
|
|
||||||
|
# Phase 12 Plan 03: Cloud Storage Scanners Summary
|
||||||
|
|
||||||
|
**Four credentialless cloud storage recon sources (S3, GCS, Azure Blob, DO Spaces) with provider-keyword bucket enumeration and config-file pattern detection**
|
||||||
|
|
||||||
|
## Performance
|
||||||
|
|
||||||
|
- **Duration:** 4 min
|
||||||
|
- **Started:** 2026-04-06T09:22:08Z
|
||||||
|
- **Completed:** 2026-04-06T09:26:11Z
|
||||||
|
- **Tasks:** 2
|
||||||
|
- **Files modified:** 8
|
||||||
|
|
||||||
|
## Accomplishments
|
||||||
|
- S3Scanner enumerates public AWS S3 buckets using S3 ListBucketResult XML parsing
|
||||||
|
- GCSScanner enumerates public GCS buckets using JSON listing format
|
||||||
|
- AzureBlobScanner enumerates public Azure Blob containers using EnumerationResults XML
|
||||||
|
- DOSpacesScanner enumerates public DO Spaces across 5 regions using S3-compatible XML
|
||||||
|
- Shared bucketNames() generates candidates from provider names + common suffixes
|
||||||
|
- Shared isConfigFile() detects .env, .json, .yaml, .toml, .conf and similar patterns
|
||||||
|
|
||||||
|
## Task Commits
|
||||||
|
|
||||||
|
Each task was committed atomically:
|
||||||
|
|
||||||
|
1. **Task 1: Implement S3Scanner and GCSScanner** - `47d542b` (feat)
|
||||||
|
2. **Task 2: Implement AzureBlobScanner, DOSpacesScanner, and all tests** - `13905eb` (feat)
|
||||||
|
|
||||||
|
## Files Created/Modified
|
||||||
|
- `pkg/recon/sources/s3scanner.go` - S3 bucket enumeration with XML ListBucketResult parsing
|
||||||
|
- `pkg/recon/sources/gcsscanner.go` - GCS bucket enumeration with JSON listing parsing
|
||||||
|
- `pkg/recon/sources/azureblob.go` - Azure Blob container enumeration with XML EnumerationResults parsing
|
||||||
|
- `pkg/recon/sources/dospaces.go` - DO Spaces enumeration across 5 regions (S3-compatible XML)
|
||||||
|
- `pkg/recon/sources/s3scanner_test.go` - httptest tests for S3Scanner
|
||||||
|
- `pkg/recon/sources/gcsscanner_test.go` - httptest tests for GCSScanner
|
||||||
|
- `pkg/recon/sources/azureblob_test.go` - httptest tests for AzureBlobScanner
|
||||||
|
- `pkg/recon/sources/dospaces_test.go` - httptest tests for DOSpacesScanner
|
||||||
|
|
||||||
|
## Decisions Made
|
||||||
|
- bucketNames uses provider Name (not Keywords) as base for bucket name generation -- produces more realistic bucket names like "openai-keys" vs "sk-proj--keys"
|
||||||
|
- HEAD probe before GET to minimize bandwidth on non-public buckets
|
||||||
|
- Azure iterates a fixed list of common container names within each generated account name
|
||||||
|
- DO Spaces iterates all 5 supported regions per bucket name
|
||||||
|
- Tests omit rate limiters (nil Limiters) to avoid test slowness from the 500ms rate limit across many bucket/region combinations
|
||||||
|
|
||||||
|
## Deviations from Plan
|
||||||
|
|
||||||
|
None - plan executed exactly as written.
|
||||||
|
|
||||||
|
## Issues Encountered
|
||||||
|
- Azure and DO Spaces tests initially timed out due to rate limiter overhead (9 bucket names x 7 containers = 63 requests at 500ms each). Resolved by omitting rate limiters in tests since rate limiting is tested at the LimiterRegistry level.
|
||||||
|
|
||||||
|
## User Setup Required
|
||||||
|
|
||||||
|
None - no external service configuration required.
|
||||||
|
|
||||||
|
## Next Phase Readiness
|
||||||
|
- Four cloud storage scanners ready for RegisterAll wiring
|
||||||
|
- Sources use same pattern as Phase 10/11 sources (BaseURL override, shared Client, LimiterRegistry)
|
||||||
|
|
||||||
|
---
|
||||||
|
*Phase: 12-osint_iot_cloud_storage*
|
||||||
|
*Completed: 2026-04-06*
|
||||||
145
pkg/recon/sources/azureblob.go
Normal file
145
pkg/recon/sources/azureblob.go
Normal file
@@ -0,0 +1,145 @@
|
|||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/xml"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"log"
|
||||||
|
"net/http"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"golang.org/x/time/rate"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||||
|
)
|
||||||
|
|
||||||
|
// AzureBlobScanner enumerates publicly accessible Azure Blob Storage containers
|
||||||
|
// by name pattern and flags readable objects that match common config-file
|
||||||
|
// patterns as potential API key exposure vectors.
|
||||||
|
//
|
||||||
|
// Credentialless: uses anonymous HTTP to probe public Azure Blob containers.
|
||||||
|
type AzureBlobScanner struct {
|
||||||
|
Registry *providers.Registry
|
||||||
|
Limiters *recon.LimiterRegistry
|
||||||
|
// BaseURL overrides the Azure Blob endpoint for tests.
|
||||||
|
// Default: "https://%s.blob.core.windows.net/%s?restype=container&comp=list"
|
||||||
|
// Must contain two %s placeholders: account name and container name.
|
||||||
|
BaseURL string
|
||||||
|
client *Client
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compile-time assertion.
|
||||||
|
var _ recon.ReconSource = (*AzureBlobScanner)(nil)
|
||||||
|
|
||||||
|
func (a *AzureBlobScanner) Name() string { return "azureblob" }
|
||||||
|
func (a *AzureBlobScanner) RateLimit() rate.Limit { return rate.Every(500 * time.Millisecond) }
|
||||||
|
func (a *AzureBlobScanner) Burst() int { return 3 }
|
||||||
|
func (a *AzureBlobScanner) RespectsRobots() bool { return false }
|
||||||
|
func (a *AzureBlobScanner) Enabled(_ recon.Config) bool { return true }
|
||||||
|
|
||||||
|
// azureContainerNames are common container names to probe within each account.
|
||||||
|
var azureContainerNames = []string{
|
||||||
|
"config", "secrets", "backup", "data", "keys", "env", "credentials",
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *AzureBlobScanner) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
|
||||||
|
client := a.client
|
||||||
|
if client == nil {
|
||||||
|
client = NewClient()
|
||||||
|
}
|
||||||
|
baseURL := a.BaseURL
|
||||||
|
if baseURL == "" {
|
||||||
|
baseURL = "https://%s.blob.core.windows.net/%s?restype=container&comp=list"
|
||||||
|
}
|
||||||
|
|
||||||
|
accounts := bucketNames(a.Registry)
|
||||||
|
if len(accounts) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, account := range accounts {
|
||||||
|
if err := ctx.Err(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, container := range azureContainerNames {
|
||||||
|
if err := ctx.Err(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if a.Limiters != nil {
|
||||||
|
if err := a.Limiters.Wait(ctx, a.Name(), a.RateLimit(), a.Burst(), false); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
endpoint := fmt.Sprintf(baseURL, account, container)
|
||||||
|
blobs, err := a.listBlobs(ctx, client, endpoint)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("azureblob: account %q container %q probe failed (skipping): %v", account, container, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, name := range blobs {
|
||||||
|
if !isConfigFile(name) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
out <- recon.Finding{
|
||||||
|
Source: fmt.Sprintf("azure://%s/%s/%s", account, container, name),
|
||||||
|
SourceType: "recon:azureblob",
|
||||||
|
Confidence: "medium",
|
||||||
|
DetectedAt: time.Now(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// listBlobs fetches and parses Azure Blob container listing XML.
|
||||||
|
func (a *AzureBlobScanner) listBlobs(ctx context.Context, client *Client, endpoint string) ([]string, error) {
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
resp, err := client.Do(ctx, req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, nil // non-public or non-existent — skip silently
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
return parseAzureBlobXML(resp.Body)
|
||||||
|
}
|
||||||
|
|
||||||
|
// azureEnumBlobResults models the Azure Blob EnumerationResults XML.
|
||||||
|
type azureEnumBlobResults struct {
|
||||||
|
XMLName xml.Name `xml:"EnumerationResults"`
|
||||||
|
Blobs azureBlobs `xml:"Blobs"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type azureBlobs struct {
|
||||||
|
Blob []azureBlob `xml:"Blob"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type azureBlob struct {
|
||||||
|
Name string `xml:"Name"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseAzureBlobXML(r io.Reader) ([]string, error) {
|
||||||
|
data, err := io.ReadAll(io.LimitReader(r, 1<<20))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
var result azureEnumBlobResults
|
||||||
|
if err := xml.Unmarshal(data, &result); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
names := make([]string, 0, len(result.Blobs.Blob))
|
||||||
|
for _, b := range result.Blobs.Blob {
|
||||||
|
if b.Name != "" {
|
||||||
|
names = append(names, b.Name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return names, nil
|
||||||
|
}
|
||||||
130
pkg/recon/sources/azureblob_test.go
Normal file
130
pkg/recon/sources/azureblob_test.go
Normal file
@@ -0,0 +1,130 @@
|
|||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||||
|
)
|
||||||
|
|
||||||
|
func azureTestServer() *httptest.Server {
|
||||||
|
mux := http.NewServeMux()
|
||||||
|
|
||||||
|
// Respond to any request path that contains "testprov-keys" account + "config" container.
|
||||||
|
mux.HandleFunc("/testprov-keys/config", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.Header().Set("Content-Type", "application/xml")
|
||||||
|
_, _ = w.Write([]byte(`<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<EnumerationResults>
|
||||||
|
<Blobs>
|
||||||
|
<Blob><Name>.env</Name></Blob>
|
||||||
|
<Blob><Name>credentials.json</Name></Blob>
|
||||||
|
<Blob><Name>photo.png</Name></Blob>
|
||||||
|
</Blobs>
|
||||||
|
</EnumerationResults>`))
|
||||||
|
})
|
||||||
|
|
||||||
|
// All other containers return error.
|
||||||
|
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusNotFound)
|
||||||
|
})
|
||||||
|
|
||||||
|
return httptest.NewServer(mux)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAzureBlob_Sweep(t *testing.T) {
|
||||||
|
srv := azureTestServer()
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
// BaseURL format: server/{account}/{container}?params
|
||||||
|
// We use a simplified format for tests.
|
||||||
|
src := &AzureBlobScanner{
|
||||||
|
Registry: cloudTestRegistry(),
|
||||||
|
BaseURL: srv.URL + "/%s/%s",
|
||||||
|
client: NewClient(),
|
||||||
|
}
|
||||||
|
|
||||||
|
out := make(chan recon.Finding, 64)
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
if err := src.Sweep(ctx, "", out); err != nil {
|
||||||
|
t.Fatalf("Sweep error: %v", err)
|
||||||
|
}
|
||||||
|
close(out)
|
||||||
|
|
||||||
|
var findings []recon.Finding
|
||||||
|
for f := range out {
|
||||||
|
findings = append(findings, f)
|
||||||
|
}
|
||||||
|
|
||||||
|
// .env and credentials.json match; photo.png does not.
|
||||||
|
// Only the "config" container returns results; others 404.
|
||||||
|
if len(findings) != 2 {
|
||||||
|
t.Fatalf("expected 2 findings, got %d: %+v", len(findings), findings)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, f := range findings {
|
||||||
|
if f.SourceType != "recon:azureblob" {
|
||||||
|
t.Errorf("unexpected SourceType: %s", f.SourceType)
|
||||||
|
}
|
||||||
|
if f.Confidence != "medium" {
|
||||||
|
t.Errorf("unexpected Confidence: %s", f.Confidence)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAzureBlob_EmptyRegistry(t *testing.T) {
|
||||||
|
src := &AzureBlobScanner{
|
||||||
|
Registry: providers.NewRegistryFromProviders(nil),
|
||||||
|
Limiters: recon.NewLimiterRegistry(),
|
||||||
|
client: NewClient(),
|
||||||
|
}
|
||||||
|
|
||||||
|
out := make(chan recon.Finding, 4)
|
||||||
|
if err := src.Sweep(context.Background(), "", out); err != nil {
|
||||||
|
t.Fatalf("Sweep error: %v", err)
|
||||||
|
}
|
||||||
|
close(out)
|
||||||
|
if len(out) != 0 {
|
||||||
|
t.Fatal("expected 0 findings")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAzureBlob_CtxCancelled(t *testing.T) {
|
||||||
|
srv := azureTestServer()
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
src := &AzureBlobScanner{
|
||||||
|
Registry: cloudTestRegistry(),
|
||||||
|
BaseURL: srv.URL + "/%s/%s",
|
||||||
|
client: NewClient(),
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
cancel()
|
||||||
|
|
||||||
|
out := make(chan recon.Finding, 4)
|
||||||
|
if err := src.Sweep(ctx, "", out); err == nil {
|
||||||
|
t.Fatal("expected ctx error")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestAzureBlob_EnabledAndMeta(t *testing.T) {
|
||||||
|
a := &AzureBlobScanner{}
|
||||||
|
if a.Name() != "azureblob" {
|
||||||
|
t.Fatalf("unexpected name: %s", a.Name())
|
||||||
|
}
|
||||||
|
if !a.Enabled(recon.Config{}) {
|
||||||
|
t.Fatal("expected Enabled=true")
|
||||||
|
}
|
||||||
|
if a.RespectsRobots() {
|
||||||
|
t.Fatal("expected RespectsRobots=false")
|
||||||
|
}
|
||||||
|
if a.Burst() != 3 {
|
||||||
|
t.Fatal("expected Burst=3")
|
||||||
|
}
|
||||||
|
}
|
||||||
126
pkg/recon/sources/dospaces.go
Normal file
126
pkg/recon/sources/dospaces.go
Normal file
@@ -0,0 +1,126 @@
|
|||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
"net/http"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"golang.org/x/time/rate"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||||
|
)
|
||||||
|
|
||||||
|
// DOSpacesScanner enumerates publicly accessible DigitalOcean Spaces by name
|
||||||
|
// pattern and flags readable objects matching common config-file patterns as
|
||||||
|
// potential API key exposure vectors.
|
||||||
|
//
|
||||||
|
// Credentialless: uses anonymous HTTP to probe public DO Spaces. DO Spaces are
|
||||||
|
// S3-compatible, so the same XML ListBucketResult format is used.
|
||||||
|
type DOSpacesScanner struct {
|
||||||
|
Registry *providers.Registry
|
||||||
|
Limiters *recon.LimiterRegistry
|
||||||
|
// BaseURL overrides the DO Spaces endpoint for tests.
|
||||||
|
// Default: "https://%s.%s.digitaloceanspaces.com"
|
||||||
|
// Must contain two %s placeholders: bucket name and region.
|
||||||
|
BaseURL string
|
||||||
|
client *Client
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compile-time assertion.
|
||||||
|
var _ recon.ReconSource = (*DOSpacesScanner)(nil)
|
||||||
|
|
||||||
|
func (d *DOSpacesScanner) Name() string { return "spaces" }
|
||||||
|
func (d *DOSpacesScanner) RateLimit() rate.Limit { return rate.Every(500 * time.Millisecond) }
|
||||||
|
func (d *DOSpacesScanner) Burst() int { return 3 }
|
||||||
|
func (d *DOSpacesScanner) RespectsRobots() bool { return false }
|
||||||
|
func (d *DOSpacesScanner) Enabled(_ recon.Config) bool { return true }
|
||||||
|
|
||||||
|
// doRegions are the DigitalOcean Spaces regions to iterate.
|
||||||
|
var doRegions = []string{"nyc3", "sfo3", "ams3", "sgp1", "fra1"}
|
||||||
|
|
||||||
|
func (d *DOSpacesScanner) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
|
||||||
|
client := d.client
|
||||||
|
if client == nil {
|
||||||
|
client = NewClient()
|
||||||
|
}
|
||||||
|
baseURL := d.BaseURL
|
||||||
|
if baseURL == "" {
|
||||||
|
baseURL = "https://%s.%s.digitaloceanspaces.com"
|
||||||
|
}
|
||||||
|
|
||||||
|
names := bucketNames(d.Registry)
|
||||||
|
if len(names) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, bucket := range names {
|
||||||
|
if err := ctx.Err(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, region := range doRegions {
|
||||||
|
if err := ctx.Err(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if d.Limiters != nil {
|
||||||
|
if err := d.Limiters.Wait(ctx, d.Name(), d.RateLimit(), d.Burst(), false); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
endpoint := fmt.Sprintf(baseURL, bucket, region)
|
||||||
|
keys, err := d.listSpace(ctx, client, endpoint)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("spaces: bucket %q region %q probe failed (skipping): %v", bucket, region, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, key := range keys {
|
||||||
|
if !isConfigFile(key) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
out <- recon.Finding{
|
||||||
|
Source: fmt.Sprintf("do://%s/%s", bucket, key),
|
||||||
|
SourceType: "recon:spaces",
|
||||||
|
Confidence: "medium",
|
||||||
|
DetectedAt: time.Now(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// listSpace probes a DO Spaces endpoint via HEAD then parses the S3-compatible
|
||||||
|
// ListBucketResult XML on success.
|
||||||
|
func (d *DOSpacesScanner) listSpace(ctx context.Context, client *Client, endpoint string) ([]string, error) {
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodHead, endpoint, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
resp, err := client.HTTP.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
getReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
getResp, err := client.Do(ctx, getReq)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer getResp.Body.Close()
|
||||||
|
|
||||||
|
// DO Spaces uses S3-compatible XML format.
|
||||||
|
return parseS3ListXML(getResp.Body)
|
||||||
|
}
|
||||||
128
pkg/recon/sources/dospaces_test.go
Normal file
128
pkg/recon/sources/dospaces_test.go
Normal file
@@ -0,0 +1,128 @@
|
|||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||||
|
)
|
||||||
|
|
||||||
|
func doSpacesTestServer() *httptest.Server {
|
||||||
|
mux := http.NewServeMux()
|
||||||
|
|
||||||
|
// Only testprov-keys bucket in nyc3 region is publicly listable.
|
||||||
|
mux.HandleFunc("/testprov-keys/nyc3/", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.Method == http.MethodHead {
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.Header().Set("Content-Type", "application/xml")
|
||||||
|
_, _ = w.Write([]byte(`<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<ListBucketResult>
|
||||||
|
<Contents><Key>.env.production</Key></Contents>
|
||||||
|
<Contents><Key>app.conf</Key></Contents>
|
||||||
|
<Contents><Key>logo.svg</Key></Contents>
|
||||||
|
</ListBucketResult>`))
|
||||||
|
})
|
||||||
|
|
||||||
|
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusNotFound)
|
||||||
|
})
|
||||||
|
|
||||||
|
return httptest.NewServer(mux)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDOSpaces_Sweep(t *testing.T) {
|
||||||
|
srv := doSpacesTestServer()
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
src := &DOSpacesScanner{
|
||||||
|
Registry: cloudTestRegistry(),
|
||||||
|
BaseURL: srv.URL + "/%s/%s/",
|
||||||
|
client: NewClient(),
|
||||||
|
}
|
||||||
|
|
||||||
|
out := make(chan recon.Finding, 64)
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
if err := src.Sweep(ctx, "", out); err != nil {
|
||||||
|
t.Fatalf("Sweep error: %v", err)
|
||||||
|
}
|
||||||
|
close(out)
|
||||||
|
|
||||||
|
var findings []recon.Finding
|
||||||
|
for f := range out {
|
||||||
|
findings = append(findings, f)
|
||||||
|
}
|
||||||
|
|
||||||
|
// .env.production and app.conf match; logo.svg does not.
|
||||||
|
if len(findings) != 2 {
|
||||||
|
t.Fatalf("expected 2 findings, got %d: %+v", len(findings), findings)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, f := range findings {
|
||||||
|
if f.SourceType != "recon:spaces" {
|
||||||
|
t.Errorf("unexpected SourceType: %s", f.SourceType)
|
||||||
|
}
|
||||||
|
if f.Confidence != "medium" {
|
||||||
|
t.Errorf("unexpected Confidence: %s", f.Confidence)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDOSpaces_EmptyRegistry(t *testing.T) {
|
||||||
|
src := &DOSpacesScanner{
|
||||||
|
Registry: providers.NewRegistryFromProviders(nil),
|
||||||
|
Limiters: recon.NewLimiterRegistry(),
|
||||||
|
client: NewClient(),
|
||||||
|
}
|
||||||
|
|
||||||
|
out := make(chan recon.Finding, 4)
|
||||||
|
if err := src.Sweep(context.Background(), "", out); err != nil {
|
||||||
|
t.Fatalf("Sweep error: %v", err)
|
||||||
|
}
|
||||||
|
close(out)
|
||||||
|
if len(out) != 0 {
|
||||||
|
t.Fatal("expected 0 findings")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDOSpaces_CtxCancelled(t *testing.T) {
|
||||||
|
srv := doSpacesTestServer()
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
src := &DOSpacesScanner{
|
||||||
|
Registry: cloudTestRegistry(),
|
||||||
|
BaseURL: srv.URL + "/%s/%s/",
|
||||||
|
client: NewClient(),
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
cancel()
|
||||||
|
|
||||||
|
out := make(chan recon.Finding, 4)
|
||||||
|
if err := src.Sweep(ctx, "", out); err == nil {
|
||||||
|
t.Fatal("expected ctx error")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDOSpaces_EnabledAndMeta(t *testing.T) {
|
||||||
|
d := &DOSpacesScanner{}
|
||||||
|
if d.Name() != "spaces" {
|
||||||
|
t.Fatalf("unexpected name: %s", d.Name())
|
||||||
|
}
|
||||||
|
if !d.Enabled(recon.Config{}) {
|
||||||
|
t.Fatal("expected Enabled=true")
|
||||||
|
}
|
||||||
|
if d.RespectsRobots() {
|
||||||
|
t.Fatal("expected RespectsRobots=false")
|
||||||
|
}
|
||||||
|
if d.Burst() != 3 {
|
||||||
|
t.Fatal("expected Burst=3")
|
||||||
|
}
|
||||||
|
}
|
||||||
144
pkg/recon/sources/gcsscanner.go
Normal file
144
pkg/recon/sources/gcsscanner.go
Normal file
@@ -0,0 +1,144 @@
|
|||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"log"
|
||||||
|
"net/http"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"golang.org/x/time/rate"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||||
|
)
|
||||||
|
|
||||||
|
// GCSScanner enumerates publicly accessible Google Cloud Storage buckets by
|
||||||
|
// name pattern and flags readable objects that match common config-file
|
||||||
|
// patterns as potential API key exposure vectors.
|
||||||
|
//
|
||||||
|
// Credentialless: uses anonymous HTTP to probe public GCS buckets.
|
||||||
|
type GCSScanner struct {
|
||||||
|
Registry *providers.Registry
|
||||||
|
Limiters *recon.LimiterRegistry
|
||||||
|
// BaseURL overrides the GCS endpoint for tests. Default: "https://storage.googleapis.com/%s".
|
||||||
|
BaseURL string
|
||||||
|
client *Client
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compile-time assertion.
|
||||||
|
var _ recon.ReconSource = (*GCSScanner)(nil)
|
||||||
|
|
||||||
|
func (g *GCSScanner) Name() string { return "gcs" }
|
||||||
|
func (g *GCSScanner) RateLimit() rate.Limit { return rate.Every(500 * time.Millisecond) }
|
||||||
|
func (g *GCSScanner) Burst() int { return 3 }
|
||||||
|
func (g *GCSScanner) RespectsRobots() bool { return false }
|
||||||
|
func (g *GCSScanner) Enabled(_ recon.Config) bool { return true }
|
||||||
|
|
||||||
|
func (g *GCSScanner) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
|
||||||
|
client := g.client
|
||||||
|
if client == nil {
|
||||||
|
client = NewClient()
|
||||||
|
}
|
||||||
|
baseURL := g.BaseURL
|
||||||
|
if baseURL == "" {
|
||||||
|
baseURL = "https://storage.googleapis.com/%s"
|
||||||
|
}
|
||||||
|
|
||||||
|
names := bucketNames(g.Registry)
|
||||||
|
if len(names) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, bucket := range names {
|
||||||
|
if err := ctx.Err(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if g.Limiters != nil {
|
||||||
|
if err := g.Limiters.Wait(ctx, g.Name(), g.RateLimit(), g.Burst(), false); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
endpoint := fmt.Sprintf(baseURL, bucket)
|
||||||
|
items, err := g.listBucketGCS(ctx, client, endpoint)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("gcs: bucket %q probe failed (skipping): %v", bucket, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, name := range items {
|
||||||
|
if !isConfigFile(name) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
out <- recon.Finding{
|
||||||
|
Source: fmt.Sprintf("gs://%s/%s", bucket, name),
|
||||||
|
SourceType: "recon:gcs",
|
||||||
|
Confidence: "medium",
|
||||||
|
DetectedAt: time.Now(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// listBucketGCS probes a GCS bucket endpoint. A HEAD returning 200 means the
|
||||||
|
// bucket is publicly accessible. We then GET with Accept: application/json to
|
||||||
|
// retrieve the JSON listing.
|
||||||
|
func (g *GCSScanner) listBucketGCS(ctx context.Context, client *Client, endpoint string) ([]string, error) {
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodHead, endpoint, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
resp, err := client.HTTP.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
getReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
getReq.Header.Set("Accept", "application/json")
|
||||||
|
getResp, err := client.Do(ctx, getReq)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer getResp.Body.Close()
|
||||||
|
|
||||||
|
return parseGCSListJSON(getResp.Body)
|
||||||
|
}
|
||||||
|
|
||||||
|
// gcsListResult models the GCS JSON listing format.
|
||||||
|
type gcsListResult struct {
|
||||||
|
Items []gcsItem `json:"items"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type gcsItem struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseGCSListJSON(r io.Reader) ([]string, error) {
|
||||||
|
data, err := io.ReadAll(io.LimitReader(r, 1<<20))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
var result gcsListResult
|
||||||
|
if err := json.Unmarshal(data, &result); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
names := make([]string, 0, len(result.Items))
|
||||||
|
for _, item := range result.Items {
|
||||||
|
if item.Name != "" {
|
||||||
|
names = append(names, item.Name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return names, nil
|
||||||
|
}
|
||||||
127
pkg/recon/sources/gcsscanner_test.go
Normal file
127
pkg/recon/sources/gcsscanner_test.go
Normal file
@@ -0,0 +1,127 @@
|
|||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||||
|
)
|
||||||
|
|
||||||
|
func gcsTestServer() *httptest.Server {
|
||||||
|
mux := http.NewServeMux()
|
||||||
|
|
||||||
|
mux.HandleFunc("/testprov-keys/", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.Method == http.MethodHead {
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_, _ = w.Write([]byte(`{"items":[
|
||||||
|
{"name":".env"},
|
||||||
|
{"name":"config.yaml"},
|
||||||
|
{"name":"readme.md"},
|
||||||
|
{"name":"secrets.toml"}
|
||||||
|
]}`))
|
||||||
|
})
|
||||||
|
|
||||||
|
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusNotFound)
|
||||||
|
})
|
||||||
|
|
||||||
|
return httptest.NewServer(mux)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGCSScanner_Sweep(t *testing.T) {
|
||||||
|
srv := gcsTestServer()
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
src := &GCSScanner{
|
||||||
|
Registry: cloudTestRegistry(),
|
||||||
|
BaseURL: srv.URL + "/%s/",
|
||||||
|
client: NewClient(),
|
||||||
|
}
|
||||||
|
|
||||||
|
out := make(chan recon.Finding, 32)
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
if err := src.Sweep(ctx, "", out); err != nil {
|
||||||
|
t.Fatalf("Sweep error: %v", err)
|
||||||
|
}
|
||||||
|
close(out)
|
||||||
|
|
||||||
|
var findings []recon.Finding
|
||||||
|
for f := range out {
|
||||||
|
findings = append(findings, f)
|
||||||
|
}
|
||||||
|
|
||||||
|
// .env, config.yaml, secrets.toml match; readme.md does not.
|
||||||
|
if len(findings) != 3 {
|
||||||
|
t.Fatalf("expected 3 findings, got %d: %+v", len(findings), findings)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, f := range findings {
|
||||||
|
if f.SourceType != "recon:gcs" {
|
||||||
|
t.Errorf("unexpected SourceType: %s", f.SourceType)
|
||||||
|
}
|
||||||
|
if f.Confidence != "medium" {
|
||||||
|
t.Errorf("unexpected Confidence: %s", f.Confidence)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGCSScanner_EmptyRegistry(t *testing.T) {
|
||||||
|
src := &GCSScanner{
|
||||||
|
Registry: providers.NewRegistryFromProviders(nil),
|
||||||
|
Limiters: recon.NewLimiterRegistry(),
|
||||||
|
client: NewClient(),
|
||||||
|
}
|
||||||
|
|
||||||
|
out := make(chan recon.Finding, 4)
|
||||||
|
if err := src.Sweep(context.Background(), "", out); err != nil {
|
||||||
|
t.Fatalf("Sweep error: %v", err)
|
||||||
|
}
|
||||||
|
close(out)
|
||||||
|
if len(out) != 0 {
|
||||||
|
t.Fatal("expected 0 findings")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGCSScanner_CtxCancelled(t *testing.T) {
|
||||||
|
srv := gcsTestServer()
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
src := &GCSScanner{
|
||||||
|
Registry: cloudTestRegistry(),
|
||||||
|
BaseURL: srv.URL + "/%s/",
|
||||||
|
client: NewClient(),
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
cancel()
|
||||||
|
|
||||||
|
out := make(chan recon.Finding, 4)
|
||||||
|
if err := src.Sweep(ctx, "", out); err == nil {
|
||||||
|
t.Fatal("expected ctx error")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGCSScanner_EnabledAndMeta(t *testing.T) {
|
||||||
|
g := &GCSScanner{}
|
||||||
|
if g.Name() != "gcs" {
|
||||||
|
t.Fatalf("unexpected name: %s", g.Name())
|
||||||
|
}
|
||||||
|
if !g.Enabled(recon.Config{}) {
|
||||||
|
t.Fatal("expected Enabled=true")
|
||||||
|
}
|
||||||
|
if g.RespectsRobots() {
|
||||||
|
t.Fatal("expected RespectsRobots=false")
|
||||||
|
}
|
||||||
|
if g.Burst() != 3 {
|
||||||
|
t.Fatal("expected Burst=3")
|
||||||
|
}
|
||||||
|
}
|
||||||
213
pkg/recon/sources/s3scanner.go
Normal file
213
pkg/recon/sources/s3scanner.go
Normal file
@@ -0,0 +1,213 @@
|
|||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/xml"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"log"
|
||||||
|
"net/http"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"golang.org/x/time/rate"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||||
|
)
|
||||||
|
|
||||||
|
// S3Scanner enumerates publicly accessible AWS S3 buckets by name pattern and
|
||||||
|
// flags readable objects that match common config-file patterns (.env, *.json,
|
||||||
|
// *.yaml, etc.) as potential API key exposure vectors.
|
||||||
|
//
|
||||||
|
// The scanner is credentialless: it uses anonymous HTTP to probe public buckets.
|
||||||
|
// Object contents are NOT downloaded; only the presence of suspicious filenames
|
||||||
|
// is reported.
|
||||||
|
type S3Scanner struct {
|
||||||
|
Registry *providers.Registry
|
||||||
|
Limiters *recon.LimiterRegistry
|
||||||
|
// BaseURL overrides the S3 endpoint for tests. Default: "https://%s.s3.amazonaws.com".
|
||||||
|
// Must contain exactly one %s placeholder for the bucket name.
|
||||||
|
BaseURL string
|
||||||
|
client *Client
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compile-time assertion.
|
||||||
|
var _ recon.ReconSource = (*S3Scanner)(nil)
|
||||||
|
|
||||||
|
func (s *S3Scanner) Name() string { return "s3" }
|
||||||
|
func (s *S3Scanner) RateLimit() rate.Limit { return rate.Every(500 * time.Millisecond) }
|
||||||
|
func (s *S3Scanner) Burst() int { return 3 }
|
||||||
|
func (s *S3Scanner) RespectsRobots() bool { return false }
|
||||||
|
func (s *S3Scanner) Enabled(_ recon.Config) bool { return true }
|
||||||
|
|
||||||
|
func (s *S3Scanner) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
|
||||||
|
client := s.client
|
||||||
|
if client == nil {
|
||||||
|
client = NewClient()
|
||||||
|
}
|
||||||
|
baseURL := s.BaseURL
|
||||||
|
if baseURL == "" {
|
||||||
|
baseURL = "https://%s.s3.amazonaws.com"
|
||||||
|
}
|
||||||
|
|
||||||
|
names := bucketNames(s.Registry)
|
||||||
|
if len(names) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, bucket := range names {
|
||||||
|
if err := ctx.Err(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if s.Limiters != nil {
|
||||||
|
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
endpoint := fmt.Sprintf(baseURL, bucket)
|
||||||
|
keys, err := s.listBucketS3(ctx, client, endpoint)
|
||||||
|
if err != nil {
|
||||||
|
log.Printf("s3: bucket %q probe failed (skipping): %v", bucket, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, key := range keys {
|
||||||
|
if !isConfigFile(key) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
out <- recon.Finding{
|
||||||
|
Source: fmt.Sprintf("s3://%s/%s", bucket, key),
|
||||||
|
SourceType: "recon:s3",
|
||||||
|
Confidence: "medium",
|
||||||
|
DetectedAt: time.Now(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// listBucketS3 probes an S3 bucket endpoint. A HEAD that returns 200 means
|
||||||
|
// public listing is enabled; we then GET to parse the ListBucketResult XML.
|
||||||
|
// Returns nil keys if the bucket is not publicly listable.
|
||||||
|
func (s *S3Scanner) listBucketS3(ctx context.Context, client *Client, endpoint string) ([]string, error) {
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodHead, endpoint, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
resp, err := client.HTTP.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
return nil, nil // not publicly listable
|
||||||
|
}
|
||||||
|
|
||||||
|
// Public listing available -- fetch and parse XML.
|
||||||
|
getReq, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
getResp, err := client.Do(ctx, getReq)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
defer getResp.Body.Close()
|
||||||
|
|
||||||
|
return parseS3ListXML(getResp.Body)
|
||||||
|
}
|
||||||
|
|
||||||
|
// s3ListResult models the AWS S3 ListBucketResult XML.
|
||||||
|
type s3ListResult struct {
|
||||||
|
XMLName xml.Name `xml:"ListBucketResult"`
|
||||||
|
Contents []s3Object `xml:"Contents"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type s3Object struct {
|
||||||
|
Key string `xml:"Key"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseS3ListXML(r io.Reader) ([]string, error) {
|
||||||
|
data, err := io.ReadAll(io.LimitReader(r, 1<<20))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
var result s3ListResult
|
||||||
|
if err := xml.Unmarshal(data, &result); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
keys := make([]string, 0, len(result.Contents))
|
||||||
|
for _, obj := range result.Contents {
|
||||||
|
if obj.Key != "" {
|
||||||
|
keys = append(keys, obj.Key)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return keys, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// bucketSuffixes are common suffixes appended to provider keywords to generate
|
||||||
|
// candidate bucket names.
|
||||||
|
var bucketSuffixes = []string{
|
||||||
|
"-keys", "-config", "-backup", "-data", "-secrets", "-env",
|
||||||
|
"-api-keys", "-credentials", "-tokens",
|
||||||
|
}
|
||||||
|
|
||||||
|
// bucketNames generates candidate cloud storage bucket names from provider
|
||||||
|
// keywords combined with common suffixes. Exported for use by GCSScanner,
|
||||||
|
// AzureBlobScanner, and DOSpacesScanner.
|
||||||
|
func bucketNames(reg *providers.Registry) []string {
|
||||||
|
if reg == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
seen := make(map[string]struct{})
|
||||||
|
var names []string
|
||||||
|
|
||||||
|
for _, p := range reg.List() {
|
||||||
|
// Use provider name (lowercased, spaces to dashes) as base.
|
||||||
|
base := strings.ToLower(strings.ReplaceAll(p.Name, " ", "-"))
|
||||||
|
if base == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
for _, suffix := range bucketSuffixes {
|
||||||
|
candidate := base + suffix
|
||||||
|
if _, ok := seen[candidate]; !ok {
|
||||||
|
seen[candidate] = struct{}{}
|
||||||
|
names = append(names, candidate)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return names
|
||||||
|
}
|
||||||
|
|
||||||
|
// isConfigFile returns true if the filename matches common config file patterns
|
||||||
|
// that may contain API keys.
|
||||||
|
func isConfigFile(name string) bool {
|
||||||
|
lower := strings.ToLower(name)
|
||||||
|
// Exact basenames.
|
||||||
|
for _, exact := range []string{".env", ".env.local", ".env.production", ".env.development"} {
|
||||||
|
if lower == exact || strings.HasSuffix(lower, "/"+exact) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Extension matches.
|
||||||
|
for _, ext := range []string{".json", ".yaml", ".yml", ".toml", ".conf", ".cfg", ".ini", ".properties"} {
|
||||||
|
if strings.HasSuffix(lower, ext) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Prefix matches (config.*, settings.*).
|
||||||
|
base := lower
|
||||||
|
if idx := strings.LastIndex(lower, "/"); idx >= 0 {
|
||||||
|
base = lower[idx+1:]
|
||||||
|
}
|
||||||
|
for _, prefix := range []string{"config.", "settings.", "credentials.", "secrets."} {
|
||||||
|
if strings.HasPrefix(base, prefix) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
139
pkg/recon/sources/s3scanner_test.go
Normal file
139
pkg/recon/sources/s3scanner_test.go
Normal file
@@ -0,0 +1,139 @@
|
|||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||||
|
)
|
||||||
|
|
||||||
|
func cloudTestRegistry() *providers.Registry {
|
||||||
|
return providers.NewRegistryFromProviders([]providers.Provider{
|
||||||
|
{Name: "testprov", Keywords: []string{"testprov-key"}},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func s3TestServer() *httptest.Server {
|
||||||
|
mux := http.NewServeMux()
|
||||||
|
|
||||||
|
// Respond to HEAD for the testprov-keys bucket with 200 (public).
|
||||||
|
mux.HandleFunc("/testprov-keys/", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if r.Method == http.MethodHead {
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// GET — return S3 ListBucketResult XML.
|
||||||
|
w.Header().Set("Content-Type", "application/xml")
|
||||||
|
_, _ = w.Write([]byte(`<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<ListBucketResult>
|
||||||
|
<Contents><Key>.env</Key></Contents>
|
||||||
|
<Contents><Key>config.yaml</Key></Contents>
|
||||||
|
<Contents><Key>readme.md</Key></Contents>
|
||||||
|
<Contents><Key>data/settings.json</Key></Contents>
|
||||||
|
</ListBucketResult>`))
|
||||||
|
})
|
||||||
|
|
||||||
|
// All other buckets return 404 (not found).
|
||||||
|
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusNotFound)
|
||||||
|
})
|
||||||
|
|
||||||
|
return httptest.NewServer(mux)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestS3Scanner_Sweep(t *testing.T) {
|
||||||
|
srv := s3TestServer()
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
src := &S3Scanner{
|
||||||
|
Registry: cloudTestRegistry(),
|
||||||
|
BaseURL: srv.URL + "/%s/",
|
||||||
|
client: NewClient(),
|
||||||
|
}
|
||||||
|
|
||||||
|
out := make(chan recon.Finding, 32)
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
if err := src.Sweep(ctx, "", out); err != nil {
|
||||||
|
t.Fatalf("Sweep error: %v", err)
|
||||||
|
}
|
||||||
|
close(out)
|
||||||
|
|
||||||
|
var findings []recon.Finding
|
||||||
|
for f := range out {
|
||||||
|
findings = append(findings, f)
|
||||||
|
}
|
||||||
|
|
||||||
|
// .env, config.yaml, data/settings.json match; readme.md does not.
|
||||||
|
if len(findings) != 3 {
|
||||||
|
t.Fatalf("expected 3 findings, got %d: %+v", len(findings), findings)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, f := range findings {
|
||||||
|
if f.SourceType != "recon:s3" {
|
||||||
|
t.Errorf("unexpected SourceType: %s", f.SourceType)
|
||||||
|
}
|
||||||
|
if f.Confidence != "medium" {
|
||||||
|
t.Errorf("unexpected Confidence: %s", f.Confidence)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestS3Scanner_EmptyRegistry(t *testing.T) {
|
||||||
|
src := &S3Scanner{
|
||||||
|
Registry: providers.NewRegistryFromProviders(nil),
|
||||||
|
Limiters: recon.NewLimiterRegistry(),
|
||||||
|
client: NewClient(),
|
||||||
|
}
|
||||||
|
|
||||||
|
out := make(chan recon.Finding, 4)
|
||||||
|
ctx := context.Background()
|
||||||
|
if err := src.Sweep(ctx, "", out); err != nil {
|
||||||
|
t.Fatalf("Sweep error: %v", err)
|
||||||
|
}
|
||||||
|
close(out)
|
||||||
|
|
||||||
|
if len(out) != 0 {
|
||||||
|
t.Fatal("expected 0 findings with empty registry")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestS3Scanner_CtxCancelled(t *testing.T) {
|
||||||
|
srv := s3TestServer()
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
src := &S3Scanner{
|
||||||
|
Registry: cloudTestRegistry(),
|
||||||
|
BaseURL: srv.URL + "/%s/",
|
||||||
|
client: NewClient(),
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
cancel()
|
||||||
|
|
||||||
|
out := make(chan recon.Finding, 4)
|
||||||
|
if err := src.Sweep(ctx, "", out); err == nil {
|
||||||
|
t.Fatal("expected ctx error")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestS3Scanner_EnabledAndMeta(t *testing.T) {
|
||||||
|
s := &S3Scanner{}
|
||||||
|
if s.Name() != "s3" {
|
||||||
|
t.Fatalf("unexpected name: %s", s.Name())
|
||||||
|
}
|
||||||
|
if !s.Enabled(recon.Config{}) {
|
||||||
|
t.Fatal("expected Enabled=true")
|
||||||
|
}
|
||||||
|
if s.RespectsRobots() {
|
||||||
|
t.Fatal("expected RespectsRobots=false")
|
||||||
|
}
|
||||||
|
if s.Burst() != 3 {
|
||||||
|
t.Fatal("expected Burst=3")
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user