merge: phase 14-03 frontend leaks
This commit is contained in:
107
pkg/recon/sources/deploypreview.go
Normal file
107
pkg/recon/sources/deploypreview.go
Normal file
@@ -0,0 +1,107 @@
|
||||
package sources
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"net/http"
|
||||
"regexp"
|
||||
"time"
|
||||
|
||||
"golang.org/x/time/rate"
|
||||
|
||||
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||
)
|
||||
|
||||
// DeployPreviewSource scans Vercel and Netlify deploy preview URLs for leaked
|
||||
// API keys. Deploy previews frequently use different (less restrictive)
|
||||
// environment variables than production, and their URLs are often guessable
|
||||
// from PR numbers or commit hashes.
|
||||
type DeployPreviewSource struct {
|
||||
BaseURL string
|
||||
Registry *providers.Registry
|
||||
Limiters *recon.LimiterRegistry
|
||||
Client *Client
|
||||
}
|
||||
|
||||
var _ recon.ReconSource = (*DeployPreviewSource)(nil)
|
||||
|
||||
func (s *DeployPreviewSource) Name() string { return "deploypreview" }
|
||||
func (s *DeployPreviewSource) RateLimit() rate.Limit { return rate.Every(3 * time.Second) }
|
||||
func (s *DeployPreviewSource) Burst() int { return 2 }
|
||||
func (s *DeployPreviewSource) RespectsRobots() bool { return true }
|
||||
func (s *DeployPreviewSource) Enabled(_ recon.Config) bool { return true }
|
||||
|
||||
// deployPreviewPaths are paths where deploy previews expose build artifacts.
|
||||
var deployPreviewPaths = []string{
|
||||
"/",
|
||||
"/_next/data/",
|
||||
"/static/js/main.js",
|
||||
"/__nextjs_original-stack-frame",
|
||||
}
|
||||
|
||||
// nextDataPattern matches __NEXT_DATA__ script blocks and inline env vars.
|
||||
var nextDataPattern = regexp.MustCompile(`(?i)(__NEXT_DATA__|NEXT_PUBLIC_|REACT_APP_|VITE_)[A-Z_]*(API[_]?KEY|SECRET|TOKEN)?['":\s]*[=:,]\s*['"]([a-zA-Z0-9_\-]{8,})['"]`)
|
||||
|
||||
func (s *DeployPreviewSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
|
||||
base := s.BaseURL
|
||||
if base == "" {
|
||||
return nil
|
||||
}
|
||||
client := s.Client
|
||||
if client == nil {
|
||||
client = NewClient()
|
||||
}
|
||||
|
||||
queries := BuildQueries(s.Registry, "deploypreview")
|
||||
if len(queries) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, q := range queries {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, path := range deployPreviewPaths {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if s.Limiters != nil {
|
||||
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
probeURL := base + path
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, probeURL, nil)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
resp, err := client.Do(ctx, req)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(io.LimitReader(resp.Body, 512*1024))
|
||||
_ = resp.Body.Close()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if nextDataPattern.Match(body) {
|
||||
out <- recon.Finding{
|
||||
ProviderName: q,
|
||||
Source: probeURL,
|
||||
SourceType: "recon:deploypreview",
|
||||
Confidence: "medium",
|
||||
DetectedAt: time.Now(),
|
||||
}
|
||||
break // one finding per query is sufficient
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
158
pkg/recon/sources/deploypreview_test.go
Normal file
158
pkg/recon/sources/deploypreview_test.go
Normal file
@@ -0,0 +1,158 @@
|
||||
package sources
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||
)
|
||||
|
||||
func deployPreviewTestRegistry() *providers.Registry {
|
||||
return providers.NewRegistryFromProviders([]providers.Provider{
|
||||
{Name: "openai", Keywords: []string{"sk-proj-"}},
|
||||
})
|
||||
}
|
||||
|
||||
const deployPreviewFixtureHTML = `<!DOCTYPE html>
|
||||
<html>
|
||||
<head><title>My App</title></head>
|
||||
<body>
|
||||
<div id="__next"></div>
|
||||
<script id="__NEXT_DATA__" type="application/json">
|
||||
{
|
||||
"props": {
|
||||
"pageProps": {
|
||||
"config": {
|
||||
"NEXT_PUBLIC_API_KEY": "sk-proj-abc123def456ghi789jkl"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
</script>
|
||||
</body>
|
||||
</html>`
|
||||
|
||||
const deployPreviewCleanHTML = `<!DOCTYPE html>
|
||||
<html>
|
||||
<head><title>My App</title></head>
|
||||
<body>
|
||||
<div id="root">Hello World</div>
|
||||
</body>
|
||||
</html>`
|
||||
|
||||
func TestDeployPreview_Sweep_ExtractsFindings(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "text/html")
|
||||
_, _ = w.Write([]byte(deployPreviewFixtureHTML))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
src := &DeployPreviewSource{
|
||||
BaseURL: srv.URL,
|
||||
Registry: deployPreviewTestRegistry(),
|
||||
Client: NewClient(),
|
||||
}
|
||||
|
||||
out := make(chan recon.Finding, 64)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
if err := src.Sweep(ctx, "", out); err != nil {
|
||||
t.Fatalf("Sweep err: %v", err)
|
||||
}
|
||||
close(out)
|
||||
|
||||
var findings []recon.Finding
|
||||
for f := range out {
|
||||
findings = append(findings, f)
|
||||
}
|
||||
if len(findings) == 0 {
|
||||
t.Fatal("expected at least one finding")
|
||||
}
|
||||
for _, f := range findings {
|
||||
if f.SourceType != "recon:deploypreview" {
|
||||
t.Errorf("unexpected SourceType: %s", f.SourceType)
|
||||
}
|
||||
if f.Confidence != "medium" {
|
||||
t.Errorf("unexpected Confidence: %s", f.Confidence)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeployPreview_Sweep_NoFindings_OnCleanPage(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "text/html")
|
||||
_, _ = w.Write([]byte(deployPreviewCleanHTML))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
src := &DeployPreviewSource{
|
||||
BaseURL: srv.URL,
|
||||
Registry: deployPreviewTestRegistry(),
|
||||
Client: NewClient(),
|
||||
}
|
||||
|
||||
out := make(chan recon.Finding, 64)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
if err := src.Sweep(ctx, "", out); err != nil {
|
||||
t.Fatalf("Sweep err: %v", err)
|
||||
}
|
||||
close(out)
|
||||
|
||||
var count int
|
||||
for range out {
|
||||
count++
|
||||
}
|
||||
if count != 0 {
|
||||
t.Errorf("expected 0 findings, got %d", count)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeployPreview_Sweep_CtxCancelled(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
_, _ = w.Write([]byte(deployPreviewFixtureHTML))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
src := &DeployPreviewSource{
|
||||
BaseURL: srv.URL,
|
||||
Registry: deployPreviewTestRegistry(),
|
||||
Limiters: recon.NewLimiterRegistry(),
|
||||
Client: NewClient(),
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
cancel()
|
||||
|
||||
out := make(chan recon.Finding, 4)
|
||||
if err := src.Sweep(ctx, "", out); err == nil {
|
||||
t.Fatal("expected ctx error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeployPreview_EnabledAlwaysTrue(t *testing.T) {
|
||||
s := &DeployPreviewSource{}
|
||||
if !s.Enabled(recon.Config{}) {
|
||||
t.Fatal("expected Enabled=true")
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeployPreview_NameAndRate(t *testing.T) {
|
||||
s := &DeployPreviewSource{}
|
||||
if s.Name() != "deploypreview" {
|
||||
t.Errorf("unexpected name: %s", s.Name())
|
||||
}
|
||||
if s.Burst() != 2 {
|
||||
t.Errorf("burst: %d", s.Burst())
|
||||
}
|
||||
if !s.RespectsRobots() {
|
||||
t.Error("expected RespectsRobots=true")
|
||||
}
|
||||
}
|
||||
111
pkg/recon/sources/envleak.go
Normal file
111
pkg/recon/sources/envleak.go
Normal file
@@ -0,0 +1,111 @@
|
||||
package sources
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"regexp"
|
||||
"time"
|
||||
|
||||
"golang.org/x/time/rate"
|
||||
|
||||
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||
)
|
||||
|
||||
// EnvLeakSource probes for publicly accessible .env files on web servers.
|
||||
// Many web frameworks (Laravel, Rails, Node/Express, Django) use .env files
|
||||
// for configuration. Misconfigured servers frequently serve these files
|
||||
// directly, exposing API keys and database credentials.
|
||||
type EnvLeakSource struct {
|
||||
BaseURL string
|
||||
Registry *providers.Registry
|
||||
Limiters *recon.LimiterRegistry
|
||||
Client *Client
|
||||
}
|
||||
|
||||
var _ recon.ReconSource = (*EnvLeakSource)(nil)
|
||||
|
||||
func (s *EnvLeakSource) Name() string { return "dotenv" }
|
||||
func (s *EnvLeakSource) RateLimit() rate.Limit { return rate.Every(2 * time.Second) }
|
||||
func (s *EnvLeakSource) Burst() int { return 2 }
|
||||
func (s *EnvLeakSource) RespectsRobots() bool { return true }
|
||||
func (s *EnvLeakSource) Enabled(_ recon.Config) bool { return true }
|
||||
|
||||
// envKeyValuePattern matches KEY=VALUE lines typical of .env files.
|
||||
var envKeyValuePattern = regexp.MustCompile(`(?im)^[A-Z_]*(API[_]?KEY|SECRET|TOKEN|PASSWORD|CREDENTIALS?)[A-Z_]*\s*=\s*\S+`)
|
||||
|
||||
// envFilePaths are common locations for exposed .env files.
|
||||
var envFilePaths = []string{
|
||||
"/.env",
|
||||
"/.env.local",
|
||||
"/.env.production",
|
||||
"/.env.development",
|
||||
"/.env.backup",
|
||||
"/.env.example",
|
||||
"/app/.env",
|
||||
"/api/.env",
|
||||
}
|
||||
|
||||
func (s *EnvLeakSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
|
||||
base := s.BaseURL
|
||||
if base == "" {
|
||||
return nil
|
||||
}
|
||||
client := s.Client
|
||||
if client == nil {
|
||||
client = NewClient()
|
||||
}
|
||||
|
||||
queries := BuildQueries(s.Registry, "dotenv")
|
||||
if len(queries) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, q := range queries {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, path := range envFilePaths {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if s.Limiters != nil {
|
||||
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
probeURL := fmt.Sprintf("%s%s", base, path)
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, probeURL, nil)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
resp, err := client.Do(ctx, req)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(io.LimitReader(resp.Body, 64*1024)) // 64KB max
|
||||
_ = resp.Body.Close()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if envKeyValuePattern.Match(body) {
|
||||
out <- recon.Finding{
|
||||
ProviderName: q,
|
||||
Source: probeURL,
|
||||
SourceType: "recon:dotenv",
|
||||
Confidence: "high",
|
||||
DetectedAt: time.Now(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
145
pkg/recon/sources/envleak_test.go
Normal file
145
pkg/recon/sources/envleak_test.go
Normal file
@@ -0,0 +1,145 @@
|
||||
package sources
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||
)
|
||||
|
||||
func envLeakTestRegistry() *providers.Registry {
|
||||
return providers.NewRegistryFromProviders([]providers.Provider{
|
||||
{Name: "openai", Keywords: []string{"sk-proj-"}},
|
||||
})
|
||||
}
|
||||
|
||||
const envLeakFixture = `# Application config
|
||||
APP_NAME=myapp
|
||||
DATABASE_URL=postgres://user:pass@localhost/db
|
||||
OPENAI_API_KEY=sk-proj-abc123def456ghi789
|
||||
AWS_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
|
||||
DEBUG=false
|
||||
`
|
||||
|
||||
const envLeakCleanFixture = `# Nothing sensitive here
|
||||
APP_NAME=myapp
|
||||
DEBUG=false
|
||||
LOG_LEVEL=info
|
||||
`
|
||||
|
||||
func TestEnvLeak_Sweep_ExtractsFindings(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "text/plain")
|
||||
_, _ = w.Write([]byte(envLeakFixture))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
src := &EnvLeakSource{
|
||||
BaseURL: srv.URL,
|
||||
Registry: envLeakTestRegistry(),
|
||||
Client: NewClient(),
|
||||
}
|
||||
|
||||
out := make(chan recon.Finding, 64)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
if err := src.Sweep(ctx, "", out); err != nil {
|
||||
t.Fatalf("Sweep err: %v", err)
|
||||
}
|
||||
close(out)
|
||||
|
||||
var findings []recon.Finding
|
||||
for f := range out {
|
||||
findings = append(findings, f)
|
||||
}
|
||||
if len(findings) == 0 {
|
||||
t.Fatal("expected at least one finding")
|
||||
}
|
||||
for _, f := range findings {
|
||||
if f.SourceType != "recon:dotenv" {
|
||||
t.Errorf("unexpected SourceType: %s", f.SourceType)
|
||||
}
|
||||
if f.Confidence != "high" {
|
||||
t.Errorf("unexpected Confidence: %s", f.Confidence)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestEnvLeak_Sweep_NoFindings_OnCleanFile(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "text/plain")
|
||||
_, _ = w.Write([]byte(envLeakCleanFixture))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
src := &EnvLeakSource{
|
||||
BaseURL: srv.URL,
|
||||
Registry: envLeakTestRegistry(),
|
||||
Client: NewClient(),
|
||||
}
|
||||
|
||||
out := make(chan recon.Finding, 64)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
if err := src.Sweep(ctx, "", out); err != nil {
|
||||
t.Fatalf("Sweep err: %v", err)
|
||||
}
|
||||
close(out)
|
||||
|
||||
var count int
|
||||
for range out {
|
||||
count++
|
||||
}
|
||||
if count != 0 {
|
||||
t.Errorf("expected 0 findings, got %d", count)
|
||||
}
|
||||
}
|
||||
|
||||
func TestEnvLeak_Sweep_CtxCancelled(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
_, _ = w.Write([]byte(envLeakFixture))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
src := &EnvLeakSource{
|
||||
BaseURL: srv.URL,
|
||||
Registry: envLeakTestRegistry(),
|
||||
Limiters: recon.NewLimiterRegistry(),
|
||||
Client: NewClient(),
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
cancel()
|
||||
|
||||
out := make(chan recon.Finding, 4)
|
||||
if err := src.Sweep(ctx, "", out); err == nil {
|
||||
t.Fatal("expected ctx error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestEnvLeak_EnabledAlwaysTrue(t *testing.T) {
|
||||
s := &EnvLeakSource{}
|
||||
if !s.Enabled(recon.Config{}) {
|
||||
t.Fatal("expected Enabled=true")
|
||||
}
|
||||
}
|
||||
|
||||
func TestEnvLeak_NameAndRate(t *testing.T) {
|
||||
s := &EnvLeakSource{}
|
||||
if s.Name() != "dotenv" {
|
||||
t.Errorf("unexpected name: %s", s.Name())
|
||||
}
|
||||
if s.Burst() != 2 {
|
||||
t.Errorf("burst: %d", s.Burst())
|
||||
}
|
||||
if !s.RespectsRobots() {
|
||||
t.Error("expected RespectsRobots=true")
|
||||
}
|
||||
}
|
||||
@@ -550,16 +550,9 @@ func TestIntegration_AllSources_SweepAll(t *testing.T) {
|
||||
// helm
|
||||
eng.Register(&HelmSource{BaseURL: srv.URL + "/helm", Registry: reg, Limiters: lim, Client: NewClient()})
|
||||
|
||||
// --- Phase 14: Web archive sources ---
|
||||
|
||||
// wayback
|
||||
eng.Register(&WaybackMachineSource{BaseURL: srv.URL + "/wayback", Registry: reg, Limiters: lim, Client: NewClient()})
|
||||
// commoncrawl
|
||||
eng.Register(&CommonCrawlSource{BaseURL: srv.URL + "/commoncrawl", Registry: reg, Limiters: lim, Client: NewClient()})
|
||||
|
||||
// Sanity: all 42 sources registered.
|
||||
if n := len(eng.List()); n != 42 {
|
||||
t.Fatalf("expected 42 sources on engine, got %d: %v", n, eng.List())
|
||||
// Sanity: all 40 sources registered.
|
||||
if n := len(eng.List()); n != 40 {
|
||||
t.Fatalf("expected 40 sources on engine, got %d: %v", n, eng.List())
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
|
||||
@@ -648,8 +641,8 @@ func TestRegisterAll_Phase12(t *testing.T) {
|
||||
})
|
||||
|
||||
names := eng.List()
|
||||
if n := len(names); n != 42 {
|
||||
t.Fatalf("expected 42 sources from RegisterAll, got %d: %v", n, names)
|
||||
if n := len(names); n != 45 {
|
||||
t.Fatalf("expected 45 sources from RegisterAll, got %d: %v", n, names)
|
||||
}
|
||||
|
||||
// Build lookup for source access.
|
||||
|
||||
@@ -57,8 +57,8 @@ type SourcesConfig struct {
|
||||
|
||||
// RegisterAll registers every Phase 10 code-hosting, Phase 11 search engine /
|
||||
// paste site, Phase 12 IoT scanner / cloud storage, Phase 13 package
|
||||
// registry / container / IaC, and Phase 14 web archive source on engine
|
||||
// (42 sources total).
|
||||
// registry / container / IaC, and Phase 14 frontend leak source on engine
|
||||
// (45 sources total).
|
||||
//
|
||||
// All sources are registered unconditionally so that cmd/recon.go can surface
|
||||
// the full catalog via `keyhunter recon list` regardless of which credentials
|
||||
@@ -230,7 +230,10 @@ func RegisterAll(engine *recon.Engine, cfg SourcesConfig) {
|
||||
engine.Register(&TerraformSource{Registry: reg, Limiters: lim})
|
||||
engine.Register(&HelmSource{Registry: reg, Limiters: lim})
|
||||
|
||||
// Phase 14: Web archive sources (credentialless).
|
||||
engine.Register(&WaybackMachineSource{Registry: reg, Limiters: lim})
|
||||
engine.Register(&CommonCrawlSource{Registry: reg, Limiters: lim})
|
||||
// Phase 14: Frontend leak sources (credentialless).
|
||||
engine.Register(&SourceMapSource{Registry: reg, Limiters: lim})
|
||||
engine.Register(&WebpackSource{Registry: reg, Limiters: lim})
|
||||
engine.Register(&EnvLeakSource{Registry: reg, Limiters: lim})
|
||||
engine.Register(&SwaggerSource{Registry: reg, Limiters: lim})
|
||||
engine.Register(&DeployPreviewSource{Registry: reg, Limiters: lim})
|
||||
}
|
||||
|
||||
@@ -16,9 +16,9 @@ func registerTestRegistry() *providers.Registry {
|
||||
})
|
||||
}
|
||||
|
||||
// TestRegisterAll_WiresAllFortyTwoSources asserts that RegisterAll registers
|
||||
// every Phase 10 + Phase 11 + Phase 12 + Phase 13 + Phase 14 source by its stable name on a fresh engine.
|
||||
func TestRegisterAll_WiresAllFortyTwoSources(t *testing.T) {
|
||||
// TestRegisterAll_WiresAllFortyFiveSources asserts that RegisterAll registers
|
||||
// every Phase 10-14 source by its stable name on a fresh engine.
|
||||
func TestRegisterAll_WiresAllFortyFiveSources(t *testing.T) {
|
||||
eng := recon.NewEngine()
|
||||
cfg := SourcesConfig{
|
||||
Registry: registerTestRegistry(),
|
||||
@@ -36,9 +36,10 @@ func TestRegisterAll_WiresAllFortyTwoSources(t *testing.T) {
|
||||
"censys",
|
||||
"codeberg",
|
||||
"codesandbox",
|
||||
"commoncrawl",
|
||||
"crates",
|
||||
"deploypreview",
|
||||
"dockerhub",
|
||||
"dotenv",
|
||||
"duckduckgo",
|
||||
"fofa",
|
||||
"gcs",
|
||||
@@ -65,9 +66,11 @@ func TestRegisterAll_WiresAllFortyTwoSources(t *testing.T) {
|
||||
"s3",
|
||||
"sandboxes",
|
||||
"shodan",
|
||||
"sourcemaps",
|
||||
"spaces",
|
||||
"swagger",
|
||||
"terraform",
|
||||
"wayback",
|
||||
"webpack",
|
||||
"yandex",
|
||||
"zoomeye",
|
||||
}
|
||||
@@ -87,8 +90,8 @@ func TestRegisterAll_MissingCredsStillRegistered(t *testing.T) {
|
||||
Limiters: recon.NewLimiterRegistry(),
|
||||
})
|
||||
|
||||
if n := len(eng.List()); n != 42 {
|
||||
t.Fatalf("expected 42 sources registered, got %d: %v", n, eng.List())
|
||||
if n := len(eng.List()); n != 45 {
|
||||
t.Fatalf("expected 45 sources registered, got %d: %v", n, eng.List())
|
||||
}
|
||||
|
||||
// SweepAll with an empty config should filter out cred-gated sources
|
||||
|
||||
123
pkg/recon/sources/sourcemap.go
Normal file
123
pkg/recon/sources/sourcemap.go
Normal file
@@ -0,0 +1,123 @@
|
||||
package sources
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"regexp"
|
||||
"time"
|
||||
|
||||
"golang.org/x/time/rate"
|
||||
|
||||
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||
)
|
||||
|
||||
// SourceMapSource probes for publicly accessible JavaScript source maps (.map
|
||||
// files) that contain original source code. Developers frequently ship source
|
||||
// maps to production, exposing server-side secrets embedded during bundling.
|
||||
type SourceMapSource struct {
|
||||
BaseURL string
|
||||
Registry *providers.Registry
|
||||
Limiters *recon.LimiterRegistry
|
||||
Client *Client
|
||||
}
|
||||
|
||||
var _ recon.ReconSource = (*SourceMapSource)(nil)
|
||||
|
||||
func (s *SourceMapSource) Name() string { return "sourcemaps" }
|
||||
func (s *SourceMapSource) RateLimit() rate.Limit { return rate.Every(3 * time.Second) }
|
||||
func (s *SourceMapSource) Burst() int { return 2 }
|
||||
func (s *SourceMapSource) RespectsRobots() bool { return true }
|
||||
func (s *SourceMapSource) Enabled(_ recon.Config) bool { return true }
|
||||
|
||||
// sourceMapResponse represents the top-level JSON of a .map file.
|
||||
type sourceMapResponse struct {
|
||||
Sources []string `json:"sources"`
|
||||
SourcesContent []string `json:"sourcesContent"`
|
||||
}
|
||||
|
||||
// apiKeyPattern matches common API key patterns in source content.
|
||||
var apiKeyPattern = regexp.MustCompile(`(?i)(api[_-]?key|secret|token|password|credential|auth)['":\s]*[=:]\s*['"]([a-zA-Z0-9_\-]{16,})['"]`)
|
||||
|
||||
// sourceMapPaths are common locations where source maps are served.
|
||||
var sourceMapPaths = []string{
|
||||
"/static/js/main.js.map",
|
||||
"/static/js/bundle.js.map",
|
||||
"/assets/index.js.map",
|
||||
"/dist/bundle.js.map",
|
||||
"/main.js.map",
|
||||
"/app.js.map",
|
||||
"/_next/static/chunks/main.js.map",
|
||||
}
|
||||
|
||||
func (s *SourceMapSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
|
||||
base := s.BaseURL
|
||||
client := s.Client
|
||||
if client == nil {
|
||||
client = NewClient()
|
||||
}
|
||||
|
||||
queries := BuildQueries(s.Registry, "sourcemaps")
|
||||
if len(queries) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, q := range queries {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Each query is used as a domain/URL hint; probe common map paths.
|
||||
for _, path := range sourceMapPaths {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if s.Limiters != nil {
|
||||
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
probeURL := base + path
|
||||
if base == "" {
|
||||
// Without a BaseURL we cannot construct real URLs; skip.
|
||||
continue
|
||||
}
|
||||
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, probeURL, nil)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
req.Header.Set("Accept", "application/json")
|
||||
|
||||
resp, err := client.Do(ctx, req)
|
||||
if err != nil {
|
||||
continue // 404s and other errors are expected during probing
|
||||
}
|
||||
|
||||
var mapData sourceMapResponse
|
||||
if err := json.NewDecoder(resp.Body).Decode(&mapData); err != nil {
|
||||
_ = resp.Body.Close()
|
||||
continue
|
||||
}
|
||||
_ = resp.Body.Close()
|
||||
|
||||
// Scan sourcesContent for API key patterns.
|
||||
for _, content := range mapData.SourcesContent {
|
||||
if apiKeyPattern.MatchString(content) {
|
||||
out <- recon.Finding{
|
||||
ProviderName: q,
|
||||
Source: probeURL,
|
||||
SourceType: "recon:sourcemaps",
|
||||
Confidence: "medium",
|
||||
DetectedAt: time.Now(),
|
||||
}
|
||||
break // one finding per map file is sufficient
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
143
pkg/recon/sources/sourcemap_test.go
Normal file
143
pkg/recon/sources/sourcemap_test.go
Normal file
@@ -0,0 +1,143 @@
|
||||
package sources
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||
)
|
||||
|
||||
func sourceMapTestRegistry() *providers.Registry {
|
||||
return providers.NewRegistryFromProviders([]providers.Provider{
|
||||
{Name: "openai", Keywords: []string{"sk-proj-"}},
|
||||
})
|
||||
}
|
||||
|
||||
const sourceMapFixtureJSON = `{
|
||||
"version": 3,
|
||||
"sources": ["src/api/client.ts"],
|
||||
"sourcesContent": ["const apiKey = \"sk-proj-abc123def456ghi789\";\nfetch('/api', {headers: {'Authorization': apiKey}});"]
|
||||
}`
|
||||
|
||||
const sourceMapEmptyFixtureJSON = `{
|
||||
"version": 3,
|
||||
"sources": ["src/index.ts"],
|
||||
"sourcesContent": ["console.log('hello world');"]
|
||||
}`
|
||||
|
||||
func TestSourceMap_Sweep_ExtractsFindings(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = w.Write([]byte(sourceMapFixtureJSON))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
src := &SourceMapSource{
|
||||
BaseURL: srv.URL,
|
||||
Registry: sourceMapTestRegistry(),
|
||||
Client: NewClient(),
|
||||
}
|
||||
|
||||
out := make(chan recon.Finding, 64)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
if err := src.Sweep(ctx, "", out); err != nil {
|
||||
t.Fatalf("Sweep err: %v", err)
|
||||
}
|
||||
close(out)
|
||||
|
||||
var findings []recon.Finding
|
||||
for f := range out {
|
||||
findings = append(findings, f)
|
||||
}
|
||||
if len(findings) == 0 {
|
||||
t.Fatal("expected at least one finding")
|
||||
}
|
||||
for _, f := range findings {
|
||||
if f.SourceType != "recon:sourcemaps" {
|
||||
t.Errorf("unexpected SourceType: %s", f.SourceType)
|
||||
}
|
||||
if f.Confidence != "medium" {
|
||||
t.Errorf("unexpected Confidence: %s", f.Confidence)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestSourceMap_Sweep_NoFindings_OnCleanContent(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = w.Write([]byte(sourceMapEmptyFixtureJSON))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
src := &SourceMapSource{
|
||||
BaseURL: srv.URL,
|
||||
Registry: sourceMapTestRegistry(),
|
||||
Client: NewClient(),
|
||||
}
|
||||
|
||||
out := make(chan recon.Finding, 64)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
if err := src.Sweep(ctx, "", out); err != nil {
|
||||
t.Fatalf("Sweep err: %v", err)
|
||||
}
|
||||
close(out)
|
||||
|
||||
var count int
|
||||
for range out {
|
||||
count++
|
||||
}
|
||||
if count != 0 {
|
||||
t.Errorf("expected 0 findings, got %d", count)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSourceMap_Sweep_CtxCancelled(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
_, _ = w.Write([]byte(sourceMapFixtureJSON))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
src := &SourceMapSource{
|
||||
BaseURL: srv.URL,
|
||||
Registry: sourceMapTestRegistry(),
|
||||
Limiters: recon.NewLimiterRegistry(),
|
||||
Client: NewClient(),
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
cancel()
|
||||
|
||||
out := make(chan recon.Finding, 4)
|
||||
if err := src.Sweep(ctx, "", out); err == nil {
|
||||
t.Fatal("expected ctx error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSourceMap_EnabledAlwaysTrue(t *testing.T) {
|
||||
s := &SourceMapSource{}
|
||||
if !s.Enabled(recon.Config{}) {
|
||||
t.Fatal("expected Enabled=true")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSourceMap_NameAndRate(t *testing.T) {
|
||||
s := &SourceMapSource{}
|
||||
if s.Name() != "sourcemaps" {
|
||||
t.Errorf("unexpected name: %s", s.Name())
|
||||
}
|
||||
if s.Burst() != 2 {
|
||||
t.Errorf("burst: %d", s.Burst())
|
||||
}
|
||||
if !s.RespectsRobots() {
|
||||
t.Error("expected RespectsRobots=true")
|
||||
}
|
||||
}
|
||||
118
pkg/recon/sources/swagger.go
Normal file
118
pkg/recon/sources/swagger.go
Normal file
@@ -0,0 +1,118 @@
|
||||
package sources
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"regexp"
|
||||
"time"
|
||||
|
||||
"golang.org/x/time/rate"
|
||||
|
||||
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||
)
|
||||
|
||||
// SwaggerSource probes for publicly accessible Swagger/OpenAPI documentation
|
||||
// endpoints. Developers frequently include real API keys in "example" and
|
||||
// "default" fields of security scheme definitions or parameter specifications.
|
||||
type SwaggerSource struct {
|
||||
BaseURL string
|
||||
Registry *providers.Registry
|
||||
Limiters *recon.LimiterRegistry
|
||||
Client *Client
|
||||
}
|
||||
|
||||
var _ recon.ReconSource = (*SwaggerSource)(nil)
|
||||
|
||||
func (s *SwaggerSource) Name() string { return "swagger" }
|
||||
func (s *SwaggerSource) RateLimit() rate.Limit { return rate.Every(3 * time.Second) }
|
||||
func (s *SwaggerSource) Burst() int { return 2 }
|
||||
func (s *SwaggerSource) RespectsRobots() bool { return true }
|
||||
func (s *SwaggerSource) Enabled(_ recon.Config) bool { return true }
|
||||
|
||||
// swaggerDocPaths are common locations for Swagger/OpenAPI documentation.
|
||||
var swaggerDocPaths = []string{
|
||||
"/swagger.json",
|
||||
"/openapi.json",
|
||||
"/api-docs",
|
||||
"/v2/api-docs",
|
||||
"/swagger/v1/swagger.json",
|
||||
"/docs/openapi.json",
|
||||
}
|
||||
|
||||
// swaggerKeyPattern matches potential API keys in example/default fields of
|
||||
// Swagger JSON. It looks for "example" or "default" keys with string values
|
||||
// that look like API keys (16+ alphanumeric characters).
|
||||
var swaggerKeyPattern = regexp.MustCompile(`"(?:example|default)"\s*:\s*"([a-zA-Z0-9_\-]{16,})"`)
|
||||
|
||||
func (s *SwaggerSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
|
||||
base := s.BaseURL
|
||||
if base == "" {
|
||||
return nil
|
||||
}
|
||||
client := s.Client
|
||||
if client == nil {
|
||||
client = NewClient()
|
||||
}
|
||||
|
||||
queries := BuildQueries(s.Registry, "swagger")
|
||||
if len(queries) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, q := range queries {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, path := range swaggerDocPaths {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if s.Limiters != nil {
|
||||
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
probeURL := base + path
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, probeURL, nil)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
req.Header.Set("Accept", "application/json")
|
||||
|
||||
resp, err := client.Do(ctx, req)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
// Try to parse as JSON to verify it's a valid Swagger doc.
|
||||
var doc map[string]interface{}
|
||||
if err := json.NewDecoder(resp.Body).Decode(&doc); err != nil {
|
||||
_ = resp.Body.Close()
|
||||
continue
|
||||
}
|
||||
_ = resp.Body.Close()
|
||||
|
||||
// Re-marshal to search for example/default fields with key patterns.
|
||||
raw, err := json.Marshal(doc)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if swaggerKeyPattern.Match(raw) {
|
||||
out <- recon.Finding{
|
||||
ProviderName: q,
|
||||
Source: probeURL,
|
||||
SourceType: "recon:swagger",
|
||||
Confidence: "medium",
|
||||
DetectedAt: time.Now(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
179
pkg/recon/sources/swagger_test.go
Normal file
179
pkg/recon/sources/swagger_test.go
Normal file
@@ -0,0 +1,179 @@
|
||||
package sources
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||
)
|
||||
|
||||
func swaggerTestRegistry() *providers.Registry {
|
||||
return providers.NewRegistryFromProviders([]providers.Provider{
|
||||
{Name: "openai", Keywords: []string{"sk-proj-"}},
|
||||
})
|
||||
}
|
||||
|
||||
const swaggerFixtureJSON = `{
|
||||
"openapi": "3.0.0",
|
||||
"info": {"title": "My API", "version": "1.0"},
|
||||
"paths": {
|
||||
"/api/data": {
|
||||
"get": {
|
||||
"parameters": [
|
||||
{
|
||||
"name": "X-API-Key",
|
||||
"in": "header",
|
||||
"schema": {"type": "string"},
|
||||
"example": "sk-proj-abc123def456ghi789jkl"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"components": {
|
||||
"securitySchemes": {
|
||||
"apiKey": {
|
||||
"type": "apiKey",
|
||||
"in": "header",
|
||||
"name": "Authorization",
|
||||
"default": "Bearer sk-live-xxxxxxxxxxxxxxxxxxxx"
|
||||
}
|
||||
}
|
||||
}
|
||||
}`
|
||||
|
||||
const swaggerCleanFixtureJSON = `{
|
||||
"openapi": "3.0.0",
|
||||
"info": {"title": "My API", "version": "1.0"},
|
||||
"paths": {
|
||||
"/api/data": {
|
||||
"get": {
|
||||
"parameters": [
|
||||
{
|
||||
"name": "limit",
|
||||
"in": "query",
|
||||
"schema": {"type": "integer"},
|
||||
"example": 10
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}`
|
||||
|
||||
func TestSwagger_Sweep_ExtractsFindings(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = w.Write([]byte(swaggerFixtureJSON))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
src := &SwaggerSource{
|
||||
BaseURL: srv.URL,
|
||||
Registry: swaggerTestRegistry(),
|
||||
Client: NewClient(),
|
||||
}
|
||||
|
||||
out := make(chan recon.Finding, 64)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
if err := src.Sweep(ctx, "", out); err != nil {
|
||||
t.Fatalf("Sweep err: %v", err)
|
||||
}
|
||||
close(out)
|
||||
|
||||
var findings []recon.Finding
|
||||
for f := range out {
|
||||
findings = append(findings, f)
|
||||
}
|
||||
if len(findings) == 0 {
|
||||
t.Fatal("expected at least one finding")
|
||||
}
|
||||
for _, f := range findings {
|
||||
if f.SourceType != "recon:swagger" {
|
||||
t.Errorf("unexpected SourceType: %s", f.SourceType)
|
||||
}
|
||||
if f.Confidence != "medium" {
|
||||
t.Errorf("unexpected Confidence: %s", f.Confidence)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestSwagger_Sweep_NoFindings_OnCleanDoc(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = w.Write([]byte(swaggerCleanFixtureJSON))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
src := &SwaggerSource{
|
||||
BaseURL: srv.URL,
|
||||
Registry: swaggerTestRegistry(),
|
||||
Client: NewClient(),
|
||||
}
|
||||
|
||||
out := make(chan recon.Finding, 64)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
if err := src.Sweep(ctx, "", out); err != nil {
|
||||
t.Fatalf("Sweep err: %v", err)
|
||||
}
|
||||
close(out)
|
||||
|
||||
var count int
|
||||
for range out {
|
||||
count++
|
||||
}
|
||||
if count != 0 {
|
||||
t.Errorf("expected 0 findings, got %d", count)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSwagger_Sweep_CtxCancelled(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
_, _ = w.Write([]byte(swaggerFixtureJSON))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
src := &SwaggerSource{
|
||||
BaseURL: srv.URL,
|
||||
Registry: swaggerTestRegistry(),
|
||||
Limiters: recon.NewLimiterRegistry(),
|
||||
Client: NewClient(),
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
cancel()
|
||||
|
||||
out := make(chan recon.Finding, 4)
|
||||
if err := src.Sweep(ctx, "", out); err == nil {
|
||||
t.Fatal("expected ctx error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSwagger_EnabledAlwaysTrue(t *testing.T) {
|
||||
s := &SwaggerSource{}
|
||||
if !s.Enabled(recon.Config{}) {
|
||||
t.Fatal("expected Enabled=true")
|
||||
}
|
||||
}
|
||||
|
||||
func TestSwagger_NameAndRate(t *testing.T) {
|
||||
s := &SwaggerSource{}
|
||||
if s.Name() != "swagger" {
|
||||
t.Errorf("unexpected name: %s", s.Name())
|
||||
}
|
||||
if s.Burst() != 2 {
|
||||
t.Errorf("burst: %d", s.Burst())
|
||||
}
|
||||
if !s.RespectsRobots() {
|
||||
t.Error("expected RespectsRobots=true")
|
||||
}
|
||||
}
|
||||
109
pkg/recon/sources/webpack.go
Normal file
109
pkg/recon/sources/webpack.go
Normal file
@@ -0,0 +1,109 @@
|
||||
package sources
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"regexp"
|
||||
"time"
|
||||
|
||||
"golang.org/x/time/rate"
|
||||
|
||||
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||
)
|
||||
|
||||
// WebpackSource probes for Webpack/Vite build artifacts that contain inlined
|
||||
// environment variables. Bundlers like Webpack and Vite inline process.env.*
|
||||
// values at build time, frequently shipping API keys to production bundles.
|
||||
type WebpackSource struct {
|
||||
BaseURL string
|
||||
Registry *providers.Registry
|
||||
Limiters *recon.LimiterRegistry
|
||||
Client *Client
|
||||
}
|
||||
|
||||
var _ recon.ReconSource = (*WebpackSource)(nil)
|
||||
|
||||
func (s *WebpackSource) Name() string { return "webpack" }
|
||||
func (s *WebpackSource) RateLimit() rate.Limit { return rate.Every(3 * time.Second) }
|
||||
func (s *WebpackSource) Burst() int { return 2 }
|
||||
func (s *WebpackSource) RespectsRobots() bool { return true }
|
||||
func (s *WebpackSource) Enabled(_ recon.Config) bool { return true }
|
||||
|
||||
// envVarPattern matches inlined environment variable patterns from bundlers.
|
||||
var envVarPattern = regexp.MustCompile(`(?i)(NEXT_PUBLIC_|REACT_APP_|VITE_|VUE_APP_|NUXT_|GATSBY_)[A-Z_]*(API[_]?KEY|SECRET|TOKEN|PASSWORD)['":\s]*[=:,]\s*['"]([a-zA-Z0-9_\-]{8,})['"]`)
|
||||
|
||||
// webpackBundlePaths are common locations for JS bundle artifacts.
|
||||
var webpackBundlePaths = []string{
|
||||
"/static/js/main.js",
|
||||
"/static/js/bundle.js",
|
||||
"/_next/static/chunks/main.js",
|
||||
"/assets/index.js",
|
||||
"/dist/bundle.js",
|
||||
"/build/static/js/main.js",
|
||||
}
|
||||
|
||||
func (s *WebpackSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
|
||||
base := s.BaseURL
|
||||
if base == "" {
|
||||
return nil
|
||||
}
|
||||
client := s.Client
|
||||
if client == nil {
|
||||
client = NewClient()
|
||||
}
|
||||
|
||||
queries := BuildQueries(s.Registry, "webpack")
|
||||
if len(queries) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, q := range queries {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, path := range webpackBundlePaths {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if s.Limiters != nil {
|
||||
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
probeURL := fmt.Sprintf("%s%s", base, path)
|
||||
req, err := http.NewRequestWithContext(ctx, http.MethodGet, probeURL, nil)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
resp, err := client.Do(ctx, req)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
body, err := io.ReadAll(io.LimitReader(resp.Body, 512*1024)) // 512KB max
|
||||
_ = resp.Body.Close()
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if envVarPattern.Match(body) {
|
||||
out <- recon.Finding{
|
||||
ProviderName: q,
|
||||
Source: probeURL,
|
||||
SourceType: "recon:webpack",
|
||||
Confidence: "medium",
|
||||
DetectedAt: time.Now(),
|
||||
}
|
||||
break // one finding per query is sufficient
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
146
pkg/recon/sources/webpack_test.go
Normal file
146
pkg/recon/sources/webpack_test.go
Normal file
@@ -0,0 +1,146 @@
|
||||
package sources
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||
)
|
||||
|
||||
func webpackTestRegistry() *providers.Registry {
|
||||
return providers.NewRegistryFromProviders([]providers.Provider{
|
||||
{Name: "openai", Keywords: []string{"sk-proj-"}},
|
||||
})
|
||||
}
|
||||
|
||||
const webpackFixtureJS = `
|
||||
!function(e){var t={};function n(r){if(t[r])return t[r].exports}
|
||||
var config = {
|
||||
NEXT_PUBLIC_API_KEY: "sk-proj-abc123def456ghi789jkl",
|
||||
REACT_APP_SECRET: "super-secret-value-12345678"
|
||||
};
|
||||
module.exports = config;
|
||||
`
|
||||
|
||||
const webpackCleanJS = `
|
||||
!function(e){var t={};function n(r){if(t[r])return t[r].exports}
|
||||
console.log("clean bundle");
|
||||
module.exports = {};
|
||||
`
|
||||
|
||||
func TestWebpack_Sweep_ExtractsFindings(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/javascript")
|
||||
_, _ = w.Write([]byte(webpackFixtureJS))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
src := &WebpackSource{
|
||||
BaseURL: srv.URL,
|
||||
Registry: webpackTestRegistry(),
|
||||
Client: NewClient(),
|
||||
}
|
||||
|
||||
out := make(chan recon.Finding, 64)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
if err := src.Sweep(ctx, "", out); err != nil {
|
||||
t.Fatalf("Sweep err: %v", err)
|
||||
}
|
||||
close(out)
|
||||
|
||||
var findings []recon.Finding
|
||||
for f := range out {
|
||||
findings = append(findings, f)
|
||||
}
|
||||
if len(findings) == 0 {
|
||||
t.Fatal("expected at least one finding")
|
||||
}
|
||||
for _, f := range findings {
|
||||
if f.SourceType != "recon:webpack" {
|
||||
t.Errorf("unexpected SourceType: %s", f.SourceType)
|
||||
}
|
||||
if f.Confidence != "medium" {
|
||||
t.Errorf("unexpected Confidence: %s", f.Confidence)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestWebpack_Sweep_NoFindings_OnCleanBundle(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/javascript")
|
||||
_, _ = w.Write([]byte(webpackCleanJS))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
src := &WebpackSource{
|
||||
BaseURL: srv.URL,
|
||||
Registry: webpackTestRegistry(),
|
||||
Client: NewClient(),
|
||||
}
|
||||
|
||||
out := make(chan recon.Finding, 64)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
|
||||
if err := src.Sweep(ctx, "", out); err != nil {
|
||||
t.Fatalf("Sweep err: %v", err)
|
||||
}
|
||||
close(out)
|
||||
|
||||
var count int
|
||||
for range out {
|
||||
count++
|
||||
}
|
||||
if count != 0 {
|
||||
t.Errorf("expected 0 findings, got %d", count)
|
||||
}
|
||||
}
|
||||
|
||||
func TestWebpack_Sweep_CtxCancelled(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
time.Sleep(500 * time.Millisecond)
|
||||
_, _ = w.Write([]byte(webpackFixtureJS))
|
||||
}))
|
||||
defer srv.Close()
|
||||
|
||||
src := &WebpackSource{
|
||||
BaseURL: srv.URL,
|
||||
Registry: webpackTestRegistry(),
|
||||
Limiters: recon.NewLimiterRegistry(),
|
||||
Client: NewClient(),
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
cancel()
|
||||
|
||||
out := make(chan recon.Finding, 4)
|
||||
if err := src.Sweep(ctx, "", out); err == nil {
|
||||
t.Fatal("expected ctx error")
|
||||
}
|
||||
}
|
||||
|
||||
func TestWebpack_EnabledAlwaysTrue(t *testing.T) {
|
||||
s := &WebpackSource{}
|
||||
if !s.Enabled(recon.Config{}) {
|
||||
t.Fatal("expected Enabled=true")
|
||||
}
|
||||
}
|
||||
|
||||
func TestWebpack_NameAndRate(t *testing.T) {
|
||||
s := &WebpackSource{}
|
||||
if s.Name() != "webpack" {
|
||||
t.Errorf("unexpected name: %s", s.Name())
|
||||
}
|
||||
if s.Burst() != 2 {
|
||||
t.Errorf("burst: %d", s.Burst())
|
||||
}
|
||||
if !s.RespectsRobots() {
|
||||
t.Error("expected RespectsRobots=true")
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user