feat(14-03): implement SourceMapSource, WebpackSource, EnvLeakSource with tests

- SourceMapSource probes .map files for original source containing API keys
- WebpackSource scans JS bundles for inlined NEXT_PUBLIC_/REACT_APP_/VITE_ env vars
- EnvLeakSource probes common .env paths for exposed environment files
- All three implement ReconSource, credentialless, with httptest-based tests
This commit is contained in:
salvacybersec
2026-04-06 13:17:07 +03:00
parent dc90785ab0
commit b57bd5e7d9
6 changed files with 777 additions and 0 deletions

View File

@@ -0,0 +1,111 @@
package sources
import (
"context"
"fmt"
"io"
"net/http"
"regexp"
"time"
"golang.org/x/time/rate"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
// EnvLeakSource probes for publicly accessible .env files on web servers.
// Many web frameworks (Laravel, Rails, Node/Express, Django) use .env files
// for configuration. Misconfigured servers frequently serve these files
// directly, exposing API keys and database credentials.
type EnvLeakSource struct {
BaseURL string
Registry *providers.Registry
Limiters *recon.LimiterRegistry
Client *Client
}
var _ recon.ReconSource = (*EnvLeakSource)(nil)
func (s *EnvLeakSource) Name() string { return "dotenv" }
func (s *EnvLeakSource) RateLimit() rate.Limit { return rate.Every(2 * time.Second) }
func (s *EnvLeakSource) Burst() int { return 2 }
func (s *EnvLeakSource) RespectsRobots() bool { return true }
func (s *EnvLeakSource) Enabled(_ recon.Config) bool { return true }
// envKeyValuePattern matches KEY=VALUE lines typical of .env files.
var envKeyValuePattern = regexp.MustCompile(`(?im)^[A-Z_]*(API[_]?KEY|SECRET|TOKEN|PASSWORD|CREDENTIALS?)[A-Z_]*\s*=\s*\S+`)
// envFilePaths are common locations for exposed .env files.
var envFilePaths = []string{
"/.env",
"/.env.local",
"/.env.production",
"/.env.development",
"/.env.backup",
"/.env.example",
"/app/.env",
"/api/.env",
}
func (s *EnvLeakSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
base := s.BaseURL
if base == "" {
return nil
}
client := s.Client
if client == nil {
client = NewClient()
}
queries := BuildQueries(s.Registry, "dotenv")
if len(queries) == 0 {
return nil
}
for _, q := range queries {
if err := ctx.Err(); err != nil {
return err
}
for _, path := range envFilePaths {
if err := ctx.Err(); err != nil {
return err
}
if s.Limiters != nil {
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
return err
}
}
probeURL := fmt.Sprintf("%s%s", base, path)
req, err := http.NewRequestWithContext(ctx, http.MethodGet, probeURL, nil)
if err != nil {
continue
}
resp, err := client.Do(ctx, req)
if err != nil {
continue
}
body, err := io.ReadAll(io.LimitReader(resp.Body, 64*1024)) // 64KB max
_ = resp.Body.Close()
if err != nil {
continue
}
if envKeyValuePattern.Match(body) {
out <- recon.Finding{
ProviderName: q,
Source: probeURL,
SourceType: "recon:dotenv",
Confidence: "high",
DetectedAt: time.Now(),
}
}
}
}
return nil
}

View File

@@ -0,0 +1,145 @@
package sources
import (
"context"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
func envLeakTestRegistry() *providers.Registry {
return providers.NewRegistryFromProviders([]providers.Provider{
{Name: "openai", Keywords: []string{"sk-proj-"}},
})
}
const envLeakFixture = `# Application config
APP_NAME=myapp
DATABASE_URL=postgres://user:pass@localhost/db
OPENAI_API_KEY=sk-proj-abc123def456ghi789
AWS_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
DEBUG=false
`
const envLeakCleanFixture = `# Nothing sensitive here
APP_NAME=myapp
DEBUG=false
LOG_LEVEL=info
`
func TestEnvLeak_Sweep_ExtractsFindings(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain")
_, _ = w.Write([]byte(envLeakFixture))
}))
defer srv.Close()
src := &EnvLeakSource{
BaseURL: srv.URL,
Registry: envLeakTestRegistry(),
Client: NewClient(),
}
out := make(chan recon.Finding, 64)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if err := src.Sweep(ctx, "", out); err != nil {
t.Fatalf("Sweep err: %v", err)
}
close(out)
var findings []recon.Finding
for f := range out {
findings = append(findings, f)
}
if len(findings) == 0 {
t.Fatal("expected at least one finding")
}
for _, f := range findings {
if f.SourceType != "recon:dotenv" {
t.Errorf("unexpected SourceType: %s", f.SourceType)
}
if f.Confidence != "high" {
t.Errorf("unexpected Confidence: %s", f.Confidence)
}
}
}
func TestEnvLeak_Sweep_NoFindings_OnCleanFile(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain")
_, _ = w.Write([]byte(envLeakCleanFixture))
}))
defer srv.Close()
src := &EnvLeakSource{
BaseURL: srv.URL,
Registry: envLeakTestRegistry(),
Client: NewClient(),
}
out := make(chan recon.Finding, 64)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if err := src.Sweep(ctx, "", out); err != nil {
t.Fatalf("Sweep err: %v", err)
}
close(out)
var count int
for range out {
count++
}
if count != 0 {
t.Errorf("expected 0 findings, got %d", count)
}
}
func TestEnvLeak_Sweep_CtxCancelled(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
time.Sleep(500 * time.Millisecond)
_, _ = w.Write([]byte(envLeakFixture))
}))
defer srv.Close()
src := &EnvLeakSource{
BaseURL: srv.URL,
Registry: envLeakTestRegistry(),
Limiters: recon.NewLimiterRegistry(),
Client: NewClient(),
}
ctx, cancel := context.WithCancel(context.Background())
cancel()
out := make(chan recon.Finding, 4)
if err := src.Sweep(ctx, "", out); err == nil {
t.Fatal("expected ctx error")
}
}
func TestEnvLeak_EnabledAlwaysTrue(t *testing.T) {
s := &EnvLeakSource{}
if !s.Enabled(recon.Config{}) {
t.Fatal("expected Enabled=true")
}
}
func TestEnvLeak_NameAndRate(t *testing.T) {
s := &EnvLeakSource{}
if s.Name() != "dotenv" {
t.Errorf("unexpected name: %s", s.Name())
}
if s.Burst() != 2 {
t.Errorf("burst: %d", s.Burst())
}
if !s.RespectsRobots() {
t.Error("expected RespectsRobots=true")
}
}

View File

@@ -0,0 +1,123 @@
package sources
import (
"context"
"encoding/json"
"net/http"
"regexp"
"time"
"golang.org/x/time/rate"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
// SourceMapSource probes for publicly accessible JavaScript source maps (.map
// files) that contain original source code. Developers frequently ship source
// maps to production, exposing server-side secrets embedded during bundling.
type SourceMapSource struct {
BaseURL string
Registry *providers.Registry
Limiters *recon.LimiterRegistry
Client *Client
}
var _ recon.ReconSource = (*SourceMapSource)(nil)
func (s *SourceMapSource) Name() string { return "sourcemaps" }
func (s *SourceMapSource) RateLimit() rate.Limit { return rate.Every(3 * time.Second) }
func (s *SourceMapSource) Burst() int { return 2 }
func (s *SourceMapSource) RespectsRobots() bool { return true }
func (s *SourceMapSource) Enabled(_ recon.Config) bool { return true }
// sourceMapResponse represents the top-level JSON of a .map file.
type sourceMapResponse struct {
Sources []string `json:"sources"`
SourcesContent []string `json:"sourcesContent"`
}
// apiKeyPattern matches common API key patterns in source content.
var apiKeyPattern = regexp.MustCompile(`(?i)(api[_-]?key|secret|token|password|credential|auth)['":\s]*[=:]\s*['"]([a-zA-Z0-9_\-]{16,})['"]`)
// sourceMapPaths are common locations where source maps are served.
var sourceMapPaths = []string{
"/static/js/main.js.map",
"/static/js/bundle.js.map",
"/assets/index.js.map",
"/dist/bundle.js.map",
"/main.js.map",
"/app.js.map",
"/_next/static/chunks/main.js.map",
}
func (s *SourceMapSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
base := s.BaseURL
client := s.Client
if client == nil {
client = NewClient()
}
queries := BuildQueries(s.Registry, "sourcemaps")
if len(queries) == 0 {
return nil
}
for _, q := range queries {
if err := ctx.Err(); err != nil {
return err
}
// Each query is used as a domain/URL hint; probe common map paths.
for _, path := range sourceMapPaths {
if err := ctx.Err(); err != nil {
return err
}
if s.Limiters != nil {
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
return err
}
}
probeURL := base + path
if base == "" {
// Without a BaseURL we cannot construct real URLs; skip.
continue
}
req, err := http.NewRequestWithContext(ctx, http.MethodGet, probeURL, nil)
if err != nil {
continue
}
req.Header.Set("Accept", "application/json")
resp, err := client.Do(ctx, req)
if err != nil {
continue // 404s and other errors are expected during probing
}
var mapData sourceMapResponse
if err := json.NewDecoder(resp.Body).Decode(&mapData); err != nil {
_ = resp.Body.Close()
continue
}
_ = resp.Body.Close()
// Scan sourcesContent for API key patterns.
for _, content := range mapData.SourcesContent {
if apiKeyPattern.MatchString(content) {
out <- recon.Finding{
ProviderName: q,
Source: probeURL,
SourceType: "recon:sourcemaps",
Confidence: "medium",
DetectedAt: time.Now(),
}
break // one finding per map file is sufficient
}
}
}
}
return nil
}

View File

@@ -0,0 +1,143 @@
package sources
import (
"context"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
func sourceMapTestRegistry() *providers.Registry {
return providers.NewRegistryFromProviders([]providers.Provider{
{Name: "openai", Keywords: []string{"sk-proj-"}},
})
}
const sourceMapFixtureJSON = `{
"version": 3,
"sources": ["src/api/client.ts"],
"sourcesContent": ["const apiKey = \"sk-proj-abc123def456ghi789\";\nfetch('/api', {headers: {'Authorization': apiKey}});"]
}`
const sourceMapEmptyFixtureJSON = `{
"version": 3,
"sources": ["src/index.ts"],
"sourcesContent": ["console.log('hello world');"]
}`
func TestSourceMap_Sweep_ExtractsFindings(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(sourceMapFixtureJSON))
}))
defer srv.Close()
src := &SourceMapSource{
BaseURL: srv.URL,
Registry: sourceMapTestRegistry(),
Client: NewClient(),
}
out := make(chan recon.Finding, 64)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if err := src.Sweep(ctx, "", out); err != nil {
t.Fatalf("Sweep err: %v", err)
}
close(out)
var findings []recon.Finding
for f := range out {
findings = append(findings, f)
}
if len(findings) == 0 {
t.Fatal("expected at least one finding")
}
for _, f := range findings {
if f.SourceType != "recon:sourcemaps" {
t.Errorf("unexpected SourceType: %s", f.SourceType)
}
if f.Confidence != "medium" {
t.Errorf("unexpected Confidence: %s", f.Confidence)
}
}
}
func TestSourceMap_Sweep_NoFindings_OnCleanContent(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(sourceMapEmptyFixtureJSON))
}))
defer srv.Close()
src := &SourceMapSource{
BaseURL: srv.URL,
Registry: sourceMapTestRegistry(),
Client: NewClient(),
}
out := make(chan recon.Finding, 64)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if err := src.Sweep(ctx, "", out); err != nil {
t.Fatalf("Sweep err: %v", err)
}
close(out)
var count int
for range out {
count++
}
if count != 0 {
t.Errorf("expected 0 findings, got %d", count)
}
}
func TestSourceMap_Sweep_CtxCancelled(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
time.Sleep(500 * time.Millisecond)
_, _ = w.Write([]byte(sourceMapFixtureJSON))
}))
defer srv.Close()
src := &SourceMapSource{
BaseURL: srv.URL,
Registry: sourceMapTestRegistry(),
Limiters: recon.NewLimiterRegistry(),
Client: NewClient(),
}
ctx, cancel := context.WithCancel(context.Background())
cancel()
out := make(chan recon.Finding, 4)
if err := src.Sweep(ctx, "", out); err == nil {
t.Fatal("expected ctx error")
}
}
func TestSourceMap_EnabledAlwaysTrue(t *testing.T) {
s := &SourceMapSource{}
if !s.Enabled(recon.Config{}) {
t.Fatal("expected Enabled=true")
}
}
func TestSourceMap_NameAndRate(t *testing.T) {
s := &SourceMapSource{}
if s.Name() != "sourcemaps" {
t.Errorf("unexpected name: %s", s.Name())
}
if s.Burst() != 2 {
t.Errorf("burst: %d", s.Burst())
}
if !s.RespectsRobots() {
t.Error("expected RespectsRobots=true")
}
}

View File

@@ -0,0 +1,109 @@
package sources
import (
"context"
"fmt"
"io"
"net/http"
"regexp"
"time"
"golang.org/x/time/rate"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
// WebpackSource probes for Webpack/Vite build artifacts that contain inlined
// environment variables. Bundlers like Webpack and Vite inline process.env.*
// values at build time, frequently shipping API keys to production bundles.
type WebpackSource struct {
BaseURL string
Registry *providers.Registry
Limiters *recon.LimiterRegistry
Client *Client
}
var _ recon.ReconSource = (*WebpackSource)(nil)
func (s *WebpackSource) Name() string { return "webpack" }
func (s *WebpackSource) RateLimit() rate.Limit { return rate.Every(3 * time.Second) }
func (s *WebpackSource) Burst() int { return 2 }
func (s *WebpackSource) RespectsRobots() bool { return true }
func (s *WebpackSource) Enabled(_ recon.Config) bool { return true }
// envVarPattern matches inlined environment variable patterns from bundlers.
var envVarPattern = regexp.MustCompile(`(?i)(NEXT_PUBLIC_|REACT_APP_|VITE_|VUE_APP_|NUXT_|GATSBY_)[A-Z_]*(API[_]?KEY|SECRET|TOKEN|PASSWORD)['":\s]*[=:,]\s*['"]([a-zA-Z0-9_\-]{8,})['"]`)
// webpackBundlePaths are common locations for JS bundle artifacts.
var webpackBundlePaths = []string{
"/static/js/main.js",
"/static/js/bundle.js",
"/_next/static/chunks/main.js",
"/assets/index.js",
"/dist/bundle.js",
"/build/static/js/main.js",
}
func (s *WebpackSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
base := s.BaseURL
if base == "" {
return nil
}
client := s.Client
if client == nil {
client = NewClient()
}
queries := BuildQueries(s.Registry, "webpack")
if len(queries) == 0 {
return nil
}
for _, q := range queries {
if err := ctx.Err(); err != nil {
return err
}
for _, path := range webpackBundlePaths {
if err := ctx.Err(); err != nil {
return err
}
if s.Limiters != nil {
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
return err
}
}
probeURL := fmt.Sprintf("%s%s", base, path)
req, err := http.NewRequestWithContext(ctx, http.MethodGet, probeURL, nil)
if err != nil {
continue
}
resp, err := client.Do(ctx, req)
if err != nil {
continue
}
body, err := io.ReadAll(io.LimitReader(resp.Body, 512*1024)) // 512KB max
_ = resp.Body.Close()
if err != nil {
continue
}
if envVarPattern.Match(body) {
out <- recon.Finding{
ProviderName: q,
Source: probeURL,
SourceType: "recon:webpack",
Confidence: "medium",
DetectedAt: time.Now(),
}
break // one finding per query is sufficient
}
}
}
return nil
}

View File

@@ -0,0 +1,146 @@
package sources
import (
"context"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
func webpackTestRegistry() *providers.Registry {
return providers.NewRegistryFromProviders([]providers.Provider{
{Name: "openai", Keywords: []string{"sk-proj-"}},
})
}
const webpackFixtureJS = `
!function(e){var t={};function n(r){if(t[r])return t[r].exports}
var config = {
NEXT_PUBLIC_API_KEY: "sk-proj-abc123def456ghi789jkl",
REACT_APP_SECRET: "super-secret-value-12345678"
};
module.exports = config;
`
const webpackCleanJS = `
!function(e){var t={};function n(r){if(t[r])return t[r].exports}
console.log("clean bundle");
module.exports = {};
`
func TestWebpack_Sweep_ExtractsFindings(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/javascript")
_, _ = w.Write([]byte(webpackFixtureJS))
}))
defer srv.Close()
src := &WebpackSource{
BaseURL: srv.URL,
Registry: webpackTestRegistry(),
Client: NewClient(),
}
out := make(chan recon.Finding, 64)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if err := src.Sweep(ctx, "", out); err != nil {
t.Fatalf("Sweep err: %v", err)
}
close(out)
var findings []recon.Finding
for f := range out {
findings = append(findings, f)
}
if len(findings) == 0 {
t.Fatal("expected at least one finding")
}
for _, f := range findings {
if f.SourceType != "recon:webpack" {
t.Errorf("unexpected SourceType: %s", f.SourceType)
}
if f.Confidence != "medium" {
t.Errorf("unexpected Confidence: %s", f.Confidence)
}
}
}
func TestWebpack_Sweep_NoFindings_OnCleanBundle(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/javascript")
_, _ = w.Write([]byte(webpackCleanJS))
}))
defer srv.Close()
src := &WebpackSource{
BaseURL: srv.URL,
Registry: webpackTestRegistry(),
Client: NewClient(),
}
out := make(chan recon.Finding, 64)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if err := src.Sweep(ctx, "", out); err != nil {
t.Fatalf("Sweep err: %v", err)
}
close(out)
var count int
for range out {
count++
}
if count != 0 {
t.Errorf("expected 0 findings, got %d", count)
}
}
func TestWebpack_Sweep_CtxCancelled(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
time.Sleep(500 * time.Millisecond)
_, _ = w.Write([]byte(webpackFixtureJS))
}))
defer srv.Close()
src := &WebpackSource{
BaseURL: srv.URL,
Registry: webpackTestRegistry(),
Limiters: recon.NewLimiterRegistry(),
Client: NewClient(),
}
ctx, cancel := context.WithCancel(context.Background())
cancel()
out := make(chan recon.Finding, 4)
if err := src.Sweep(ctx, "", out); err == nil {
t.Fatal("expected ctx error")
}
}
func TestWebpack_EnabledAlwaysTrue(t *testing.T) {
s := &WebpackSource{}
if !s.Enabled(recon.Config{}) {
t.Fatal("expected Enabled=true")
}
}
func TestWebpack_NameAndRate(t *testing.T) {
s := &WebpackSource{}
if s.Name() != "webpack" {
t.Errorf("unexpected name: %s", s.Name())
}
if s.Burst() != 2 {
t.Errorf("burst: %d", s.Burst())
}
if !s.RespectsRobots() {
t.Error("expected RespectsRobots=true")
}
}