From 7d8a4182d7cb83d235b1eca84a900e2a75ce6e48 Mon Sep 17 00:00:00 2001 From: salvacybersec Date: Mon, 6 Apr 2026 13:18:18 +0300 Subject: [PATCH] feat(14-03): implement SwaggerSource and DeployPreviewSource with tests - SwaggerSource probes OpenAPI doc endpoints for API keys in example/default fields - DeployPreviewSource scans Vercel/Netlify preview URLs for __NEXT_DATA__ env leaks - Both implement ReconSource, credentialless, with httptest-based tests --- pkg/recon/sources/deploypreview.go | 107 ++++++++++++++ pkg/recon/sources/deploypreview_test.go | 158 +++++++++++++++++++++ pkg/recon/sources/swagger.go | 118 ++++++++++++++++ pkg/recon/sources/swagger_test.go | 179 ++++++++++++++++++++++++ 4 files changed, 562 insertions(+) create mode 100644 pkg/recon/sources/deploypreview.go create mode 100644 pkg/recon/sources/deploypreview_test.go create mode 100644 pkg/recon/sources/swagger.go create mode 100644 pkg/recon/sources/swagger_test.go diff --git a/pkg/recon/sources/deploypreview.go b/pkg/recon/sources/deploypreview.go new file mode 100644 index 0000000..628ec79 --- /dev/null +++ b/pkg/recon/sources/deploypreview.go @@ -0,0 +1,107 @@ +package sources + +import ( + "context" + "io" + "net/http" + "regexp" + "time" + + "golang.org/x/time/rate" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +// DeployPreviewSource scans Vercel and Netlify deploy preview URLs for leaked +// API keys. Deploy previews frequently use different (less restrictive) +// environment variables than production, and their URLs are often guessable +// from PR numbers or commit hashes. +type DeployPreviewSource struct { + BaseURL string + Registry *providers.Registry + Limiters *recon.LimiterRegistry + Client *Client +} + +var _ recon.ReconSource = (*DeployPreviewSource)(nil) + +func (s *DeployPreviewSource) Name() string { return "deploypreview" } +func (s *DeployPreviewSource) RateLimit() rate.Limit { return rate.Every(3 * time.Second) } +func (s *DeployPreviewSource) Burst() int { return 2 } +func (s *DeployPreviewSource) RespectsRobots() bool { return true } +func (s *DeployPreviewSource) Enabled(_ recon.Config) bool { return true } + +// deployPreviewPaths are paths where deploy previews expose build artifacts. +var deployPreviewPaths = []string{ + "/", + "/_next/data/", + "/static/js/main.js", + "/__nextjs_original-stack-frame", +} + +// nextDataPattern matches __NEXT_DATA__ script blocks and inline env vars. +var nextDataPattern = regexp.MustCompile(`(?i)(__NEXT_DATA__|NEXT_PUBLIC_|REACT_APP_|VITE_)[A-Z_]*(API[_]?KEY|SECRET|TOKEN)?['":\s]*[=:,]\s*['"]([a-zA-Z0-9_\-]{8,})['"]`) + +func (s *DeployPreviewSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error { + base := s.BaseURL + if base == "" { + return nil + } + client := s.Client + if client == nil { + client = NewClient() + } + + queries := BuildQueries(s.Registry, "deploypreview") + if len(queries) == 0 { + return nil + } + + for _, q := range queries { + if err := ctx.Err(); err != nil { + return err + } + + for _, path := range deployPreviewPaths { + if err := ctx.Err(); err != nil { + return err + } + + if s.Limiters != nil { + if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil { + return err + } + } + + probeURL := base + path + req, err := http.NewRequestWithContext(ctx, http.MethodGet, probeURL, nil) + if err != nil { + continue + } + + resp, err := client.Do(ctx, req) + if err != nil { + continue + } + + body, err := io.ReadAll(io.LimitReader(resp.Body, 512*1024)) + _ = resp.Body.Close() + if err != nil { + continue + } + + if nextDataPattern.Match(body) { + out <- recon.Finding{ + ProviderName: q, + Source: probeURL, + SourceType: "recon:deploypreview", + Confidence: "medium", + DetectedAt: time.Now(), + } + break // one finding per query is sufficient + } + } + } + return nil +} diff --git a/pkg/recon/sources/deploypreview_test.go b/pkg/recon/sources/deploypreview_test.go new file mode 100644 index 0000000..9bdf2c0 --- /dev/null +++ b/pkg/recon/sources/deploypreview_test.go @@ -0,0 +1,158 @@ +package sources + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +func deployPreviewTestRegistry() *providers.Registry { + return providers.NewRegistryFromProviders([]providers.Provider{ + {Name: "openai", Keywords: []string{"sk-proj-"}}, + }) +} + +const deployPreviewFixtureHTML = ` + +My App + +
+ + +` + +const deployPreviewCleanHTML = ` + +My App + +
Hello World
+ +` + +func TestDeployPreview_Sweep_ExtractsFindings(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html") + _, _ = w.Write([]byte(deployPreviewFixtureHTML)) + })) + defer srv.Close() + + src := &DeployPreviewSource{ + BaseURL: srv.URL, + Registry: deployPreviewTestRegistry(), + Client: NewClient(), + } + + out := make(chan recon.Finding, 64) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + if err := src.Sweep(ctx, "", out); err != nil { + t.Fatalf("Sweep err: %v", err) + } + close(out) + + var findings []recon.Finding + for f := range out { + findings = append(findings, f) + } + if len(findings) == 0 { + t.Fatal("expected at least one finding") + } + for _, f := range findings { + if f.SourceType != "recon:deploypreview" { + t.Errorf("unexpected SourceType: %s", f.SourceType) + } + if f.Confidence != "medium" { + t.Errorf("unexpected Confidence: %s", f.Confidence) + } + } +} + +func TestDeployPreview_Sweep_NoFindings_OnCleanPage(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html") + _, _ = w.Write([]byte(deployPreviewCleanHTML)) + })) + defer srv.Close() + + src := &DeployPreviewSource{ + BaseURL: srv.URL, + Registry: deployPreviewTestRegistry(), + Client: NewClient(), + } + + out := make(chan recon.Finding, 64) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + if err := src.Sweep(ctx, "", out); err != nil { + t.Fatalf("Sweep err: %v", err) + } + close(out) + + var count int + for range out { + count++ + } + if count != 0 { + t.Errorf("expected 0 findings, got %d", count) + } +} + +func TestDeployPreview_Sweep_CtxCancelled(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + time.Sleep(500 * time.Millisecond) + _, _ = w.Write([]byte(deployPreviewFixtureHTML)) + })) + defer srv.Close() + + src := &DeployPreviewSource{ + BaseURL: srv.URL, + Registry: deployPreviewTestRegistry(), + Limiters: recon.NewLimiterRegistry(), + Client: NewClient(), + } + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + out := make(chan recon.Finding, 4) + if err := src.Sweep(ctx, "", out); err == nil { + t.Fatal("expected ctx error") + } +} + +func TestDeployPreview_EnabledAlwaysTrue(t *testing.T) { + s := &DeployPreviewSource{} + if !s.Enabled(recon.Config{}) { + t.Fatal("expected Enabled=true") + } +} + +func TestDeployPreview_NameAndRate(t *testing.T) { + s := &DeployPreviewSource{} + if s.Name() != "deploypreview" { + t.Errorf("unexpected name: %s", s.Name()) + } + if s.Burst() != 2 { + t.Errorf("burst: %d", s.Burst()) + } + if !s.RespectsRobots() { + t.Error("expected RespectsRobots=true") + } +} diff --git a/pkg/recon/sources/swagger.go b/pkg/recon/sources/swagger.go new file mode 100644 index 0000000..58028d2 --- /dev/null +++ b/pkg/recon/sources/swagger.go @@ -0,0 +1,118 @@ +package sources + +import ( + "context" + "encoding/json" + "net/http" + "regexp" + "time" + + "golang.org/x/time/rate" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +// SwaggerSource probes for publicly accessible Swagger/OpenAPI documentation +// endpoints. Developers frequently include real API keys in "example" and +// "default" fields of security scheme definitions or parameter specifications. +type SwaggerSource struct { + BaseURL string + Registry *providers.Registry + Limiters *recon.LimiterRegistry + Client *Client +} + +var _ recon.ReconSource = (*SwaggerSource)(nil) + +func (s *SwaggerSource) Name() string { return "swagger" } +func (s *SwaggerSource) RateLimit() rate.Limit { return rate.Every(3 * time.Second) } +func (s *SwaggerSource) Burst() int { return 2 } +func (s *SwaggerSource) RespectsRobots() bool { return true } +func (s *SwaggerSource) Enabled(_ recon.Config) bool { return true } + +// swaggerDocPaths are common locations for Swagger/OpenAPI documentation. +var swaggerDocPaths = []string{ + "/swagger.json", + "/openapi.json", + "/api-docs", + "/v2/api-docs", + "/swagger/v1/swagger.json", + "/docs/openapi.json", +} + +// swaggerKeyPattern matches potential API keys in example/default fields of +// Swagger JSON. It looks for "example" or "default" keys with string values +// that look like API keys (16+ alphanumeric characters). +var swaggerKeyPattern = regexp.MustCompile(`"(?:example|default)"\s*:\s*"([a-zA-Z0-9_\-]{16,})"`) + +func (s *SwaggerSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error { + base := s.BaseURL + if base == "" { + return nil + } + client := s.Client + if client == nil { + client = NewClient() + } + + queries := BuildQueries(s.Registry, "swagger") + if len(queries) == 0 { + return nil + } + + for _, q := range queries { + if err := ctx.Err(); err != nil { + return err + } + + for _, path := range swaggerDocPaths { + if err := ctx.Err(); err != nil { + return err + } + + if s.Limiters != nil { + if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil { + return err + } + } + + probeURL := base + path + req, err := http.NewRequestWithContext(ctx, http.MethodGet, probeURL, nil) + if err != nil { + continue + } + req.Header.Set("Accept", "application/json") + + resp, err := client.Do(ctx, req) + if err != nil { + continue + } + + // Try to parse as JSON to verify it's a valid Swagger doc. + var doc map[string]interface{} + if err := json.NewDecoder(resp.Body).Decode(&doc); err != nil { + _ = resp.Body.Close() + continue + } + _ = resp.Body.Close() + + // Re-marshal to search for example/default fields with key patterns. + raw, err := json.Marshal(doc) + if err != nil { + continue + } + + if swaggerKeyPattern.Match(raw) { + out <- recon.Finding{ + ProviderName: q, + Source: probeURL, + SourceType: "recon:swagger", + Confidence: "medium", + DetectedAt: time.Now(), + } + } + } + } + return nil +} diff --git a/pkg/recon/sources/swagger_test.go b/pkg/recon/sources/swagger_test.go new file mode 100644 index 0000000..6ffdbaa --- /dev/null +++ b/pkg/recon/sources/swagger_test.go @@ -0,0 +1,179 @@ +package sources + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +func swaggerTestRegistry() *providers.Registry { + return providers.NewRegistryFromProviders([]providers.Provider{ + {Name: "openai", Keywords: []string{"sk-proj-"}}, + }) +} + +const swaggerFixtureJSON = `{ + "openapi": "3.0.0", + "info": {"title": "My API", "version": "1.0"}, + "paths": { + "/api/data": { + "get": { + "parameters": [ + { + "name": "X-API-Key", + "in": "header", + "schema": {"type": "string"}, + "example": "sk-proj-abc123def456ghi789jkl" + } + ] + } + } + }, + "components": { + "securitySchemes": { + "apiKey": { + "type": "apiKey", + "in": "header", + "name": "Authorization", + "default": "Bearer sk-live-xxxxxxxxxxxxxxxxxxxx" + } + } + } +}` + +const swaggerCleanFixtureJSON = `{ + "openapi": "3.0.0", + "info": {"title": "My API", "version": "1.0"}, + "paths": { + "/api/data": { + "get": { + "parameters": [ + { + "name": "limit", + "in": "query", + "schema": {"type": "integer"}, + "example": 10 + } + ] + } + } + } +}` + +func TestSwagger_Sweep_ExtractsFindings(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(swaggerFixtureJSON)) + })) + defer srv.Close() + + src := &SwaggerSource{ + BaseURL: srv.URL, + Registry: swaggerTestRegistry(), + Client: NewClient(), + } + + out := make(chan recon.Finding, 64) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + if err := src.Sweep(ctx, "", out); err != nil { + t.Fatalf("Sweep err: %v", err) + } + close(out) + + var findings []recon.Finding + for f := range out { + findings = append(findings, f) + } + if len(findings) == 0 { + t.Fatal("expected at least one finding") + } + for _, f := range findings { + if f.SourceType != "recon:swagger" { + t.Errorf("unexpected SourceType: %s", f.SourceType) + } + if f.Confidence != "medium" { + t.Errorf("unexpected Confidence: %s", f.Confidence) + } + } +} + +func TestSwagger_Sweep_NoFindings_OnCleanDoc(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(swaggerCleanFixtureJSON)) + })) + defer srv.Close() + + src := &SwaggerSource{ + BaseURL: srv.URL, + Registry: swaggerTestRegistry(), + Client: NewClient(), + } + + out := make(chan recon.Finding, 64) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + if err := src.Sweep(ctx, "", out); err != nil { + t.Fatalf("Sweep err: %v", err) + } + close(out) + + var count int + for range out { + count++ + } + if count != 0 { + t.Errorf("expected 0 findings, got %d", count) + } +} + +func TestSwagger_Sweep_CtxCancelled(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + time.Sleep(500 * time.Millisecond) + _, _ = w.Write([]byte(swaggerFixtureJSON)) + })) + defer srv.Close() + + src := &SwaggerSource{ + BaseURL: srv.URL, + Registry: swaggerTestRegistry(), + Limiters: recon.NewLimiterRegistry(), + Client: NewClient(), + } + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + out := make(chan recon.Finding, 4) + if err := src.Sweep(ctx, "", out); err == nil { + t.Fatal("expected ctx error") + } +} + +func TestSwagger_EnabledAlwaysTrue(t *testing.T) { + s := &SwaggerSource{} + if !s.Enabled(recon.Config{}) { + t.Fatal("expected Enabled=true") + } +} + +func TestSwagger_NameAndRate(t *testing.T) { + s := &SwaggerSource{} + if s.Name() != "swagger" { + t.Errorf("unexpected name: %s", s.Name()) + } + if s.Burst() != 2 { + t.Errorf("burst: %d", s.Burst()) + } + if !s.RespectsRobots() { + t.Error("expected RespectsRobots=true") + } +}