feat(14-03): implement SwaggerSource and DeployPreviewSource with tests

- SwaggerSource probes OpenAPI doc endpoints for API keys in example/default fields
- DeployPreviewSource scans Vercel/Netlify preview URLs for __NEXT_DATA__ env leaks
- Both implement ReconSource, credentialless, with httptest-based tests
This commit is contained in:
salvacybersec
2026-04-06 13:18:18 +03:00
parent b57bd5e7d9
commit 7d8a4182d7
4 changed files with 562 additions and 0 deletions

View File

@@ -0,0 +1,107 @@
package sources
import (
"context"
"io"
"net/http"
"regexp"
"time"
"golang.org/x/time/rate"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
// DeployPreviewSource scans Vercel and Netlify deploy preview URLs for leaked
// API keys. Deploy previews frequently use different (less restrictive)
// environment variables than production, and their URLs are often guessable
// from PR numbers or commit hashes.
type DeployPreviewSource struct {
BaseURL string
Registry *providers.Registry
Limiters *recon.LimiterRegistry
Client *Client
}
var _ recon.ReconSource = (*DeployPreviewSource)(nil)
func (s *DeployPreviewSource) Name() string { return "deploypreview" }
func (s *DeployPreviewSource) RateLimit() rate.Limit { return rate.Every(3 * time.Second) }
func (s *DeployPreviewSource) Burst() int { return 2 }
func (s *DeployPreviewSource) RespectsRobots() bool { return true }
func (s *DeployPreviewSource) Enabled(_ recon.Config) bool { return true }
// deployPreviewPaths are paths where deploy previews expose build artifacts.
var deployPreviewPaths = []string{
"/",
"/_next/data/",
"/static/js/main.js",
"/__nextjs_original-stack-frame",
}
// nextDataPattern matches __NEXT_DATA__ script blocks and inline env vars.
var nextDataPattern = regexp.MustCompile(`(?i)(__NEXT_DATA__|NEXT_PUBLIC_|REACT_APP_|VITE_)[A-Z_]*(API[_]?KEY|SECRET|TOKEN)?['":\s]*[=:,]\s*['"]([a-zA-Z0-9_\-]{8,})['"]`)
func (s *DeployPreviewSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
base := s.BaseURL
if base == "" {
return nil
}
client := s.Client
if client == nil {
client = NewClient()
}
queries := BuildQueries(s.Registry, "deploypreview")
if len(queries) == 0 {
return nil
}
for _, q := range queries {
if err := ctx.Err(); err != nil {
return err
}
for _, path := range deployPreviewPaths {
if err := ctx.Err(); err != nil {
return err
}
if s.Limiters != nil {
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
return err
}
}
probeURL := base + path
req, err := http.NewRequestWithContext(ctx, http.MethodGet, probeURL, nil)
if err != nil {
continue
}
resp, err := client.Do(ctx, req)
if err != nil {
continue
}
body, err := io.ReadAll(io.LimitReader(resp.Body, 512*1024))
_ = resp.Body.Close()
if err != nil {
continue
}
if nextDataPattern.Match(body) {
out <- recon.Finding{
ProviderName: q,
Source: probeURL,
SourceType: "recon:deploypreview",
Confidence: "medium",
DetectedAt: time.Now(),
}
break // one finding per query is sufficient
}
}
}
return nil
}

View File

@@ -0,0 +1,158 @@
package sources
import (
"context"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
func deployPreviewTestRegistry() *providers.Registry {
return providers.NewRegistryFromProviders([]providers.Provider{
{Name: "openai", Keywords: []string{"sk-proj-"}},
})
}
const deployPreviewFixtureHTML = `<!DOCTYPE html>
<html>
<head><title>My App</title></head>
<body>
<div id="__next"></div>
<script id="__NEXT_DATA__" type="application/json">
{
"props": {
"pageProps": {
"config": {
"NEXT_PUBLIC_API_KEY": "sk-proj-abc123def456ghi789jkl"
}
}
}
}
</script>
</body>
</html>`
const deployPreviewCleanHTML = `<!DOCTYPE html>
<html>
<head><title>My App</title></head>
<body>
<div id="root">Hello World</div>
</body>
</html>`
func TestDeployPreview_Sweep_ExtractsFindings(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/html")
_, _ = w.Write([]byte(deployPreviewFixtureHTML))
}))
defer srv.Close()
src := &DeployPreviewSource{
BaseURL: srv.URL,
Registry: deployPreviewTestRegistry(),
Client: NewClient(),
}
out := make(chan recon.Finding, 64)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if err := src.Sweep(ctx, "", out); err != nil {
t.Fatalf("Sweep err: %v", err)
}
close(out)
var findings []recon.Finding
for f := range out {
findings = append(findings, f)
}
if len(findings) == 0 {
t.Fatal("expected at least one finding")
}
for _, f := range findings {
if f.SourceType != "recon:deploypreview" {
t.Errorf("unexpected SourceType: %s", f.SourceType)
}
if f.Confidence != "medium" {
t.Errorf("unexpected Confidence: %s", f.Confidence)
}
}
}
func TestDeployPreview_Sweep_NoFindings_OnCleanPage(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/html")
_, _ = w.Write([]byte(deployPreviewCleanHTML))
}))
defer srv.Close()
src := &DeployPreviewSource{
BaseURL: srv.URL,
Registry: deployPreviewTestRegistry(),
Client: NewClient(),
}
out := make(chan recon.Finding, 64)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if err := src.Sweep(ctx, "", out); err != nil {
t.Fatalf("Sweep err: %v", err)
}
close(out)
var count int
for range out {
count++
}
if count != 0 {
t.Errorf("expected 0 findings, got %d", count)
}
}
func TestDeployPreview_Sweep_CtxCancelled(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
time.Sleep(500 * time.Millisecond)
_, _ = w.Write([]byte(deployPreviewFixtureHTML))
}))
defer srv.Close()
src := &DeployPreviewSource{
BaseURL: srv.URL,
Registry: deployPreviewTestRegistry(),
Limiters: recon.NewLimiterRegistry(),
Client: NewClient(),
}
ctx, cancel := context.WithCancel(context.Background())
cancel()
out := make(chan recon.Finding, 4)
if err := src.Sweep(ctx, "", out); err == nil {
t.Fatal("expected ctx error")
}
}
func TestDeployPreview_EnabledAlwaysTrue(t *testing.T) {
s := &DeployPreviewSource{}
if !s.Enabled(recon.Config{}) {
t.Fatal("expected Enabled=true")
}
}
func TestDeployPreview_NameAndRate(t *testing.T) {
s := &DeployPreviewSource{}
if s.Name() != "deploypreview" {
t.Errorf("unexpected name: %s", s.Name())
}
if s.Burst() != 2 {
t.Errorf("burst: %d", s.Burst())
}
if !s.RespectsRobots() {
t.Error("expected RespectsRobots=true")
}
}

View File

@@ -0,0 +1,118 @@
package sources
import (
"context"
"encoding/json"
"net/http"
"regexp"
"time"
"golang.org/x/time/rate"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
// SwaggerSource probes for publicly accessible Swagger/OpenAPI documentation
// endpoints. Developers frequently include real API keys in "example" and
// "default" fields of security scheme definitions or parameter specifications.
type SwaggerSource struct {
BaseURL string
Registry *providers.Registry
Limiters *recon.LimiterRegistry
Client *Client
}
var _ recon.ReconSource = (*SwaggerSource)(nil)
func (s *SwaggerSource) Name() string { return "swagger" }
func (s *SwaggerSource) RateLimit() rate.Limit { return rate.Every(3 * time.Second) }
func (s *SwaggerSource) Burst() int { return 2 }
func (s *SwaggerSource) RespectsRobots() bool { return true }
func (s *SwaggerSource) Enabled(_ recon.Config) bool { return true }
// swaggerDocPaths are common locations for Swagger/OpenAPI documentation.
var swaggerDocPaths = []string{
"/swagger.json",
"/openapi.json",
"/api-docs",
"/v2/api-docs",
"/swagger/v1/swagger.json",
"/docs/openapi.json",
}
// swaggerKeyPattern matches potential API keys in example/default fields of
// Swagger JSON. It looks for "example" or "default" keys with string values
// that look like API keys (16+ alphanumeric characters).
var swaggerKeyPattern = regexp.MustCompile(`"(?:example|default)"\s*:\s*"([a-zA-Z0-9_\-]{16,})"`)
func (s *SwaggerSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
base := s.BaseURL
if base == "" {
return nil
}
client := s.Client
if client == nil {
client = NewClient()
}
queries := BuildQueries(s.Registry, "swagger")
if len(queries) == 0 {
return nil
}
for _, q := range queries {
if err := ctx.Err(); err != nil {
return err
}
for _, path := range swaggerDocPaths {
if err := ctx.Err(); err != nil {
return err
}
if s.Limiters != nil {
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
return err
}
}
probeURL := base + path
req, err := http.NewRequestWithContext(ctx, http.MethodGet, probeURL, nil)
if err != nil {
continue
}
req.Header.Set("Accept", "application/json")
resp, err := client.Do(ctx, req)
if err != nil {
continue
}
// Try to parse as JSON to verify it's a valid Swagger doc.
var doc map[string]interface{}
if err := json.NewDecoder(resp.Body).Decode(&doc); err != nil {
_ = resp.Body.Close()
continue
}
_ = resp.Body.Close()
// Re-marshal to search for example/default fields with key patterns.
raw, err := json.Marshal(doc)
if err != nil {
continue
}
if swaggerKeyPattern.Match(raw) {
out <- recon.Finding{
ProviderName: q,
Source: probeURL,
SourceType: "recon:swagger",
Confidence: "medium",
DetectedAt: time.Now(),
}
}
}
}
return nil
}

View File

@@ -0,0 +1,179 @@
package sources
import (
"context"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
func swaggerTestRegistry() *providers.Registry {
return providers.NewRegistryFromProviders([]providers.Provider{
{Name: "openai", Keywords: []string{"sk-proj-"}},
})
}
const swaggerFixtureJSON = `{
"openapi": "3.0.0",
"info": {"title": "My API", "version": "1.0"},
"paths": {
"/api/data": {
"get": {
"parameters": [
{
"name": "X-API-Key",
"in": "header",
"schema": {"type": "string"},
"example": "sk-proj-abc123def456ghi789jkl"
}
]
}
}
},
"components": {
"securitySchemes": {
"apiKey": {
"type": "apiKey",
"in": "header",
"name": "Authorization",
"default": "Bearer sk-live-xxxxxxxxxxxxxxxxxxxx"
}
}
}
}`
const swaggerCleanFixtureJSON = `{
"openapi": "3.0.0",
"info": {"title": "My API", "version": "1.0"},
"paths": {
"/api/data": {
"get": {
"parameters": [
{
"name": "limit",
"in": "query",
"schema": {"type": "integer"},
"example": 10
}
]
}
}
}
}`
func TestSwagger_Sweep_ExtractsFindings(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(swaggerFixtureJSON))
}))
defer srv.Close()
src := &SwaggerSource{
BaseURL: srv.URL,
Registry: swaggerTestRegistry(),
Client: NewClient(),
}
out := make(chan recon.Finding, 64)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if err := src.Sweep(ctx, "", out); err != nil {
t.Fatalf("Sweep err: %v", err)
}
close(out)
var findings []recon.Finding
for f := range out {
findings = append(findings, f)
}
if len(findings) == 0 {
t.Fatal("expected at least one finding")
}
for _, f := range findings {
if f.SourceType != "recon:swagger" {
t.Errorf("unexpected SourceType: %s", f.SourceType)
}
if f.Confidence != "medium" {
t.Errorf("unexpected Confidence: %s", f.Confidence)
}
}
}
func TestSwagger_Sweep_NoFindings_OnCleanDoc(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(swaggerCleanFixtureJSON))
}))
defer srv.Close()
src := &SwaggerSource{
BaseURL: srv.URL,
Registry: swaggerTestRegistry(),
Client: NewClient(),
}
out := make(chan recon.Finding, 64)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if err := src.Sweep(ctx, "", out); err != nil {
t.Fatalf("Sweep err: %v", err)
}
close(out)
var count int
for range out {
count++
}
if count != 0 {
t.Errorf("expected 0 findings, got %d", count)
}
}
func TestSwagger_Sweep_CtxCancelled(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
time.Sleep(500 * time.Millisecond)
_, _ = w.Write([]byte(swaggerFixtureJSON))
}))
defer srv.Close()
src := &SwaggerSource{
BaseURL: srv.URL,
Registry: swaggerTestRegistry(),
Limiters: recon.NewLimiterRegistry(),
Client: NewClient(),
}
ctx, cancel := context.WithCancel(context.Background())
cancel()
out := make(chan recon.Finding, 4)
if err := src.Sweep(ctx, "", out); err == nil {
t.Fatal("expected ctx error")
}
}
func TestSwagger_EnabledAlwaysTrue(t *testing.T) {
s := &SwaggerSource{}
if !s.Enabled(recon.Config{}) {
t.Fatal("expected Enabled=true")
}
}
func TestSwagger_NameAndRate(t *testing.T) {
s := &SwaggerSource{}
if s.Name() != "swagger" {
t.Errorf("unexpected name: %s", s.Name())
}
if s.Burst() != 2 {
t.Errorf("burst: %d", s.Burst())
}
if !s.RespectsRobots() {
t.Error("expected RespectsRobots=true")
}
}