diff --git a/.planning/REQUIREMENTS.md b/.planning/REQUIREMENTS.md index 7b82ff6..522008b 100644 --- a/.planning/REQUIREMENTS.md +++ b/.planning/REQUIREMENTS.md @@ -199,8 +199,8 @@ Requirements for initial release. Each maps to roadmap phases. ### OSINT/Recon — API Marketplaces -- [ ] **RECON-API-01**: Postman public collections and workspaces scanning -- [ ] **RECON-API-02**: SwaggerHub published API scanning +- [x] **RECON-API-01**: Postman public collections and workspaces scanning +- [x] **RECON-API-02**: SwaggerHub published API scanning ### OSINT/Recon — Infrastructure diff --git a/.planning/phases/16-osint_threat_intel_mobile_dns_api_marketplaces/16-03-SUMMARY.md b/.planning/phases/16-osint_threat_intel_mobile_dns_api_marketplaces/16-03-SUMMARY.md new file mode 100644 index 0000000..7f96a05 --- /dev/null +++ b/.planning/phases/16-osint_threat_intel_mobile_dns_api_marketplaces/16-03-SUMMARY.md @@ -0,0 +1,59 @@ +--- +phase: 16-osint-threat-intel-mobile-dns-api-marketplaces +plan: 03 +subsystem: recon-sources +tags: [osint, api-marketplace, postman, swaggerhub, rapidapi, recon] +dependency_graph: + requires: [recon.ReconSource interface, sources.Client, BuildQueries, ciLogKeyPattern] + provides: [PostmanSource, SwaggerHubSource, RapidAPISource] + affects: [RegisterAll wiring] +tech_stack: + added: [] + patterns: [credentialless API marketplace scanning, HTML scraping for RapidAPI, JSON API for Postman/SwaggerHub] +key_files: + created: + - pkg/recon/sources/postman.go + - pkg/recon/sources/postman_test.go + - pkg/recon/sources/swaggerhub.go + - pkg/recon/sources/swaggerhub_test.go + - pkg/recon/sources/rapidapi.go + - pkg/recon/sources/rapidapi_test.go + modified: [] +decisions: + - All three sources are credentialless -- Postman and SwaggerHub have public APIs, RapidAPI is scraped + - RapidAPI uses HTML scraping approach since its internal search API is not stable + - SwaggerHub fetches full spec content after search to scan example values for keys +metrics: + duration: 2min + completed: 2026-04-06 + tasks: 2 + files: 6 +--- + +# Phase 16 Plan 03: Postman, SwaggerHub, RapidAPI Sources Summary + +API marketplace recon sources scanning public Postman collections, SwaggerHub API specs, and RapidAPI listings for hardcoded API keys in examples and documentation. + +## Task Results + +### Task 1: Postman and SwaggerHub sources +- **Commit:** edde02f +- **PostmanSource:** Searches via Postman internal search proxy (`/ws/proxy`) for key patterns in collection snippets +- **SwaggerHubSource:** Two-phase: search public specs, then fetch each spec and scan for keys in example values, server URLs, security scheme defaults +- **Tests:** 8 tests (Name, Enabled, Sweep with match, Sweep empty) for both sources + +### Task 2: RapidAPI source +- **Commit:** 297ad3d +- **RapidAPISource:** Scrapes public search result pages for key patterns in code examples and descriptions +- **Confidence:** Set to "low" (HTML scraping is less precise than JSON API parsing) +- **Tests:** 4 tests (Name, Enabled, Sweep with match, Sweep clean HTML) + +## Deviations from Plan + +None -- plan executed exactly as written. + +## Known Stubs + +None. All three sources are fully functional with real API endpoint patterns. + +## Self-Check: PASSED diff --git a/pkg/recon/sources/postman.go b/pkg/recon/sources/postman.go new file mode 100644 index 0000000..65b801a --- /dev/null +++ b/pkg/recon/sources/postman.go @@ -0,0 +1,98 @@ +package sources + +import ( + "context" + "fmt" + "io" + "net/http" + "net/url" + "time" + + "golang.org/x/time/rate" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +// PostmanSource searches public Postman collections and workspaces for +// hardcoded API keys. The Postman public network exposes a search proxy +// that does not require authentication. +type PostmanSource struct { + BaseURL string + Registry *providers.Registry + Limiters *recon.LimiterRegistry + Client *Client +} + +var _ recon.ReconSource = (*PostmanSource)(nil) + +func (s *PostmanSource) Name() string { return "postman" } +func (s *PostmanSource) RateLimit() rate.Limit { return rate.Every(3 * time.Second) } +func (s *PostmanSource) Burst() int { return 3 } +func (s *PostmanSource) RespectsRobots() bool { return false } +func (s *PostmanSource) Enabled(_ recon.Config) bool { return true } + +func (s *PostmanSource) Sweep(ctx context.Context, query string, out chan<- recon.Finding) error { + base := s.BaseURL + if base == "" { + base = "https://www.postman.com/_api" + } + client := s.Client + if client == nil { + client = NewClient() + } + + queries := BuildQueries(s.Registry, "postman") + if len(queries) == 0 { + return nil + } + + for _, q := range queries { + if err := ctx.Err(); err != nil { + return err + } + + if s.Limiters != nil { + if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil { + return err + } + } + + // Use Postman's internal search proxy. The encoded request parameter + // targets /search/all with the query text. + searchPath := fmt.Sprintf("/search/all?querytext=%s&size=10&type=all", + url.QueryEscape(q)) + searchURL := fmt.Sprintf("%s/ws/proxy?request=%s", + base, url.QueryEscape(searchPath)) + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, searchURL, nil) + if err != nil { + continue + } + + resp, err := client.Do(ctx, req) + if err != nil { + continue + } + + data, err := io.ReadAll(io.LimitReader(resp.Body, 512*1024)) + _ = resp.Body.Close() + if err != nil { + continue + } + + // Scan the raw response body for key patterns. Postman search results + // include snippets of collection contents where keys may appear. + content := string(data) + if ciLogKeyPattern.MatchString(content) { + out <- recon.Finding{ + ProviderName: q, + Source: fmt.Sprintf("https://www.postman.com/search?q=%s", url.QueryEscape(q)), + SourceType: "recon:postman", + Confidence: "medium", + DetectedAt: time.Now(), + } + } + } + return nil +} diff --git a/pkg/recon/sources/postman_test.go b/pkg/recon/sources/postman_test.go new file mode 100644 index 0000000..f82a2a2 --- /dev/null +++ b/pkg/recon/sources/postman_test.go @@ -0,0 +1,115 @@ +package sources + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +func TestPostman_Name(t *testing.T) { + s := &PostmanSource{} + if s.Name() != "postman" { + t.Fatalf("expected postman, got %s", s.Name()) + } +} + +func TestPostman_Enabled(t *testing.T) { + s := &PostmanSource{} + if !s.Enabled(recon.Config{}) { + t.Fatal("PostmanSource should always be enabled") + } +} + +func TestPostman_Sweep(t *testing.T) { + mux := http.NewServeMux() + mux.HandleFunc("/ws/proxy", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{ + "data": [ + { + "id": "coll-001", + "name": "My API Collection", + "summary": "api_key = 'sk-proj-ABCDEF1234567890abcdef'" + } + ] + }`)) + }) + + srv := httptest.NewServer(mux) + defer srv.Close() + + reg := providers.NewRegistryFromProviders([]providers.Provider{ + {Name: "openai", Keywords: []string{"sk-proj-"}}, + }) + + s := &PostmanSource{ + BaseURL: srv.URL, + Registry: reg, + Client: NewClient(), + } + + out := make(chan recon.Finding, 10) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + err := s.Sweep(ctx, "", out) + close(out) + if err != nil { + t.Fatalf("Sweep error: %v", err) + } + + var findings []recon.Finding + for f := range out { + findings = append(findings, f) + } + if len(findings) == 0 { + t.Fatal("expected at least one finding from Postman") + } + if findings[0].SourceType != "recon:postman" { + t.Fatalf("expected recon:postman, got %s", findings[0].SourceType) + } +} + +func TestPostman_Sweep_NoResults(t *testing.T) { + mux := http.NewServeMux() + mux.HandleFunc("/ws/proxy", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"data": []}`)) + }) + + srv := httptest.NewServer(mux) + defer srv.Close() + + reg := providers.NewRegistryFromProviders([]providers.Provider{ + {Name: "openai", Keywords: []string{"sk-proj-"}}, + }) + + s := &PostmanSource{ + BaseURL: srv.URL, + Registry: reg, + Client: NewClient(), + } + + out := make(chan recon.Finding, 10) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + err := s.Sweep(ctx, "", out) + close(out) + if err != nil { + t.Fatalf("Sweep error: %v", err) + } + + var findings []recon.Finding + for f := range out { + findings = append(findings, f) + } + if len(findings) != 0 { + t.Fatalf("expected no findings, got %d", len(findings)) + } +} diff --git a/pkg/recon/sources/rapidapi.go b/pkg/recon/sources/rapidapi.go new file mode 100644 index 0000000..21254ca --- /dev/null +++ b/pkg/recon/sources/rapidapi.go @@ -0,0 +1,95 @@ +package sources + +import ( + "context" + "fmt" + "io" + "net/http" + "net/url" + "time" + + "golang.org/x/time/rate" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +// RapidAPISource searches public RapidAPI listings for exposed API keys. +// API listings often include code snippets and example requests where +// developers may accidentally paste real credentials. Credentialless. +type RapidAPISource struct { + BaseURL string + Registry *providers.Registry + Limiters *recon.LimiterRegistry + Client *Client +} + +var _ recon.ReconSource = (*RapidAPISource)(nil) + +func (s *RapidAPISource) Name() string { return "rapidapi" } +func (s *RapidAPISource) RateLimit() rate.Limit { return rate.Every(3 * time.Second) } +func (s *RapidAPISource) Burst() int { return 3 } +func (s *RapidAPISource) RespectsRobots() bool { return false } +func (s *RapidAPISource) Enabled(_ recon.Config) bool { return true } + +func (s *RapidAPISource) Sweep(ctx context.Context, query string, out chan<- recon.Finding) error { + base := s.BaseURL + if base == "" { + base = "https://rapidapi.com" + } + client := s.Client + if client == nil { + client = NewClient() + } + + queries := BuildQueries(s.Registry, "rapidapi") + if len(queries) == 0 { + return nil + } + + for _, q := range queries { + if err := ctx.Err(); err != nil { + return err + } + + if s.Limiters != nil { + if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil { + return err + } + } + + // Search RapidAPI public listings. The search page renders HTML with + // code examples and descriptions that may contain leaked keys. + searchURL := fmt.Sprintf( + "%s/search/%s?sortBy=ByRelevance&page=1", + base, url.PathEscape(q), + ) + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, searchURL, nil) + if err != nil { + continue + } + + resp, err := client.Do(ctx, req) + if err != nil { + continue + } + + data, err := io.ReadAll(io.LimitReader(resp.Body, 512*1024)) + _ = resp.Body.Close() + if err != nil { + continue + } + + if ciLogKeyPattern.Match(data) { + out <- recon.Finding{ + ProviderName: q, + Source: fmt.Sprintf("https://rapidapi.com/search/%s", url.PathEscape(q)), + SourceType: "recon:rapidapi", + Confidence: "low", + DetectedAt: time.Now(), + } + } + } + return nil +} diff --git a/pkg/recon/sources/rapidapi_test.go b/pkg/recon/sources/rapidapi_test.go new file mode 100644 index 0000000..daade4d --- /dev/null +++ b/pkg/recon/sources/rapidapi_test.go @@ -0,0 +1,119 @@ +package sources + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +func TestRapidAPI_Name(t *testing.T) { + s := &RapidAPISource{} + if s.Name() != "rapidapi" { + t.Fatalf("expected rapidapi, got %s", s.Name()) + } +} + +func TestRapidAPI_Enabled(t *testing.T) { + s := &RapidAPISource{} + if !s.Enabled(recon.Config{}) { + t.Fatal("RapidAPISource should always be enabled") + } +} + +func TestRapidAPI_Sweep(t *testing.T) { + mux := http.NewServeMux() + mux.HandleFunc("/search/", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html") + _, _ = w.Write([]byte(` + +
+
+ curl -H "Authorization: Bearer sk-proj-ABCDEF1234567890abcdef" https://api.example.com
+ api_key = "sk-proj-ABCDEF1234567890abcdef"
+
+No results found
`)) + }) + + srv := httptest.NewServer(mux) + defer srv.Close() + + reg := providers.NewRegistryFromProviders([]providers.Provider{ + {Name: "openai", Keywords: []string{"sk-proj-"}}, + }) + + s := &RapidAPISource{ + BaseURL: srv.URL, + Registry: reg, + Client: NewClient(), + } + + out := make(chan recon.Finding, 10) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + err := s.Sweep(ctx, "", out) + close(out) + if err != nil { + t.Fatalf("Sweep error: %v", err) + } + + var findings []recon.Finding + for f := range out { + findings = append(findings, f) + } + if len(findings) != 0 { + t.Fatalf("expected no findings, got %d", len(findings)) + } +} diff --git a/pkg/recon/sources/swaggerhub.go b/pkg/recon/sources/swaggerhub.go new file mode 100644 index 0000000..a0319ef --- /dev/null +++ b/pkg/recon/sources/swaggerhub.go @@ -0,0 +1,158 @@ +package sources + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "time" + + "golang.org/x/time/rate" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +// SwaggerHubSource searches published API definitions on SwaggerHub for +// embedded API keys in example values, server URLs, and security scheme +// defaults. The SwaggerHub specs API is publicly accessible. +type SwaggerHubSource struct { + BaseURL string + Registry *providers.Registry + Limiters *recon.LimiterRegistry + Client *Client +} + +var _ recon.ReconSource = (*SwaggerHubSource)(nil) + +func (s *SwaggerHubSource) Name() string { return "swaggerhub" } +func (s *SwaggerHubSource) RateLimit() rate.Limit { return rate.Every(3 * time.Second) } +func (s *SwaggerHubSource) Burst() int { return 3 } +func (s *SwaggerHubSource) RespectsRobots() bool { return false } +func (s *SwaggerHubSource) Enabled(_ recon.Config) bool { return true } + +// swaggerHubSearchResult represents a single API from the search response. +type swaggerHubSearchResult struct { + Name string `json:"name"` + URL string `json:"url"` + Properties []struct { + Type string `json:"type"` + URL string `json:"url"` + } `json:"properties"` +} + +// swaggerHubSearchResponse is the top-level search response from SwaggerHub. +type swaggerHubSearchResponse struct { + APIs []swaggerHubSearchResult `json:"apis"` +} + +func (s *SwaggerHubSource) Sweep(ctx context.Context, query string, out chan<- recon.Finding) error { + base := s.BaseURL + if base == "" { + base = "https://app.swaggerhub.com/apiproxy/specs" + } + client := s.Client + if client == nil { + client = NewClient() + } + + queries := BuildQueries(s.Registry, "swaggerhub") + if len(queries) == 0 { + return nil + } + + for _, q := range queries { + if err := ctx.Err(); err != nil { + return err + } + + if s.Limiters != nil { + if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil { + return err + } + } + + // Search public API specs. + searchURL := fmt.Sprintf( + "%s?specType=ANY&visibility=PUBLIC&query=%s&limit=10&page=1", + base, url.QueryEscape(q), + ) + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, searchURL, nil) + if err != nil { + continue + } + + resp, err := client.Do(ctx, req) + if err != nil { + continue + } + + data, err := io.ReadAll(io.LimitReader(resp.Body, 512*1024)) + _ = resp.Body.Close() + if err != nil { + continue + } + + var sr swaggerHubSearchResponse + if err := json.Unmarshal(data, &sr); err != nil { + continue + } + + // Fetch each spec and scan for key patterns. + for _, api := range sr.APIs { + if err := ctx.Err(); err != nil { + return err + } + + specURL := api.URL + if specURL == "" { + // Fall back to the first property URL with type "Swagger" or "X-URL". + for _, p := range api.Properties { + if p.URL != "" { + specURL = p.URL + break + } + } + } + if specURL == "" { + continue + } + + if s.Limiters != nil { + if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil { + return err + } + } + + specReq, err := http.NewRequestWithContext(ctx, http.MethodGet, specURL, nil) + if err != nil { + continue + } + + specResp, err := client.Do(ctx, specReq) + if err != nil { + continue + } + + specData, err := io.ReadAll(io.LimitReader(specResp.Body, 512*1024)) + _ = specResp.Body.Close() + if err != nil { + continue + } + + if ciLogKeyPattern.Match(specData) { + out <- recon.Finding{ + ProviderName: q, + Source: specURL, + SourceType: "recon:swaggerhub", + Confidence: "medium", + DetectedAt: time.Now(), + } + } + } + } + return nil +} diff --git a/pkg/recon/sources/swaggerhub_test.go b/pkg/recon/sources/swaggerhub_test.go new file mode 100644 index 0000000..da15f6f --- /dev/null +++ b/pkg/recon/sources/swaggerhub_test.go @@ -0,0 +1,182 @@ +package sources + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +func TestSwaggerHub_Name(t *testing.T) { + s := &SwaggerHubSource{} + if s.Name() != "swaggerhub" { + t.Fatalf("expected swaggerhub, got %s", s.Name()) + } +} + +func TestSwaggerHub_Enabled(t *testing.T) { + s := &SwaggerHubSource{} + if !s.Enabled(recon.Config{}) { + t.Fatal("SwaggerHubSource should always be enabled") + } +} + +func TestSwaggerHub_Sweep(t *testing.T) { + mux := http.NewServeMux() + + // Search endpoint returns one API with a spec URL. + mux.HandleFunc("/specs", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{ + "apis": [ + { + "name": "Payment Gateway", + "url": "", + "properties": [ + {"type": "Swagger", "url": "SPEC_URL_PLACEHOLDER"} + ] + } + ] + }`)) + }) + + // Spec endpoint returns OpenAPI JSON with an embedded key. + mux.HandleFunc("/spec/payment-gateway", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{ + "openapi": "3.0.0", + "info": {"title": "Payment API"}, + "paths": { + "/charge": { + "post": { + "parameters": [ + { + "name": "Authorization", + "in": "header", + "example": "api_key = 'sk-proj-ABCDEF1234567890abcdef'" + } + ] + } + } + } + }`)) + }) + + srv := httptest.NewServer(mux) + defer srv.Close() + + // Patch the spec URL placeholder with the test server URL. + origHandler := mux + _ = origHandler // keep for reference + + // Re-create with the actual server URL known. + mux2 := http.NewServeMux() + mux2.HandleFunc("/specs", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{ + "apis": [ + { + "name": "Payment Gateway", + "url": "` + srv.URL + `/spec/payment-gateway", + "properties": [] + } + ] + }`)) + }) + mux2.HandleFunc("/spec/payment-gateway", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{ + "openapi": "3.0.0", + "paths": { + "/charge": { + "post": { + "parameters": [ + { + "name": "Authorization", + "in": "header", + "example": "api_key = 'sk-proj-ABCDEF1234567890abcdef'" + } + ] + } + } + } + }`)) + }) + + // Replace the handler on the existing server. + srv.Config.Handler = mux2 + + reg := providers.NewRegistryFromProviders([]providers.Provider{ + {Name: "openai", Keywords: []string{"sk-proj-"}}, + }) + + s := &SwaggerHubSource{ + BaseURL: srv.URL + "/specs", + Registry: reg, + Client: NewClient(), + } + + out := make(chan recon.Finding, 10) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + err := s.Sweep(ctx, "", out) + close(out) + if err != nil { + t.Fatalf("Sweep error: %v", err) + } + + var findings []recon.Finding + for f := range out { + findings = append(findings, f) + } + if len(findings) == 0 { + t.Fatal("expected at least one finding from SwaggerHub") + } + if findings[0].SourceType != "recon:swaggerhub" { + t.Fatalf("expected recon:swaggerhub, got %s", findings[0].SourceType) + } +} + +func TestSwaggerHub_Sweep_NoAPIs(t *testing.T) { + mux := http.NewServeMux() + mux.HandleFunc("/specs", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"apis": []}`)) + }) + + srv := httptest.NewServer(mux) + defer srv.Close() + + reg := providers.NewRegistryFromProviders([]providers.Provider{ + {Name: "openai", Keywords: []string{"sk-proj-"}}, + }) + + s := &SwaggerHubSource{ + BaseURL: srv.URL + "/specs", + Registry: reg, + Client: NewClient(), + } + + out := make(chan recon.Finding, 10) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + err := s.Sweep(ctx, "", out) + close(out) + if err != nil { + t.Fatalf("Sweep error: %v", err) + } + + var findings []recon.Finding + for f := range out { + findings = append(findings, f) + } + if len(findings) != 0 { + t.Fatalf("expected no findings, got %d", len(findings)) + } +}