merge: phase 16 resolve conflicts

This commit is contained in:
salvacybersec
2026-04-06 16:47:10 +03:00
8 changed files with 828 additions and 2 deletions

View File

@@ -199,8 +199,8 @@ Requirements for initial release. Each maps to roadmap phases.
### OSINT/Recon — API Marketplaces
- [ ] **RECON-API-01**: Postman public collections and workspaces scanning
- [ ] **RECON-API-02**: SwaggerHub published API scanning
- [x] **RECON-API-01**: Postman public collections and workspaces scanning
- [x] **RECON-API-02**: SwaggerHub published API scanning
### OSINT/Recon — Infrastructure

View File

@@ -0,0 +1,59 @@
---
phase: 16-osint-threat-intel-mobile-dns-api-marketplaces
plan: 03
subsystem: recon-sources
tags: [osint, api-marketplace, postman, swaggerhub, rapidapi, recon]
dependency_graph:
requires: [recon.ReconSource interface, sources.Client, BuildQueries, ciLogKeyPattern]
provides: [PostmanSource, SwaggerHubSource, RapidAPISource]
affects: [RegisterAll wiring]
tech_stack:
added: []
patterns: [credentialless API marketplace scanning, HTML scraping for RapidAPI, JSON API for Postman/SwaggerHub]
key_files:
created:
- pkg/recon/sources/postman.go
- pkg/recon/sources/postman_test.go
- pkg/recon/sources/swaggerhub.go
- pkg/recon/sources/swaggerhub_test.go
- pkg/recon/sources/rapidapi.go
- pkg/recon/sources/rapidapi_test.go
modified: []
decisions:
- All three sources are credentialless -- Postman and SwaggerHub have public APIs, RapidAPI is scraped
- RapidAPI uses HTML scraping approach since its internal search API is not stable
- SwaggerHub fetches full spec content after search to scan example values for keys
metrics:
duration: 2min
completed: 2026-04-06
tasks: 2
files: 6
---
# Phase 16 Plan 03: Postman, SwaggerHub, RapidAPI Sources Summary
API marketplace recon sources scanning public Postman collections, SwaggerHub API specs, and RapidAPI listings for hardcoded API keys in examples and documentation.
## Task Results
### Task 1: Postman and SwaggerHub sources
- **Commit:** edde02f
- **PostmanSource:** Searches via Postman internal search proxy (`/ws/proxy`) for key patterns in collection snippets
- **SwaggerHubSource:** Two-phase: search public specs, then fetch each spec and scan for keys in example values, server URLs, security scheme defaults
- **Tests:** 8 tests (Name, Enabled, Sweep with match, Sweep empty) for both sources
### Task 2: RapidAPI source
- **Commit:** 297ad3d
- **RapidAPISource:** Scrapes public search result pages for key patterns in code examples and descriptions
- **Confidence:** Set to "low" (HTML scraping is less precise than JSON API parsing)
- **Tests:** 4 tests (Name, Enabled, Sweep with match, Sweep clean HTML)
## Deviations from Plan
None -- plan executed exactly as written.
## Known Stubs
None. All three sources are fully functional with real API endpoint patterns.
## Self-Check: PASSED

View File

@@ -0,0 +1,98 @@
package sources
import (
"context"
"fmt"
"io"
"net/http"
"net/url"
"time"
"golang.org/x/time/rate"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
// PostmanSource searches public Postman collections and workspaces for
// hardcoded API keys. The Postman public network exposes a search proxy
// that does not require authentication.
type PostmanSource struct {
BaseURL string
Registry *providers.Registry
Limiters *recon.LimiterRegistry
Client *Client
}
var _ recon.ReconSource = (*PostmanSource)(nil)
func (s *PostmanSource) Name() string { return "postman" }
func (s *PostmanSource) RateLimit() rate.Limit { return rate.Every(3 * time.Second) }
func (s *PostmanSource) Burst() int { return 3 }
func (s *PostmanSource) RespectsRobots() bool { return false }
func (s *PostmanSource) Enabled(_ recon.Config) bool { return true }
func (s *PostmanSource) Sweep(ctx context.Context, query string, out chan<- recon.Finding) error {
base := s.BaseURL
if base == "" {
base = "https://www.postman.com/_api"
}
client := s.Client
if client == nil {
client = NewClient()
}
queries := BuildQueries(s.Registry, "postman")
if len(queries) == 0 {
return nil
}
for _, q := range queries {
if err := ctx.Err(); err != nil {
return err
}
if s.Limiters != nil {
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
return err
}
}
// Use Postman's internal search proxy. The encoded request parameter
// targets /search/all with the query text.
searchPath := fmt.Sprintf("/search/all?querytext=%s&size=10&type=all",
url.QueryEscape(q))
searchURL := fmt.Sprintf("%s/ws/proxy?request=%s",
base, url.QueryEscape(searchPath))
req, err := http.NewRequestWithContext(ctx, http.MethodGet, searchURL, nil)
if err != nil {
continue
}
resp, err := client.Do(ctx, req)
if err != nil {
continue
}
data, err := io.ReadAll(io.LimitReader(resp.Body, 512*1024))
_ = resp.Body.Close()
if err != nil {
continue
}
// Scan the raw response body for key patterns. Postman search results
// include snippets of collection contents where keys may appear.
content := string(data)
if ciLogKeyPattern.MatchString(content) {
out <- recon.Finding{
ProviderName: q,
Source: fmt.Sprintf("https://www.postman.com/search?q=%s", url.QueryEscape(q)),
SourceType: "recon:postman",
Confidence: "medium",
DetectedAt: time.Now(),
}
}
}
return nil
}

View File

@@ -0,0 +1,115 @@
package sources
import (
"context"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
func TestPostman_Name(t *testing.T) {
s := &PostmanSource{}
if s.Name() != "postman" {
t.Fatalf("expected postman, got %s", s.Name())
}
}
func TestPostman_Enabled(t *testing.T) {
s := &PostmanSource{}
if !s.Enabled(recon.Config{}) {
t.Fatal("PostmanSource should always be enabled")
}
}
func TestPostman_Sweep(t *testing.T) {
mux := http.NewServeMux()
mux.HandleFunc("/ws/proxy", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{
"data": [
{
"id": "coll-001",
"name": "My API Collection",
"summary": "api_key = 'sk-proj-ABCDEF1234567890abcdef'"
}
]
}`))
})
srv := httptest.NewServer(mux)
defer srv.Close()
reg := providers.NewRegistryFromProviders([]providers.Provider{
{Name: "openai", Keywords: []string{"sk-proj-"}},
})
s := &PostmanSource{
BaseURL: srv.URL,
Registry: reg,
Client: NewClient(),
}
out := make(chan recon.Finding, 10)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
err := s.Sweep(ctx, "", out)
close(out)
if err != nil {
t.Fatalf("Sweep error: %v", err)
}
var findings []recon.Finding
for f := range out {
findings = append(findings, f)
}
if len(findings) == 0 {
t.Fatal("expected at least one finding from Postman")
}
if findings[0].SourceType != "recon:postman" {
t.Fatalf("expected recon:postman, got %s", findings[0].SourceType)
}
}
func TestPostman_Sweep_NoResults(t *testing.T) {
mux := http.NewServeMux()
mux.HandleFunc("/ws/proxy", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"data": []}`))
})
srv := httptest.NewServer(mux)
defer srv.Close()
reg := providers.NewRegistryFromProviders([]providers.Provider{
{Name: "openai", Keywords: []string{"sk-proj-"}},
})
s := &PostmanSource{
BaseURL: srv.URL,
Registry: reg,
Client: NewClient(),
}
out := make(chan recon.Finding, 10)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
err := s.Sweep(ctx, "", out)
close(out)
if err != nil {
t.Fatalf("Sweep error: %v", err)
}
var findings []recon.Finding
for f := range out {
findings = append(findings, f)
}
if len(findings) != 0 {
t.Fatalf("expected no findings, got %d", len(findings))
}
}

View File

@@ -0,0 +1,95 @@
package sources
import (
"context"
"fmt"
"io"
"net/http"
"net/url"
"time"
"golang.org/x/time/rate"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
// RapidAPISource searches public RapidAPI listings for exposed API keys.
// API listings often include code snippets and example requests where
// developers may accidentally paste real credentials. Credentialless.
type RapidAPISource struct {
BaseURL string
Registry *providers.Registry
Limiters *recon.LimiterRegistry
Client *Client
}
var _ recon.ReconSource = (*RapidAPISource)(nil)
func (s *RapidAPISource) Name() string { return "rapidapi" }
func (s *RapidAPISource) RateLimit() rate.Limit { return rate.Every(3 * time.Second) }
func (s *RapidAPISource) Burst() int { return 3 }
func (s *RapidAPISource) RespectsRobots() bool { return false }
func (s *RapidAPISource) Enabled(_ recon.Config) bool { return true }
func (s *RapidAPISource) Sweep(ctx context.Context, query string, out chan<- recon.Finding) error {
base := s.BaseURL
if base == "" {
base = "https://rapidapi.com"
}
client := s.Client
if client == nil {
client = NewClient()
}
queries := BuildQueries(s.Registry, "rapidapi")
if len(queries) == 0 {
return nil
}
for _, q := range queries {
if err := ctx.Err(); err != nil {
return err
}
if s.Limiters != nil {
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
return err
}
}
// Search RapidAPI public listings. The search page renders HTML with
// code examples and descriptions that may contain leaked keys.
searchURL := fmt.Sprintf(
"%s/search/%s?sortBy=ByRelevance&page=1",
base, url.PathEscape(q),
)
req, err := http.NewRequestWithContext(ctx, http.MethodGet, searchURL, nil)
if err != nil {
continue
}
resp, err := client.Do(ctx, req)
if err != nil {
continue
}
data, err := io.ReadAll(io.LimitReader(resp.Body, 512*1024))
_ = resp.Body.Close()
if err != nil {
continue
}
if ciLogKeyPattern.Match(data) {
out <- recon.Finding{
ProviderName: q,
Source: fmt.Sprintf("https://rapidapi.com/search/%s", url.PathEscape(q)),
SourceType: "recon:rapidapi",
Confidence: "low",
DetectedAt: time.Now(),
}
}
}
return nil
}

View File

@@ -0,0 +1,119 @@
package sources
import (
"context"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
func TestRapidAPI_Name(t *testing.T) {
s := &RapidAPISource{}
if s.Name() != "rapidapi" {
t.Fatalf("expected rapidapi, got %s", s.Name())
}
}
func TestRapidAPI_Enabled(t *testing.T) {
s := &RapidAPISource{}
if !s.Enabled(recon.Config{}) {
t.Fatal("RapidAPISource should always be enabled")
}
}
func TestRapidAPI_Sweep(t *testing.T) {
mux := http.NewServeMux()
mux.HandleFunc("/search/", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/html")
_, _ = w.Write([]byte(`<!DOCTYPE html>
<html>
<body>
<div class="api-listing">
<h2>OpenAI Helper API</h2>
<pre><code>
curl -H "Authorization: Bearer sk-proj-ABCDEF1234567890abcdef" https://api.example.com
api_key = "sk-proj-ABCDEF1234567890abcdef"
</code></pre>
</div>
</body>
</html>`))
})
srv := httptest.NewServer(mux)
defer srv.Close()
reg := providers.NewRegistryFromProviders([]providers.Provider{
{Name: "openai", Keywords: []string{"sk-proj-"}},
})
s := &RapidAPISource{
BaseURL: srv.URL,
Registry: reg,
Client: NewClient(),
}
out := make(chan recon.Finding, 10)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
err := s.Sweep(ctx, "", out)
close(out)
if err != nil {
t.Fatalf("Sweep error: %v", err)
}
var findings []recon.Finding
for f := range out {
findings = append(findings, f)
}
if len(findings) == 0 {
t.Fatal("expected at least one finding from RapidAPI")
}
if findings[0].SourceType != "recon:rapidapi" {
t.Fatalf("expected recon:rapidapi, got %s", findings[0].SourceType)
}
}
func TestRapidAPI_Sweep_Clean(t *testing.T) {
mux := http.NewServeMux()
mux.HandleFunc("/search/", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/html")
_, _ = w.Write([]byte(`<!DOCTYPE html>
<html><body><p>No results found</p></body></html>`))
})
srv := httptest.NewServer(mux)
defer srv.Close()
reg := providers.NewRegistryFromProviders([]providers.Provider{
{Name: "openai", Keywords: []string{"sk-proj-"}},
})
s := &RapidAPISource{
BaseURL: srv.URL,
Registry: reg,
Client: NewClient(),
}
out := make(chan recon.Finding, 10)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
err := s.Sweep(ctx, "", out)
close(out)
if err != nil {
t.Fatalf("Sweep error: %v", err)
}
var findings []recon.Finding
for f := range out {
findings = append(findings, f)
}
if len(findings) != 0 {
t.Fatalf("expected no findings, got %d", len(findings))
}
}

View File

@@ -0,0 +1,158 @@
package sources
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"time"
"golang.org/x/time/rate"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
// SwaggerHubSource searches published API definitions on SwaggerHub for
// embedded API keys in example values, server URLs, and security scheme
// defaults. The SwaggerHub specs API is publicly accessible.
type SwaggerHubSource struct {
BaseURL string
Registry *providers.Registry
Limiters *recon.LimiterRegistry
Client *Client
}
var _ recon.ReconSource = (*SwaggerHubSource)(nil)
func (s *SwaggerHubSource) Name() string { return "swaggerhub" }
func (s *SwaggerHubSource) RateLimit() rate.Limit { return rate.Every(3 * time.Second) }
func (s *SwaggerHubSource) Burst() int { return 3 }
func (s *SwaggerHubSource) RespectsRobots() bool { return false }
func (s *SwaggerHubSource) Enabled(_ recon.Config) bool { return true }
// swaggerHubSearchResult represents a single API from the search response.
type swaggerHubSearchResult struct {
Name string `json:"name"`
URL string `json:"url"`
Properties []struct {
Type string `json:"type"`
URL string `json:"url"`
} `json:"properties"`
}
// swaggerHubSearchResponse is the top-level search response from SwaggerHub.
type swaggerHubSearchResponse struct {
APIs []swaggerHubSearchResult `json:"apis"`
}
func (s *SwaggerHubSource) Sweep(ctx context.Context, query string, out chan<- recon.Finding) error {
base := s.BaseURL
if base == "" {
base = "https://app.swaggerhub.com/apiproxy/specs"
}
client := s.Client
if client == nil {
client = NewClient()
}
queries := BuildQueries(s.Registry, "swaggerhub")
if len(queries) == 0 {
return nil
}
for _, q := range queries {
if err := ctx.Err(); err != nil {
return err
}
if s.Limiters != nil {
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
return err
}
}
// Search public API specs.
searchURL := fmt.Sprintf(
"%s?specType=ANY&visibility=PUBLIC&query=%s&limit=10&page=1",
base, url.QueryEscape(q),
)
req, err := http.NewRequestWithContext(ctx, http.MethodGet, searchURL, nil)
if err != nil {
continue
}
resp, err := client.Do(ctx, req)
if err != nil {
continue
}
data, err := io.ReadAll(io.LimitReader(resp.Body, 512*1024))
_ = resp.Body.Close()
if err != nil {
continue
}
var sr swaggerHubSearchResponse
if err := json.Unmarshal(data, &sr); err != nil {
continue
}
// Fetch each spec and scan for key patterns.
for _, api := range sr.APIs {
if err := ctx.Err(); err != nil {
return err
}
specURL := api.URL
if specURL == "" {
// Fall back to the first property URL with type "Swagger" or "X-URL".
for _, p := range api.Properties {
if p.URL != "" {
specURL = p.URL
break
}
}
}
if specURL == "" {
continue
}
if s.Limiters != nil {
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
return err
}
}
specReq, err := http.NewRequestWithContext(ctx, http.MethodGet, specURL, nil)
if err != nil {
continue
}
specResp, err := client.Do(ctx, specReq)
if err != nil {
continue
}
specData, err := io.ReadAll(io.LimitReader(specResp.Body, 512*1024))
_ = specResp.Body.Close()
if err != nil {
continue
}
if ciLogKeyPattern.Match(specData) {
out <- recon.Finding{
ProviderName: q,
Source: specURL,
SourceType: "recon:swaggerhub",
Confidence: "medium",
DetectedAt: time.Now(),
}
}
}
}
return nil
}

View File

@@ -0,0 +1,182 @@
package sources
import (
"context"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
func TestSwaggerHub_Name(t *testing.T) {
s := &SwaggerHubSource{}
if s.Name() != "swaggerhub" {
t.Fatalf("expected swaggerhub, got %s", s.Name())
}
}
func TestSwaggerHub_Enabled(t *testing.T) {
s := &SwaggerHubSource{}
if !s.Enabled(recon.Config{}) {
t.Fatal("SwaggerHubSource should always be enabled")
}
}
func TestSwaggerHub_Sweep(t *testing.T) {
mux := http.NewServeMux()
// Search endpoint returns one API with a spec URL.
mux.HandleFunc("/specs", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{
"apis": [
{
"name": "Payment Gateway",
"url": "",
"properties": [
{"type": "Swagger", "url": "SPEC_URL_PLACEHOLDER"}
]
}
]
}`))
})
// Spec endpoint returns OpenAPI JSON with an embedded key.
mux.HandleFunc("/spec/payment-gateway", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{
"openapi": "3.0.0",
"info": {"title": "Payment API"},
"paths": {
"/charge": {
"post": {
"parameters": [
{
"name": "Authorization",
"in": "header",
"example": "api_key = 'sk-proj-ABCDEF1234567890abcdef'"
}
]
}
}
}
}`))
})
srv := httptest.NewServer(mux)
defer srv.Close()
// Patch the spec URL placeholder with the test server URL.
origHandler := mux
_ = origHandler // keep for reference
// Re-create with the actual server URL known.
mux2 := http.NewServeMux()
mux2.HandleFunc("/specs", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{
"apis": [
{
"name": "Payment Gateway",
"url": "` + srv.URL + `/spec/payment-gateway",
"properties": []
}
]
}`))
})
mux2.HandleFunc("/spec/payment-gateway", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{
"openapi": "3.0.0",
"paths": {
"/charge": {
"post": {
"parameters": [
{
"name": "Authorization",
"in": "header",
"example": "api_key = 'sk-proj-ABCDEF1234567890abcdef'"
}
]
}
}
}
}`))
})
// Replace the handler on the existing server.
srv.Config.Handler = mux2
reg := providers.NewRegistryFromProviders([]providers.Provider{
{Name: "openai", Keywords: []string{"sk-proj-"}},
})
s := &SwaggerHubSource{
BaseURL: srv.URL + "/specs",
Registry: reg,
Client: NewClient(),
}
out := make(chan recon.Finding, 10)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
err := s.Sweep(ctx, "", out)
close(out)
if err != nil {
t.Fatalf("Sweep error: %v", err)
}
var findings []recon.Finding
for f := range out {
findings = append(findings, f)
}
if len(findings) == 0 {
t.Fatal("expected at least one finding from SwaggerHub")
}
if findings[0].SourceType != "recon:swaggerhub" {
t.Fatalf("expected recon:swaggerhub, got %s", findings[0].SourceType)
}
}
func TestSwaggerHub_Sweep_NoAPIs(t *testing.T) {
mux := http.NewServeMux()
mux.HandleFunc("/specs", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"apis": []}`))
})
srv := httptest.NewServer(mux)
defer srv.Close()
reg := providers.NewRegistryFromProviders([]providers.Provider{
{Name: "openai", Keywords: []string{"sk-proj-"}},
})
s := &SwaggerHubSource{
BaseURL: srv.URL + "/specs",
Registry: reg,
Client: NewClient(),
}
out := make(chan recon.Finding, 10)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
err := s.Sweep(ctx, "", out)
close(out)
if err != nil {
t.Fatalf("Sweep error: %v", err)
}
var findings []recon.Finding
for f := range out {
findings = append(findings, f)
}
if len(findings) != 0 {
t.Fatalf("expected no findings, got %d", len(findings))
}
}