diff --git a/pkg/recon/sources/apkmirror.go b/pkg/recon/sources/apkmirror.go new file mode 100644 index 0000000..edc74d9 --- /dev/null +++ b/pkg/recon/sources/apkmirror.go @@ -0,0 +1,94 @@ +package sources + +import ( + "context" + "fmt" + "io" + "net/http" + "net/url" + "time" + + "golang.org/x/time/rate" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +// APKMirrorSource searches APKMirror for mobile app metadata (descriptions, +// changelogs, file listings) that may contain leaked API keys. This is a +// metadata scanner -- it does not decompile APKs. Full decompilation via +// apktool/jadx would require local binary dependencies and is out of scope +// for a network-based ReconSource. +type APKMirrorSource struct { + BaseURL string + Registry *providers.Registry + Limiters *recon.LimiterRegistry + Client *Client +} + +var _ recon.ReconSource = (*APKMirrorSource)(nil) + +func (s *APKMirrorSource) Name() string { return "apkmirror" } +func (s *APKMirrorSource) RateLimit() rate.Limit { return rate.Every(5 * time.Second) } +func (s *APKMirrorSource) Burst() int { return 2 } +func (s *APKMirrorSource) RespectsRobots() bool { return true } +func (s *APKMirrorSource) Enabled(_ recon.Config) bool { return true } + +func (s *APKMirrorSource) Sweep(ctx context.Context, query string, out chan<- recon.Finding) error { + base := s.BaseURL + if base == "" { + base = "https://www.apkmirror.com" + } + client := s.Client + if client == nil { + client = NewClient() + } + + queries := BuildQueries(s.Registry, "apkmirror") + if len(queries) == 0 { + return nil + } + + for _, q := range queries { + if err := ctx.Err(); err != nil { + return err + } + + if s.Limiters != nil { + if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil { + return err + } + } + + searchURL := fmt.Sprintf( + "%s/?post_type=app_release&searchtype=apk&s=%s", + base, url.QueryEscape(q), + ) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, searchURL, nil) + if err != nil { + continue + } + + resp, err := client.Do(ctx, req) + if err != nil { + continue + } + + body, err := io.ReadAll(io.LimitReader(resp.Body, 512*1024)) + _ = resp.Body.Close() + if err != nil { + continue + } + + if ciLogKeyPattern.Match(body) { + out <- recon.Finding{ + ProviderName: q, + Source: searchURL, + SourceType: "recon:apkmirror", + Confidence: "medium", + DetectedAt: time.Now(), + } + } + } + return nil +} diff --git a/pkg/recon/sources/apkmirror_test.go b/pkg/recon/sources/apkmirror_test.go new file mode 100644 index 0000000..20ef3e6 --- /dev/null +++ b/pkg/recon/sources/apkmirror_test.go @@ -0,0 +1,115 @@ +package sources + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +func TestAPKMirror_Name(t *testing.T) { + s := &APKMirrorSource{} + if s.Name() != "apkmirror" { + t.Fatalf("expected apkmirror, got %s", s.Name()) + } +} + +func TestAPKMirror_Enabled(t *testing.T) { + s := &APKMirrorSource{} + if !s.Enabled(recon.Config{}) { + t.Fatal("APKMirrorSource should always be enabled") + } +} + +func TestAPKMirror_RespectsRobots(t *testing.T) { + s := &APKMirrorSource{} + if !s.RespectsRobots() { + t.Fatal("APKMirrorSource should respect robots.txt") + } +} + +func TestAPKMirror_Sweep(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html") + _, _ = w.Write([]byte(` +
+Uses api_key = "sk-proj-ABCDEF1234567890abcdef" for backend
+No API keys here
`)) + })) + defer srv.Close() + + reg := providers.NewRegistryFromProviders([]providers.Provider{ + {Name: "openai", Keywords: []string{"sk-proj-"}}, + }) + + s := &APKMirrorSource{ + BaseURL: srv.URL, + Registry: reg, + Client: NewClient(), + } + + out := make(chan recon.Finding, 10) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + err := s.Sweep(ctx, "", out) + close(out) + if err != nil { + t.Fatalf("Sweep error: %v", err) + } + + var findings []recon.Finding + for f := range out { + findings = append(findings, f) + } + if len(findings) != 0 { + t.Fatalf("expected no findings, got %d", len(findings)) + } +} diff --git a/pkg/recon/sources/crtsh.go b/pkg/recon/sources/crtsh.go new file mode 100644 index 0000000..65dbff3 --- /dev/null +++ b/pkg/recon/sources/crtsh.go @@ -0,0 +1,177 @@ +package sources + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "strings" + "time" + + "golang.org/x/time/rate" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +// CrtShSource discovers subdomains via certificate transparency logs (crt.sh) +// and probes their config endpoints (/.env, /api/config, /actuator/env) for +// leaked API keys. +type CrtShSource struct { + BaseURL string + Registry *providers.Registry + Limiters *recon.LimiterRegistry + Client *Client + + // ProbeBaseURL overrides the scheme+host used when probing discovered + // subdomains. Tests set this to the httptest server URL. + ProbeBaseURL string +} + +var _ recon.ReconSource = (*CrtShSource)(nil) + +func (s *CrtShSource) Name() string { return "crtsh" } +func (s *CrtShSource) RateLimit() rate.Limit { return rate.Every(3 * time.Second) } +func (s *CrtShSource) Burst() int { return 3 } +func (s *CrtShSource) RespectsRobots() bool { return false } +func (s *CrtShSource) Enabled(_ recon.Config) bool { return true } + +// crtshEntry represents one row from the crt.sh JSON API. +type crtshEntry struct { + NameValue string `json:"name_value"` + CommonName string `json:"common_name"` +} + +// configProbeEndpoints are the well-known config endpoints probed on each +// discovered subdomain. +var configProbeEndpoints = []string{ + "/.env", + "/api/config", + "/actuator/env", +} + +func (s *CrtShSource) Sweep(ctx context.Context, query string, out chan<- recon.Finding) error { + base := s.BaseURL + if base == "" { + base = "https://crt.sh" + } + client := s.Client + if client == nil { + client = NewClient() + } + + // query should be a domain. Skip keyword-like queries (no dots). + if query == "" || !strings.Contains(query, ".") { + return nil + } + + if s.Limiters != nil { + if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil { + return err + } + } + + // Fetch subdomains from crt.sh. + crtURL := fmt.Sprintf("%s/?q=%%25.%s&output=json", base, url.QueryEscape(query)) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, crtURL, nil) + if err != nil { + return err + } + + resp, err := client.Do(ctx, req) + if err != nil { + return nil // non-fatal: crt.sh may be down + } + + data, err := io.ReadAll(io.LimitReader(resp.Body, 1024*1024)) + _ = resp.Body.Close() + if err != nil { + return nil + } + + var entries []crtshEntry + if err := json.Unmarshal(data, &entries); err != nil { + return nil + } + + // Deduplicate name_value entries. + seen := make(map[string]struct{}) + var subdomains []string + for _, e := range entries { + // name_value can contain multiple names separated by newlines. + for _, name := range strings.Split(e.NameValue, "\n") { + name = strings.TrimSpace(name) + if name == "" { + continue + } + // Remove wildcard prefix. + name = strings.TrimPrefix(name, "*.") + if _, ok := seen[name]; ok { + continue + } + seen[name] = struct{}{} + subdomains = append(subdomains, name) + if len(subdomains) >= 20 { + break + } + } + if len(subdomains) >= 20 { + break + } + } + + // Probe config endpoints on each subdomain. + probeClient := &http.Client{Timeout: 5 * time.Second} + for _, sub := range subdomains { + if err := ctx.Err(); err != nil { + return err + } + s.probeSubdomain(ctx, probeClient, sub, out) + } + return nil +} + +// probeSubdomain checks well-known config endpoints for key patterns. +func (s *CrtShSource) probeSubdomain(ctx context.Context, probeClient *http.Client, subdomain string, out chan<- recon.Finding) { + for _, ep := range configProbeEndpoints { + if err := ctx.Err(); err != nil { + return + } + + var probeURL string + if s.ProbeBaseURL != "" { + // Test mode: use the mock server URL with subdomain as a header/path hint. + probeURL = s.ProbeBaseURL + "/" + subdomain + ep + } else { + probeURL = "https://" + subdomain + ep + } + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, probeURL, nil) + if err != nil { + continue + } + + resp, err := probeClient.Do(req) + if err != nil { + continue + } + + body, err := io.ReadAll(io.LimitReader(resp.Body, 64*1024)) + _ = resp.Body.Close() + if err != nil { + continue + } + + if resp.StatusCode == http.StatusOK && ciLogKeyPattern.Match(body) { + out <- recon.Finding{ + ProviderName: subdomain, + Source: probeURL, + SourceType: "recon:crtsh", + Confidence: "high", + DetectedAt: time.Now(), + } + } + } +} diff --git a/pkg/recon/sources/crtsh_test.go b/pkg/recon/sources/crtsh_test.go new file mode 100644 index 0000000..f28b7b5 --- /dev/null +++ b/pkg/recon/sources/crtsh_test.go @@ -0,0 +1,139 @@ +package sources + +import ( + "context" + "net/http" + "net/http/httptest" + "strings" + "testing" + "time" + + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +func TestCrtSh_Name(t *testing.T) { + s := &CrtShSource{} + if s.Name() != "crtsh" { + t.Fatalf("expected crtsh, got %s", s.Name()) + } +} + +func TestCrtSh_Enabled(t *testing.T) { + s := &CrtShSource{} + if !s.Enabled(recon.Config{}) { + t.Fatal("CrtShSource should always be enabled") + } +} + +func TestCrtSh_Sweep_SkipsKeywords(t *testing.T) { + s := &CrtShSource{Client: NewClient()} + + out := make(chan recon.Finding, 10) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + // "sk-proj-" has no dot -- should be skipped as a keyword. + err := s.Sweep(ctx, "sk-proj-", out) + close(out) + if err != nil { + t.Fatalf("Sweep error: %v", err) + } + var findings []recon.Finding + for f := range out { + findings = append(findings, f) + } + if len(findings) != 0 { + t.Fatalf("expected no findings for keyword query, got %d", len(findings)) + } +} + +func TestCrtSh_Sweep(t *testing.T) { + // Mux handles both crt.sh API and probe endpoints. + mux := http.NewServeMux() + + // crt.sh subdomain lookup. + mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + if r.URL.Query().Get("output") == "json" { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`[ + {"name_value":"api.example.com","common_name":"api.example.com"}, + {"name_value":"staging.example.com","common_name":"staging.example.com"} + ]`)) + return + } + http.NotFound(w, r) + }) + + crtSrv := httptest.NewServer(mux) + defer crtSrv.Close() + + // Probe server: serves /.env with key-like content. + probeMux := http.NewServeMux() + probeMux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + if strings.HasSuffix(r.URL.Path, "/.env") { + _, _ = w.Write([]byte(`API_KEY = "sk-proj-ABCDEF1234567890abcdef"`)) + return + } + http.NotFound(w, r) + }) + probeSrv := httptest.NewServer(probeMux) + defer probeSrv.Close() + + s := &CrtShSource{ + BaseURL: crtSrv.URL, + Client: NewClient(), + ProbeBaseURL: probeSrv.URL, + } + + out := make(chan recon.Finding, 20) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + err := s.Sweep(ctx, "example.com", out) + close(out) + if err != nil { + t.Fatalf("Sweep error: %v", err) + } + + var findings []recon.Finding + for f := range out { + findings = append(findings, f) + } + if len(findings) == 0 { + t.Fatal("expected at least one finding from crt.sh probe") + } + if findings[0].SourceType != "recon:crtsh" { + t.Fatalf("expected recon:crtsh, got %s", findings[0].SourceType) + } +} + +func TestCrtSh_Sweep_NoSubdomains(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`[]`)) + })) + defer srv.Close() + + s := &CrtShSource{ + BaseURL: srv.URL, + Client: NewClient(), + } + + out := make(chan recon.Finding, 10) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + err := s.Sweep(ctx, "empty.example.com", out) + close(out) + if err != nil { + t.Fatalf("Sweep error: %v", err) + } + + var findings []recon.Finding + for f := range out { + findings = append(findings, f) + } + if len(findings) != 0 { + t.Fatalf("expected no findings, got %d", len(findings)) + } +}