feat(16-02): add APKMirror and crt.sh ReconSource modules

- APKMirrorSource searches APK metadata pages for key patterns
- CrtShSource discovers subdomains via CT logs and probes config endpoints
- Both credentialless, emit findings on ciLogKeyPattern match
This commit is contained in:
salvacybersec
2026-04-06 16:44:37 +03:00
parent af284f56f2
commit 09a8d4cb70
4 changed files with 525 additions and 0 deletions

View File

@@ -0,0 +1,94 @@
package sources
import (
"context"
"fmt"
"io"
"net/http"
"net/url"
"time"
"golang.org/x/time/rate"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
// APKMirrorSource searches APKMirror for mobile app metadata (descriptions,
// changelogs, file listings) that may contain leaked API keys. This is a
// metadata scanner -- it does not decompile APKs. Full decompilation via
// apktool/jadx would require local binary dependencies and is out of scope
// for a network-based ReconSource.
type APKMirrorSource struct {
BaseURL string
Registry *providers.Registry
Limiters *recon.LimiterRegistry
Client *Client
}
var _ recon.ReconSource = (*APKMirrorSource)(nil)
func (s *APKMirrorSource) Name() string { return "apkmirror" }
func (s *APKMirrorSource) RateLimit() rate.Limit { return rate.Every(5 * time.Second) }
func (s *APKMirrorSource) Burst() int { return 2 }
func (s *APKMirrorSource) RespectsRobots() bool { return true }
func (s *APKMirrorSource) Enabled(_ recon.Config) bool { return true }
func (s *APKMirrorSource) Sweep(ctx context.Context, query string, out chan<- recon.Finding) error {
base := s.BaseURL
if base == "" {
base = "https://www.apkmirror.com"
}
client := s.Client
if client == nil {
client = NewClient()
}
queries := BuildQueries(s.Registry, "apkmirror")
if len(queries) == 0 {
return nil
}
for _, q := range queries {
if err := ctx.Err(); err != nil {
return err
}
if s.Limiters != nil {
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
return err
}
}
searchURL := fmt.Sprintf(
"%s/?post_type=app_release&searchtype=apk&s=%s",
base, url.QueryEscape(q),
)
req, err := http.NewRequestWithContext(ctx, http.MethodGet, searchURL, nil)
if err != nil {
continue
}
resp, err := client.Do(ctx, req)
if err != nil {
continue
}
body, err := io.ReadAll(io.LimitReader(resp.Body, 512*1024))
_ = resp.Body.Close()
if err != nil {
continue
}
if ciLogKeyPattern.Match(body) {
out <- recon.Finding{
ProviderName: q,
Source: searchURL,
SourceType: "recon:apkmirror",
Confidence: "medium",
DetectedAt: time.Now(),
}
}
}
return nil
}

View File

@@ -0,0 +1,115 @@
package sources
import (
"context"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
func TestAPKMirror_Name(t *testing.T) {
s := &APKMirrorSource{}
if s.Name() != "apkmirror" {
t.Fatalf("expected apkmirror, got %s", s.Name())
}
}
func TestAPKMirror_Enabled(t *testing.T) {
s := &APKMirrorSource{}
if !s.Enabled(recon.Config{}) {
t.Fatal("APKMirrorSource should always be enabled")
}
}
func TestAPKMirror_RespectsRobots(t *testing.T) {
s := &APKMirrorSource{}
if !s.RespectsRobots() {
t.Fatal("APKMirrorSource should respect robots.txt")
}
}
func TestAPKMirror_Sweep(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/html")
_, _ = w.Write([]byte(`
<html><body>
<div class="appRow">
<h5 class="appRowTitle">AI Chat Pro</h5>
<p>Uses api_key = "sk-proj-ABCDEF1234567890abcdef" for backend</p>
</div>
</body></html>
`))
}))
defer srv.Close()
reg := providers.NewRegistryFromProviders([]providers.Provider{
{Name: "openai", Keywords: []string{"sk-proj-"}},
})
s := &APKMirrorSource{
BaseURL: srv.URL,
Registry: reg,
Client: NewClient(),
}
out := make(chan recon.Finding, 10)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
err := s.Sweep(ctx, "", out)
close(out)
if err != nil {
t.Fatalf("Sweep error: %v", err)
}
var findings []recon.Finding
for f := range out {
findings = append(findings, f)
}
if len(findings) == 0 {
t.Fatal("expected at least one finding from APKMirror")
}
if findings[0].SourceType != "recon:apkmirror" {
t.Fatalf("expected recon:apkmirror, got %s", findings[0].SourceType)
}
}
func TestAPKMirror_Sweep_NoMatch(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/html")
_, _ = w.Write([]byte(`<html><body><p>No API keys here</p></body></html>`))
}))
defer srv.Close()
reg := providers.NewRegistryFromProviders([]providers.Provider{
{Name: "openai", Keywords: []string{"sk-proj-"}},
})
s := &APKMirrorSource{
BaseURL: srv.URL,
Registry: reg,
Client: NewClient(),
}
out := make(chan recon.Finding, 10)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
err := s.Sweep(ctx, "", out)
close(out)
if err != nil {
t.Fatalf("Sweep error: %v", err)
}
var findings []recon.Finding
for f := range out {
findings = append(findings, f)
}
if len(findings) != 0 {
t.Fatalf("expected no findings, got %d", len(findings))
}
}

177
pkg/recon/sources/crtsh.go Normal file
View File

@@ -0,0 +1,177 @@
package sources
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"
"golang.org/x/time/rate"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
// CrtShSource discovers subdomains via certificate transparency logs (crt.sh)
// and probes their config endpoints (/.env, /api/config, /actuator/env) for
// leaked API keys.
type CrtShSource struct {
BaseURL string
Registry *providers.Registry
Limiters *recon.LimiterRegistry
Client *Client
// ProbeBaseURL overrides the scheme+host used when probing discovered
// subdomains. Tests set this to the httptest server URL.
ProbeBaseURL string
}
var _ recon.ReconSource = (*CrtShSource)(nil)
func (s *CrtShSource) Name() string { return "crtsh" }
func (s *CrtShSource) RateLimit() rate.Limit { return rate.Every(3 * time.Second) }
func (s *CrtShSource) Burst() int { return 3 }
func (s *CrtShSource) RespectsRobots() bool { return false }
func (s *CrtShSource) Enabled(_ recon.Config) bool { return true }
// crtshEntry represents one row from the crt.sh JSON API.
type crtshEntry struct {
NameValue string `json:"name_value"`
CommonName string `json:"common_name"`
}
// configProbeEndpoints are the well-known config endpoints probed on each
// discovered subdomain.
var configProbeEndpoints = []string{
"/.env",
"/api/config",
"/actuator/env",
}
func (s *CrtShSource) Sweep(ctx context.Context, query string, out chan<- recon.Finding) error {
base := s.BaseURL
if base == "" {
base = "https://crt.sh"
}
client := s.Client
if client == nil {
client = NewClient()
}
// query should be a domain. Skip keyword-like queries (no dots).
if query == "" || !strings.Contains(query, ".") {
return nil
}
if s.Limiters != nil {
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
return err
}
}
// Fetch subdomains from crt.sh.
crtURL := fmt.Sprintf("%s/?q=%%25.%s&output=json", base, url.QueryEscape(query))
req, err := http.NewRequestWithContext(ctx, http.MethodGet, crtURL, nil)
if err != nil {
return err
}
resp, err := client.Do(ctx, req)
if err != nil {
return nil // non-fatal: crt.sh may be down
}
data, err := io.ReadAll(io.LimitReader(resp.Body, 1024*1024))
_ = resp.Body.Close()
if err != nil {
return nil
}
var entries []crtshEntry
if err := json.Unmarshal(data, &entries); err != nil {
return nil
}
// Deduplicate name_value entries.
seen := make(map[string]struct{})
var subdomains []string
for _, e := range entries {
// name_value can contain multiple names separated by newlines.
for _, name := range strings.Split(e.NameValue, "\n") {
name = strings.TrimSpace(name)
if name == "" {
continue
}
// Remove wildcard prefix.
name = strings.TrimPrefix(name, "*.")
if _, ok := seen[name]; ok {
continue
}
seen[name] = struct{}{}
subdomains = append(subdomains, name)
if len(subdomains) >= 20 {
break
}
}
if len(subdomains) >= 20 {
break
}
}
// Probe config endpoints on each subdomain.
probeClient := &http.Client{Timeout: 5 * time.Second}
for _, sub := range subdomains {
if err := ctx.Err(); err != nil {
return err
}
s.probeSubdomain(ctx, probeClient, sub, out)
}
return nil
}
// probeSubdomain checks well-known config endpoints for key patterns.
func (s *CrtShSource) probeSubdomain(ctx context.Context, probeClient *http.Client, subdomain string, out chan<- recon.Finding) {
for _, ep := range configProbeEndpoints {
if err := ctx.Err(); err != nil {
return
}
var probeURL string
if s.ProbeBaseURL != "" {
// Test mode: use the mock server URL with subdomain as a header/path hint.
probeURL = s.ProbeBaseURL + "/" + subdomain + ep
} else {
probeURL = "https://" + subdomain + ep
}
req, err := http.NewRequestWithContext(ctx, http.MethodGet, probeURL, nil)
if err != nil {
continue
}
resp, err := probeClient.Do(req)
if err != nil {
continue
}
body, err := io.ReadAll(io.LimitReader(resp.Body, 64*1024))
_ = resp.Body.Close()
if err != nil {
continue
}
if resp.StatusCode == http.StatusOK && ciLogKeyPattern.Match(body) {
out <- recon.Finding{
ProviderName: subdomain,
Source: probeURL,
SourceType: "recon:crtsh",
Confidence: "high",
DetectedAt: time.Now(),
}
}
}
}

View File

@@ -0,0 +1,139 @@
package sources
import (
"context"
"net/http"
"net/http/httptest"
"strings"
"testing"
"time"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
func TestCrtSh_Name(t *testing.T) {
s := &CrtShSource{}
if s.Name() != "crtsh" {
t.Fatalf("expected crtsh, got %s", s.Name())
}
}
func TestCrtSh_Enabled(t *testing.T) {
s := &CrtShSource{}
if !s.Enabled(recon.Config{}) {
t.Fatal("CrtShSource should always be enabled")
}
}
func TestCrtSh_Sweep_SkipsKeywords(t *testing.T) {
s := &CrtShSource{Client: NewClient()}
out := make(chan recon.Finding, 10)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
// "sk-proj-" has no dot -- should be skipped as a keyword.
err := s.Sweep(ctx, "sk-proj-", out)
close(out)
if err != nil {
t.Fatalf("Sweep error: %v", err)
}
var findings []recon.Finding
for f := range out {
findings = append(findings, f)
}
if len(findings) != 0 {
t.Fatalf("expected no findings for keyword query, got %d", len(findings))
}
}
func TestCrtSh_Sweep(t *testing.T) {
// Mux handles both crt.sh API and probe endpoints.
mux := http.NewServeMux()
// crt.sh subdomain lookup.
mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
if r.URL.Query().Get("output") == "json" {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`[
{"name_value":"api.example.com","common_name":"api.example.com"},
{"name_value":"staging.example.com","common_name":"staging.example.com"}
]`))
return
}
http.NotFound(w, r)
})
crtSrv := httptest.NewServer(mux)
defer crtSrv.Close()
// Probe server: serves /.env with key-like content.
probeMux := http.NewServeMux()
probeMux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
if strings.HasSuffix(r.URL.Path, "/.env") {
_, _ = w.Write([]byte(`API_KEY = "sk-proj-ABCDEF1234567890abcdef"`))
return
}
http.NotFound(w, r)
})
probeSrv := httptest.NewServer(probeMux)
defer probeSrv.Close()
s := &CrtShSource{
BaseURL: crtSrv.URL,
Client: NewClient(),
ProbeBaseURL: probeSrv.URL,
}
out := make(chan recon.Finding, 20)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
err := s.Sweep(ctx, "example.com", out)
close(out)
if err != nil {
t.Fatalf("Sweep error: %v", err)
}
var findings []recon.Finding
for f := range out {
findings = append(findings, f)
}
if len(findings) == 0 {
t.Fatal("expected at least one finding from crt.sh probe")
}
if findings[0].SourceType != "recon:crtsh" {
t.Fatalf("expected recon:crtsh, got %s", findings[0].SourceType)
}
}
func TestCrtSh_Sweep_NoSubdomains(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`[]`))
}))
defer srv.Close()
s := &CrtShSource{
BaseURL: srv.URL,
Client: NewClient(),
}
out := make(chan recon.Finding, 10)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
err := s.Sweep(ctx, "empty.example.com", out)
close(out)
if err != nil {
t.Fatalf("Sweep error: %v", err)
}
var findings []recon.Finding
for f := range out {
findings = append(findings, f)
}
if len(findings) != 0 {
t.Fatalf("expected no findings, got %d", len(findings))
}
}