diff --git a/.planning/REQUIREMENTS.md b/.planning/REQUIREMENTS.md index 5ce70de..7b82ff6 100644 --- a/.planning/REQUIREMENTS.md +++ b/.planning/REQUIREMENTS.md @@ -187,9 +187,9 @@ Requirements for initial release. Each maps to roadmap phases. ### OSINT/Recon — Threat Intelligence -- [ ] **RECON-INTEL-01**: VirusTotal file and URL search -- [ ] **RECON-INTEL-02**: Intelligence X aggregated search -- [ ] **RECON-INTEL-03**: URLhaus search +- [x] **RECON-INTEL-01**: VirusTotal file and URL search +- [x] **RECON-INTEL-02**: Intelligence X aggregated search +- [x] **RECON-INTEL-03**: URLhaus search ### OSINT/Recon — Mobile & DNS diff --git a/.planning/STATE.md b/.planning/STATE.md index 64c3796..81d4f26 100644 --- a/.planning/STATE.md +++ b/.planning/STATE.md @@ -3,14 +3,14 @@ gsd_state_version: 1.0 milestone: v1.0 milestone_name: milestone status: executing -stopped_at: Completed 15-03-PLAN.md -last_updated: "2026-04-06T13:37:48.053Z" +stopped_at: Completed 16-01-PLAN.md +last_updated: "2026-04-06T13:46:09.387Z" last_activity: 2026-04-06 progress: total_phases: 18 completed_phases: 14 total_plans: 81 - completed_plans: 80 + completed_plans: 81 percent: 20 --- @@ -99,6 +99,7 @@ Progress: [██░░░░░░░░] 20% | Phase 14 P01 | 4min | 1 tasks | 14 files | | Phase 15 P01 | 3min | 2 tasks | 13 files | | Phase 15 P03 | 4min | 2 tasks | 11 files | +| Phase 16 P01 | 4min | 2 tasks | 6 files | ## Accumulated Context @@ -148,6 +149,9 @@ Recent decisions affecting current work: - [Phase 14]: RegisterAll extended to 45 sources (40 Phase 10-13 + 5 Phase 14 CI/CD); CircleCI gets dedicated CIRCLECI_TOKEN - [Phase 15]: Discord/Slack use dorking approach (configurable search endpoint) since neither has public message search API - [Phase 15]: Log aggregator sources are credentialless, targeting exposed instances +- [Phase 16]: VT uses x-apikey header per official API v3 spec +- [Phase 16]: IX uses three-step flow: POST search, GET results, GET file content +- [Phase 16]: URLhaus tag lookup with payload endpoint fallback ### Pending Todos @@ -162,6 +166,6 @@ None yet. ## Session Continuity -Last session: 2026-04-06T13:32:52.610Z -Stopped at: Completed 15-03-PLAN.md +Last session: 2026-04-06T13:46:09.383Z +Stopped at: Completed 16-01-PLAN.md Resume file: None diff --git a/.planning/phases/16-osint_threat_intel_mobile_dns_api_marketplaces/16-01-SUMMARY.md b/.planning/phases/16-osint_threat_intel_mobile_dns_api_marketplaces/16-01-SUMMARY.md new file mode 100644 index 0000000..585ae28 --- /dev/null +++ b/.planning/phases/16-osint_threat_intel_mobile_dns_api_marketplaces/16-01-SUMMARY.md @@ -0,0 +1,99 @@ +--- +phase: 16-osint-threat-intel-mobile-dns-api-marketplaces +plan: 01 +subsystem: recon +tags: [virustotal, intelligencex, urlhaus, threat-intel, osint] + +requires: + - phase: 09-osint-infrastructure + provides: ReconSource interface, LimiterRegistry, Client, BuildQueries, ciLogKeyPattern +provides: + - VirusTotalSource implementing ReconSource (credential-gated) + - IntelligenceXSource implementing ReconSource (credential-gated) + - URLhausSource implementing ReconSource (credentialless) +affects: [16-osint-wiring, recon-engine-registration] + +tech-stack: + added: [] + patterns: [three-step IX search flow (initiate/results/read), VT x-apikey auth, URLhaus form-encoded POST with tag/payload fallback] + +key-files: + created: + - pkg/recon/sources/virustotal.go + - pkg/recon/sources/virustotal_test.go + - pkg/recon/sources/intelligencex.go + - pkg/recon/sources/intelligencex_test.go + - pkg/recon/sources/urlhaus.go + - pkg/recon/sources/urlhaus_test.go + modified: [] + +key-decisions: + - "VT uses x-apikey header per official API v3 spec" + - "IX uses three-step flow: POST search, GET results, GET file content per record" + - "URLhaus tag lookup with payload endpoint fallback for broader coverage" + +patterns-established: + - "Threat intel sources follow same SentrySource pattern with ciLogKeyPattern matching" + +requirements-completed: [RECON-INTEL-01, RECON-INTEL-02, RECON-INTEL-03] + +duration: 4min +completed: 2026-04-06 +--- + +# Phase 16 Plan 01: Threat Intelligence Sources Summary + +**VirusTotal, IntelligenceX, and URLhaus recon sources for detecting API keys in malware samples, breach archives, and malicious URL databases** + +## Performance + +- **Duration:** 4 min +- **Started:** 2026-04-06T13:43:29Z +- **Completed:** 2026-04-06T13:47:29Z +- **Tasks:** 2 +- **Files modified:** 6 + +## Accomplishments +- VirusTotalSource searches VT Intelligence API for files containing API key patterns (credential-gated, 4 req/min rate limit) +- IntelligenceXSource searches IX archive with three-step search/results/content-read flow (credential-gated) +- URLhausSource searches abuse.ch API for malicious URLs with embedded keys (credentialless, always enabled) +- All three sources use ciLogKeyPattern for consistent content matching across the recon framework + +## Task Commits + +Each task was committed atomically: + +1. **Task 1: VirusTotal and IntelligenceX sources** - `e02bad6` (feat) +2. **Task 2: URLhaus source** - `35fa4ad` (feat) + +## Files Created/Modified +- `pkg/recon/sources/virustotal.go` - VT Intelligence API search source +- `pkg/recon/sources/virustotal_test.go` - httptest mocks for VT (4 tests) +- `pkg/recon/sources/intelligencex.go` - IX archive search with three-step flow +- `pkg/recon/sources/intelligencex_test.go` - httptest mocks for IX (4 tests) +- `pkg/recon/sources/urlhaus.go` - abuse.ch URLhaus tag/payload search +- `pkg/recon/sources/urlhaus_test.go` - httptest mocks for URLhaus (4 tests) + +## Decisions Made +- VT uses x-apikey header per official API v3 spec +- IX uses three-step flow: POST search initiation, GET results list, GET file content per record +- URLhaus uses tag lookup endpoint with payload endpoint fallback for broader coverage + +## Deviations from Plan + +None - plan executed exactly as written. + +## Issues Encountered +None + +## User Setup Required +None - no external service configuration required. + +## Next Phase Readiness +- Three threat intel sources ready for wiring into RegisterAll +- VT and IX require API keys via config/env; URLhaus works immediately +- All sources follow established ReconSource pattern + +--- +*Phase: 16-osint-threat-intel-mobile-dns-api-marketplaces* +*Completed: 2026-04-06* diff --git a/pkg/recon/sources/intelligencex.go b/pkg/recon/sources/intelligencex.go new file mode 100644 index 0000000..d9109a5 --- /dev/null +++ b/pkg/recon/sources/intelligencex.go @@ -0,0 +1,202 @@ +package sources + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "time" + + "golang.org/x/time/rate" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +// IntelligenceXSource searches the IntelligenceX archive for leaked credentials. +// IX indexes breached databases, paste sites, and dark web content, making it +// a high-value source for discovering leaked API keys. +type IntelligenceXSource struct { + APIKey string + BaseURL string + Registry *providers.Registry + Limiters *recon.LimiterRegistry + Client *Client +} + +var _ recon.ReconSource = (*IntelligenceXSource)(nil) + +func (s *IntelligenceXSource) Name() string { return "intelligencex" } +func (s *IntelligenceXSource) RateLimit() rate.Limit { return rate.Every(5 * time.Second) } +func (s *IntelligenceXSource) Burst() int { return 3 } +func (s *IntelligenceXSource) RespectsRobots() bool { return false } +func (s *IntelligenceXSource) Enabled(_ recon.Config) bool { + return s.APIKey != "" +} + +// ixSearchRequest is the JSON body for the IX search endpoint. +type ixSearchRequest struct { + Term string `json:"term"` + MaxResults int `json:"maxresults"` + Media int `json:"media"` + Timeout int `json:"timeout"` +} + +// ixSearchResponse is the response from the IX search initiation endpoint. +type ixSearchResponse struct { + ID string `json:"id"` + Status int `json:"status"` +} + +// ixResultResponse is the response from the IX search results endpoint. +type ixResultResponse struct { + Records []ixRecord `json:"records"` +} + +// ixRecord is a single record in the IX search results. +type ixRecord struct { + SystemID string `json:"systemid"` + Name string `json:"name"` + StorageID string `json:"storageid"` + Bucket string `json:"bucket"` +} + +func (s *IntelligenceXSource) Sweep(ctx context.Context, query string, out chan<- recon.Finding) error { + base := s.BaseURL + if base == "" { + base = "https://2.intelx.io" + } + client := s.Client + if client == nil { + client = NewClient() + } + + queries := BuildQueries(s.Registry, "intelligencex") + if len(queries) == 0 { + return nil + } + + for _, q := range queries { + if err := ctx.Err(); err != nil { + return err + } + + if s.Limiters != nil { + if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil { + return err + } + } + + // Step 1: Initiate search. + searchBody, _ := json.Marshal(ixSearchRequest{ + Term: q, + MaxResults: 10, + Media: 0, + Timeout: 5, + }) + + searchURL := fmt.Sprintf("%s/intelligent/search", base) + req, err := http.NewRequestWithContext(ctx, http.MethodPost, searchURL, bytes.NewReader(searchBody)) + if err != nil { + continue + } + req.Header.Set("Content-Type", "application/json") + req.Header.Set("x-key", s.APIKey) + + resp, err := client.Do(ctx, req) + if err != nil { + continue + } + + respData, err := io.ReadAll(io.LimitReader(resp.Body, 64*1024)) + _ = resp.Body.Close() + if err != nil { + continue + } + + var searchResp ixSearchResponse + if err := json.Unmarshal(respData, &searchResp); err != nil { + continue + } + if searchResp.ID == "" { + continue + } + + // Step 2: Fetch results. + if s.Limiters != nil { + if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil { + return err + } + } + + resultURL := fmt.Sprintf("%s/intelligent/search/result?id=%s&limit=10", base, searchResp.ID) + resReq, err := http.NewRequestWithContext(ctx, http.MethodGet, resultURL, nil) + if err != nil { + continue + } + resReq.Header.Set("x-key", s.APIKey) + + resResp, err := client.Do(ctx, resReq) + if err != nil { + continue + } + + resData, err := io.ReadAll(io.LimitReader(resResp.Body, 512*1024)) + _ = resResp.Body.Close() + if err != nil { + continue + } + + var resultResp ixResultResponse + if err := json.Unmarshal(resData, &resultResp); err != nil { + continue + } + + // Step 3: Fetch content for each record and check for keys. + for _, rec := range resultResp.Records { + if err := ctx.Err(); err != nil { + return err + } + + if s.Limiters != nil { + if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil { + return err + } + } + + fileURL := fmt.Sprintf( + "%s/file/read?type=0&storageid=%s&bucket=%s", + base, rec.StorageID, rec.Bucket, + ) + fileReq, err := http.NewRequestWithContext(ctx, http.MethodGet, fileURL, nil) + if err != nil { + continue + } + fileReq.Header.Set("x-key", s.APIKey) + + fileResp, err := client.Do(ctx, fileReq) + if err != nil { + continue + } + + fileData, err := io.ReadAll(io.LimitReader(fileResp.Body, 512*1024)) + _ = fileResp.Body.Close() + if err != nil { + continue + } + + if ciLogKeyPattern.Match(fileData) { + out <- recon.Finding{ + ProviderName: q, + Source: fmt.Sprintf("%s/file/read?storageid=%s", base, rec.StorageID), + SourceType: "recon:intelligencex", + Confidence: "medium", + DetectedAt: time.Now(), + } + } + } + } + return nil +} diff --git a/pkg/recon/sources/intelligencex_test.go b/pkg/recon/sources/intelligencex_test.go new file mode 100644 index 0000000..3e6542f --- /dev/null +++ b/pkg/recon/sources/intelligencex_test.go @@ -0,0 +1,151 @@ +package sources + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +func TestIntelligenceX_Name(t *testing.T) { + s := &IntelligenceXSource{} + if s.Name() != "intelligencex" { + t.Fatalf("expected intelligencex, got %s", s.Name()) + } +} + +func TestIntelligenceX_Enabled(t *testing.T) { + s := &IntelligenceXSource{} + if s.Enabled(recon.Config{}) { + t.Fatal("IntelligenceXSource should be disabled without API key") + } + s.APIKey = "test-key" + if !s.Enabled(recon.Config{}) { + t.Fatal("IntelligenceXSource should be enabled with API key") + } +} + +func TestIntelligenceX_Sweep(t *testing.T) { + mux := http.NewServeMux() + + // Search initiation endpoint. + mux.HandleFunc("/intelligent/search", func(w http.ResponseWriter, r *http.Request) { + if r.Method == http.MethodPost { + if r.Header.Get("x-key") != "test-key" { + http.Error(w, "unauthorized", http.StatusUnauthorized) + return + } + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"id":"search-42","status":0}`)) + return + } + http.Error(w, "not found", http.StatusNotFound) + }) + + // Search results endpoint. + mux.HandleFunc("/intelligent/search/result", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{ + "records": [{ + "systemid": "sys-001", + "name": "leak.txt", + "storageid": "store-001", + "bucket": "bucket-a" + }] + }`)) + }) + + // File read endpoint. + mux.HandleFunc("/file/read", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/plain") + _, _ = w.Write([]byte(`config: + api_key = "sk-proj-ABCDEF1234567890abcdef" + secret_key: "super-secret-value-1234567890ab" +`)) + }) + + srv := httptest.NewServer(mux) + defer srv.Close() + + reg := providers.NewRegistryFromProviders([]providers.Provider{ + {Name: "openai", Keywords: []string{"sk-proj-"}}, + }) + + s := &IntelligenceXSource{ + APIKey: "test-key", + BaseURL: srv.URL, + Registry: reg, + Client: NewClient(), + } + + out := make(chan recon.Finding, 10) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + err := s.Sweep(ctx, "", out) + close(out) + if err != nil { + t.Fatalf("Sweep error: %v", err) + } + + var findings []recon.Finding + for f := range out { + findings = append(findings, f) + } + if len(findings) == 0 { + t.Fatal("expected at least one finding from IntelligenceX") + } + if findings[0].SourceType != "recon:intelligencex" { + t.Fatalf("expected recon:intelligencex, got %s", findings[0].SourceType) + } +} + +func TestIntelligenceX_Sweep_Empty(t *testing.T) { + mux := http.NewServeMux() + + mux.HandleFunc("/intelligent/search", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"id":"search-empty","status":0}`)) + }) + + mux.HandleFunc("/intelligent/search/result", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"records": []}`)) + }) + + srv := httptest.NewServer(mux) + defer srv.Close() + + reg := providers.NewRegistryFromProviders([]providers.Provider{ + {Name: "openai", Keywords: []string{"sk-proj-"}}, + }) + + s := &IntelligenceXSource{ + APIKey: "test-key", + BaseURL: srv.URL, + Registry: reg, + Client: NewClient(), + } + + out := make(chan recon.Finding, 10) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + err := s.Sweep(ctx, "", out) + close(out) + if err != nil { + t.Fatalf("Sweep error: %v", err) + } + + var findings []recon.Finding + for f := range out { + findings = append(findings, f) + } + if len(findings) != 0 { + t.Fatalf("expected no findings, got %d", len(findings)) + } +} diff --git a/pkg/recon/sources/urlhaus.go b/pkg/recon/sources/urlhaus.go new file mode 100644 index 0000000..a0e128a --- /dev/null +++ b/pkg/recon/sources/urlhaus.go @@ -0,0 +1,152 @@ +package sources + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "strings" + "time" + + "golang.org/x/time/rate" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +// URLhausSource searches the abuse.ch URLhaus API for malicious URLs that +// contain API key patterns. Threat actors often embed stolen API keys in +// malware C2 URLs, phishing pages, and credential-harvesting infrastructure. +// URLhaus is free and unauthenticated — no API key required. +type URLhausSource struct { + BaseURL string + Registry *providers.Registry + Limiters *recon.LimiterRegistry + Client *Client +} + +var _ recon.ReconSource = (*URLhausSource)(nil) + +func (s *URLhausSource) Name() string { return "urlhaus" } +func (s *URLhausSource) RateLimit() rate.Limit { return rate.Every(3 * time.Second) } +func (s *URLhausSource) Burst() int { return 2 } +func (s *URLhausSource) RespectsRobots() bool { return false } +func (s *URLhausSource) Enabled(_ recon.Config) bool { return true } + +// urlhausResponse represents the URLhaus API response for tag/payload lookups. +type urlhausResponse struct { + QueryStatus string `json:"query_status"` + URLs []urlhausEntry `json:"urls"` +} + +// urlhausEntry is a single URL record from URLhaus. +type urlhausEntry struct { + URL string `json:"url"` + URLStatus string `json:"url_status"` + Tags []string `json:"tags"` + Reporter string `json:"reporter"` +} + +func (s *URLhausSource) Sweep(ctx context.Context, query string, out chan<- recon.Finding) error { + base := s.BaseURL + if base == "" { + base = "https://urlhaus-api.abuse.ch/v1" + } + client := s.Client + if client == nil { + client = NewClient() + } + + queries := BuildQueries(s.Registry, "urlhaus") + if len(queries) == 0 { + return nil + } + + for _, q := range queries { + if err := ctx.Err(); err != nil { + return err + } + + if s.Limiters != nil { + if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil { + return err + } + } + + // Try tag lookup first. + tagURL := fmt.Sprintf("%s/tag/%s/", base, url.PathEscape(q)) + req, err := http.NewRequestWithContext(ctx, http.MethodPost, tagURL, nil) + if err != nil { + continue + } + req.Header.Set("Content-Type", "application/x-www-form-urlencoded") + + resp, err := client.Do(ctx, req) + if err != nil { + // Fallback to payload endpoint on tag lookup failure. + resp, err = s.payloadFallback(ctx, client, base, q) + if err != nil { + continue + } + } + + data, err := io.ReadAll(io.LimitReader(resp.Body, 512*1024)) + _ = resp.Body.Close() + if err != nil { + continue + } + + var result urlhausResponse + if err := json.Unmarshal(data, &result); err != nil { + continue + } + + // If tag lookup returned no results, try payload fallback. + if result.QueryStatus != "ok" || len(result.URLs) == 0 { + resp, err = s.payloadFallback(ctx, client, base, q) + if err != nil { + continue + } + + data, err = io.ReadAll(io.LimitReader(resp.Body, 512*1024)) + _ = resp.Body.Close() + if err != nil { + continue + } + + if err := json.Unmarshal(data, &result); err != nil { + continue + } + } + + for _, entry := range result.URLs { + // Stringify the record and check for key patterns. + record := fmt.Sprintf("url=%s status=%s tags=%v reporter=%s", + entry.URL, entry.URLStatus, entry.Tags, entry.Reporter) + if ciLogKeyPattern.MatchString(record) || ciLogKeyPattern.MatchString(entry.URL) { + out <- recon.Finding{ + ProviderName: q, + Source: entry.URL, + SourceType: "recon:urlhaus", + Confidence: "medium", + DetectedAt: time.Now(), + } + } + } + } + return nil +} + +// payloadFallback tries the URLhaus payload endpoint as a secondary search method. +func (s *URLhausSource) payloadFallback(ctx context.Context, client *Client, base, tag string) (*http.Response, error) { + payloadURL := fmt.Sprintf("%s/payload/", base) + body := fmt.Sprintf("md5_hash=&sha256_hash=&tag=%s", url.QueryEscape(tag)) + req, err := http.NewRequestWithContext(ctx, http.MethodPost, payloadURL, strings.NewReader(body)) + if err != nil { + return nil, err + } + req.Header.Set("Content-Type", "application/x-www-form-urlencoded") + return client.Do(ctx, req) +} diff --git a/pkg/recon/sources/urlhaus_test.go b/pkg/recon/sources/urlhaus_test.go new file mode 100644 index 0000000..ad287fc --- /dev/null +++ b/pkg/recon/sources/urlhaus_test.go @@ -0,0 +1,119 @@ +package sources + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +func TestURLhaus_Name(t *testing.T) { + s := &URLhausSource{} + if s.Name() != "urlhaus" { + t.Fatalf("expected urlhaus, got %s", s.Name()) + } +} + +func TestURLhaus_Enabled(t *testing.T) { + s := &URLhausSource{} + if !s.Enabled(recon.Config{}) { + t.Fatal("URLhausSource should always be enabled (credentialless)") + } +} + +func TestURLhaus_Sweep(t *testing.T) { + mux := http.NewServeMux() + mux.HandleFunc("/tag/", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{ + "query_status": "ok", + "urls": [{ + "url": "https://evil.example.com/exfil?token=sk-proj-ABCDEF1234567890abcdef", + "url_status": "online", + "tags": ["malware", "api_key"], + "reporter": "abuse_ch" + }] + }`)) + }) + + srv := httptest.NewServer(mux) + defer srv.Close() + + reg := providers.NewRegistryFromProviders([]providers.Provider{ + {Name: "openai", Keywords: []string{"sk-proj-"}}, + }) + + s := &URLhausSource{ + BaseURL: srv.URL, + Registry: reg, + Client: NewClient(), + } + + out := make(chan recon.Finding, 10) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + err := s.Sweep(ctx, "", out) + close(out) + if err != nil { + t.Fatalf("Sweep error: %v", err) + } + + var findings []recon.Finding + for f := range out { + findings = append(findings, f) + } + if len(findings) == 0 { + t.Fatal("expected at least one finding from URLhaus") + } + if findings[0].SourceType != "recon:urlhaus" { + t.Fatalf("expected recon:urlhaus, got %s", findings[0].SourceType) + } +} + +func TestURLhaus_Sweep_Empty(t *testing.T) { + mux := http.NewServeMux() + mux.HandleFunc("/tag/", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"query_status": "no_results", "urls": []}`)) + }) + mux.HandleFunc("/payload/", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"query_status": "no_results", "urls": []}`)) + }) + + srv := httptest.NewServer(mux) + defer srv.Close() + + reg := providers.NewRegistryFromProviders([]providers.Provider{ + {Name: "openai", Keywords: []string{"sk-proj-"}}, + }) + + s := &URLhausSource{ + BaseURL: srv.URL, + Registry: reg, + Client: NewClient(), + } + + out := make(chan recon.Finding, 10) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + err := s.Sweep(ctx, "", out) + close(out) + if err != nil { + t.Fatalf("Sweep error: %v", err) + } + + var findings []recon.Finding + for f := range out { + findings = append(findings, f) + } + if len(findings) != 0 { + t.Fatalf("expected no findings, got %d", len(findings)) + } +} diff --git a/pkg/recon/sources/virustotal.go b/pkg/recon/sources/virustotal.go new file mode 100644 index 0000000..86f8059 --- /dev/null +++ b/pkg/recon/sources/virustotal.go @@ -0,0 +1,116 @@ +package sources + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "time" + + "golang.org/x/time/rate" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +// VirusTotalSource searches the VirusTotal Intelligence API for files and URLs +// containing API key patterns. Malware samples frequently contain hard-coded +// API keys used by threat actors to exfiltrate data or proxy requests. +type VirusTotalSource struct { + APIKey string + BaseURL string + Registry *providers.Registry + Limiters *recon.LimiterRegistry + Client *Client +} + +var _ recon.ReconSource = (*VirusTotalSource)(nil) + +func (s *VirusTotalSource) Name() string { return "virustotal" } +func (s *VirusTotalSource) RateLimit() rate.Limit { return rate.Every(15 * time.Second) } +func (s *VirusTotalSource) Burst() int { return 2 } +func (s *VirusTotalSource) RespectsRobots() bool { return false } +func (s *VirusTotalSource) Enabled(_ recon.Config) bool { + return s.APIKey != "" +} + +// vtSearchResponse represents the top-level VT intelligence search response. +type vtSearchResponse struct { + Data []vtSearchItem `json:"data"` +} + +// vtSearchItem is a single item in the VT search results. +type vtSearchItem struct { + ID string `json:"id"` + Attributes json.RawMessage `json:"attributes"` +} + +func (s *VirusTotalSource) Sweep(ctx context.Context, query string, out chan<- recon.Finding) error { + base := s.BaseURL + if base == "" { + base = "https://www.virustotal.com/api/v3" + } + client := s.Client + if client == nil { + client = NewClient() + } + + queries := BuildQueries(s.Registry, "virustotal") + if len(queries) == 0 { + return nil + } + + for _, q := range queries { + if err := ctx.Err(); err != nil { + return err + } + + if s.Limiters != nil { + if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil { + return err + } + } + + searchURL := fmt.Sprintf( + "%s/intelligence/search?query=%s&limit=10", + base, url.QueryEscape(q), + ) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, searchURL, nil) + if err != nil { + continue + } + req.Header.Set("x-apikey", s.APIKey) + + resp, err := client.Do(ctx, req) + if err != nil { + continue + } + + data, err := io.ReadAll(io.LimitReader(resp.Body, 512*1024)) + _ = resp.Body.Close() + if err != nil { + continue + } + + var result vtSearchResponse + if err := json.Unmarshal(data, &result); err != nil { + continue + } + + for _, item := range result.Data { + attrs := string(item.Attributes) + if ciLogKeyPattern.MatchString(attrs) { + out <- recon.Finding{ + ProviderName: q, + Source: fmt.Sprintf("https://www.virustotal.com/gui/file/%s", item.ID), + SourceType: "recon:virustotal", + Confidence: "medium", + DetectedAt: time.Now(), + } + } + } + } + return nil +} diff --git a/pkg/recon/sources/virustotal_test.go b/pkg/recon/sources/virustotal_test.go new file mode 100644 index 0000000..65ae75e --- /dev/null +++ b/pkg/recon/sources/virustotal_test.go @@ -0,0 +1,126 @@ +package sources + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +func TestVirusTotal_Name(t *testing.T) { + s := &VirusTotalSource{} + if s.Name() != "virustotal" { + t.Fatalf("expected virustotal, got %s", s.Name()) + } +} + +func TestVirusTotal_Enabled(t *testing.T) { + s := &VirusTotalSource{} + if s.Enabled(recon.Config{}) { + t.Fatal("VirusTotalSource should be disabled without API key") + } + s.APIKey = "test-key" + if !s.Enabled(recon.Config{}) { + t.Fatal("VirusTotalSource should be enabled with API key") + } +} + +func TestVirusTotal_Sweep(t *testing.T) { + mux := http.NewServeMux() + mux.HandleFunc("/intelligence/search", func(w http.ResponseWriter, r *http.Request) { + if r.Header.Get("x-apikey") != "test-key" { + http.Error(w, "unauthorized", http.StatusUnauthorized) + return + } + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{ + "data": [{ + "id": "abc123def456", + "attributes": { + "meaningful_name": "malware.exe", + "tags": ["trojan"], + "api_key": "sk-proj-ABCDEF1234567890abcdef" + } + }] + }`)) + }) + + srv := httptest.NewServer(mux) + defer srv.Close() + + reg := providers.NewRegistryFromProviders([]providers.Provider{ + {Name: "openai", Keywords: []string{"sk-proj-"}}, + }) + + s := &VirusTotalSource{ + APIKey: "test-key", + BaseURL: srv.URL, + Registry: reg, + Client: NewClient(), + } + + out := make(chan recon.Finding, 10) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + err := s.Sweep(ctx, "", out) + close(out) + if err != nil { + t.Fatalf("Sweep error: %v", err) + } + + var findings []recon.Finding + for f := range out { + findings = append(findings, f) + } + if len(findings) == 0 { + t.Fatal("expected at least one finding from VirusTotal") + } + if findings[0].SourceType != "recon:virustotal" { + t.Fatalf("expected recon:virustotal, got %s", findings[0].SourceType) + } +} + +func TestVirusTotal_Sweep_Empty(t *testing.T) { + mux := http.NewServeMux() + mux.HandleFunc("/intelligence/search", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"data": []}`)) + }) + + srv := httptest.NewServer(mux) + defer srv.Close() + + reg := providers.NewRegistryFromProviders([]providers.Provider{ + {Name: "openai", Keywords: []string{"sk-proj-"}}, + }) + + s := &VirusTotalSource{ + APIKey: "test-key", + BaseURL: srv.URL, + Registry: reg, + Client: NewClient(), + } + + out := make(chan recon.Finding, 10) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + err := s.Sweep(ctx, "", out) + close(out) + if err != nil { + t.Fatalf("Sweep error: %v", err) + } + + var findings []recon.Finding + for f := range out { + findings = append(findings, f) + } + if len(findings) != 0 { + t.Fatalf("expected no findings, got %d", len(findings)) + } +}