From e02bad69ba08e67336eebce46328ae70e7a8c6d9 Mon Sep 17 00:00:00 2001 From: salvacybersec Date: Mon, 6 Apr 2026 16:44:41 +0300 Subject: [PATCH] feat(16-01): add VirusTotal and IntelligenceX recon sources - VirusTotalSource searches VT Intelligence API for files containing API keys - IntelligenceXSource searches IX archive with 3-step flow (search/results/read) - Both credential-gated (Enabled returns false without API key) - ciLogKeyPattern used for content matching - Tests with httptest mocks for happy path and empty results Co-Authored-By: Claude Opus 4.6 (1M context) --- pkg/recon/sources/intelligencex.go | 202 ++++++++++++++++++++++++ pkg/recon/sources/intelligencex_test.go | 151 ++++++++++++++++++ pkg/recon/sources/virustotal.go | 116 ++++++++++++++ pkg/recon/sources/virustotal_test.go | 126 +++++++++++++++ 4 files changed, 595 insertions(+) create mode 100644 pkg/recon/sources/intelligencex.go create mode 100644 pkg/recon/sources/intelligencex_test.go create mode 100644 pkg/recon/sources/virustotal.go create mode 100644 pkg/recon/sources/virustotal_test.go diff --git a/pkg/recon/sources/intelligencex.go b/pkg/recon/sources/intelligencex.go new file mode 100644 index 0000000..d9109a5 --- /dev/null +++ b/pkg/recon/sources/intelligencex.go @@ -0,0 +1,202 @@ +package sources + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "time" + + "golang.org/x/time/rate" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +// IntelligenceXSource searches the IntelligenceX archive for leaked credentials. +// IX indexes breached databases, paste sites, and dark web content, making it +// a high-value source for discovering leaked API keys. +type IntelligenceXSource struct { + APIKey string + BaseURL string + Registry *providers.Registry + Limiters *recon.LimiterRegistry + Client *Client +} + +var _ recon.ReconSource = (*IntelligenceXSource)(nil) + +func (s *IntelligenceXSource) Name() string { return "intelligencex" } +func (s *IntelligenceXSource) RateLimit() rate.Limit { return rate.Every(5 * time.Second) } +func (s *IntelligenceXSource) Burst() int { return 3 } +func (s *IntelligenceXSource) RespectsRobots() bool { return false } +func (s *IntelligenceXSource) Enabled(_ recon.Config) bool { + return s.APIKey != "" +} + +// ixSearchRequest is the JSON body for the IX search endpoint. +type ixSearchRequest struct { + Term string `json:"term"` + MaxResults int `json:"maxresults"` + Media int `json:"media"` + Timeout int `json:"timeout"` +} + +// ixSearchResponse is the response from the IX search initiation endpoint. +type ixSearchResponse struct { + ID string `json:"id"` + Status int `json:"status"` +} + +// ixResultResponse is the response from the IX search results endpoint. +type ixResultResponse struct { + Records []ixRecord `json:"records"` +} + +// ixRecord is a single record in the IX search results. +type ixRecord struct { + SystemID string `json:"systemid"` + Name string `json:"name"` + StorageID string `json:"storageid"` + Bucket string `json:"bucket"` +} + +func (s *IntelligenceXSource) Sweep(ctx context.Context, query string, out chan<- recon.Finding) error { + base := s.BaseURL + if base == "" { + base = "https://2.intelx.io" + } + client := s.Client + if client == nil { + client = NewClient() + } + + queries := BuildQueries(s.Registry, "intelligencex") + if len(queries) == 0 { + return nil + } + + for _, q := range queries { + if err := ctx.Err(); err != nil { + return err + } + + if s.Limiters != nil { + if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil { + return err + } + } + + // Step 1: Initiate search. + searchBody, _ := json.Marshal(ixSearchRequest{ + Term: q, + MaxResults: 10, + Media: 0, + Timeout: 5, + }) + + searchURL := fmt.Sprintf("%s/intelligent/search", base) + req, err := http.NewRequestWithContext(ctx, http.MethodPost, searchURL, bytes.NewReader(searchBody)) + if err != nil { + continue + } + req.Header.Set("Content-Type", "application/json") + req.Header.Set("x-key", s.APIKey) + + resp, err := client.Do(ctx, req) + if err != nil { + continue + } + + respData, err := io.ReadAll(io.LimitReader(resp.Body, 64*1024)) + _ = resp.Body.Close() + if err != nil { + continue + } + + var searchResp ixSearchResponse + if err := json.Unmarshal(respData, &searchResp); err != nil { + continue + } + if searchResp.ID == "" { + continue + } + + // Step 2: Fetch results. + if s.Limiters != nil { + if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil { + return err + } + } + + resultURL := fmt.Sprintf("%s/intelligent/search/result?id=%s&limit=10", base, searchResp.ID) + resReq, err := http.NewRequestWithContext(ctx, http.MethodGet, resultURL, nil) + if err != nil { + continue + } + resReq.Header.Set("x-key", s.APIKey) + + resResp, err := client.Do(ctx, resReq) + if err != nil { + continue + } + + resData, err := io.ReadAll(io.LimitReader(resResp.Body, 512*1024)) + _ = resResp.Body.Close() + if err != nil { + continue + } + + var resultResp ixResultResponse + if err := json.Unmarshal(resData, &resultResp); err != nil { + continue + } + + // Step 3: Fetch content for each record and check for keys. + for _, rec := range resultResp.Records { + if err := ctx.Err(); err != nil { + return err + } + + if s.Limiters != nil { + if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil { + return err + } + } + + fileURL := fmt.Sprintf( + "%s/file/read?type=0&storageid=%s&bucket=%s", + base, rec.StorageID, rec.Bucket, + ) + fileReq, err := http.NewRequestWithContext(ctx, http.MethodGet, fileURL, nil) + if err != nil { + continue + } + fileReq.Header.Set("x-key", s.APIKey) + + fileResp, err := client.Do(ctx, fileReq) + if err != nil { + continue + } + + fileData, err := io.ReadAll(io.LimitReader(fileResp.Body, 512*1024)) + _ = fileResp.Body.Close() + if err != nil { + continue + } + + if ciLogKeyPattern.Match(fileData) { + out <- recon.Finding{ + ProviderName: q, + Source: fmt.Sprintf("%s/file/read?storageid=%s", base, rec.StorageID), + SourceType: "recon:intelligencex", + Confidence: "medium", + DetectedAt: time.Now(), + } + } + } + } + return nil +} diff --git a/pkg/recon/sources/intelligencex_test.go b/pkg/recon/sources/intelligencex_test.go new file mode 100644 index 0000000..3e6542f --- /dev/null +++ b/pkg/recon/sources/intelligencex_test.go @@ -0,0 +1,151 @@ +package sources + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +func TestIntelligenceX_Name(t *testing.T) { + s := &IntelligenceXSource{} + if s.Name() != "intelligencex" { + t.Fatalf("expected intelligencex, got %s", s.Name()) + } +} + +func TestIntelligenceX_Enabled(t *testing.T) { + s := &IntelligenceXSource{} + if s.Enabled(recon.Config{}) { + t.Fatal("IntelligenceXSource should be disabled without API key") + } + s.APIKey = "test-key" + if !s.Enabled(recon.Config{}) { + t.Fatal("IntelligenceXSource should be enabled with API key") + } +} + +func TestIntelligenceX_Sweep(t *testing.T) { + mux := http.NewServeMux() + + // Search initiation endpoint. + mux.HandleFunc("/intelligent/search", func(w http.ResponseWriter, r *http.Request) { + if r.Method == http.MethodPost { + if r.Header.Get("x-key") != "test-key" { + http.Error(w, "unauthorized", http.StatusUnauthorized) + return + } + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"id":"search-42","status":0}`)) + return + } + http.Error(w, "not found", http.StatusNotFound) + }) + + // Search results endpoint. + mux.HandleFunc("/intelligent/search/result", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{ + "records": [{ + "systemid": "sys-001", + "name": "leak.txt", + "storageid": "store-001", + "bucket": "bucket-a" + }] + }`)) + }) + + // File read endpoint. + mux.HandleFunc("/file/read", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/plain") + _, _ = w.Write([]byte(`config: + api_key = "sk-proj-ABCDEF1234567890abcdef" + secret_key: "super-secret-value-1234567890ab" +`)) + }) + + srv := httptest.NewServer(mux) + defer srv.Close() + + reg := providers.NewRegistryFromProviders([]providers.Provider{ + {Name: "openai", Keywords: []string{"sk-proj-"}}, + }) + + s := &IntelligenceXSource{ + APIKey: "test-key", + BaseURL: srv.URL, + Registry: reg, + Client: NewClient(), + } + + out := make(chan recon.Finding, 10) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + err := s.Sweep(ctx, "", out) + close(out) + if err != nil { + t.Fatalf("Sweep error: %v", err) + } + + var findings []recon.Finding + for f := range out { + findings = append(findings, f) + } + if len(findings) == 0 { + t.Fatal("expected at least one finding from IntelligenceX") + } + if findings[0].SourceType != "recon:intelligencex" { + t.Fatalf("expected recon:intelligencex, got %s", findings[0].SourceType) + } +} + +func TestIntelligenceX_Sweep_Empty(t *testing.T) { + mux := http.NewServeMux() + + mux.HandleFunc("/intelligent/search", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"id":"search-empty","status":0}`)) + }) + + mux.HandleFunc("/intelligent/search/result", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"records": []}`)) + }) + + srv := httptest.NewServer(mux) + defer srv.Close() + + reg := providers.NewRegistryFromProviders([]providers.Provider{ + {Name: "openai", Keywords: []string{"sk-proj-"}}, + }) + + s := &IntelligenceXSource{ + APIKey: "test-key", + BaseURL: srv.URL, + Registry: reg, + Client: NewClient(), + } + + out := make(chan recon.Finding, 10) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + err := s.Sweep(ctx, "", out) + close(out) + if err != nil { + t.Fatalf("Sweep error: %v", err) + } + + var findings []recon.Finding + for f := range out { + findings = append(findings, f) + } + if len(findings) != 0 { + t.Fatalf("expected no findings, got %d", len(findings)) + } +} diff --git a/pkg/recon/sources/virustotal.go b/pkg/recon/sources/virustotal.go new file mode 100644 index 0000000..86f8059 --- /dev/null +++ b/pkg/recon/sources/virustotal.go @@ -0,0 +1,116 @@ +package sources + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "time" + + "golang.org/x/time/rate" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +// VirusTotalSource searches the VirusTotal Intelligence API for files and URLs +// containing API key patterns. Malware samples frequently contain hard-coded +// API keys used by threat actors to exfiltrate data or proxy requests. +type VirusTotalSource struct { + APIKey string + BaseURL string + Registry *providers.Registry + Limiters *recon.LimiterRegistry + Client *Client +} + +var _ recon.ReconSource = (*VirusTotalSource)(nil) + +func (s *VirusTotalSource) Name() string { return "virustotal" } +func (s *VirusTotalSource) RateLimit() rate.Limit { return rate.Every(15 * time.Second) } +func (s *VirusTotalSource) Burst() int { return 2 } +func (s *VirusTotalSource) RespectsRobots() bool { return false } +func (s *VirusTotalSource) Enabled(_ recon.Config) bool { + return s.APIKey != "" +} + +// vtSearchResponse represents the top-level VT intelligence search response. +type vtSearchResponse struct { + Data []vtSearchItem `json:"data"` +} + +// vtSearchItem is a single item in the VT search results. +type vtSearchItem struct { + ID string `json:"id"` + Attributes json.RawMessage `json:"attributes"` +} + +func (s *VirusTotalSource) Sweep(ctx context.Context, query string, out chan<- recon.Finding) error { + base := s.BaseURL + if base == "" { + base = "https://www.virustotal.com/api/v3" + } + client := s.Client + if client == nil { + client = NewClient() + } + + queries := BuildQueries(s.Registry, "virustotal") + if len(queries) == 0 { + return nil + } + + for _, q := range queries { + if err := ctx.Err(); err != nil { + return err + } + + if s.Limiters != nil { + if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil { + return err + } + } + + searchURL := fmt.Sprintf( + "%s/intelligence/search?query=%s&limit=10", + base, url.QueryEscape(q), + ) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, searchURL, nil) + if err != nil { + continue + } + req.Header.Set("x-apikey", s.APIKey) + + resp, err := client.Do(ctx, req) + if err != nil { + continue + } + + data, err := io.ReadAll(io.LimitReader(resp.Body, 512*1024)) + _ = resp.Body.Close() + if err != nil { + continue + } + + var result vtSearchResponse + if err := json.Unmarshal(data, &result); err != nil { + continue + } + + for _, item := range result.Data { + attrs := string(item.Attributes) + if ciLogKeyPattern.MatchString(attrs) { + out <- recon.Finding{ + ProviderName: q, + Source: fmt.Sprintf("https://www.virustotal.com/gui/file/%s", item.ID), + SourceType: "recon:virustotal", + Confidence: "medium", + DetectedAt: time.Now(), + } + } + } + } + return nil +} diff --git a/pkg/recon/sources/virustotal_test.go b/pkg/recon/sources/virustotal_test.go new file mode 100644 index 0000000..65ae75e --- /dev/null +++ b/pkg/recon/sources/virustotal_test.go @@ -0,0 +1,126 @@ +package sources + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +func TestVirusTotal_Name(t *testing.T) { + s := &VirusTotalSource{} + if s.Name() != "virustotal" { + t.Fatalf("expected virustotal, got %s", s.Name()) + } +} + +func TestVirusTotal_Enabled(t *testing.T) { + s := &VirusTotalSource{} + if s.Enabled(recon.Config{}) { + t.Fatal("VirusTotalSource should be disabled without API key") + } + s.APIKey = "test-key" + if !s.Enabled(recon.Config{}) { + t.Fatal("VirusTotalSource should be enabled with API key") + } +} + +func TestVirusTotal_Sweep(t *testing.T) { + mux := http.NewServeMux() + mux.HandleFunc("/intelligence/search", func(w http.ResponseWriter, r *http.Request) { + if r.Header.Get("x-apikey") != "test-key" { + http.Error(w, "unauthorized", http.StatusUnauthorized) + return + } + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{ + "data": [{ + "id": "abc123def456", + "attributes": { + "meaningful_name": "malware.exe", + "tags": ["trojan"], + "api_key": "sk-proj-ABCDEF1234567890abcdef" + } + }] + }`)) + }) + + srv := httptest.NewServer(mux) + defer srv.Close() + + reg := providers.NewRegistryFromProviders([]providers.Provider{ + {Name: "openai", Keywords: []string{"sk-proj-"}}, + }) + + s := &VirusTotalSource{ + APIKey: "test-key", + BaseURL: srv.URL, + Registry: reg, + Client: NewClient(), + } + + out := make(chan recon.Finding, 10) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + err := s.Sweep(ctx, "", out) + close(out) + if err != nil { + t.Fatalf("Sweep error: %v", err) + } + + var findings []recon.Finding + for f := range out { + findings = append(findings, f) + } + if len(findings) == 0 { + t.Fatal("expected at least one finding from VirusTotal") + } + if findings[0].SourceType != "recon:virustotal" { + t.Fatalf("expected recon:virustotal, got %s", findings[0].SourceType) + } +} + +func TestVirusTotal_Sweep_Empty(t *testing.T) { + mux := http.NewServeMux() + mux.HandleFunc("/intelligence/search", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"data": []}`)) + }) + + srv := httptest.NewServer(mux) + defer srv.Close() + + reg := providers.NewRegistryFromProviders([]providers.Provider{ + {Name: "openai", Keywords: []string{"sk-proj-"}}, + }) + + s := &VirusTotalSource{ + APIKey: "test-key", + BaseURL: srv.URL, + Registry: reg, + Client: NewClient(), + } + + out := make(chan recon.Finding, 10) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + err := s.Sweep(ctx, "", out) + close(out) + if err != nil { + t.Fatalf("Sweep error: %v", err) + } + + var findings []recon.Finding + for f := range out { + findings = append(findings, f) + } + if len(findings) != 0 { + t.Fatalf("expected no findings, got %d", len(findings)) + } +}