feat(16-01): add VirusTotal and IntelligenceX recon sources

- VirusTotalSource searches VT Intelligence API for files containing API keys
- IntelligenceXSource searches IX archive with 3-step flow (search/results/read)
- Both credential-gated (Enabled returns false without API key)
- ciLogKeyPattern used for content matching
- Tests with httptest mocks for happy path and empty results

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
salvacybersec
2026-04-06 16:44:41 +03:00
parent af284f56f2
commit e02bad69ba
4 changed files with 595 additions and 0 deletions

View File

@@ -0,0 +1,202 @@
package sources
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"time"
"golang.org/x/time/rate"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
// IntelligenceXSource searches the IntelligenceX archive for leaked credentials.
// IX indexes breached databases, paste sites, and dark web content, making it
// a high-value source for discovering leaked API keys.
type IntelligenceXSource struct {
APIKey string
BaseURL string
Registry *providers.Registry
Limiters *recon.LimiterRegistry
Client *Client
}
var _ recon.ReconSource = (*IntelligenceXSource)(nil)
func (s *IntelligenceXSource) Name() string { return "intelligencex" }
func (s *IntelligenceXSource) RateLimit() rate.Limit { return rate.Every(5 * time.Second) }
func (s *IntelligenceXSource) Burst() int { return 3 }
func (s *IntelligenceXSource) RespectsRobots() bool { return false }
func (s *IntelligenceXSource) Enabled(_ recon.Config) bool {
return s.APIKey != ""
}
// ixSearchRequest is the JSON body for the IX search endpoint.
type ixSearchRequest struct {
Term string `json:"term"`
MaxResults int `json:"maxresults"`
Media int `json:"media"`
Timeout int `json:"timeout"`
}
// ixSearchResponse is the response from the IX search initiation endpoint.
type ixSearchResponse struct {
ID string `json:"id"`
Status int `json:"status"`
}
// ixResultResponse is the response from the IX search results endpoint.
type ixResultResponse struct {
Records []ixRecord `json:"records"`
}
// ixRecord is a single record in the IX search results.
type ixRecord struct {
SystemID string `json:"systemid"`
Name string `json:"name"`
StorageID string `json:"storageid"`
Bucket string `json:"bucket"`
}
func (s *IntelligenceXSource) Sweep(ctx context.Context, query string, out chan<- recon.Finding) error {
base := s.BaseURL
if base == "" {
base = "https://2.intelx.io"
}
client := s.Client
if client == nil {
client = NewClient()
}
queries := BuildQueries(s.Registry, "intelligencex")
if len(queries) == 0 {
return nil
}
for _, q := range queries {
if err := ctx.Err(); err != nil {
return err
}
if s.Limiters != nil {
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
return err
}
}
// Step 1: Initiate search.
searchBody, _ := json.Marshal(ixSearchRequest{
Term: q,
MaxResults: 10,
Media: 0,
Timeout: 5,
})
searchURL := fmt.Sprintf("%s/intelligent/search", base)
req, err := http.NewRequestWithContext(ctx, http.MethodPost, searchURL, bytes.NewReader(searchBody))
if err != nil {
continue
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("x-key", s.APIKey)
resp, err := client.Do(ctx, req)
if err != nil {
continue
}
respData, err := io.ReadAll(io.LimitReader(resp.Body, 64*1024))
_ = resp.Body.Close()
if err != nil {
continue
}
var searchResp ixSearchResponse
if err := json.Unmarshal(respData, &searchResp); err != nil {
continue
}
if searchResp.ID == "" {
continue
}
// Step 2: Fetch results.
if s.Limiters != nil {
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
return err
}
}
resultURL := fmt.Sprintf("%s/intelligent/search/result?id=%s&limit=10", base, searchResp.ID)
resReq, err := http.NewRequestWithContext(ctx, http.MethodGet, resultURL, nil)
if err != nil {
continue
}
resReq.Header.Set("x-key", s.APIKey)
resResp, err := client.Do(ctx, resReq)
if err != nil {
continue
}
resData, err := io.ReadAll(io.LimitReader(resResp.Body, 512*1024))
_ = resResp.Body.Close()
if err != nil {
continue
}
var resultResp ixResultResponse
if err := json.Unmarshal(resData, &resultResp); err != nil {
continue
}
// Step 3: Fetch content for each record and check for keys.
for _, rec := range resultResp.Records {
if err := ctx.Err(); err != nil {
return err
}
if s.Limiters != nil {
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
return err
}
}
fileURL := fmt.Sprintf(
"%s/file/read?type=0&storageid=%s&bucket=%s",
base, rec.StorageID, rec.Bucket,
)
fileReq, err := http.NewRequestWithContext(ctx, http.MethodGet, fileURL, nil)
if err != nil {
continue
}
fileReq.Header.Set("x-key", s.APIKey)
fileResp, err := client.Do(ctx, fileReq)
if err != nil {
continue
}
fileData, err := io.ReadAll(io.LimitReader(fileResp.Body, 512*1024))
_ = fileResp.Body.Close()
if err != nil {
continue
}
if ciLogKeyPattern.Match(fileData) {
out <- recon.Finding{
ProviderName: q,
Source: fmt.Sprintf("%s/file/read?storageid=%s", base, rec.StorageID),
SourceType: "recon:intelligencex",
Confidence: "medium",
DetectedAt: time.Now(),
}
}
}
}
return nil
}

View File

@@ -0,0 +1,151 @@
package sources
import (
"context"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
func TestIntelligenceX_Name(t *testing.T) {
s := &IntelligenceXSource{}
if s.Name() != "intelligencex" {
t.Fatalf("expected intelligencex, got %s", s.Name())
}
}
func TestIntelligenceX_Enabled(t *testing.T) {
s := &IntelligenceXSource{}
if s.Enabled(recon.Config{}) {
t.Fatal("IntelligenceXSource should be disabled without API key")
}
s.APIKey = "test-key"
if !s.Enabled(recon.Config{}) {
t.Fatal("IntelligenceXSource should be enabled with API key")
}
}
func TestIntelligenceX_Sweep(t *testing.T) {
mux := http.NewServeMux()
// Search initiation endpoint.
mux.HandleFunc("/intelligent/search", func(w http.ResponseWriter, r *http.Request) {
if r.Method == http.MethodPost {
if r.Header.Get("x-key") != "test-key" {
http.Error(w, "unauthorized", http.StatusUnauthorized)
return
}
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"id":"search-42","status":0}`))
return
}
http.Error(w, "not found", http.StatusNotFound)
})
// Search results endpoint.
mux.HandleFunc("/intelligent/search/result", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{
"records": [{
"systemid": "sys-001",
"name": "leak.txt",
"storageid": "store-001",
"bucket": "bucket-a"
}]
}`))
})
// File read endpoint.
mux.HandleFunc("/file/read", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain")
_, _ = w.Write([]byte(`config:
api_key = "sk-proj-ABCDEF1234567890abcdef"
secret_key: "super-secret-value-1234567890ab"
`))
})
srv := httptest.NewServer(mux)
defer srv.Close()
reg := providers.NewRegistryFromProviders([]providers.Provider{
{Name: "openai", Keywords: []string{"sk-proj-"}},
})
s := &IntelligenceXSource{
APIKey: "test-key",
BaseURL: srv.URL,
Registry: reg,
Client: NewClient(),
}
out := make(chan recon.Finding, 10)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
err := s.Sweep(ctx, "", out)
close(out)
if err != nil {
t.Fatalf("Sweep error: %v", err)
}
var findings []recon.Finding
for f := range out {
findings = append(findings, f)
}
if len(findings) == 0 {
t.Fatal("expected at least one finding from IntelligenceX")
}
if findings[0].SourceType != "recon:intelligencex" {
t.Fatalf("expected recon:intelligencex, got %s", findings[0].SourceType)
}
}
func TestIntelligenceX_Sweep_Empty(t *testing.T) {
mux := http.NewServeMux()
mux.HandleFunc("/intelligent/search", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"id":"search-empty","status":0}`))
})
mux.HandleFunc("/intelligent/search/result", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"records": []}`))
})
srv := httptest.NewServer(mux)
defer srv.Close()
reg := providers.NewRegistryFromProviders([]providers.Provider{
{Name: "openai", Keywords: []string{"sk-proj-"}},
})
s := &IntelligenceXSource{
APIKey: "test-key",
BaseURL: srv.URL,
Registry: reg,
Client: NewClient(),
}
out := make(chan recon.Finding, 10)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
err := s.Sweep(ctx, "", out)
close(out)
if err != nil {
t.Fatalf("Sweep error: %v", err)
}
var findings []recon.Finding
for f := range out {
findings = append(findings, f)
}
if len(findings) != 0 {
t.Fatalf("expected no findings, got %d", len(findings))
}
}

View File

@@ -0,0 +1,116 @@
package sources
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"time"
"golang.org/x/time/rate"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
// VirusTotalSource searches the VirusTotal Intelligence API for files and URLs
// containing API key patterns. Malware samples frequently contain hard-coded
// API keys used by threat actors to exfiltrate data or proxy requests.
type VirusTotalSource struct {
APIKey string
BaseURL string
Registry *providers.Registry
Limiters *recon.LimiterRegistry
Client *Client
}
var _ recon.ReconSource = (*VirusTotalSource)(nil)
func (s *VirusTotalSource) Name() string { return "virustotal" }
func (s *VirusTotalSource) RateLimit() rate.Limit { return rate.Every(15 * time.Second) }
func (s *VirusTotalSource) Burst() int { return 2 }
func (s *VirusTotalSource) RespectsRobots() bool { return false }
func (s *VirusTotalSource) Enabled(_ recon.Config) bool {
return s.APIKey != ""
}
// vtSearchResponse represents the top-level VT intelligence search response.
type vtSearchResponse struct {
Data []vtSearchItem `json:"data"`
}
// vtSearchItem is a single item in the VT search results.
type vtSearchItem struct {
ID string `json:"id"`
Attributes json.RawMessage `json:"attributes"`
}
func (s *VirusTotalSource) Sweep(ctx context.Context, query string, out chan<- recon.Finding) error {
base := s.BaseURL
if base == "" {
base = "https://www.virustotal.com/api/v3"
}
client := s.Client
if client == nil {
client = NewClient()
}
queries := BuildQueries(s.Registry, "virustotal")
if len(queries) == 0 {
return nil
}
for _, q := range queries {
if err := ctx.Err(); err != nil {
return err
}
if s.Limiters != nil {
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
return err
}
}
searchURL := fmt.Sprintf(
"%s/intelligence/search?query=%s&limit=10",
base, url.QueryEscape(q),
)
req, err := http.NewRequestWithContext(ctx, http.MethodGet, searchURL, nil)
if err != nil {
continue
}
req.Header.Set("x-apikey", s.APIKey)
resp, err := client.Do(ctx, req)
if err != nil {
continue
}
data, err := io.ReadAll(io.LimitReader(resp.Body, 512*1024))
_ = resp.Body.Close()
if err != nil {
continue
}
var result vtSearchResponse
if err := json.Unmarshal(data, &result); err != nil {
continue
}
for _, item := range result.Data {
attrs := string(item.Attributes)
if ciLogKeyPattern.MatchString(attrs) {
out <- recon.Finding{
ProviderName: q,
Source: fmt.Sprintf("https://www.virustotal.com/gui/file/%s", item.ID),
SourceType: "recon:virustotal",
Confidence: "medium",
DetectedAt: time.Now(),
}
}
}
}
return nil
}

View File

@@ -0,0 +1,126 @@
package sources
import (
"context"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
func TestVirusTotal_Name(t *testing.T) {
s := &VirusTotalSource{}
if s.Name() != "virustotal" {
t.Fatalf("expected virustotal, got %s", s.Name())
}
}
func TestVirusTotal_Enabled(t *testing.T) {
s := &VirusTotalSource{}
if s.Enabled(recon.Config{}) {
t.Fatal("VirusTotalSource should be disabled without API key")
}
s.APIKey = "test-key"
if !s.Enabled(recon.Config{}) {
t.Fatal("VirusTotalSource should be enabled with API key")
}
}
func TestVirusTotal_Sweep(t *testing.T) {
mux := http.NewServeMux()
mux.HandleFunc("/intelligence/search", func(w http.ResponseWriter, r *http.Request) {
if r.Header.Get("x-apikey") != "test-key" {
http.Error(w, "unauthorized", http.StatusUnauthorized)
return
}
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{
"data": [{
"id": "abc123def456",
"attributes": {
"meaningful_name": "malware.exe",
"tags": ["trojan"],
"api_key": "sk-proj-ABCDEF1234567890abcdef"
}
}]
}`))
})
srv := httptest.NewServer(mux)
defer srv.Close()
reg := providers.NewRegistryFromProviders([]providers.Provider{
{Name: "openai", Keywords: []string{"sk-proj-"}},
})
s := &VirusTotalSource{
APIKey: "test-key",
BaseURL: srv.URL,
Registry: reg,
Client: NewClient(),
}
out := make(chan recon.Finding, 10)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
err := s.Sweep(ctx, "", out)
close(out)
if err != nil {
t.Fatalf("Sweep error: %v", err)
}
var findings []recon.Finding
for f := range out {
findings = append(findings, f)
}
if len(findings) == 0 {
t.Fatal("expected at least one finding from VirusTotal")
}
if findings[0].SourceType != "recon:virustotal" {
t.Fatalf("expected recon:virustotal, got %s", findings[0].SourceType)
}
}
func TestVirusTotal_Sweep_Empty(t *testing.T) {
mux := http.NewServeMux()
mux.HandleFunc("/intelligence/search", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"data": []}`))
})
srv := httptest.NewServer(mux)
defer srv.Close()
reg := providers.NewRegistryFromProviders([]providers.Provider{
{Name: "openai", Keywords: []string{"sk-proj-"}},
})
s := &VirusTotalSource{
APIKey: "test-key",
BaseURL: srv.URL,
Registry: reg,
Client: NewClient(),
}
out := make(chan recon.Finding, 10)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
err := s.Sweep(ctx, "", out)
close(out)
if err != nil {
t.Fatalf("Sweep error: %v", err)
}
var findings []recon.Finding
for f := range out {
findings = append(findings, f)
}
if len(findings) != 0 {
t.Fatalf("expected no findings, got %d", len(findings))
}
}