Merge branch 'worktree-agent-aa3f0a8f'

This commit is contained in:
salvacybersec
2026-04-06 16:47:10 +03:00
9 changed files with 977 additions and 8 deletions

View File

@@ -187,9 +187,9 @@ Requirements for initial release. Each maps to roadmap phases.
### OSINT/Recon — Threat Intelligence ### OSINT/Recon — Threat Intelligence
- [ ] **RECON-INTEL-01**: VirusTotal file and URL search - [x] **RECON-INTEL-01**: VirusTotal file and URL search
- [ ] **RECON-INTEL-02**: Intelligence X aggregated search - [x] **RECON-INTEL-02**: Intelligence X aggregated search
- [ ] **RECON-INTEL-03**: URLhaus search - [x] **RECON-INTEL-03**: URLhaus search
### OSINT/Recon — Mobile & DNS ### OSINT/Recon — Mobile & DNS

View File

@@ -3,14 +3,14 @@ gsd_state_version: 1.0
milestone: v1.0 milestone: v1.0
milestone_name: milestone milestone_name: milestone
status: executing status: executing
stopped_at: Completed 15-03-PLAN.md stopped_at: Completed 16-01-PLAN.md
last_updated: "2026-04-06T13:37:48.053Z" last_updated: "2026-04-06T13:46:09.387Z"
last_activity: 2026-04-06 last_activity: 2026-04-06
progress: progress:
total_phases: 18 total_phases: 18
completed_phases: 14 completed_phases: 14
total_plans: 81 total_plans: 81
completed_plans: 80 completed_plans: 81
percent: 20 percent: 20
--- ---
@@ -99,6 +99,7 @@ Progress: [██░░░░░░░░] 20%
| Phase 14 P01 | 4min | 1 tasks | 14 files | | Phase 14 P01 | 4min | 1 tasks | 14 files |
| Phase 15 P01 | 3min | 2 tasks | 13 files | | Phase 15 P01 | 3min | 2 tasks | 13 files |
| Phase 15 P03 | 4min | 2 tasks | 11 files | | Phase 15 P03 | 4min | 2 tasks | 11 files |
| Phase 16 P01 | 4min | 2 tasks | 6 files |
## Accumulated Context ## Accumulated Context
@@ -148,6 +149,9 @@ Recent decisions affecting current work:
- [Phase 14]: RegisterAll extended to 45 sources (40 Phase 10-13 + 5 Phase 14 CI/CD); CircleCI gets dedicated CIRCLECI_TOKEN - [Phase 14]: RegisterAll extended to 45 sources (40 Phase 10-13 + 5 Phase 14 CI/CD); CircleCI gets dedicated CIRCLECI_TOKEN
- [Phase 15]: Discord/Slack use dorking approach (configurable search endpoint) since neither has public message search API - [Phase 15]: Discord/Slack use dorking approach (configurable search endpoint) since neither has public message search API
- [Phase 15]: Log aggregator sources are credentialless, targeting exposed instances - [Phase 15]: Log aggregator sources are credentialless, targeting exposed instances
- [Phase 16]: VT uses x-apikey header per official API v3 spec
- [Phase 16]: IX uses three-step flow: POST search, GET results, GET file content
- [Phase 16]: URLhaus tag lookup with payload endpoint fallback
### Pending Todos ### Pending Todos
@@ -162,6 +166,6 @@ None yet.
## Session Continuity ## Session Continuity
Last session: 2026-04-06T13:32:52.610Z Last session: 2026-04-06T13:46:09.383Z
Stopped at: Completed 15-03-PLAN.md Stopped at: Completed 16-01-PLAN.md
Resume file: None Resume file: None

View File

@@ -0,0 +1,99 @@
---
phase: 16-osint-threat-intel-mobile-dns-api-marketplaces
plan: 01
subsystem: recon
tags: [virustotal, intelligencex, urlhaus, threat-intel, osint]
requires:
- phase: 09-osint-infrastructure
provides: ReconSource interface, LimiterRegistry, Client, BuildQueries, ciLogKeyPattern
provides:
- VirusTotalSource implementing ReconSource (credential-gated)
- IntelligenceXSource implementing ReconSource (credential-gated)
- URLhausSource implementing ReconSource (credentialless)
affects: [16-osint-wiring, recon-engine-registration]
tech-stack:
added: []
patterns: [three-step IX search flow (initiate/results/read), VT x-apikey auth, URLhaus form-encoded POST with tag/payload fallback]
key-files:
created:
- pkg/recon/sources/virustotal.go
- pkg/recon/sources/virustotal_test.go
- pkg/recon/sources/intelligencex.go
- pkg/recon/sources/intelligencex_test.go
- pkg/recon/sources/urlhaus.go
- pkg/recon/sources/urlhaus_test.go
modified: []
key-decisions:
- "VT uses x-apikey header per official API v3 spec"
- "IX uses three-step flow: POST search, GET results, GET file content per record"
- "URLhaus tag lookup with payload endpoint fallback for broader coverage"
patterns-established:
- "Threat intel sources follow same SentrySource pattern with ciLogKeyPattern matching"
requirements-completed: [RECON-INTEL-01, RECON-INTEL-02, RECON-INTEL-03]
duration: 4min
completed: 2026-04-06
---
# Phase 16 Plan 01: Threat Intelligence Sources Summary
**VirusTotal, IntelligenceX, and URLhaus recon sources for detecting API keys in malware samples, breach archives, and malicious URL databases**
## Performance
- **Duration:** 4 min
- **Started:** 2026-04-06T13:43:29Z
- **Completed:** 2026-04-06T13:47:29Z
- **Tasks:** 2
- **Files modified:** 6
## Accomplishments
- VirusTotalSource searches VT Intelligence API for files containing API key patterns (credential-gated, 4 req/min rate limit)
- IntelligenceXSource searches IX archive with three-step search/results/content-read flow (credential-gated)
- URLhausSource searches abuse.ch API for malicious URLs with embedded keys (credentialless, always enabled)
- All three sources use ciLogKeyPattern for consistent content matching across the recon framework
## Task Commits
Each task was committed atomically:
1. **Task 1: VirusTotal and IntelligenceX sources** - `e02bad6` (feat)
2. **Task 2: URLhaus source** - `35fa4ad` (feat)
## Files Created/Modified
- `pkg/recon/sources/virustotal.go` - VT Intelligence API search source
- `pkg/recon/sources/virustotal_test.go` - httptest mocks for VT (4 tests)
- `pkg/recon/sources/intelligencex.go` - IX archive search with three-step flow
- `pkg/recon/sources/intelligencex_test.go` - httptest mocks for IX (4 tests)
- `pkg/recon/sources/urlhaus.go` - abuse.ch URLhaus tag/payload search
- `pkg/recon/sources/urlhaus_test.go` - httptest mocks for URLhaus (4 tests)
## Decisions Made
- VT uses x-apikey header per official API v3 spec
- IX uses three-step flow: POST search initiation, GET results list, GET file content per record
- URLhaus uses tag lookup endpoint with payload endpoint fallback for broader coverage
## Deviations from Plan
None - plan executed exactly as written.
## Issues Encountered
None
## User Setup Required
None - no external service configuration required.
## Next Phase Readiness
- Three threat intel sources ready for wiring into RegisterAll
- VT and IX require API keys via config/env; URLhaus works immediately
- All sources follow established ReconSource pattern
---
*Phase: 16-osint-threat-intel-mobile-dns-api-marketplaces*
*Completed: 2026-04-06*

View File

@@ -0,0 +1,202 @@
package sources
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"time"
"golang.org/x/time/rate"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
// IntelligenceXSource searches the IntelligenceX archive for leaked credentials.
// IX indexes breached databases, paste sites, and dark web content, making it
// a high-value source for discovering leaked API keys.
type IntelligenceXSource struct {
APIKey string
BaseURL string
Registry *providers.Registry
Limiters *recon.LimiterRegistry
Client *Client
}
var _ recon.ReconSource = (*IntelligenceXSource)(nil)
func (s *IntelligenceXSource) Name() string { return "intelligencex" }
func (s *IntelligenceXSource) RateLimit() rate.Limit { return rate.Every(5 * time.Second) }
func (s *IntelligenceXSource) Burst() int { return 3 }
func (s *IntelligenceXSource) RespectsRobots() bool { return false }
func (s *IntelligenceXSource) Enabled(_ recon.Config) bool {
return s.APIKey != ""
}
// ixSearchRequest is the JSON body for the IX search endpoint.
type ixSearchRequest struct {
Term string `json:"term"`
MaxResults int `json:"maxresults"`
Media int `json:"media"`
Timeout int `json:"timeout"`
}
// ixSearchResponse is the response from the IX search initiation endpoint.
type ixSearchResponse struct {
ID string `json:"id"`
Status int `json:"status"`
}
// ixResultResponse is the response from the IX search results endpoint.
type ixResultResponse struct {
Records []ixRecord `json:"records"`
}
// ixRecord is a single record in the IX search results.
type ixRecord struct {
SystemID string `json:"systemid"`
Name string `json:"name"`
StorageID string `json:"storageid"`
Bucket string `json:"bucket"`
}
func (s *IntelligenceXSource) Sweep(ctx context.Context, query string, out chan<- recon.Finding) error {
base := s.BaseURL
if base == "" {
base = "https://2.intelx.io"
}
client := s.Client
if client == nil {
client = NewClient()
}
queries := BuildQueries(s.Registry, "intelligencex")
if len(queries) == 0 {
return nil
}
for _, q := range queries {
if err := ctx.Err(); err != nil {
return err
}
if s.Limiters != nil {
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
return err
}
}
// Step 1: Initiate search.
searchBody, _ := json.Marshal(ixSearchRequest{
Term: q,
MaxResults: 10,
Media: 0,
Timeout: 5,
})
searchURL := fmt.Sprintf("%s/intelligent/search", base)
req, err := http.NewRequestWithContext(ctx, http.MethodPost, searchURL, bytes.NewReader(searchBody))
if err != nil {
continue
}
req.Header.Set("Content-Type", "application/json")
req.Header.Set("x-key", s.APIKey)
resp, err := client.Do(ctx, req)
if err != nil {
continue
}
respData, err := io.ReadAll(io.LimitReader(resp.Body, 64*1024))
_ = resp.Body.Close()
if err != nil {
continue
}
var searchResp ixSearchResponse
if err := json.Unmarshal(respData, &searchResp); err != nil {
continue
}
if searchResp.ID == "" {
continue
}
// Step 2: Fetch results.
if s.Limiters != nil {
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
return err
}
}
resultURL := fmt.Sprintf("%s/intelligent/search/result?id=%s&limit=10", base, searchResp.ID)
resReq, err := http.NewRequestWithContext(ctx, http.MethodGet, resultURL, nil)
if err != nil {
continue
}
resReq.Header.Set("x-key", s.APIKey)
resResp, err := client.Do(ctx, resReq)
if err != nil {
continue
}
resData, err := io.ReadAll(io.LimitReader(resResp.Body, 512*1024))
_ = resResp.Body.Close()
if err != nil {
continue
}
var resultResp ixResultResponse
if err := json.Unmarshal(resData, &resultResp); err != nil {
continue
}
// Step 3: Fetch content for each record and check for keys.
for _, rec := range resultResp.Records {
if err := ctx.Err(); err != nil {
return err
}
if s.Limiters != nil {
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
return err
}
}
fileURL := fmt.Sprintf(
"%s/file/read?type=0&storageid=%s&bucket=%s",
base, rec.StorageID, rec.Bucket,
)
fileReq, err := http.NewRequestWithContext(ctx, http.MethodGet, fileURL, nil)
if err != nil {
continue
}
fileReq.Header.Set("x-key", s.APIKey)
fileResp, err := client.Do(ctx, fileReq)
if err != nil {
continue
}
fileData, err := io.ReadAll(io.LimitReader(fileResp.Body, 512*1024))
_ = fileResp.Body.Close()
if err != nil {
continue
}
if ciLogKeyPattern.Match(fileData) {
out <- recon.Finding{
ProviderName: q,
Source: fmt.Sprintf("%s/file/read?storageid=%s", base, rec.StorageID),
SourceType: "recon:intelligencex",
Confidence: "medium",
DetectedAt: time.Now(),
}
}
}
}
return nil
}

View File

@@ -0,0 +1,151 @@
package sources
import (
"context"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
func TestIntelligenceX_Name(t *testing.T) {
s := &IntelligenceXSource{}
if s.Name() != "intelligencex" {
t.Fatalf("expected intelligencex, got %s", s.Name())
}
}
func TestIntelligenceX_Enabled(t *testing.T) {
s := &IntelligenceXSource{}
if s.Enabled(recon.Config{}) {
t.Fatal("IntelligenceXSource should be disabled without API key")
}
s.APIKey = "test-key"
if !s.Enabled(recon.Config{}) {
t.Fatal("IntelligenceXSource should be enabled with API key")
}
}
func TestIntelligenceX_Sweep(t *testing.T) {
mux := http.NewServeMux()
// Search initiation endpoint.
mux.HandleFunc("/intelligent/search", func(w http.ResponseWriter, r *http.Request) {
if r.Method == http.MethodPost {
if r.Header.Get("x-key") != "test-key" {
http.Error(w, "unauthorized", http.StatusUnauthorized)
return
}
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"id":"search-42","status":0}`))
return
}
http.Error(w, "not found", http.StatusNotFound)
})
// Search results endpoint.
mux.HandleFunc("/intelligent/search/result", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{
"records": [{
"systemid": "sys-001",
"name": "leak.txt",
"storageid": "store-001",
"bucket": "bucket-a"
}]
}`))
})
// File read endpoint.
mux.HandleFunc("/file/read", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain")
_, _ = w.Write([]byte(`config:
api_key = "sk-proj-ABCDEF1234567890abcdef"
secret_key: "super-secret-value-1234567890ab"
`))
})
srv := httptest.NewServer(mux)
defer srv.Close()
reg := providers.NewRegistryFromProviders([]providers.Provider{
{Name: "openai", Keywords: []string{"sk-proj-"}},
})
s := &IntelligenceXSource{
APIKey: "test-key",
BaseURL: srv.URL,
Registry: reg,
Client: NewClient(),
}
out := make(chan recon.Finding, 10)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
err := s.Sweep(ctx, "", out)
close(out)
if err != nil {
t.Fatalf("Sweep error: %v", err)
}
var findings []recon.Finding
for f := range out {
findings = append(findings, f)
}
if len(findings) == 0 {
t.Fatal("expected at least one finding from IntelligenceX")
}
if findings[0].SourceType != "recon:intelligencex" {
t.Fatalf("expected recon:intelligencex, got %s", findings[0].SourceType)
}
}
func TestIntelligenceX_Sweep_Empty(t *testing.T) {
mux := http.NewServeMux()
mux.HandleFunc("/intelligent/search", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"id":"search-empty","status":0}`))
})
mux.HandleFunc("/intelligent/search/result", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"records": []}`))
})
srv := httptest.NewServer(mux)
defer srv.Close()
reg := providers.NewRegistryFromProviders([]providers.Provider{
{Name: "openai", Keywords: []string{"sk-proj-"}},
})
s := &IntelligenceXSource{
APIKey: "test-key",
BaseURL: srv.URL,
Registry: reg,
Client: NewClient(),
}
out := make(chan recon.Finding, 10)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
err := s.Sweep(ctx, "", out)
close(out)
if err != nil {
t.Fatalf("Sweep error: %v", err)
}
var findings []recon.Finding
for f := range out {
findings = append(findings, f)
}
if len(findings) != 0 {
t.Fatalf("expected no findings, got %d", len(findings))
}
}

View File

@@ -0,0 +1,152 @@
package sources
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"
"golang.org/x/time/rate"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
// URLhausSource searches the abuse.ch URLhaus API for malicious URLs that
// contain API key patterns. Threat actors often embed stolen API keys in
// malware C2 URLs, phishing pages, and credential-harvesting infrastructure.
// URLhaus is free and unauthenticated — no API key required.
type URLhausSource struct {
BaseURL string
Registry *providers.Registry
Limiters *recon.LimiterRegistry
Client *Client
}
var _ recon.ReconSource = (*URLhausSource)(nil)
func (s *URLhausSource) Name() string { return "urlhaus" }
func (s *URLhausSource) RateLimit() rate.Limit { return rate.Every(3 * time.Second) }
func (s *URLhausSource) Burst() int { return 2 }
func (s *URLhausSource) RespectsRobots() bool { return false }
func (s *URLhausSource) Enabled(_ recon.Config) bool { return true }
// urlhausResponse represents the URLhaus API response for tag/payload lookups.
type urlhausResponse struct {
QueryStatus string `json:"query_status"`
URLs []urlhausEntry `json:"urls"`
}
// urlhausEntry is a single URL record from URLhaus.
type urlhausEntry struct {
URL string `json:"url"`
URLStatus string `json:"url_status"`
Tags []string `json:"tags"`
Reporter string `json:"reporter"`
}
func (s *URLhausSource) Sweep(ctx context.Context, query string, out chan<- recon.Finding) error {
base := s.BaseURL
if base == "" {
base = "https://urlhaus-api.abuse.ch/v1"
}
client := s.Client
if client == nil {
client = NewClient()
}
queries := BuildQueries(s.Registry, "urlhaus")
if len(queries) == 0 {
return nil
}
for _, q := range queries {
if err := ctx.Err(); err != nil {
return err
}
if s.Limiters != nil {
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
return err
}
}
// Try tag lookup first.
tagURL := fmt.Sprintf("%s/tag/%s/", base, url.PathEscape(q))
req, err := http.NewRequestWithContext(ctx, http.MethodPost, tagURL, nil)
if err != nil {
continue
}
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
resp, err := client.Do(ctx, req)
if err != nil {
// Fallback to payload endpoint on tag lookup failure.
resp, err = s.payloadFallback(ctx, client, base, q)
if err != nil {
continue
}
}
data, err := io.ReadAll(io.LimitReader(resp.Body, 512*1024))
_ = resp.Body.Close()
if err != nil {
continue
}
var result urlhausResponse
if err := json.Unmarshal(data, &result); err != nil {
continue
}
// If tag lookup returned no results, try payload fallback.
if result.QueryStatus != "ok" || len(result.URLs) == 0 {
resp, err = s.payloadFallback(ctx, client, base, q)
if err != nil {
continue
}
data, err = io.ReadAll(io.LimitReader(resp.Body, 512*1024))
_ = resp.Body.Close()
if err != nil {
continue
}
if err := json.Unmarshal(data, &result); err != nil {
continue
}
}
for _, entry := range result.URLs {
// Stringify the record and check for key patterns.
record := fmt.Sprintf("url=%s status=%s tags=%v reporter=%s",
entry.URL, entry.URLStatus, entry.Tags, entry.Reporter)
if ciLogKeyPattern.MatchString(record) || ciLogKeyPattern.MatchString(entry.URL) {
out <- recon.Finding{
ProviderName: q,
Source: entry.URL,
SourceType: "recon:urlhaus",
Confidence: "medium",
DetectedAt: time.Now(),
}
}
}
}
return nil
}
// payloadFallback tries the URLhaus payload endpoint as a secondary search method.
func (s *URLhausSource) payloadFallback(ctx context.Context, client *Client, base, tag string) (*http.Response, error) {
payloadURL := fmt.Sprintf("%s/payload/", base)
body := fmt.Sprintf("md5_hash=&sha256_hash=&tag=%s", url.QueryEscape(tag))
req, err := http.NewRequestWithContext(ctx, http.MethodPost, payloadURL, strings.NewReader(body))
if err != nil {
return nil, err
}
req.Header.Set("Content-Type", "application/x-www-form-urlencoded")
return client.Do(ctx, req)
}

View File

@@ -0,0 +1,119 @@
package sources
import (
"context"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
func TestURLhaus_Name(t *testing.T) {
s := &URLhausSource{}
if s.Name() != "urlhaus" {
t.Fatalf("expected urlhaus, got %s", s.Name())
}
}
func TestURLhaus_Enabled(t *testing.T) {
s := &URLhausSource{}
if !s.Enabled(recon.Config{}) {
t.Fatal("URLhausSource should always be enabled (credentialless)")
}
}
func TestURLhaus_Sweep(t *testing.T) {
mux := http.NewServeMux()
mux.HandleFunc("/tag/", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{
"query_status": "ok",
"urls": [{
"url": "https://evil.example.com/exfil?token=sk-proj-ABCDEF1234567890abcdef",
"url_status": "online",
"tags": ["malware", "api_key"],
"reporter": "abuse_ch"
}]
}`))
})
srv := httptest.NewServer(mux)
defer srv.Close()
reg := providers.NewRegistryFromProviders([]providers.Provider{
{Name: "openai", Keywords: []string{"sk-proj-"}},
})
s := &URLhausSource{
BaseURL: srv.URL,
Registry: reg,
Client: NewClient(),
}
out := make(chan recon.Finding, 10)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
err := s.Sweep(ctx, "", out)
close(out)
if err != nil {
t.Fatalf("Sweep error: %v", err)
}
var findings []recon.Finding
for f := range out {
findings = append(findings, f)
}
if len(findings) == 0 {
t.Fatal("expected at least one finding from URLhaus")
}
if findings[0].SourceType != "recon:urlhaus" {
t.Fatalf("expected recon:urlhaus, got %s", findings[0].SourceType)
}
}
func TestURLhaus_Sweep_Empty(t *testing.T) {
mux := http.NewServeMux()
mux.HandleFunc("/tag/", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"query_status": "no_results", "urls": []}`))
})
mux.HandleFunc("/payload/", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"query_status": "no_results", "urls": []}`))
})
srv := httptest.NewServer(mux)
defer srv.Close()
reg := providers.NewRegistryFromProviders([]providers.Provider{
{Name: "openai", Keywords: []string{"sk-proj-"}},
})
s := &URLhausSource{
BaseURL: srv.URL,
Registry: reg,
Client: NewClient(),
}
out := make(chan recon.Finding, 10)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
err := s.Sweep(ctx, "", out)
close(out)
if err != nil {
t.Fatalf("Sweep error: %v", err)
}
var findings []recon.Finding
for f := range out {
findings = append(findings, f)
}
if len(findings) != 0 {
t.Fatalf("expected no findings, got %d", len(findings))
}
}

View File

@@ -0,0 +1,116 @@
package sources
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"time"
"golang.org/x/time/rate"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
// VirusTotalSource searches the VirusTotal Intelligence API for files and URLs
// containing API key patterns. Malware samples frequently contain hard-coded
// API keys used by threat actors to exfiltrate data or proxy requests.
type VirusTotalSource struct {
APIKey string
BaseURL string
Registry *providers.Registry
Limiters *recon.LimiterRegistry
Client *Client
}
var _ recon.ReconSource = (*VirusTotalSource)(nil)
func (s *VirusTotalSource) Name() string { return "virustotal" }
func (s *VirusTotalSource) RateLimit() rate.Limit { return rate.Every(15 * time.Second) }
func (s *VirusTotalSource) Burst() int { return 2 }
func (s *VirusTotalSource) RespectsRobots() bool { return false }
func (s *VirusTotalSource) Enabled(_ recon.Config) bool {
return s.APIKey != ""
}
// vtSearchResponse represents the top-level VT intelligence search response.
type vtSearchResponse struct {
Data []vtSearchItem `json:"data"`
}
// vtSearchItem is a single item in the VT search results.
type vtSearchItem struct {
ID string `json:"id"`
Attributes json.RawMessage `json:"attributes"`
}
func (s *VirusTotalSource) Sweep(ctx context.Context, query string, out chan<- recon.Finding) error {
base := s.BaseURL
if base == "" {
base = "https://www.virustotal.com/api/v3"
}
client := s.Client
if client == nil {
client = NewClient()
}
queries := BuildQueries(s.Registry, "virustotal")
if len(queries) == 0 {
return nil
}
for _, q := range queries {
if err := ctx.Err(); err != nil {
return err
}
if s.Limiters != nil {
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
return err
}
}
searchURL := fmt.Sprintf(
"%s/intelligence/search?query=%s&limit=10",
base, url.QueryEscape(q),
)
req, err := http.NewRequestWithContext(ctx, http.MethodGet, searchURL, nil)
if err != nil {
continue
}
req.Header.Set("x-apikey", s.APIKey)
resp, err := client.Do(ctx, req)
if err != nil {
continue
}
data, err := io.ReadAll(io.LimitReader(resp.Body, 512*1024))
_ = resp.Body.Close()
if err != nil {
continue
}
var result vtSearchResponse
if err := json.Unmarshal(data, &result); err != nil {
continue
}
for _, item := range result.Data {
attrs := string(item.Attributes)
if ciLogKeyPattern.MatchString(attrs) {
out <- recon.Finding{
ProviderName: q,
Source: fmt.Sprintf("https://www.virustotal.com/gui/file/%s", item.ID),
SourceType: "recon:virustotal",
Confidence: "medium",
DetectedAt: time.Now(),
}
}
}
}
return nil
}

View File

@@ -0,0 +1,126 @@
package sources
import (
"context"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
func TestVirusTotal_Name(t *testing.T) {
s := &VirusTotalSource{}
if s.Name() != "virustotal" {
t.Fatalf("expected virustotal, got %s", s.Name())
}
}
func TestVirusTotal_Enabled(t *testing.T) {
s := &VirusTotalSource{}
if s.Enabled(recon.Config{}) {
t.Fatal("VirusTotalSource should be disabled without API key")
}
s.APIKey = "test-key"
if !s.Enabled(recon.Config{}) {
t.Fatal("VirusTotalSource should be enabled with API key")
}
}
func TestVirusTotal_Sweep(t *testing.T) {
mux := http.NewServeMux()
mux.HandleFunc("/intelligence/search", func(w http.ResponseWriter, r *http.Request) {
if r.Header.Get("x-apikey") != "test-key" {
http.Error(w, "unauthorized", http.StatusUnauthorized)
return
}
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{
"data": [{
"id": "abc123def456",
"attributes": {
"meaningful_name": "malware.exe",
"tags": ["trojan"],
"api_key": "sk-proj-ABCDEF1234567890abcdef"
}
}]
}`))
})
srv := httptest.NewServer(mux)
defer srv.Close()
reg := providers.NewRegistryFromProviders([]providers.Provider{
{Name: "openai", Keywords: []string{"sk-proj-"}},
})
s := &VirusTotalSource{
APIKey: "test-key",
BaseURL: srv.URL,
Registry: reg,
Client: NewClient(),
}
out := make(chan recon.Finding, 10)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
err := s.Sweep(ctx, "", out)
close(out)
if err != nil {
t.Fatalf("Sweep error: %v", err)
}
var findings []recon.Finding
for f := range out {
findings = append(findings, f)
}
if len(findings) == 0 {
t.Fatal("expected at least one finding from VirusTotal")
}
if findings[0].SourceType != "recon:virustotal" {
t.Fatalf("expected recon:virustotal, got %s", findings[0].SourceType)
}
}
func TestVirusTotal_Sweep_Empty(t *testing.T) {
mux := http.NewServeMux()
mux.HandleFunc("/intelligence/search", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"data": []}`))
})
srv := httptest.NewServer(mux)
defer srv.Close()
reg := providers.NewRegistryFromProviders([]providers.Provider{
{Name: "openai", Keywords: []string{"sk-proj-"}},
})
s := &VirusTotalSource{
APIKey: "test-key",
BaseURL: srv.URL,
Registry: reg,
Client: NewClient(),
}
out := make(chan recon.Finding, 10)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
err := s.Sweep(ctx, "", out)
close(out)
if err != nil {
t.Fatalf("Sweep error: %v", err)
}
var findings []recon.Finding
for f := range out {
findings = append(findings, f)
}
if len(findings) != 0 {
t.Fatalf("expected no findings, got %d", len(findings))
}
}