feat(16-01): add URLhaus recon source

- URLhausSource searches abuse.ch URLhaus API for malicious URLs with API keys
- Credentialless source (Enabled always true, no API key needed)
- Tag lookup with payload endpoint fallback
- ciLogKeyPattern used for content matching
- Tests with httptest mocks for happy path and empty results

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
salvacybersec
2026-04-06 16:45:23 +03:00
parent e02bad69ba
commit 35fa4ad174
2 changed files with 271 additions and 0 deletions

View File

@@ -0,0 +1,119 @@
package sources
import (
"context"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
func TestURLhaus_Name(t *testing.T) {
s := &URLhausSource{}
if s.Name() != "urlhaus" {
t.Fatalf("expected urlhaus, got %s", s.Name())
}
}
func TestURLhaus_Enabled(t *testing.T) {
s := &URLhausSource{}
if !s.Enabled(recon.Config{}) {
t.Fatal("URLhausSource should always be enabled (credentialless)")
}
}
func TestURLhaus_Sweep(t *testing.T) {
mux := http.NewServeMux()
mux.HandleFunc("/tag/", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{
"query_status": "ok",
"urls": [{
"url": "https://evil.example.com/exfil?token=sk-proj-ABCDEF1234567890abcdef",
"url_status": "online",
"tags": ["malware", "api_key"],
"reporter": "abuse_ch"
}]
}`))
})
srv := httptest.NewServer(mux)
defer srv.Close()
reg := providers.NewRegistryFromProviders([]providers.Provider{
{Name: "openai", Keywords: []string{"sk-proj-"}},
})
s := &URLhausSource{
BaseURL: srv.URL,
Registry: reg,
Client: NewClient(),
}
out := make(chan recon.Finding, 10)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
err := s.Sweep(ctx, "", out)
close(out)
if err != nil {
t.Fatalf("Sweep error: %v", err)
}
var findings []recon.Finding
for f := range out {
findings = append(findings, f)
}
if len(findings) == 0 {
t.Fatal("expected at least one finding from URLhaus")
}
if findings[0].SourceType != "recon:urlhaus" {
t.Fatalf("expected recon:urlhaus, got %s", findings[0].SourceType)
}
}
func TestURLhaus_Sweep_Empty(t *testing.T) {
mux := http.NewServeMux()
mux.HandleFunc("/tag/", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"query_status": "no_results", "urls": []}`))
})
mux.HandleFunc("/payload/", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"query_status": "no_results", "urls": []}`))
})
srv := httptest.NewServer(mux)
defer srv.Close()
reg := providers.NewRegistryFromProviders([]providers.Provider{
{Name: "openai", Keywords: []string{"sk-proj-"}},
})
s := &URLhausSource{
BaseURL: srv.URL,
Registry: reg,
Client: NewClient(),
}
out := make(chan recon.Finding, 10)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
err := s.Sweep(ctx, "", out)
close(out)
if err != nil {
t.Fatalf("Sweep error: %v", err)
}
var findings []recon.Finding
for f := range out {
findings = append(findings, f)
}
if len(findings) != 0 {
t.Fatalf("expected no findings, got %d", len(findings))
}
}