Files
keyhunter/pkg/recon/sources/duckduckgo_test.go
salvacybersec 770705302c feat(11-01): add DuckDuckGoSource, YandexSource, and BraveSource
- DuckDuckGoSource scrapes HTML search (no API key, always enabled, RespectsRobots=true)
- YandexSource uses Yandex XML Search API (user+key required, XML response parsing)
- BraveSource uses Brave Search API (X-Subscription-Token header, JSON response)
- All three follow established error handling: 401 aborts, transient continues, ctx cancellation returns
2026-04-06 11:54:42 +03:00

135 lines
3.4 KiB
Go

package sources
import (
"context"
"errors"
"net/http"
"net/http/httptest"
"sync/atomic"
"testing"
"time"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
const ddgHTMLFixture = `<!DOCTYPE html>
<html>
<body>
<div class="results">
<div class="result">
<a class="result__a" href="https://pastebin.com/abc123">Pastebin Result</a>
</div>
<div class="result">
<a class="result__a" href="https://github.com/user/repo/blob/main/.env">GitHub Result</a>
</div>
<div class="result">
<a class="result__a" href="https://example.com/page">Example</a>
</div>
</div>
</body>
</html>`
func ddgStubHandler(t *testing.T, calls *int32) http.HandlerFunc {
t.Helper()
return func(w http.ResponseWriter, r *http.Request) {
atomic.AddInt32(calls, 1)
if r.URL.Path != "/html/" {
t.Errorf("unexpected path: %s", r.URL.Path)
}
w.Header().Set("Content-Type", "text/html")
_, _ = w.Write([]byte(ddgHTMLFixture))
}
}
func TestDuckDuckGoSource_AlwaysEnabled(t *testing.T) {
s := NewDuckDuckGoSource(syntheticRegistry(), recon.NewLimiterRegistry())
if !s.Enabled(recon.Config{}) {
t.Error("expected Enabled=true always")
}
}
func TestDuckDuckGoSource_RespectsRobots(t *testing.T) {
s := NewDuckDuckGoSource(syntheticRegistry(), recon.NewLimiterRegistry())
if !s.RespectsRobots() {
t.Error("expected RespectsRobots=true")
}
}
func TestDuckDuckGoSource_SweepEmitsFindings(t *testing.T) {
reg := syntheticRegistry()
lim := recon.NewLimiterRegistry()
_ = lim.For("duckduckgo", 1000, 100)
var calls int32
srv := httptest.NewServer(ddgStubHandler(t, &calls))
defer srv.Close()
s := NewDuckDuckGoSource(reg, lim)
s.BaseURL = srv.URL
out := make(chan recon.Finding, 32)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
done := make(chan error, 1)
go func() { done <- s.Sweep(ctx, "", out); close(out) }()
var findings []recon.Finding
for f := range out {
findings = append(findings, f)
}
if err := <-done; err != nil {
t.Fatalf("Sweep error: %v", err)
}
// 2 keywords * 3 links = 6 findings
if len(findings) != 6 {
t.Fatalf("expected 6 findings, got %d", len(findings))
}
for _, f := range findings {
if f.SourceType != "recon:duckduckgo" {
t.Errorf("SourceType=%q want recon:duckduckgo", f.SourceType)
}
if f.Confidence != "low" {
t.Errorf("Confidence=%q want low", f.Confidence)
}
}
if got := atomic.LoadInt32(&calls); got != 2 {
t.Errorf("expected 2 DDG calls, got %d", got)
}
}
func TestDuckDuckGoSource_CtxCancelled(t *testing.T) {
reg := syntheticRegistry()
lim := recon.NewLimiterRegistry()
_ = lim.For("duckduckgo", 1000, 100)
s := NewDuckDuckGoSource(reg, lim)
s.BaseURL = "http://127.0.0.1:1"
ctx, cancel := context.WithCancel(context.Background())
cancel()
out := make(chan recon.Finding, 1)
err := s.Sweep(ctx, "", out)
if !errors.Is(err, context.Canceled) {
t.Fatalf("expected context.Canceled, got %v", err)
}
}
func TestDuckDuckGoSource_EmptyRegistryNoError(t *testing.T) {
lim := recon.NewLimiterRegistry()
s := NewDuckDuckGoSource(nil, lim)
out := make(chan recon.Finding, 1)
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
t.Fatal("should not be called with nil registry")
}))
defer srv.Close()
s.BaseURL = srv.URL
if err := s.Sweep(context.Background(), "", out); err != nil {
t.Fatalf("expected nil, got %v", err)
}
}