From 770705302cafc46f9a17c09ea0dafb00fc6dcd65 Mon Sep 17 00:00:00 2001 From: salvacybersec Date: Mon, 6 Apr 2026 11:54:42 +0300 Subject: [PATCH] feat(11-01): add DuckDuckGoSource, YandexSource, and BraveSource - DuckDuckGoSource scrapes HTML search (no API key, always enabled, RespectsRobots=true) - YandexSource uses Yandex XML Search API (user+key required, XML response parsing) - BraveSource uses Brave Search API (X-Subscription-Token header, JSON response) - All three follow established error handling: 401 aborts, transient continues, ctx cancellation returns --- pkg/recon/sources/brave.go | 153 +++++++++++++++++++++++ pkg/recon/sources/brave_test.go | 145 ++++++++++++++++++++++ pkg/recon/sources/duckduckgo.go | 116 ++++++++++++++++++ pkg/recon/sources/duckduckgo_test.go | 134 ++++++++++++++++++++ pkg/recon/sources/yandex.go | 177 +++++++++++++++++++++++++++ pkg/recon/sources/yandex_test.go | 171 ++++++++++++++++++++++++++ 6 files changed, 896 insertions(+) create mode 100644 pkg/recon/sources/brave.go create mode 100644 pkg/recon/sources/brave_test.go create mode 100644 pkg/recon/sources/duckduckgo.go create mode 100644 pkg/recon/sources/duckduckgo_test.go create mode 100644 pkg/recon/sources/yandex.go create mode 100644 pkg/recon/sources/yandex_test.go diff --git a/pkg/recon/sources/brave.go b/pkg/recon/sources/brave.go new file mode 100644 index 0000000..06f930c --- /dev/null +++ b/pkg/recon/sources/brave.go @@ -0,0 +1,153 @@ +package sources + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "net/http" + "net/url" + "strings" + "time" + + "golang.org/x/time/rate" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +// BraveSource implements recon.ReconSource against the Brave Search API. +// It requires an API key (X-Subscription-Token) to be enabled. +type BraveSource struct { + APIKey string + BaseURL string + Registry *providers.Registry + Limiters *recon.LimiterRegistry + client *Client +} + +// Compile-time assertion. +var _ recon.ReconSource = (*BraveSource)(nil) + +// NewBraveSource constructs a BraveSource with the shared retry client. +func NewBraveSource(apiKey string, reg *providers.Registry, lim *recon.LimiterRegistry) *BraveSource { + return &BraveSource{ + APIKey: apiKey, + BaseURL: "https://api.search.brave.com", + Registry: reg, + Limiters: lim, + client: NewClient(), + } +} + +func (s *BraveSource) Name() string { return "brave" } +func (s *BraveSource) RateLimit() rate.Limit { return rate.Every(1 * time.Second) } +func (s *BraveSource) Burst() int { return 1 } +func (s *BraveSource) RespectsRobots() bool { return false } + +// Enabled returns true only when APIKey is configured. +func (s *BraveSource) Enabled(_ recon.Config) bool { return s.APIKey != "" } + +// Sweep issues one Brave Search request per provider keyword and emits a +// Finding for every web result. +func (s *BraveSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error { + if s.APIKey == "" { + return nil + } + base := s.BaseURL + if base == "" { + base = "https://api.search.brave.com" + } + + queries := BuildQueries(s.Registry, "brave") + kwIndex := braveKeywordIndex(s.Registry) + + for _, q := range queries { + if err := ctx.Err(); err != nil { + return err + } + if s.Limiters != nil { + if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil { + return err + } + } + + endpoint := fmt.Sprintf("%s/res/v1/web/search?q=%s&count=20", base, url.QueryEscape(q)) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil) + if err != nil { + return fmt.Errorf("brave: build request: %w", err) + } + req.Header.Set("X-Subscription-Token", s.APIKey) + req.Header.Set("Accept", "application/json") + req.Header.Set("User-Agent", "keyhunter-recon") + + resp, err := s.client.Do(ctx, req) + if err != nil { + if errors.Is(err, ErrUnauthorized) { + return err + } + if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { + return err + } + continue + } + + var parsed braveSearchResponse + decErr := json.NewDecoder(resp.Body).Decode(&parsed) + _ = resp.Body.Close() + if decErr != nil { + continue + } + + provName := kwIndex[strings.ToLower(extractGoogleKeyword(q))] + for _, it := range parsed.Web.Results { + f := recon.Finding{ + ProviderName: provName, + Confidence: "low", + Source: it.URL, + SourceType: "recon:brave", + DetectedAt: time.Now(), + } + select { + case out <- f: + case <-ctx.Done(): + return ctx.Err() + } + } + } + return nil +} + +type braveSearchResponse struct { + Web braveWebResults `json:"web"` +} + +type braveWebResults struct { + Results []braveWebItem `json:"results"` +} + +type braveWebItem struct { + URL string `json:"url"` + Title string `json:"title"` + Description string `json:"description"` +} + +// braveKeywordIndex maps lowercased keywords to provider names. +func braveKeywordIndex(reg *providers.Registry) map[string]string { + m := make(map[string]string) + if reg == nil { + return m + } + for _, p := range reg.List() { + for _, k := range p.Keywords { + kl := strings.ToLower(strings.TrimSpace(k)) + if kl == "" { + continue + } + if _, exists := m[kl]; !exists { + m[kl] = p.Name + } + } + } + return m +} diff --git a/pkg/recon/sources/brave_test.go b/pkg/recon/sources/brave_test.go new file mode 100644 index 0000000..f60f5a8 --- /dev/null +++ b/pkg/recon/sources/brave_test.go @@ -0,0 +1,145 @@ +package sources + +import ( + "context" + "encoding/json" + "errors" + "net/http" + "net/http/httptest" + "strings" + "sync/atomic" + "testing" + "time" + + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +func braveStubHandler(t *testing.T, calls *int32) http.HandlerFunc { + t.Helper() + return func(w http.ResponseWriter, r *http.Request) { + atomic.AddInt32(calls, 1) + if !strings.HasPrefix(r.URL.Path, "/res/v1/web/search") { + t.Errorf("unexpected path: %s", r.URL.Path) + } + if got := r.Header.Get("X-Subscription-Token"); got != "testtoken" { + t.Errorf("missing subscription token: %q", got) + } + body := map[string]any{ + "web": map[string]any{ + "results": []map[string]any{ + {"url": "https://pastebin.com/brave1", "title": "Brave Result 1", "description": "found key"}, + {"url": "https://github.com/org/repo/blob/main/config.env", "title": "Brave Result 2", "description": "leaked"}, + }, + }, + } + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(body) + } +} + +func TestBraveSource_EnabledRequiresAPIKey(t *testing.T) { + reg := syntheticRegistry() + lim := recon.NewLimiterRegistry() + + if s := NewBraveSource("", reg, lim); s.Enabled(recon.Config{}) { + t.Error("expected Enabled=false with empty key") + } + if s := NewBraveSource("key", reg, lim); !s.Enabled(recon.Config{}) { + t.Error("expected Enabled=true with key") + } +} + +func TestBraveSource_SweepEmptyKeyReturnsNil(t *testing.T) { + reg := syntheticRegistry() + lim := recon.NewLimiterRegistry() + s := NewBraveSource("", reg, lim) + + out := make(chan recon.Finding, 10) + if err := s.Sweep(context.Background(), "", out); err != nil { + t.Fatalf("expected nil, got %v", err) + } + close(out) + if n := countFindings(out); n != 0 { + t.Fatalf("expected 0 findings, got %d", n) + } +} + +func TestBraveSource_SweepEmitsFindings(t *testing.T) { + reg := syntheticRegistry() + lim := recon.NewLimiterRegistry() + _ = lim.For("brave", 1000, 100) + + var calls int32 + srv := httptest.NewServer(braveStubHandler(t, &calls)) + defer srv.Close() + + s := NewBraveSource("testtoken", reg, lim) + s.BaseURL = srv.URL + + out := make(chan recon.Finding, 32) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + done := make(chan error, 1) + go func() { done <- s.Sweep(ctx, "", out); close(out) }() + + var findings []recon.Finding + for f := range out { + findings = append(findings, f) + } + if err := <-done; err != nil { + t.Fatalf("Sweep error: %v", err) + } + + // 2 keywords * 2 items = 4 findings + if len(findings) != 4 { + t.Fatalf("expected 4 findings, got %d", len(findings)) + } + for _, f := range findings { + if f.SourceType != "recon:brave" { + t.Errorf("SourceType=%q want recon:brave", f.SourceType) + } + } + if got := atomic.LoadInt32(&calls); got != 2 { + t.Errorf("expected 2 calls, got %d", got) + } +} + +func TestBraveSource_CtxCancelled(t *testing.T) { + reg := syntheticRegistry() + lim := recon.NewLimiterRegistry() + _ = lim.For("brave", 1000, 100) + + s := NewBraveSource("key", reg, lim) + s.BaseURL = "http://127.0.0.1:1" + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + out := make(chan recon.Finding, 1) + err := s.Sweep(ctx, "", out) + if !errors.Is(err, context.Canceled) { + t.Fatalf("expected context.Canceled, got %v", err) + } +} + +func TestBraveSource_Unauthorized(t *testing.T) { + reg := syntheticRegistry() + lim := recon.NewLimiterRegistry() + _ = lim.For("brave", 1000, 100) + + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusUnauthorized) + _, _ = w.Write([]byte("bad token")) + })) + defer srv.Close() + + s := NewBraveSource("key", reg, lim) + s.BaseURL = srv.URL + + out := make(chan recon.Finding, 1) + err := s.Sweep(context.Background(), "", out) + if !errors.Is(err, ErrUnauthorized) { + t.Fatalf("expected ErrUnauthorized, got %v", err) + } +} diff --git a/pkg/recon/sources/duckduckgo.go b/pkg/recon/sources/duckduckgo.go new file mode 100644 index 0000000..cc4a69e --- /dev/null +++ b/pkg/recon/sources/duckduckgo.go @@ -0,0 +1,116 @@ +package sources + +import ( + "context" + "fmt" + "net/http" + "net/url" + "regexp" + "time" + + "golang.org/x/time/rate" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +// DuckDuckGoSource implements recon.ReconSource by scraping DuckDuckGo's HTML +// search endpoint. No API key is required -- this source is always enabled. +// +// It operates conservatively (2s per request) and declares RespectsRobots=true. +type DuckDuckGoSource struct { + BaseURL string + Registry *providers.Registry + Limiters *recon.LimiterRegistry + client *Client +} + +// Compile-time assertion. +var _ recon.ReconSource = (*DuckDuckGoSource)(nil) + +// ddgResultRE matches DuckDuckGo HTML result links. The HTML search page uses +// anchors for organic results. +var ddgResultRE = regexp.MustCompile(`^https?://`) + +// NewDuckDuckGoSource constructs a DuckDuckGoSource with the shared retry client. +func NewDuckDuckGoSource(reg *providers.Registry, lim *recon.LimiterRegistry) *DuckDuckGoSource { + return &DuckDuckGoSource{ + BaseURL: "https://html.duckduckgo.com", + Registry: reg, + Limiters: lim, + client: NewClient(), + } +} + +func (s *DuckDuckGoSource) Name() string { return "duckduckgo" } +func (s *DuckDuckGoSource) RateLimit() rate.Limit { return rate.Every(2 * time.Second) } +func (s *DuckDuckGoSource) Burst() int { return 1 } +func (s *DuckDuckGoSource) RespectsRobots() bool { return true } + +// Enabled always returns true -- DuckDuckGo HTML scraping requires no credentials. +func (s *DuckDuckGoSource) Enabled(_ recon.Config) bool { return true } + +// Sweep iterates provider keywords, scrapes DuckDuckGo HTML search, and emits +// a Finding per result link. +func (s *DuckDuckGoSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error { + base := s.BaseURL + if base == "" { + base = "https://html.duckduckgo.com" + } + client := s.client + if client == nil { + client = NewClient() + } + + queries := BuildQueries(s.Registry, "duckduckgo") + if len(queries) == 0 { + return nil + } + + for _, q := range queries { + if err := ctx.Err(); err != nil { + return err + } + if s.Limiters != nil { + if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil { + return err + } + } + + searchURL := fmt.Sprintf("%s/html/?q=%s", base, url.QueryEscape(q)) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, searchURL, nil) + if err != nil { + return fmt.Errorf("duckduckgo: build req: %w", err) + } + req.Header.Set("User-Agent", "keyhunter-recon") + + resp, err := client.Do(ctx, req) + if err != nil { + // Transient failures: continue to next query. + continue + } + links, parseErr := extractAnchorHrefs(resp.Body, ddgResultRE) + _ = resp.Body.Close() + if parseErr != nil { + continue + } + + for _, href := range links { + if err := ctx.Err(); err != nil { + return err + } + f := recon.Finding{ + Source: href, + SourceType: "recon:duckduckgo", + Confidence: "low", + DetectedAt: time.Now(), + } + select { + case out <- f: + case <-ctx.Done(): + return ctx.Err() + } + } + } + return nil +} diff --git a/pkg/recon/sources/duckduckgo_test.go b/pkg/recon/sources/duckduckgo_test.go new file mode 100644 index 0000000..86a656a --- /dev/null +++ b/pkg/recon/sources/duckduckgo_test.go @@ -0,0 +1,134 @@ +package sources + +import ( + "context" + "errors" + "net/http" + "net/http/httptest" + "sync/atomic" + "testing" + "time" + + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +const ddgHTMLFixture = ` + + +
+ + +
+ Example +
+
+ +` + +func ddgStubHandler(t *testing.T, calls *int32) http.HandlerFunc { + t.Helper() + return func(w http.ResponseWriter, r *http.Request) { + atomic.AddInt32(calls, 1) + if r.URL.Path != "/html/" { + t.Errorf("unexpected path: %s", r.URL.Path) + } + w.Header().Set("Content-Type", "text/html") + _, _ = w.Write([]byte(ddgHTMLFixture)) + } +} + +func TestDuckDuckGoSource_AlwaysEnabled(t *testing.T) { + s := NewDuckDuckGoSource(syntheticRegistry(), recon.NewLimiterRegistry()) + if !s.Enabled(recon.Config{}) { + t.Error("expected Enabled=true always") + } +} + +func TestDuckDuckGoSource_RespectsRobots(t *testing.T) { + s := NewDuckDuckGoSource(syntheticRegistry(), recon.NewLimiterRegistry()) + if !s.RespectsRobots() { + t.Error("expected RespectsRobots=true") + } +} + +func TestDuckDuckGoSource_SweepEmitsFindings(t *testing.T) { + reg := syntheticRegistry() + lim := recon.NewLimiterRegistry() + _ = lim.For("duckduckgo", 1000, 100) + + var calls int32 + srv := httptest.NewServer(ddgStubHandler(t, &calls)) + defer srv.Close() + + s := NewDuckDuckGoSource(reg, lim) + s.BaseURL = srv.URL + + out := make(chan recon.Finding, 32) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + done := make(chan error, 1) + go func() { done <- s.Sweep(ctx, "", out); close(out) }() + + var findings []recon.Finding + for f := range out { + findings = append(findings, f) + } + if err := <-done; err != nil { + t.Fatalf("Sweep error: %v", err) + } + + // 2 keywords * 3 links = 6 findings + if len(findings) != 6 { + t.Fatalf("expected 6 findings, got %d", len(findings)) + } + for _, f := range findings { + if f.SourceType != "recon:duckduckgo" { + t.Errorf("SourceType=%q want recon:duckduckgo", f.SourceType) + } + if f.Confidence != "low" { + t.Errorf("Confidence=%q want low", f.Confidence) + } + } + if got := atomic.LoadInt32(&calls); got != 2 { + t.Errorf("expected 2 DDG calls, got %d", got) + } +} + +func TestDuckDuckGoSource_CtxCancelled(t *testing.T) { + reg := syntheticRegistry() + lim := recon.NewLimiterRegistry() + _ = lim.For("duckduckgo", 1000, 100) + + s := NewDuckDuckGoSource(reg, lim) + s.BaseURL = "http://127.0.0.1:1" + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + out := make(chan recon.Finding, 1) + err := s.Sweep(ctx, "", out) + if !errors.Is(err, context.Canceled) { + t.Fatalf("expected context.Canceled, got %v", err) + } +} + +func TestDuckDuckGoSource_EmptyRegistryNoError(t *testing.T) { + lim := recon.NewLimiterRegistry() + s := NewDuckDuckGoSource(nil, lim) + + out := make(chan recon.Finding, 1) + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + t.Fatal("should not be called with nil registry") + })) + defer srv.Close() + s.BaseURL = srv.URL + + if err := s.Sweep(context.Background(), "", out); err != nil { + t.Fatalf("expected nil, got %v", err) + } +} diff --git a/pkg/recon/sources/yandex.go b/pkg/recon/sources/yandex.go new file mode 100644 index 0000000..0d26c00 --- /dev/null +++ b/pkg/recon/sources/yandex.go @@ -0,0 +1,177 @@ +package sources + +import ( + "context" + "encoding/xml" + "errors" + "fmt" + "net/http" + "net/url" + "strings" + "time" + + "golang.org/x/time/rate" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +// YandexSource implements recon.ReconSource against the Yandex XML Search API. +// It requires both a User and APIKey to be enabled. +type YandexSource struct { + User string + APIKey string + BaseURL string + Registry *providers.Registry + Limiters *recon.LimiterRegistry + client *Client +} + +// Compile-time assertion. +var _ recon.ReconSource = (*YandexSource)(nil) + +// NewYandexSource constructs a YandexSource with the shared retry client. +func NewYandexSource(user, apiKey string, reg *providers.Registry, lim *recon.LimiterRegistry) *YandexSource { + return &YandexSource{ + User: user, + APIKey: apiKey, + BaseURL: "https://yandex.com", + Registry: reg, + Limiters: lim, + client: NewClient(), + } +} + +func (s *YandexSource) Name() string { return "yandex" } +func (s *YandexSource) RateLimit() rate.Limit { return rate.Every(1 * time.Second) } +func (s *YandexSource) Burst() int { return 1 } +func (s *YandexSource) RespectsRobots() bool { return false } + +// Enabled returns true only when both User and APIKey are configured. +func (s *YandexSource) Enabled(_ recon.Config) bool { + return s.User != "" && s.APIKey != "" +} + +// Sweep issues one Yandex XML search request per provider keyword and emits a +// Finding for every element in the response. +func (s *YandexSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error { + if s.User == "" || s.APIKey == "" { + return nil + } + base := s.BaseURL + if base == "" { + base = "https://yandex.com" + } + + queries := BuildQueries(s.Registry, "yandex") + kwIndex := yandexKeywordIndex(s.Registry) + + for _, q := range queries { + if err := ctx.Err(); err != nil { + return err + } + if s.Limiters != nil { + if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil { + return err + } + } + + endpoint := fmt.Sprintf("%s/search/xml?user=%s&key=%s&query=%s&l10n=en&sortby=rlv&filter=none&groupby=%s", + base, + url.QueryEscape(s.User), + url.QueryEscape(s.APIKey), + url.QueryEscape(q), + url.QueryEscape(`attr="".mode=flat.groups-on-page=50`)) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil) + if err != nil { + return fmt.Errorf("yandex: build request: %w", err) + } + req.Header.Set("User-Agent", "keyhunter-recon") + + resp, err := s.client.Do(ctx, req) + if err != nil { + if errors.Is(err, ErrUnauthorized) { + return err + } + if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) { + return err + } + continue + } + + var parsed yandexSearchResponse + decErr := xml.NewDecoder(resp.Body).Decode(&parsed) + _ = resp.Body.Close() + if decErr != nil { + continue + } + + provName := kwIndex[strings.ToLower(extractGoogleKeyword(q))] + for _, grp := range parsed.Response.Results.Grouping.Groups { + for _, doc := range grp.Docs { + if doc.URL == "" { + continue + } + f := recon.Finding{ + ProviderName: provName, + Confidence: "low", + Source: doc.URL, + SourceType: "recon:yandex", + DetectedAt: time.Now(), + } + select { + case out <- f: + case <-ctx.Done(): + return ctx.Err() + } + } + } + } + return nil +} + +// XML response structures for Yandex XML Search API. +type yandexSearchResponse struct { + XMLName xml.Name `xml:"yandexsearch"` + Response yandexResponse `xml:"response"` +} + +type yandexResponse struct { + Results yandexResults `xml:"results"` +} + +type yandexResults struct { + Grouping yandexGrouping `xml:"grouping"` +} + +type yandexGrouping struct { + Groups []yandexGroup `xml:"group"` +} + +type yandexGroup struct { + Docs []yandexDoc `xml:"doc"` +} + +type yandexDoc struct { + URL string `xml:"url"` +} + +// yandexKeywordIndex maps lowercased keywords to provider names. +func yandexKeywordIndex(reg *providers.Registry) map[string]string { + m := make(map[string]string) + if reg == nil { + return m + } + for _, p := range reg.List() { + for _, k := range p.Keywords { + kl := strings.ToLower(strings.TrimSpace(k)) + if kl == "" { + continue + } + if _, exists := m[kl]; !exists { + m[kl] = p.Name + } + } + } + return m +} diff --git a/pkg/recon/sources/yandex_test.go b/pkg/recon/sources/yandex_test.go new file mode 100644 index 0000000..1ec4556 --- /dev/null +++ b/pkg/recon/sources/yandex_test.go @@ -0,0 +1,171 @@ +package sources + +import ( + "context" + "errors" + "net/http" + "net/http/httptest" + "strings" + "sync/atomic" + "testing" + "time" + + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +const yandexXMLFixture = ` + + + + + + + https://pastebin.com/yandex1 + + + + + https://github.com/user/repo/blob/main/secrets.env + + + https://example.com/leaked + + + + + +` + +func yandexStubHandler(t *testing.T, calls *int32) http.HandlerFunc { + t.Helper() + return func(w http.ResponseWriter, r *http.Request) { + atomic.AddInt32(calls, 1) + if !strings.HasPrefix(r.URL.Path, "/search/xml") { + t.Errorf("unexpected path: %s", r.URL.Path) + } + if r.URL.Query().Get("user") != "testuser" { + t.Errorf("missing user param") + } + if r.URL.Query().Get("key") != "testkey" { + t.Errorf("missing key param") + } + w.Header().Set("Content-Type", "application/xml") + _, _ = w.Write([]byte(yandexXMLFixture)) + } +} + +func TestYandexSource_EnabledRequiresBoth(t *testing.T) { + reg := syntheticRegistry() + lim := recon.NewLimiterRegistry() + + tests := []struct { + user, key string + want bool + }{ + {"", "", false}, + {"user", "", false}, + {"", "key", false}, + {"user", "key", true}, + } + for _, tt := range tests { + s := NewYandexSource(tt.user, tt.key, reg, lim) + if got := s.Enabled(recon.Config{}); got != tt.want { + t.Errorf("Enabled(user=%q, key=%q) = %v, want %v", tt.user, tt.key, got, tt.want) + } + } +} + +func TestYandexSource_SweepEmptyCredsReturnsNil(t *testing.T) { + reg := syntheticRegistry() + lim := recon.NewLimiterRegistry() + s := NewYandexSource("", "", reg, lim) + + out := make(chan recon.Finding, 10) + if err := s.Sweep(context.Background(), "", out); err != nil { + t.Fatalf("expected nil, got %v", err) + } + close(out) + if n := countFindings(out); n != 0 { + t.Fatalf("expected 0 findings, got %d", n) + } +} + +func TestYandexSource_SweepEmitsFindings(t *testing.T) { + reg := syntheticRegistry() + lim := recon.NewLimiterRegistry() + _ = lim.For("yandex", 1000, 100) + + var calls int32 + srv := httptest.NewServer(yandexStubHandler(t, &calls)) + defer srv.Close() + + s := NewYandexSource("testuser", "testkey", reg, lim) + s.BaseURL = srv.URL + + out := make(chan recon.Finding, 32) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + done := make(chan error, 1) + go func() { done <- s.Sweep(ctx, "", out); close(out) }() + + var findings []recon.Finding + for f := range out { + findings = append(findings, f) + } + if err := <-done; err != nil { + t.Fatalf("Sweep error: %v", err) + } + + // 2 keywords * 3 URLs in XML = 6 findings + if len(findings) != 6 { + t.Fatalf("expected 6 findings, got %d", len(findings)) + } + for _, f := range findings { + if f.SourceType != "recon:yandex" { + t.Errorf("SourceType=%q want recon:yandex", f.SourceType) + } + } + if got := atomic.LoadInt32(&calls); got != 2 { + t.Errorf("expected 2 calls, got %d", got) + } +} + +func TestYandexSource_CtxCancelled(t *testing.T) { + reg := syntheticRegistry() + lim := recon.NewLimiterRegistry() + _ = lim.For("yandex", 1000, 100) + + s := NewYandexSource("user", "key", reg, lim) + s.BaseURL = "http://127.0.0.1:1" + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + out := make(chan recon.Finding, 1) + err := s.Sweep(ctx, "", out) + if !errors.Is(err, context.Canceled) { + t.Fatalf("expected context.Canceled, got %v", err) + } +} + +func TestYandexSource_Unauthorized(t *testing.T) { + reg := syntheticRegistry() + lim := recon.NewLimiterRegistry() + _ = lim.For("yandex", 1000, 100) + + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusUnauthorized) + _, _ = w.Write([]byte("bad creds")) + })) + defer srv.Close() + + s := NewYandexSource("user", "key", reg, lim) + s.BaseURL = srv.URL + + out := make(chan recon.Finding, 1) + err := s.Sweep(context.Background(), "", out) + if !errors.Is(err, ErrUnauthorized) { + t.Fatalf("expected ErrUnauthorized, got %v", err) + } +}