package sources import ( "context" "encoding/json" "errors" "fmt" "net/http" "net/http/httptest" "strings" "sync/atomic" "testing" "time" "github.com/salvacybersec/keyhunter/pkg/providers" "github.com/salvacybersec/keyhunter/pkg/recon" ) // syntheticRegistry builds a two-provider registry for tests. func syntheticRegistry() *providers.Registry { return providers.NewRegistryFromProviders([]providers.Provider{ {Name: "openai", Keywords: []string{"sk-proj-"}}, {Name: "anthropic", Keywords: []string{"sk-ant-"}}, }) } // ghStubHandler returns a handler that echoes the query back in two items. func ghStubHandler(t *testing.T, calls *int32) http.HandlerFunc { t.Helper() return func(w http.ResponseWriter, r *http.Request) { atomic.AddInt32(calls, 1) if r.URL.Path != "/search/code" { t.Errorf("unexpected path: %s", r.URL.Path) } if got := r.Header.Get("Authorization"); got != "Bearer testtoken" { t.Errorf("missing bearer token: %q", got) } if got := r.Header.Get("Accept"); !strings.Contains(got, "text-match") { t.Errorf("missing text-match accept header: %q", got) } q := r.URL.Query().Get("q") body := map[string]any{ "items": []map[string]any{ { "html_url": "https://github.com/org/repo/blob/main/a.env#" + q, "repository": map[string]any{"full_name": "org/repo"}, "text_matches": []map[string]any{ {"fragment": "snippet for " + q}, }, }, { "html_url": "https://github.com/org/repo/blob/main/b.env#" + q, "repository": map[string]any{"full_name": "org/repo"}, }, }, } w.Header().Set("Content-Type", "application/json") _ = json.NewEncoder(w).Encode(body) } } func TestGitHubSource_EnabledReflectsToken(t *testing.T) { reg := syntheticRegistry() lim := recon.NewLimiterRegistry() if s := NewGitHubSource("", reg, lim); s.Enabled(recon.Config{}) { t.Errorf("expected Enabled=false with empty token") } if s := NewGitHubSource("tok", reg, lim); !s.Enabled(recon.Config{}) { t.Errorf("expected Enabled=true with token") } } func TestGitHubSource_SweepEmptyTokenReturnsNil(t *testing.T) { reg := syntheticRegistry() lim := recon.NewLimiterRegistry() s := NewGitHubSource("", reg, lim) out := make(chan recon.Finding, 10) if err := s.Sweep(context.Background(), "", out); err != nil { t.Fatalf("expected nil err, got %v", err) } close(out) if n := countFindings(out); n != 0 { t.Fatalf("expected 0 findings, got %d", n) } } func TestGitHubSource_SweepEmitsFindings(t *testing.T) { reg := syntheticRegistry() lim := recon.NewLimiterRegistry() var calls int32 srv := httptest.NewServer(ghStubHandler(t, &calls)) defer srv.Close() s := NewGitHubSource("testtoken", reg, lim) s.BaseURL = srv.URL // Use a generous limiter so the test doesn't wait seconds for each query. // Overwrite via the limiter registry by pre-registering at high rate. _ = lim.For(s.Name(), 1000, 100) out := make(chan recon.Finding, 32) ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() done := make(chan error, 1) go func() { done <- s.Sweep(ctx, "", out); close(out) }() var findings []recon.Finding for f := range out { findings = append(findings, f) } if err := <-done; err != nil { t.Fatalf("Sweep error: %v", err) } // syntheticRegistry has 2 keywords -> 2 queries -> 2 items each = 4 findings. if len(findings) != 4 { t.Fatalf("expected 4 findings, got %d", len(findings)) } for _, f := range findings { if f.SourceType != "recon:github" { t.Errorf("SourceType=%q want recon:github", f.SourceType) } if !strings.HasPrefix(f.Source, "https://github.com/org/repo/blob/main/") { t.Errorf("Source=%q unexpected", f.Source) } if f.ProviderName != "openai" && f.ProviderName != "anthropic" { t.Errorf("ProviderName=%q unexpected", f.ProviderName) } } // Two queries -> two calls. if got := atomic.LoadInt32(&calls); got != 2 { t.Errorf("expected 2 calls, got %d", got) } } func TestGitHubSource_ProviderNameFromKeyword(t *testing.T) { reg := syntheticRegistry() lim := recon.NewLimiterRegistry() _ = lim.For("github", 1000, 100) // Track which query came in so we can assert providerName mapping per-call. srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { q := r.URL.Query().Get("q") body := fmt.Sprintf(`{"items":[{"html_url":"https://example/%s","repository":{"full_name":"o/r"}}]}`, q) w.Header().Set("Content-Type", "application/json") _, _ = w.Write([]byte(body)) })) defer srv.Close() s := NewGitHubSource("tok", reg, lim) s.BaseURL = srv.URL out := make(chan recon.Finding, 8) ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second) defer cancel() if err := s.Sweep(ctx, "", out); err != nil { t.Fatalf("Sweep: %v", err) } close(out) // BuildQueries returns sorted keywords: sk-ant- then sk-proj-. // So findings should arrive in that order. var got []string for f := range out { got = append(got, f.ProviderName) } if len(got) != 2 { t.Fatalf("expected 2 findings, got %d", len(got)) } if got[0] != "anthropic" || got[1] != "openai" { t.Errorf("expected sorted provider names [anthropic openai], got %v", got) } } func TestGitHubSource_CtxCancelled(t *testing.T) { reg := syntheticRegistry() lim := recon.NewLimiterRegistry() _ = lim.For("github", 1000, 100) s := NewGitHubSource("tok", reg, lim) s.BaseURL = "http://127.0.0.1:1" // unused, should never be reached ctx, cancel := context.WithCancel(context.Background()) cancel() out := make(chan recon.Finding, 1) err := s.Sweep(ctx, "", out) if !errors.Is(err, context.Canceled) { t.Fatalf("expected context.Canceled, got %v", err) } } func TestGitHubSource_Unauthorized(t *testing.T) { reg := syntheticRegistry() lim := recon.NewLimiterRegistry() _ = lim.For("github", 1000, 100) srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusUnauthorized) _, _ = w.Write([]byte("bad token")) })) defer srv.Close() s := NewGitHubSource("tok", reg, lim) s.BaseURL = srv.URL out := make(chan recon.Finding, 1) err := s.Sweep(context.Background(), "", out) if !errors.Is(err, ErrUnauthorized) { t.Fatalf("expected ErrUnauthorized, got %v", err) } } // Compile-time assertion that GitHubSource satisfies recon.ReconSource. var _ recon.ReconSource = (*GitHubSource)(nil) func countFindings(ch <-chan recon.Finding) int { n := 0 for range ch { n++ } return n }