package sources import ( "context" "encoding/json" "errors" "net/http" "net/http/httptest" "strings" "sync/atomic" "testing" "time" "github.com/salvacybersec/keyhunter/pkg/providers" "github.com/salvacybersec/keyhunter/pkg/recon" ) // gitlabTestRegistry returns a synthetic registry with two providers whose // keywords drive the query loop. Keywords are chosen so BuildQueries output is // deterministic and map lookups are unambiguous. func gitlabTestRegistry() *providers.Registry { return providers.NewRegistryFromProviders([]providers.Provider{ { Name: "openai", Keywords: []string{"sk-test"}, Patterns: []providers.Pattern{{Regex: "sk-test[A-Za-z0-9]+", Confidence: "high"}}, }, { Name: "demo", Keywords: []string{"ghkey"}, Patterns: []providers.Pattern{{Regex: "ghkey[A-Za-z0-9]+", Confidence: "low"}}, }, }) } type gitlabBlobFixture struct { Basename string `json:"basename"` Data string `json:"data"` Path string `json:"path"` ProjectID int `json:"project_id"` Ref string `json:"ref"` Startline int `json:"startline"` } func TestGitLabSource_EnabledFalseWhenTokenEmpty(t *testing.T) { s := &GitLabSource{Token: "", Registry: gitlabTestRegistry(), Limiters: recon.NewLimiterRegistry()} if s.Enabled(recon.Config{}) { t.Fatalf("expected Enabled=false when token empty") } s2 := &GitLabSource{Token: "glpat-xxx", Registry: gitlabTestRegistry(), Limiters: recon.NewLimiterRegistry()} if !s2.Enabled(recon.Config{}) { t.Fatalf("expected Enabled=true when token set") } if s.Name() != "gitlab" { t.Fatalf("expected Name=gitlab, got %q", s.Name()) } if s.RespectsRobots() { t.Fatalf("expected RespectsRobots=false for REST API source") } } func TestGitLabSource_EmptyToken_NoCallsNoError(t *testing.T) { var calls int32 srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { atomic.AddInt32(&calls, 1) w.WriteHeader(200) _, _ = w.Write([]byte("[]")) })) defer srv.Close() s := &GitLabSource{ Token: "", BaseURL: srv.URL, Registry: gitlabTestRegistry(), Limiters: recon.NewLimiterRegistry(), } out := make(chan recon.Finding, 4) if err := s.Sweep(context.Background(), "", out); err != nil { t.Fatalf("expected nil err on empty token, got %v", err) } close(out) if atomic.LoadInt32(&calls) != 0 { t.Fatalf("expected zero HTTP calls, got %d", calls) } if len(out) != 0 { t.Fatalf("expected zero findings, got %d", len(out)) } } func TestGitLabSource_Sweep_EmitsFindings(t *testing.T) { var gotToken string var gotScopes []string var gotSearches []string blobs := []gitlabBlobFixture{ {Basename: "config.env", Data: "API_KEY=sk-testABCDEF", Path: "app/config.env", ProjectID: 42, Ref: "main", Startline: 3}, {Basename: "README.md", Data: "use ghkeyXYZ", Path: "docs/README.md", ProjectID: 99, Ref: "master", Startline: 10}, } srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { if r.URL.Path != "/api/v4/search" { http.Error(w, "not found", 404) return } gotToken = r.Header.Get("PRIVATE-TOKEN") gotScopes = append(gotScopes, r.URL.Query().Get("scope")) gotSearches = append(gotSearches, r.URL.Query().Get("search")) w.Header().Set("Content-Type", "application/json") _ = json.NewEncoder(w).Encode(blobs) })) defer srv.Close() s := &GitLabSource{ Token: "glpat-secret", BaseURL: srv.URL, Registry: gitlabTestRegistry(), Limiters: recon.NewLimiterRegistry(), } out := make(chan recon.Finding, 32) ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) defer cancel() if err := s.Sweep(ctx, "", out); err != nil { t.Fatalf("sweep err: %v", err) } close(out) if gotToken != "glpat-secret" { t.Fatalf("expected PRIVATE-TOKEN header, got %q", gotToken) } for _, sc := range gotScopes { if sc != "blobs" { t.Fatalf("expected scope=blobs, got %q", sc) } } // Two providers → two queries → two requests → 4 findings (2 blobs each). if len(gotSearches) != 2 { t.Fatalf("expected 2 search calls, got %d: %v", len(gotSearches), gotSearches) } findings := gitlabDrain(out) if len(findings) != 4 { t.Fatalf("expected 4 findings (2 blobs × 2 queries), got %d", len(findings)) } var sawP42, sawP99 bool for _, f := range findings { if f.SourceType != "recon:gitlab" { t.Errorf("bad SourceType: %q", f.SourceType) } if f.Confidence != "low" { t.Errorf("bad confidence: %q", f.Confidence) } if strings.Contains(f.Source, "/projects/42/-/blob/main/app/config.env") { sawP42 = true } if strings.Contains(f.Source, "/projects/99/-/blob/master/docs/README.md") { sawP99 = true } } if !sawP42 || !sawP99 { t.Fatalf("expected both project URLs in Source fields: p42=%v p99=%v", sawP42, sawP99) } } func TestGitLabSource_Unauthorized(t *testing.T) { srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(401) _, _ = w.Write([]byte(`{"message":"401 Unauthorized"}`)) })) defer srv.Close() s := &GitLabSource{ Token: "bad", BaseURL: srv.URL, Registry: gitlabTestRegistry(), Limiters: recon.NewLimiterRegistry(), } out := make(chan recon.Finding, 4) err := s.Sweep(context.Background(), "", out) close(out) if err == nil { t.Fatalf("expected error, got nil") } if !errors.Is(err, ErrUnauthorized) { t.Fatalf("expected ErrUnauthorized, got %v", err) } } func TestGitLabSource_CtxCancellation(t *testing.T) { srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { select { case <-r.Context().Done(): return case <-time.After(2 * time.Second): w.WriteHeader(200) _, _ = w.Write([]byte("[]")) } })) defer srv.Close() s := &GitLabSource{ Token: "glpat-x", BaseURL: srv.URL, Registry: gitlabTestRegistry(), Limiters: recon.NewLimiterRegistry(), } ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond) defer cancel() out := make(chan recon.Finding, 4) err := s.Sweep(ctx, "", out) close(out) if err == nil { t.Fatalf("expected ctx error, got nil") } } func TestGitLabSource_InterfaceAssertion(t *testing.T) { var _ recon.ReconSource = (*GitLabSource)(nil) } func gitlabDrain(ch <-chan recon.Finding) []recon.Finding { var out []recon.Finding for f := range ch { out = append(out, f) } return out }