diff --git a/pkg/recon/sources/bitbucket.go b/pkg/recon/sources/bitbucket.go new file mode 100644 index 0000000..515f2bb --- /dev/null +++ b/pkg/recon/sources/bitbucket.go @@ -0,0 +1,174 @@ +package sources + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + "net/url" + "time" + + "golang.org/x/time/rate" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +// BitbucketSource queries the Bitbucket Cloud 2.0 code search API for leaked +// provider keywords across a configured workspace (RECON-CODE-03). +// +// Docs: https://developer.atlassian.com/cloud/bitbucket/rest/api-group-search/ +// Rate: 1000 req/hour → rate.Every(3.6s), burst 1. +// Scope: requires both a token (app password or OAuth) AND a workspace slug; +// absent either, the source disables itself cleanly (no error). +type BitbucketSource struct { + Token string + Workspace string + BaseURL string + Registry *providers.Registry + Limiters *recon.LimiterRegistry + + client *Client +} + +var _ recon.ReconSource = (*BitbucketSource)(nil) + +// Name returns the stable source identifier. +func (s *BitbucketSource) Name() string { return "bitbucket" } + +// RateLimit reports the per-source token bucket rate (1000/hour). +func (s *BitbucketSource) RateLimit() rate.Limit { + return rate.Every(3600 * time.Millisecond) +} + +// Burst reports the token bucket burst capacity. +func (s *BitbucketSource) Burst() int { return 1 } + +// RespectsRobots reports whether robots.txt applies (REST API → false). +func (s *BitbucketSource) RespectsRobots() bool { return false } + +// Enabled reports whether the source should run. Requires both token and +// workspace to be non-empty. +func (s *BitbucketSource) Enabled(cfg recon.Config) bool { + return s.Token != "" && s.Workspace != "" +} + +// bitbucketSearchResponse mirrors the subset of the Bitbucket code search +// response shape this source consumes. +type bitbucketSearchResponse struct { + Values []struct { + ContentMatchCount int `json:"content_match_count"` + PageURL string `json:"page_url"` + File struct { + Path string `json:"path"` + Commit struct { + Hash string `json:"hash"` + } `json:"commit"` + } `json:"file"` + } `json:"values"` +} + +// Sweep iterates queries built from the provider registry, issues one search +// request per query (rate-limited via Limiters), and emits one Finding per +// `values` entry in the response. +func (s *BitbucketSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error { + if s.client == nil { + s.client = NewClient() + } + base := s.BaseURL + if base == "" { + base = "https://api.bitbucket.org" + } + + queries := BuildQueries(s.Registry, "bitbucket") + for _, q := range queries { + if err := ctx.Err(); err != nil { + return err + } + + if s.Limiters != nil { + if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil { + return err + } + } + + endpoint := fmt.Sprintf("%s/2.0/workspaces/%s/search/code", base, url.PathEscape(s.Workspace)) + req, err := http.NewRequest(http.MethodGet, endpoint, nil) + if err != nil { + return fmt.Errorf("bitbucket: build request: %w", err) + } + vals := req.URL.Query() + vals.Set("search_query", q) + req.URL.RawQuery = vals.Encode() + req.Header.Set("Authorization", "Bearer "+s.Token) + req.Header.Set("Accept", "application/json") + + resp, err := s.client.Do(ctx, req) + if err != nil { + return fmt.Errorf("bitbucket: sweep: %w", err) + } + + var body bitbucketSearchResponse + dec := json.NewDecoder(resp.Body) + decodeErr := dec.Decode(&body) + _ = resp.Body.Close() + if decodeErr != nil { + return fmt.Errorf("bitbucket: decode: %w", decodeErr) + } + + for _, v := range body.Values { + src := v.PageURL + if src == "" { + src = fmt.Sprintf("bitbucket:%s/%s@%s", s.Workspace, v.File.Path, v.File.Commit.Hash) + } + f := recon.Finding{ + ProviderName: providerForQuery(s.Registry, q), + Source: src, + SourceType: "recon:bitbucket", + DetectedAt: time.Now().UTC(), + } + select { + case out <- f: + case <-ctx.Done(): + return ctx.Err() + } + } + } + + return nil +} + +// providerForQuery returns the provider name whose keyword appears in q, or +// empty string if no match is found. Used to label Findings with their source +// provider when the remote API doesn't echo the original keyword. +func providerForQuery(reg *providers.Registry, q string) string { + if reg == nil { + return "" + } + for _, p := range reg.List() { + for _, k := range p.Keywords { + if k == "" { + continue + } + if containsFold(q, k) { + return p.Name + } + } + } + return "" +} + +func containsFold(haystack, needle string) bool { + if needle == "" { + return false + } + if len(needle) > len(haystack) { + return false + } + for i := 0; i+len(needle) <= len(haystack); i++ { + if haystack[i:i+len(needle)] == needle { + return true + } + } + return false +} diff --git a/pkg/recon/sources/bitbucket_test.go b/pkg/recon/sources/bitbucket_test.go new file mode 100644 index 0000000..c4e80ad --- /dev/null +++ b/pkg/recon/sources/bitbucket_test.go @@ -0,0 +1,132 @@ +package sources + +import ( + "context" + "errors" + "net/http" + "net/http/httptest" + "strings" + "testing" + "time" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +func bitbucketTestRegistry() *providers.Registry { + return providers.NewRegistryFromProviders([]providers.Provider{ + {Name: "openai", Keywords: []string{"sk-proj-"}}, + }) +} + +func newBitbucketSource(baseURL, token, workspace string) *BitbucketSource { + return &BitbucketSource{ + Token: token, + Workspace: workspace, + BaseURL: baseURL, + Registry: bitbucketTestRegistry(), + Limiters: recon.NewLimiterRegistry(), + } +} + +func TestBitbucket_EnabledRequiresTokenAndWorkspace(t *testing.T) { + cfg := recon.Config{} + + if newBitbucketSource("", "", "").Enabled(cfg) { + t.Fatal("expected disabled when token+workspace empty") + } + if newBitbucketSource("", "tok", "").Enabled(cfg) { + t.Fatal("expected disabled when workspace empty") + } + if newBitbucketSource("", "", "ws").Enabled(cfg) { + t.Fatal("expected disabled when token empty") + } + if !newBitbucketSource("", "tok", "ws").Enabled(cfg) { + t.Fatal("expected enabled when both set") + } +} + +func TestBitbucket_SweepEmitsFindings(t *testing.T) { + var gotAuth, gotPath string + var gotQuery string + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gotAuth = r.Header.Get("Authorization") + gotPath = r.URL.Path + gotQuery = r.URL.Query().Get("search_query") + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{ + "values": [ + { + "content_match_count": 2, + "file": {"path": "secrets/.env", "commit": {"hash": "deadbeef"}}, + "page_url": "https://bitbucket.org/testws/repo/src/deadbeef/secrets/.env" + } + ] + }`)) + })) + t.Cleanup(srv.Close) + + src := newBitbucketSource(srv.URL, "tok", "testws") + out := make(chan recon.Finding, 16) + if err := src.Sweep(context.Background(), "", out); err != nil { + t.Fatalf("Sweep: %v", err) + } + close(out) + + if gotAuth != "Bearer tok" { + t.Errorf("Authorization header = %q, want Bearer tok", gotAuth) + } + if gotPath != "/2.0/workspaces/testws/search/code" { + t.Errorf("path = %q", gotPath) + } + if gotQuery == "" { + t.Errorf("expected search_query param to be set") + } + + var findings []recon.Finding + for f := range out { + findings = append(findings, f) + } + if len(findings) == 0 { + t.Fatal("expected at least 1 finding") + } + f := findings[0] + if f.SourceType != "recon:bitbucket" { + t.Errorf("SourceType = %q", f.SourceType) + } + if !strings.Contains(f.Source, "bitbucket.org/testws/repo") { + t.Errorf("Source = %q", f.Source) + } +} + +func TestBitbucket_Unauthorized(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + http.Error(w, "nope", http.StatusUnauthorized) + })) + t.Cleanup(srv.Close) + + src := newBitbucketSource(srv.URL, "tok", "testws") + out := make(chan recon.Finding, 4) + err := src.Sweep(context.Background(), "", out) + if !errors.Is(err, ErrUnauthorized) { + t.Fatalf("err = %v, want ErrUnauthorized", err) + } +} + +func TestBitbucket_ContextCancellation(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + time.Sleep(2 * time.Second) + w.WriteHeader(200) + _, _ = w.Write([]byte(`{"values":[]}`)) + })) + t.Cleanup(srv.Close) + + src := newBitbucketSource(srv.URL, "tok", "testws") + ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond) + defer cancel() + out := make(chan recon.Finding, 1) + err := src.Sweep(ctx, "", out) + if err == nil { + t.Fatal("expected error from cancelled context") + } +}