package sources import ( "context" "encoding/json" "fmt" "net/http" "net/url" "time" "golang.org/x/time/rate" "github.com/salvacybersec/keyhunter/pkg/providers" "github.com/salvacybersec/keyhunter/pkg/recon" ) // BitbucketSource queries the Bitbucket Cloud 2.0 code search API for leaked // provider keywords across a configured workspace (RECON-CODE-03). // // Docs: https://developer.atlassian.com/cloud/bitbucket/rest/api-group-search/ // Rate: 1000 req/hour → rate.Every(3.6s), burst 1. // Scope: requires both a token (app password or OAuth) AND a workspace slug; // absent either, the source disables itself cleanly (no error). type BitbucketSource struct { Token string Workspace string BaseURL string Registry *providers.Registry Limiters *recon.LimiterRegistry client *Client } var _ recon.ReconSource = (*BitbucketSource)(nil) // Name returns the stable source identifier. func (s *BitbucketSource) Name() string { return "bitbucket" } // RateLimit reports the per-source token bucket rate (1000/hour). func (s *BitbucketSource) RateLimit() rate.Limit { return rate.Every(3600 * time.Millisecond) } // Burst reports the token bucket burst capacity. func (s *BitbucketSource) Burst() int { return 1 } // RespectsRobots reports whether robots.txt applies (REST API → false). func (s *BitbucketSource) RespectsRobots() bool { return false } // Enabled reports whether the source should run. Requires both token and // workspace to be non-empty. func (s *BitbucketSource) Enabled(cfg recon.Config) bool { return s.Token != "" && s.Workspace != "" } // bitbucketSearchResponse mirrors the subset of the Bitbucket code search // response shape this source consumes. type bitbucketSearchResponse struct { Values []struct { ContentMatchCount int `json:"content_match_count"` PageURL string `json:"page_url"` File struct { Path string `json:"path"` Commit struct { Hash string `json:"hash"` } `json:"commit"` } `json:"file"` } `json:"values"` } // Sweep iterates queries built from the provider registry, issues one search // request per query (rate-limited via Limiters), and emits one Finding per // `values` entry in the response. func (s *BitbucketSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error { if s.client == nil { s.client = NewClient() } base := s.BaseURL if base == "" { base = "https://api.bitbucket.org" } queries := BuildQueries(s.Registry, "bitbucket") for _, q := range queries { if err := ctx.Err(); err != nil { return err } if s.Limiters != nil { if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil { return err } } endpoint := fmt.Sprintf("%s/2.0/workspaces/%s/search/code", base, url.PathEscape(s.Workspace)) req, err := http.NewRequest(http.MethodGet, endpoint, nil) if err != nil { return fmt.Errorf("bitbucket: build request: %w", err) } vals := req.URL.Query() vals.Set("search_query", q) req.URL.RawQuery = vals.Encode() req.Header.Set("Authorization", "Bearer "+s.Token) req.Header.Set("Accept", "application/json") resp, err := s.client.Do(ctx, req) if err != nil { return fmt.Errorf("bitbucket: sweep: %w", err) } var body bitbucketSearchResponse dec := json.NewDecoder(resp.Body) decodeErr := dec.Decode(&body) _ = resp.Body.Close() if decodeErr != nil { return fmt.Errorf("bitbucket: decode: %w", decodeErr) } for _, v := range body.Values { src := v.PageURL if src == "" { src = fmt.Sprintf("bitbucket:%s/%s@%s", s.Workspace, v.File.Path, v.File.Commit.Hash) } f := recon.Finding{ ProviderName: providerForQuery(s.Registry, q), Source: src, SourceType: "recon:bitbucket", DetectedAt: time.Now().UTC(), } select { case out <- f: case <-ctx.Done(): return ctx.Err() } } } return nil } // providerForQuery returns the provider name whose keyword appears in q, or // empty string if no match is found. Used to label Findings with their source // provider when the remote API doesn't echo the original keyword. func providerForQuery(reg *providers.Registry, q string) string { if reg == nil { return "" } for _, p := range reg.List() { for _, k := range p.Keywords { if k == "" { continue } if containsFold(q, k) { return p.Name } } } return "" } func containsFold(haystack, needle string) bool { if needle == "" { return false } if len(needle) > len(haystack) { return false } for i := 0; i+len(needle) <= len(haystack); i++ { if haystack[i:i+len(needle)] == needle { return true } } return false }