- BitbucketSource implements recon.ReconSource (RECON-CODE-03)
- Queries /2.0/workspaces/{ws}/search/code with Bearer auth
- Disabled when token OR workspace empty
- Rate: rate.Every(3.6s), burst 1 (Bitbucket 1000/hr limit)
- httptest coverage: enable gating, sweep, 401, ctx cancel
175 lines
4.6 KiB
Go
175 lines
4.6 KiB
Go
package sources
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"net/http"
|
|
"net/url"
|
|
"time"
|
|
|
|
"golang.org/x/time/rate"
|
|
|
|
"github.com/salvacybersec/keyhunter/pkg/providers"
|
|
"github.com/salvacybersec/keyhunter/pkg/recon"
|
|
)
|
|
|
|
// BitbucketSource queries the Bitbucket Cloud 2.0 code search API for leaked
|
|
// provider keywords across a configured workspace (RECON-CODE-03).
|
|
//
|
|
// Docs: https://developer.atlassian.com/cloud/bitbucket/rest/api-group-search/
|
|
// Rate: 1000 req/hour → rate.Every(3.6s), burst 1.
|
|
// Scope: requires both a token (app password or OAuth) AND a workspace slug;
|
|
// absent either, the source disables itself cleanly (no error).
|
|
type BitbucketSource struct {
|
|
Token string
|
|
Workspace string
|
|
BaseURL string
|
|
Registry *providers.Registry
|
|
Limiters *recon.LimiterRegistry
|
|
|
|
client *Client
|
|
}
|
|
|
|
var _ recon.ReconSource = (*BitbucketSource)(nil)
|
|
|
|
// Name returns the stable source identifier.
|
|
func (s *BitbucketSource) Name() string { return "bitbucket" }
|
|
|
|
// RateLimit reports the per-source token bucket rate (1000/hour).
|
|
func (s *BitbucketSource) RateLimit() rate.Limit {
|
|
return rate.Every(3600 * time.Millisecond)
|
|
}
|
|
|
|
// Burst reports the token bucket burst capacity.
|
|
func (s *BitbucketSource) Burst() int { return 1 }
|
|
|
|
// RespectsRobots reports whether robots.txt applies (REST API → false).
|
|
func (s *BitbucketSource) RespectsRobots() bool { return false }
|
|
|
|
// Enabled reports whether the source should run. Requires both token and
|
|
// workspace to be non-empty.
|
|
func (s *BitbucketSource) Enabled(cfg recon.Config) bool {
|
|
return s.Token != "" && s.Workspace != ""
|
|
}
|
|
|
|
// bitbucketSearchResponse mirrors the subset of the Bitbucket code search
|
|
// response shape this source consumes.
|
|
type bitbucketSearchResponse struct {
|
|
Values []struct {
|
|
ContentMatchCount int `json:"content_match_count"`
|
|
PageURL string `json:"page_url"`
|
|
File struct {
|
|
Path string `json:"path"`
|
|
Commit struct {
|
|
Hash string `json:"hash"`
|
|
} `json:"commit"`
|
|
} `json:"file"`
|
|
} `json:"values"`
|
|
}
|
|
|
|
// Sweep iterates queries built from the provider registry, issues one search
|
|
// request per query (rate-limited via Limiters), and emits one Finding per
|
|
// `values` entry in the response.
|
|
func (s *BitbucketSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
|
|
if s.client == nil {
|
|
s.client = NewClient()
|
|
}
|
|
base := s.BaseURL
|
|
if base == "" {
|
|
base = "https://api.bitbucket.org"
|
|
}
|
|
|
|
queries := BuildQueries(s.Registry, "bitbucket")
|
|
for _, q := range queries {
|
|
if err := ctx.Err(); err != nil {
|
|
return err
|
|
}
|
|
|
|
if s.Limiters != nil {
|
|
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
endpoint := fmt.Sprintf("%s/2.0/workspaces/%s/search/code", base, url.PathEscape(s.Workspace))
|
|
req, err := http.NewRequest(http.MethodGet, endpoint, nil)
|
|
if err != nil {
|
|
return fmt.Errorf("bitbucket: build request: %w", err)
|
|
}
|
|
vals := req.URL.Query()
|
|
vals.Set("search_query", q)
|
|
req.URL.RawQuery = vals.Encode()
|
|
req.Header.Set("Authorization", "Bearer "+s.Token)
|
|
req.Header.Set("Accept", "application/json")
|
|
|
|
resp, err := s.client.Do(ctx, req)
|
|
if err != nil {
|
|
return fmt.Errorf("bitbucket: sweep: %w", err)
|
|
}
|
|
|
|
var body bitbucketSearchResponse
|
|
dec := json.NewDecoder(resp.Body)
|
|
decodeErr := dec.Decode(&body)
|
|
_ = resp.Body.Close()
|
|
if decodeErr != nil {
|
|
return fmt.Errorf("bitbucket: decode: %w", decodeErr)
|
|
}
|
|
|
|
for _, v := range body.Values {
|
|
src := v.PageURL
|
|
if src == "" {
|
|
src = fmt.Sprintf("bitbucket:%s/%s@%s", s.Workspace, v.File.Path, v.File.Commit.Hash)
|
|
}
|
|
f := recon.Finding{
|
|
ProviderName: providerForQuery(s.Registry, q),
|
|
Source: src,
|
|
SourceType: "recon:bitbucket",
|
|
DetectedAt: time.Now().UTC(),
|
|
}
|
|
select {
|
|
case out <- f:
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
}
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// providerForQuery returns the provider name whose keyword appears in q, or
|
|
// empty string if no match is found. Used to label Findings with their source
|
|
// provider when the remote API doesn't echo the original keyword.
|
|
func providerForQuery(reg *providers.Registry, q string) string {
|
|
if reg == nil {
|
|
return ""
|
|
}
|
|
for _, p := range reg.List() {
|
|
for _, k := range p.Keywords {
|
|
if k == "" {
|
|
continue
|
|
}
|
|
if containsFold(q, k) {
|
|
return p.Name
|
|
}
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func containsFold(haystack, needle string) bool {
|
|
if needle == "" {
|
|
return false
|
|
}
|
|
if len(needle) > len(haystack) {
|
|
return false
|
|
}
|
|
for i := 0; i+len(needle) <= len(haystack); i++ {
|
|
if haystack[i:i+len(needle)] == needle {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|