feat(10-04): add BitbucketSource for code search recon
- BitbucketSource implements recon.ReconSource (RECON-CODE-03)
- Queries /2.0/workspaces/{ws}/search/code with Bearer auth
- Disabled when token OR workspace empty
- Rate: rate.Every(3.6s), burst 1 (Bitbucket 1000/hr limit)
- httptest coverage: enable gating, sweep, 401, ctx cancel
This commit is contained in:
174
pkg/recon/sources/bitbucket.go
Normal file
174
pkg/recon/sources/bitbucket.go
Normal file
@@ -0,0 +1,174 @@
|
||||
package sources
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"time"
|
||||
|
||||
"golang.org/x/time/rate"
|
||||
|
||||
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||
)
|
||||
|
||||
// BitbucketSource queries the Bitbucket Cloud 2.0 code search API for leaked
|
||||
// provider keywords across a configured workspace (RECON-CODE-03).
|
||||
//
|
||||
// Docs: https://developer.atlassian.com/cloud/bitbucket/rest/api-group-search/
|
||||
// Rate: 1000 req/hour → rate.Every(3.6s), burst 1.
|
||||
// Scope: requires both a token (app password or OAuth) AND a workspace slug;
|
||||
// absent either, the source disables itself cleanly (no error).
|
||||
type BitbucketSource struct {
|
||||
Token string
|
||||
Workspace string
|
||||
BaseURL string
|
||||
Registry *providers.Registry
|
||||
Limiters *recon.LimiterRegistry
|
||||
|
||||
client *Client
|
||||
}
|
||||
|
||||
var _ recon.ReconSource = (*BitbucketSource)(nil)
|
||||
|
||||
// Name returns the stable source identifier.
|
||||
func (s *BitbucketSource) Name() string { return "bitbucket" }
|
||||
|
||||
// RateLimit reports the per-source token bucket rate (1000/hour).
|
||||
func (s *BitbucketSource) RateLimit() rate.Limit {
|
||||
return rate.Every(3600 * time.Millisecond)
|
||||
}
|
||||
|
||||
// Burst reports the token bucket burst capacity.
|
||||
func (s *BitbucketSource) Burst() int { return 1 }
|
||||
|
||||
// RespectsRobots reports whether robots.txt applies (REST API → false).
|
||||
func (s *BitbucketSource) RespectsRobots() bool { return false }
|
||||
|
||||
// Enabled reports whether the source should run. Requires both token and
|
||||
// workspace to be non-empty.
|
||||
func (s *BitbucketSource) Enabled(cfg recon.Config) bool {
|
||||
return s.Token != "" && s.Workspace != ""
|
||||
}
|
||||
|
||||
// bitbucketSearchResponse mirrors the subset of the Bitbucket code search
|
||||
// response shape this source consumes.
|
||||
type bitbucketSearchResponse struct {
|
||||
Values []struct {
|
||||
ContentMatchCount int `json:"content_match_count"`
|
||||
PageURL string `json:"page_url"`
|
||||
File struct {
|
||||
Path string `json:"path"`
|
||||
Commit struct {
|
||||
Hash string `json:"hash"`
|
||||
} `json:"commit"`
|
||||
} `json:"file"`
|
||||
} `json:"values"`
|
||||
}
|
||||
|
||||
// Sweep iterates queries built from the provider registry, issues one search
|
||||
// request per query (rate-limited via Limiters), and emits one Finding per
|
||||
// `values` entry in the response.
|
||||
func (s *BitbucketSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
|
||||
if s.client == nil {
|
||||
s.client = NewClient()
|
||||
}
|
||||
base := s.BaseURL
|
||||
if base == "" {
|
||||
base = "https://api.bitbucket.org"
|
||||
}
|
||||
|
||||
queries := BuildQueries(s.Registry, "bitbucket")
|
||||
for _, q := range queries {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if s.Limiters != nil {
|
||||
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
endpoint := fmt.Sprintf("%s/2.0/workspaces/%s/search/code", base, url.PathEscape(s.Workspace))
|
||||
req, err := http.NewRequest(http.MethodGet, endpoint, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("bitbucket: build request: %w", err)
|
||||
}
|
||||
vals := req.URL.Query()
|
||||
vals.Set("search_query", q)
|
||||
req.URL.RawQuery = vals.Encode()
|
||||
req.Header.Set("Authorization", "Bearer "+s.Token)
|
||||
req.Header.Set("Accept", "application/json")
|
||||
|
||||
resp, err := s.client.Do(ctx, req)
|
||||
if err != nil {
|
||||
return fmt.Errorf("bitbucket: sweep: %w", err)
|
||||
}
|
||||
|
||||
var body bitbucketSearchResponse
|
||||
dec := json.NewDecoder(resp.Body)
|
||||
decodeErr := dec.Decode(&body)
|
||||
_ = resp.Body.Close()
|
||||
if decodeErr != nil {
|
||||
return fmt.Errorf("bitbucket: decode: %w", decodeErr)
|
||||
}
|
||||
|
||||
for _, v := range body.Values {
|
||||
src := v.PageURL
|
||||
if src == "" {
|
||||
src = fmt.Sprintf("bitbucket:%s/%s@%s", s.Workspace, v.File.Path, v.File.Commit.Hash)
|
||||
}
|
||||
f := recon.Finding{
|
||||
ProviderName: providerForQuery(s.Registry, q),
|
||||
Source: src,
|
||||
SourceType: "recon:bitbucket",
|
||||
DetectedAt: time.Now().UTC(),
|
||||
}
|
||||
select {
|
||||
case out <- f:
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// providerForQuery returns the provider name whose keyword appears in q, or
|
||||
// empty string if no match is found. Used to label Findings with their source
|
||||
// provider when the remote API doesn't echo the original keyword.
|
||||
func providerForQuery(reg *providers.Registry, q string) string {
|
||||
if reg == nil {
|
||||
return ""
|
||||
}
|
||||
for _, p := range reg.List() {
|
||||
for _, k := range p.Keywords {
|
||||
if k == "" {
|
||||
continue
|
||||
}
|
||||
if containsFold(q, k) {
|
||||
return p.Name
|
||||
}
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func containsFold(haystack, needle string) bool {
|
||||
if needle == "" {
|
||||
return false
|
||||
}
|
||||
if len(needle) > len(haystack) {
|
||||
return false
|
||||
}
|
||||
for i := 0; i+len(needle) <= len(haystack); i++ {
|
||||
if haystack[i:i+len(needle)] == needle {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
Reference in New Issue
Block a user