Files
keyhunter/pkg/recon/sources/bitbucket.go
salvacybersec d279abf449 feat(10-04): add BitbucketSource for code search recon
- BitbucketSource implements recon.ReconSource (RECON-CODE-03)
- Queries /2.0/workspaces/{ws}/search/code with Bearer auth
- Disabled when token OR workspace empty
- Rate: rate.Every(3.6s), burst 1 (Bitbucket 1000/hr limit)
- httptest coverage: enable gating, sweep, 401, ctx cancel
2026-04-06 01:15:42 +03:00

175 lines
4.6 KiB
Go

package sources
import (
"context"
"encoding/json"
"fmt"
"net/http"
"net/url"
"time"
"golang.org/x/time/rate"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
// BitbucketSource queries the Bitbucket Cloud 2.0 code search API for leaked
// provider keywords across a configured workspace (RECON-CODE-03).
//
// Docs: https://developer.atlassian.com/cloud/bitbucket/rest/api-group-search/
// Rate: 1000 req/hour → rate.Every(3.6s), burst 1.
// Scope: requires both a token (app password or OAuth) AND a workspace slug;
// absent either, the source disables itself cleanly (no error).
type BitbucketSource struct {
Token string
Workspace string
BaseURL string
Registry *providers.Registry
Limiters *recon.LimiterRegistry
client *Client
}
var _ recon.ReconSource = (*BitbucketSource)(nil)
// Name returns the stable source identifier.
func (s *BitbucketSource) Name() string { return "bitbucket" }
// RateLimit reports the per-source token bucket rate (1000/hour).
func (s *BitbucketSource) RateLimit() rate.Limit {
return rate.Every(3600 * time.Millisecond)
}
// Burst reports the token bucket burst capacity.
func (s *BitbucketSource) Burst() int { return 1 }
// RespectsRobots reports whether robots.txt applies (REST API → false).
func (s *BitbucketSource) RespectsRobots() bool { return false }
// Enabled reports whether the source should run. Requires both token and
// workspace to be non-empty.
func (s *BitbucketSource) Enabled(cfg recon.Config) bool {
return s.Token != "" && s.Workspace != ""
}
// bitbucketSearchResponse mirrors the subset of the Bitbucket code search
// response shape this source consumes.
type bitbucketSearchResponse struct {
Values []struct {
ContentMatchCount int `json:"content_match_count"`
PageURL string `json:"page_url"`
File struct {
Path string `json:"path"`
Commit struct {
Hash string `json:"hash"`
} `json:"commit"`
} `json:"file"`
} `json:"values"`
}
// Sweep iterates queries built from the provider registry, issues one search
// request per query (rate-limited via Limiters), and emits one Finding per
// `values` entry in the response.
func (s *BitbucketSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
if s.client == nil {
s.client = NewClient()
}
base := s.BaseURL
if base == "" {
base = "https://api.bitbucket.org"
}
queries := BuildQueries(s.Registry, "bitbucket")
for _, q := range queries {
if err := ctx.Err(); err != nil {
return err
}
if s.Limiters != nil {
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
return err
}
}
endpoint := fmt.Sprintf("%s/2.0/workspaces/%s/search/code", base, url.PathEscape(s.Workspace))
req, err := http.NewRequest(http.MethodGet, endpoint, nil)
if err != nil {
return fmt.Errorf("bitbucket: build request: %w", err)
}
vals := req.URL.Query()
vals.Set("search_query", q)
req.URL.RawQuery = vals.Encode()
req.Header.Set("Authorization", "Bearer "+s.Token)
req.Header.Set("Accept", "application/json")
resp, err := s.client.Do(ctx, req)
if err != nil {
return fmt.Errorf("bitbucket: sweep: %w", err)
}
var body bitbucketSearchResponse
dec := json.NewDecoder(resp.Body)
decodeErr := dec.Decode(&body)
_ = resp.Body.Close()
if decodeErr != nil {
return fmt.Errorf("bitbucket: decode: %w", decodeErr)
}
for _, v := range body.Values {
src := v.PageURL
if src == "" {
src = fmt.Sprintf("bitbucket:%s/%s@%s", s.Workspace, v.File.Path, v.File.Commit.Hash)
}
f := recon.Finding{
ProviderName: providerForQuery(s.Registry, q),
Source: src,
SourceType: "recon:bitbucket",
DetectedAt: time.Now().UTC(),
}
select {
case out <- f:
case <-ctx.Done():
return ctx.Err()
}
}
}
return nil
}
// providerForQuery returns the provider name whose keyword appears in q, or
// empty string if no match is found. Used to label Findings with their source
// provider when the remote API doesn't echo the original keyword.
func providerForQuery(reg *providers.Registry, q string) string {
if reg == nil {
return ""
}
for _, p := range reg.List() {
for _, k := range p.Keywords {
if k == "" {
continue
}
if containsFold(q, k) {
return p.Name
}
}
}
return ""
}
func containsFold(haystack, needle string) bool {
if needle == "" {
return false
}
if len(needle) > len(haystack) {
return false
}
for i := 0; i+len(needle) <= len(haystack); i++ {
if haystack[i:i+len(needle)] == needle {
return true
}
}
return false
}