feat(10-04): add BitbucketSource for code search recon
- BitbucketSource implements recon.ReconSource (RECON-CODE-03)
- Queries /2.0/workspaces/{ws}/search/code with Bearer auth
- Disabled when token OR workspace empty
- Rate: rate.Every(3.6s), burst 1 (Bitbucket 1000/hr limit)
- httptest coverage: enable gating, sweep, 401, ctx cancel
This commit is contained in:
174
pkg/recon/sources/bitbucket.go
Normal file
174
pkg/recon/sources/bitbucket.go
Normal file
@@ -0,0 +1,174 @@
|
|||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"golang.org/x/time/rate"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||||
|
)
|
||||||
|
|
||||||
|
// BitbucketSource queries the Bitbucket Cloud 2.0 code search API for leaked
|
||||||
|
// provider keywords across a configured workspace (RECON-CODE-03).
|
||||||
|
//
|
||||||
|
// Docs: https://developer.atlassian.com/cloud/bitbucket/rest/api-group-search/
|
||||||
|
// Rate: 1000 req/hour → rate.Every(3.6s), burst 1.
|
||||||
|
// Scope: requires both a token (app password or OAuth) AND a workspace slug;
|
||||||
|
// absent either, the source disables itself cleanly (no error).
|
||||||
|
type BitbucketSource struct {
|
||||||
|
Token string
|
||||||
|
Workspace string
|
||||||
|
BaseURL string
|
||||||
|
Registry *providers.Registry
|
||||||
|
Limiters *recon.LimiterRegistry
|
||||||
|
|
||||||
|
client *Client
|
||||||
|
}
|
||||||
|
|
||||||
|
var _ recon.ReconSource = (*BitbucketSource)(nil)
|
||||||
|
|
||||||
|
// Name returns the stable source identifier.
|
||||||
|
func (s *BitbucketSource) Name() string { return "bitbucket" }
|
||||||
|
|
||||||
|
// RateLimit reports the per-source token bucket rate (1000/hour).
|
||||||
|
func (s *BitbucketSource) RateLimit() rate.Limit {
|
||||||
|
return rate.Every(3600 * time.Millisecond)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Burst reports the token bucket burst capacity.
|
||||||
|
func (s *BitbucketSource) Burst() int { return 1 }
|
||||||
|
|
||||||
|
// RespectsRobots reports whether robots.txt applies (REST API → false).
|
||||||
|
func (s *BitbucketSource) RespectsRobots() bool { return false }
|
||||||
|
|
||||||
|
// Enabled reports whether the source should run. Requires both token and
|
||||||
|
// workspace to be non-empty.
|
||||||
|
func (s *BitbucketSource) Enabled(cfg recon.Config) bool {
|
||||||
|
return s.Token != "" && s.Workspace != ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// bitbucketSearchResponse mirrors the subset of the Bitbucket code search
|
||||||
|
// response shape this source consumes.
|
||||||
|
type bitbucketSearchResponse struct {
|
||||||
|
Values []struct {
|
||||||
|
ContentMatchCount int `json:"content_match_count"`
|
||||||
|
PageURL string `json:"page_url"`
|
||||||
|
File struct {
|
||||||
|
Path string `json:"path"`
|
||||||
|
Commit struct {
|
||||||
|
Hash string `json:"hash"`
|
||||||
|
} `json:"commit"`
|
||||||
|
} `json:"file"`
|
||||||
|
} `json:"values"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sweep iterates queries built from the provider registry, issues one search
|
||||||
|
// request per query (rate-limited via Limiters), and emits one Finding per
|
||||||
|
// `values` entry in the response.
|
||||||
|
func (s *BitbucketSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
|
||||||
|
if s.client == nil {
|
||||||
|
s.client = NewClient()
|
||||||
|
}
|
||||||
|
base := s.BaseURL
|
||||||
|
if base == "" {
|
||||||
|
base = "https://api.bitbucket.org"
|
||||||
|
}
|
||||||
|
|
||||||
|
queries := BuildQueries(s.Registry, "bitbucket")
|
||||||
|
for _, q := range queries {
|
||||||
|
if err := ctx.Err(); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
if s.Limiters != nil {
|
||||||
|
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
endpoint := fmt.Sprintf("%s/2.0/workspaces/%s/search/code", base, url.PathEscape(s.Workspace))
|
||||||
|
req, err := http.NewRequest(http.MethodGet, endpoint, nil)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("bitbucket: build request: %w", err)
|
||||||
|
}
|
||||||
|
vals := req.URL.Query()
|
||||||
|
vals.Set("search_query", q)
|
||||||
|
req.URL.RawQuery = vals.Encode()
|
||||||
|
req.Header.Set("Authorization", "Bearer "+s.Token)
|
||||||
|
req.Header.Set("Accept", "application/json")
|
||||||
|
|
||||||
|
resp, err := s.client.Do(ctx, req)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("bitbucket: sweep: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var body bitbucketSearchResponse
|
||||||
|
dec := json.NewDecoder(resp.Body)
|
||||||
|
decodeErr := dec.Decode(&body)
|
||||||
|
_ = resp.Body.Close()
|
||||||
|
if decodeErr != nil {
|
||||||
|
return fmt.Errorf("bitbucket: decode: %w", decodeErr)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, v := range body.Values {
|
||||||
|
src := v.PageURL
|
||||||
|
if src == "" {
|
||||||
|
src = fmt.Sprintf("bitbucket:%s/%s@%s", s.Workspace, v.File.Path, v.File.Commit.Hash)
|
||||||
|
}
|
||||||
|
f := recon.Finding{
|
||||||
|
ProviderName: providerForQuery(s.Registry, q),
|
||||||
|
Source: src,
|
||||||
|
SourceType: "recon:bitbucket",
|
||||||
|
DetectedAt: time.Now().UTC(),
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case out <- f:
|
||||||
|
case <-ctx.Done():
|
||||||
|
return ctx.Err()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// providerForQuery returns the provider name whose keyword appears in q, or
|
||||||
|
// empty string if no match is found. Used to label Findings with their source
|
||||||
|
// provider when the remote API doesn't echo the original keyword.
|
||||||
|
func providerForQuery(reg *providers.Registry, q string) string {
|
||||||
|
if reg == nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
for _, p := range reg.List() {
|
||||||
|
for _, k := range p.Keywords {
|
||||||
|
if k == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if containsFold(q, k) {
|
||||||
|
return p.Name
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func containsFold(haystack, needle string) bool {
|
||||||
|
if needle == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if len(needle) > len(haystack) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
for i := 0; i+len(needle) <= len(haystack); i++ {
|
||||||
|
if haystack[i:i+len(needle)] == needle {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
132
pkg/recon/sources/bitbucket_test.go
Normal file
132
pkg/recon/sources/bitbucket_test.go
Normal file
@@ -0,0 +1,132 @@
|
|||||||
|
package sources
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||||
|
)
|
||||||
|
|
||||||
|
func bitbucketTestRegistry() *providers.Registry {
|
||||||
|
return providers.NewRegistryFromProviders([]providers.Provider{
|
||||||
|
{Name: "openai", Keywords: []string{"sk-proj-"}},
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func newBitbucketSource(baseURL, token, workspace string) *BitbucketSource {
|
||||||
|
return &BitbucketSource{
|
||||||
|
Token: token,
|
||||||
|
Workspace: workspace,
|
||||||
|
BaseURL: baseURL,
|
||||||
|
Registry: bitbucketTestRegistry(),
|
||||||
|
Limiters: recon.NewLimiterRegistry(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBitbucket_EnabledRequiresTokenAndWorkspace(t *testing.T) {
|
||||||
|
cfg := recon.Config{}
|
||||||
|
|
||||||
|
if newBitbucketSource("", "", "").Enabled(cfg) {
|
||||||
|
t.Fatal("expected disabled when token+workspace empty")
|
||||||
|
}
|
||||||
|
if newBitbucketSource("", "tok", "").Enabled(cfg) {
|
||||||
|
t.Fatal("expected disabled when workspace empty")
|
||||||
|
}
|
||||||
|
if newBitbucketSource("", "", "ws").Enabled(cfg) {
|
||||||
|
t.Fatal("expected disabled when token empty")
|
||||||
|
}
|
||||||
|
if !newBitbucketSource("", "tok", "ws").Enabled(cfg) {
|
||||||
|
t.Fatal("expected enabled when both set")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBitbucket_SweepEmitsFindings(t *testing.T) {
|
||||||
|
var gotAuth, gotPath string
|
||||||
|
var gotQuery string
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
gotAuth = r.Header.Get("Authorization")
|
||||||
|
gotPath = r.URL.Path
|
||||||
|
gotQuery = r.URL.Query().Get("search_query")
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
_, _ = w.Write([]byte(`{
|
||||||
|
"values": [
|
||||||
|
{
|
||||||
|
"content_match_count": 2,
|
||||||
|
"file": {"path": "secrets/.env", "commit": {"hash": "deadbeef"}},
|
||||||
|
"page_url": "https://bitbucket.org/testws/repo/src/deadbeef/secrets/.env"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}`))
|
||||||
|
}))
|
||||||
|
t.Cleanup(srv.Close)
|
||||||
|
|
||||||
|
src := newBitbucketSource(srv.URL, "tok", "testws")
|
||||||
|
out := make(chan recon.Finding, 16)
|
||||||
|
if err := src.Sweep(context.Background(), "", out); err != nil {
|
||||||
|
t.Fatalf("Sweep: %v", err)
|
||||||
|
}
|
||||||
|
close(out)
|
||||||
|
|
||||||
|
if gotAuth != "Bearer tok" {
|
||||||
|
t.Errorf("Authorization header = %q, want Bearer tok", gotAuth)
|
||||||
|
}
|
||||||
|
if gotPath != "/2.0/workspaces/testws/search/code" {
|
||||||
|
t.Errorf("path = %q", gotPath)
|
||||||
|
}
|
||||||
|
if gotQuery == "" {
|
||||||
|
t.Errorf("expected search_query param to be set")
|
||||||
|
}
|
||||||
|
|
||||||
|
var findings []recon.Finding
|
||||||
|
for f := range out {
|
||||||
|
findings = append(findings, f)
|
||||||
|
}
|
||||||
|
if len(findings) == 0 {
|
||||||
|
t.Fatal("expected at least 1 finding")
|
||||||
|
}
|
||||||
|
f := findings[0]
|
||||||
|
if f.SourceType != "recon:bitbucket" {
|
||||||
|
t.Errorf("SourceType = %q", f.SourceType)
|
||||||
|
}
|
||||||
|
if !strings.Contains(f.Source, "bitbucket.org/testws/repo") {
|
||||||
|
t.Errorf("Source = %q", f.Source)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBitbucket_Unauthorized(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
http.Error(w, "nope", http.StatusUnauthorized)
|
||||||
|
}))
|
||||||
|
t.Cleanup(srv.Close)
|
||||||
|
|
||||||
|
src := newBitbucketSource(srv.URL, "tok", "testws")
|
||||||
|
out := make(chan recon.Finding, 4)
|
||||||
|
err := src.Sweep(context.Background(), "", out)
|
||||||
|
if !errors.Is(err, ErrUnauthorized) {
|
||||||
|
t.Fatalf("err = %v, want ErrUnauthorized", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBitbucket_ContextCancellation(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
time.Sleep(2 * time.Second)
|
||||||
|
w.WriteHeader(200)
|
||||||
|
_, _ = w.Write([]byte(`{"values":[]}`))
|
||||||
|
}))
|
||||||
|
t.Cleanup(srv.Close)
|
||||||
|
|
||||||
|
src := newBitbucketSource(srv.URL, "tok", "testws")
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond)
|
||||||
|
defer cancel()
|
||||||
|
out := make(chan recon.Finding, 1)
|
||||||
|
err := src.Sweep(ctx, "", out)
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected error from cancelled context")
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user