feat(10-04): add BitbucketSource for code search recon
- BitbucketSource implements recon.ReconSource (RECON-CODE-03)
- Queries /2.0/workspaces/{ws}/search/code with Bearer auth
- Disabled when token OR workspace empty
- Rate: rate.Every(3.6s), burst 1 (Bitbucket 1000/hr limit)
- httptest coverage: enable gating, sweep, 401, ctx cancel
This commit is contained in:
174
pkg/recon/sources/bitbucket.go
Normal file
174
pkg/recon/sources/bitbucket.go
Normal file
@@ -0,0 +1,174 @@
|
||||
package sources
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"time"
|
||||
|
||||
"golang.org/x/time/rate"
|
||||
|
||||
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||
)
|
||||
|
||||
// BitbucketSource queries the Bitbucket Cloud 2.0 code search API for leaked
|
||||
// provider keywords across a configured workspace (RECON-CODE-03).
|
||||
//
|
||||
// Docs: https://developer.atlassian.com/cloud/bitbucket/rest/api-group-search/
|
||||
// Rate: 1000 req/hour → rate.Every(3.6s), burst 1.
|
||||
// Scope: requires both a token (app password or OAuth) AND a workspace slug;
|
||||
// absent either, the source disables itself cleanly (no error).
|
||||
type BitbucketSource struct {
|
||||
Token string
|
||||
Workspace string
|
||||
BaseURL string
|
||||
Registry *providers.Registry
|
||||
Limiters *recon.LimiterRegistry
|
||||
|
||||
client *Client
|
||||
}
|
||||
|
||||
var _ recon.ReconSource = (*BitbucketSource)(nil)
|
||||
|
||||
// Name returns the stable source identifier.
|
||||
func (s *BitbucketSource) Name() string { return "bitbucket" }
|
||||
|
||||
// RateLimit reports the per-source token bucket rate (1000/hour).
|
||||
func (s *BitbucketSource) RateLimit() rate.Limit {
|
||||
return rate.Every(3600 * time.Millisecond)
|
||||
}
|
||||
|
||||
// Burst reports the token bucket burst capacity.
|
||||
func (s *BitbucketSource) Burst() int { return 1 }
|
||||
|
||||
// RespectsRobots reports whether robots.txt applies (REST API → false).
|
||||
func (s *BitbucketSource) RespectsRobots() bool { return false }
|
||||
|
||||
// Enabled reports whether the source should run. Requires both token and
|
||||
// workspace to be non-empty.
|
||||
func (s *BitbucketSource) Enabled(cfg recon.Config) bool {
|
||||
return s.Token != "" && s.Workspace != ""
|
||||
}
|
||||
|
||||
// bitbucketSearchResponse mirrors the subset of the Bitbucket code search
|
||||
// response shape this source consumes.
|
||||
type bitbucketSearchResponse struct {
|
||||
Values []struct {
|
||||
ContentMatchCount int `json:"content_match_count"`
|
||||
PageURL string `json:"page_url"`
|
||||
File struct {
|
||||
Path string `json:"path"`
|
||||
Commit struct {
|
||||
Hash string `json:"hash"`
|
||||
} `json:"commit"`
|
||||
} `json:"file"`
|
||||
} `json:"values"`
|
||||
}
|
||||
|
||||
// Sweep iterates queries built from the provider registry, issues one search
|
||||
// request per query (rate-limited via Limiters), and emits one Finding per
|
||||
// `values` entry in the response.
|
||||
func (s *BitbucketSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
|
||||
if s.client == nil {
|
||||
s.client = NewClient()
|
||||
}
|
||||
base := s.BaseURL
|
||||
if base == "" {
|
||||
base = "https://api.bitbucket.org"
|
||||
}
|
||||
|
||||
queries := BuildQueries(s.Registry, "bitbucket")
|
||||
for _, q := range queries {
|
||||
if err := ctx.Err(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if s.Limiters != nil {
|
||||
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
endpoint := fmt.Sprintf("%s/2.0/workspaces/%s/search/code", base, url.PathEscape(s.Workspace))
|
||||
req, err := http.NewRequest(http.MethodGet, endpoint, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("bitbucket: build request: %w", err)
|
||||
}
|
||||
vals := req.URL.Query()
|
||||
vals.Set("search_query", q)
|
||||
req.URL.RawQuery = vals.Encode()
|
||||
req.Header.Set("Authorization", "Bearer "+s.Token)
|
||||
req.Header.Set("Accept", "application/json")
|
||||
|
||||
resp, err := s.client.Do(ctx, req)
|
||||
if err != nil {
|
||||
return fmt.Errorf("bitbucket: sweep: %w", err)
|
||||
}
|
||||
|
||||
var body bitbucketSearchResponse
|
||||
dec := json.NewDecoder(resp.Body)
|
||||
decodeErr := dec.Decode(&body)
|
||||
_ = resp.Body.Close()
|
||||
if decodeErr != nil {
|
||||
return fmt.Errorf("bitbucket: decode: %w", decodeErr)
|
||||
}
|
||||
|
||||
for _, v := range body.Values {
|
||||
src := v.PageURL
|
||||
if src == "" {
|
||||
src = fmt.Sprintf("bitbucket:%s/%s@%s", s.Workspace, v.File.Path, v.File.Commit.Hash)
|
||||
}
|
||||
f := recon.Finding{
|
||||
ProviderName: providerForQuery(s.Registry, q),
|
||||
Source: src,
|
||||
SourceType: "recon:bitbucket",
|
||||
DetectedAt: time.Now().UTC(),
|
||||
}
|
||||
select {
|
||||
case out <- f:
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// providerForQuery returns the provider name whose keyword appears in q, or
|
||||
// empty string if no match is found. Used to label Findings with their source
|
||||
// provider when the remote API doesn't echo the original keyword.
|
||||
func providerForQuery(reg *providers.Registry, q string) string {
|
||||
if reg == nil {
|
||||
return ""
|
||||
}
|
||||
for _, p := range reg.List() {
|
||||
for _, k := range p.Keywords {
|
||||
if k == "" {
|
||||
continue
|
||||
}
|
||||
if containsFold(q, k) {
|
||||
return p.Name
|
||||
}
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func containsFold(haystack, needle string) bool {
|
||||
if needle == "" {
|
||||
return false
|
||||
}
|
||||
if len(needle) > len(haystack) {
|
||||
return false
|
||||
}
|
||||
for i := 0; i+len(needle) <= len(haystack); i++ {
|
||||
if haystack[i:i+len(needle)] == needle {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
132
pkg/recon/sources/bitbucket_test.go
Normal file
132
pkg/recon/sources/bitbucket_test.go
Normal file
@@ -0,0 +1,132 @@
|
||||
package sources
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||
)
|
||||
|
||||
func bitbucketTestRegistry() *providers.Registry {
|
||||
return providers.NewRegistryFromProviders([]providers.Provider{
|
||||
{Name: "openai", Keywords: []string{"sk-proj-"}},
|
||||
})
|
||||
}
|
||||
|
||||
func newBitbucketSource(baseURL, token, workspace string) *BitbucketSource {
|
||||
return &BitbucketSource{
|
||||
Token: token,
|
||||
Workspace: workspace,
|
||||
BaseURL: baseURL,
|
||||
Registry: bitbucketTestRegistry(),
|
||||
Limiters: recon.NewLimiterRegistry(),
|
||||
}
|
||||
}
|
||||
|
||||
func TestBitbucket_EnabledRequiresTokenAndWorkspace(t *testing.T) {
|
||||
cfg := recon.Config{}
|
||||
|
||||
if newBitbucketSource("", "", "").Enabled(cfg) {
|
||||
t.Fatal("expected disabled when token+workspace empty")
|
||||
}
|
||||
if newBitbucketSource("", "tok", "").Enabled(cfg) {
|
||||
t.Fatal("expected disabled when workspace empty")
|
||||
}
|
||||
if newBitbucketSource("", "", "ws").Enabled(cfg) {
|
||||
t.Fatal("expected disabled when token empty")
|
||||
}
|
||||
if !newBitbucketSource("", "tok", "ws").Enabled(cfg) {
|
||||
t.Fatal("expected enabled when both set")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBitbucket_SweepEmitsFindings(t *testing.T) {
|
||||
var gotAuth, gotPath string
|
||||
var gotQuery string
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
gotAuth = r.Header.Get("Authorization")
|
||||
gotPath = r.URL.Path
|
||||
gotQuery = r.URL.Query().Get("search_query")
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = w.Write([]byte(`{
|
||||
"values": [
|
||||
{
|
||||
"content_match_count": 2,
|
||||
"file": {"path": "secrets/.env", "commit": {"hash": "deadbeef"}},
|
||||
"page_url": "https://bitbucket.org/testws/repo/src/deadbeef/secrets/.env"
|
||||
}
|
||||
]
|
||||
}`))
|
||||
}))
|
||||
t.Cleanup(srv.Close)
|
||||
|
||||
src := newBitbucketSource(srv.URL, "tok", "testws")
|
||||
out := make(chan recon.Finding, 16)
|
||||
if err := src.Sweep(context.Background(), "", out); err != nil {
|
||||
t.Fatalf("Sweep: %v", err)
|
||||
}
|
||||
close(out)
|
||||
|
||||
if gotAuth != "Bearer tok" {
|
||||
t.Errorf("Authorization header = %q, want Bearer tok", gotAuth)
|
||||
}
|
||||
if gotPath != "/2.0/workspaces/testws/search/code" {
|
||||
t.Errorf("path = %q", gotPath)
|
||||
}
|
||||
if gotQuery == "" {
|
||||
t.Errorf("expected search_query param to be set")
|
||||
}
|
||||
|
||||
var findings []recon.Finding
|
||||
for f := range out {
|
||||
findings = append(findings, f)
|
||||
}
|
||||
if len(findings) == 0 {
|
||||
t.Fatal("expected at least 1 finding")
|
||||
}
|
||||
f := findings[0]
|
||||
if f.SourceType != "recon:bitbucket" {
|
||||
t.Errorf("SourceType = %q", f.SourceType)
|
||||
}
|
||||
if !strings.Contains(f.Source, "bitbucket.org/testws/repo") {
|
||||
t.Errorf("Source = %q", f.Source)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBitbucket_Unauthorized(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
http.Error(w, "nope", http.StatusUnauthorized)
|
||||
}))
|
||||
t.Cleanup(srv.Close)
|
||||
|
||||
src := newBitbucketSource(srv.URL, "tok", "testws")
|
||||
out := make(chan recon.Finding, 4)
|
||||
err := src.Sweep(context.Background(), "", out)
|
||||
if !errors.Is(err, ErrUnauthorized) {
|
||||
t.Fatalf("err = %v, want ErrUnauthorized", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBitbucket_ContextCancellation(t *testing.T) {
|
||||
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
time.Sleep(2 * time.Second)
|
||||
w.WriteHeader(200)
|
||||
_, _ = w.Write([]byte(`{"values":[]}`))
|
||||
}))
|
||||
t.Cleanup(srv.Close)
|
||||
|
||||
src := newBitbucketSource(srv.URL, "tok", "testws")
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 50*time.Millisecond)
|
||||
defer cancel()
|
||||
out := make(chan recon.Finding, 1)
|
||||
err := src.Sweep(ctx, "", out)
|
||||
if err == nil {
|
||||
t.Fatal("expected error from cancelled context")
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user