From 3a8123edc6e8422caae248a7ba2ca1e27e885c0a Mon Sep 17 00:00:00 2001 From: salvacybersec Date: Mon, 6 Apr 2026 12:52:45 +0300 Subject: [PATCH] feat(13-03): implement DockerHubSource and KubernetesSource - DockerHub searches hub.docker.com v2 search API for repos matching provider keywords - Kubernetes searches Artifact Hub for operators/manifests with kind-aware URL paths - Both sources: context cancellation, nil registry, httptest-based tests --- pkg/recon/sources/dockerhub.go | 125 +++++++++++++++++ pkg/recon/sources/dockerhub_test.go | 130 +++++++++++++++++ pkg/recon/sources/kubernetes.go | 156 +++++++++++++++++++++ pkg/recon/sources/kubernetes_test.go | 200 +++++++++++++++++++++++++++ 4 files changed, 611 insertions(+) create mode 100644 pkg/recon/sources/dockerhub.go create mode 100644 pkg/recon/sources/dockerhub_test.go create mode 100644 pkg/recon/sources/kubernetes.go create mode 100644 pkg/recon/sources/kubernetes_test.go diff --git a/pkg/recon/sources/dockerhub.go b/pkg/recon/sources/dockerhub.go new file mode 100644 index 0000000..57cd15c --- /dev/null +++ b/pkg/recon/sources/dockerhub.go @@ -0,0 +1,125 @@ +package sources + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + "net/url" + "time" + + "golang.org/x/time/rate" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +// DockerHubSource searches Docker Hub for public images matching provider +// keywords. Unauthenticated search is rate-limited but freely accessible. +// +// Emits one Finding per repository result, tagged SourceType=recon:dockerhub. +type DockerHubSource struct { + // BaseURL defaults to https://hub.docker.com. Tests override with httptest URL. + BaseURL string + // Registry drives the keyword query list via BuildQueries. + Registry *providers.Registry + // Limiters is the shared recon.LimiterRegistry. + Limiters *recon.LimiterRegistry + // Client is the shared retry HTTP wrapper. If nil, a default is used. + Client *Client +} + +// Compile-time assertion that DockerHubSource satisfies recon.ReconSource. +var _ recon.ReconSource = (*DockerHubSource)(nil) + +func (s *DockerHubSource) Name() string { return "dockerhub" } +func (s *DockerHubSource) RateLimit() rate.Limit { return rate.Every(2 * time.Second) } +func (s *DockerHubSource) Burst() int { return 2 } +func (s *DockerHubSource) RespectsRobots() bool { return false } + +// Enabled always returns true: Docker Hub search is unauthenticated. +func (s *DockerHubSource) Enabled(_ recon.Config) bool { return true } + +// Sweep iterates provider keywords, searches Docker Hub for matching +// repositories, and emits a Finding for each result. +func (s *DockerHubSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error { + base := s.BaseURL + if base == "" { + base = "https://hub.docker.com" + } + client := s.Client + if client == nil { + client = NewClient() + } + + queries := BuildQueries(s.Registry, "dockerhub") + if len(queries) == 0 { + return nil + } + + for _, q := range queries { + if err := ctx.Err(); err != nil { + return err + } + + if s.Limiters != nil { + if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil { + return err + } + } + + endpoint := fmt.Sprintf("%s/v2/search/repositories/?query=%s&page_size=20", + base, url.QueryEscape(q)) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil) + if err != nil { + return fmt.Errorf("dockerhub: build req: %w", err) + } + req.Header.Set("Accept", "application/json") + + resp, err := client.Do(ctx, req) + if err != nil { + // Non-fatal: skip this keyword on transient errors. + continue + } + + var parsed dockerHubSearchResponse + decErr := json.NewDecoder(resp.Body).Decode(&parsed) + _ = resp.Body.Close() + if decErr != nil { + continue + } + + for _, repo := range parsed.Results { + if err := ctx.Err(); err != nil { + return err + } + sourceURL := fmt.Sprintf("https://hub.docker.com/r/%s", repo.RepoName) + if base != "https://hub.docker.com" { + sourceURL = fmt.Sprintf("%s/r/%s", base, repo.RepoName) + } + f := recon.Finding{ + ProviderName: "", + Source: sourceURL, + SourceType: "recon:dockerhub", + Confidence: "low", + DetectedAt: time.Now(), + } + select { + case out <- f: + case <-ctx.Done(): + return ctx.Err() + } + } + } + return nil +} + +type dockerHubSearchResponse struct { + Results []dockerHubRepo `json:"results"` +} + +type dockerHubRepo struct { + RepoName string `json:"repo_name"` + Description string `json:"description"` + IsOfficial bool `json:"is_official"` +} diff --git a/pkg/recon/sources/dockerhub_test.go b/pkg/recon/sources/dockerhub_test.go new file mode 100644 index 0000000..9f290c2 --- /dev/null +++ b/pkg/recon/sources/dockerhub_test.go @@ -0,0 +1,130 @@ +package sources + +import ( + "context" + "encoding/json" + "errors" + "net/http" + "net/http/httptest" + "sync/atomic" + "testing" + "time" + + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +func dockerHubStubHandler(t *testing.T, calls *int32) http.HandlerFunc { + t.Helper() + return func(w http.ResponseWriter, r *http.Request) { + atomic.AddInt32(calls, 1) + if r.URL.Path != "/v2/search/repositories/" { + t.Errorf("unexpected path: %s", r.URL.Path) + } + if r.URL.Query().Get("query") == "" { + t.Errorf("missing query param") + } + body := dockerHubSearchResponse{ + Results: []dockerHubRepo{ + {RepoName: "alice/openai-proxy", Description: "OpenAI proxy", IsOfficial: false}, + {RepoName: "bob/llm-gateway", Description: "LLM gateway", IsOfficial: false}, + }, + } + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(body) + } +} + +func TestDockerHub_SweepEmitsFindings(t *testing.T) { + reg := syntheticRegistry() + lim := recon.NewLimiterRegistry() + _ = lim.For("dockerhub", 1000, 100) + + var calls int32 + srv := httptest.NewServer(dockerHubStubHandler(t, &calls)) + defer srv.Close() + + src := &DockerHubSource{ + BaseURL: srv.URL, + Registry: reg, + Limiters: lim, + Client: NewClient(), + } + + out := make(chan recon.Finding, 32) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + done := make(chan error, 1) + go func() { done <- src.Sweep(ctx, "", out); close(out) }() + + var findings []recon.Finding + for f := range out { + findings = append(findings, f) + } + if err := <-done; err != nil { + t.Fatalf("Sweep error: %v", err) + } + + // 2 keywords * 2 results = 4 findings + if len(findings) != 4 { + t.Fatalf("expected 4 findings, got %d", len(findings)) + } + for _, f := range findings { + if f.SourceType != "recon:dockerhub" { + t.Errorf("SourceType=%q want recon:dockerhub", f.SourceType) + } + } + if got := atomic.LoadInt32(&calls); got != 2 { + t.Errorf("expected 2 server calls, got %d", got) + } +} + +func TestDockerHub_EnabledAlwaysTrue(t *testing.T) { + s := &DockerHubSource{} + if !s.Enabled(recon.Config{}) { + t.Fatal("expected Enabled=true") + } +} + +func TestDockerHub_NameAndRate(t *testing.T) { + s := &DockerHubSource{} + if s.Name() != "dockerhub" { + t.Errorf("unexpected name: %s", s.Name()) + } + if s.Burst() != 2 { + t.Errorf("burst: %d", s.Burst()) + } + if s.RespectsRobots() { + t.Error("expected RespectsRobots=false") + } +} + +func TestDockerHub_CtxCancelled(t *testing.T) { + reg := syntheticRegistry() + lim := recon.NewLimiterRegistry() + _ = lim.For("dockerhub", 1000, 100) + + src := &DockerHubSource{ + BaseURL: "http://127.0.0.1:1", + Registry: reg, + Limiters: lim, + Client: NewClient(), + } + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + out := make(chan recon.Finding, 1) + err := src.Sweep(ctx, "", out) + if !errors.Is(err, context.Canceled) { + t.Fatalf("expected context.Canceled, got %v", err) + } +} + +func TestDockerHub_NilRegistryNoError(t *testing.T) { + src := &DockerHubSource{Client: NewClient()} + out := make(chan recon.Finding, 1) + if err := src.Sweep(context.Background(), "", out); err != nil { + t.Fatalf("expected nil, got %v", err) + } +} diff --git a/pkg/recon/sources/kubernetes.go b/pkg/recon/sources/kubernetes.go new file mode 100644 index 0000000..af87b7d --- /dev/null +++ b/pkg/recon/sources/kubernetes.go @@ -0,0 +1,156 @@ +package sources + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + "net/url" + "time" + + "golang.org/x/time/rate" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +// KubernetesSource searches Artifact Hub for Kubernetes operators and manifests +// matching provider keywords. This discovers publicly published K8s packages +// that may embed API keys in their manifests or values files. +// +// Emits one Finding per package result, tagged SourceType=recon:k8s. +type KubernetesSource struct { + // BaseURL defaults to https://artifacthub.io. Tests override with httptest URL. + BaseURL string + // Registry drives the keyword query list via BuildQueries. + Registry *providers.Registry + // Limiters is the shared recon.LimiterRegistry. + Limiters *recon.LimiterRegistry + // Client is the shared retry HTTP wrapper. If nil, a default is used. + Client *Client +} + +// Compile-time assertion that KubernetesSource satisfies recon.ReconSource. +var _ recon.ReconSource = (*KubernetesSource)(nil) + +func (s *KubernetesSource) Name() string { return "k8s" } +func (s *KubernetesSource) RateLimit() rate.Limit { return rate.Every(3 * time.Second) } +func (s *KubernetesSource) Burst() int { return 1 } +func (s *KubernetesSource) RespectsRobots() bool { return true } + +// Enabled always returns true: Artifact Hub search is unauthenticated. +func (s *KubernetesSource) Enabled(_ recon.Config) bool { return true } + +// Sweep iterates provider keywords, searches Artifact Hub for Kubernetes +// operators (kind=6), and emits a Finding for each result. +func (s *KubernetesSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error { + base := s.BaseURL + if base == "" { + base = "https://artifacthub.io" + } + client := s.Client + if client == nil { + client = NewClient() + } + + queries := BuildQueries(s.Registry, "k8s") + if len(queries) == 0 { + return nil + } + + for _, q := range queries { + if err := ctx.Err(); err != nil { + return err + } + + if s.Limiters != nil { + if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil { + return err + } + } + + // kind left empty to search across all Kubernetes-related package types. + endpoint := fmt.Sprintf("%s/api/v1/packages/search?ts_query_web=%s&limit=20", + base, url.QueryEscape(q)) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil) + if err != nil { + return fmt.Errorf("k8s: build req: %w", err) + } + req.Header.Set("Accept", "application/json") + + resp, err := client.Do(ctx, req) + if err != nil { + continue + } + + var parsed k8sSearchResponse + decErr := json.NewDecoder(resp.Body).Decode(&parsed) + _ = resp.Body.Close() + if decErr != nil { + continue + } + + for _, pkg := range parsed.Packages { + if err := ctx.Err(); err != nil { + return err + } + + repoName := "" + if pkg.Repository.Name != "" { + repoName = pkg.Repository.Name + } + + kindPath := k8sKindPath(pkg.Repository.Kind) + sourceURL := fmt.Sprintf("https://artifacthub.io/packages/%s/%s/%s", + kindPath, repoName, pkg.NormalizedName) + if base != "https://artifacthub.io" { + sourceURL = fmt.Sprintf("%s/packages/%s/%s/%s", + base, kindPath, repoName, pkg.NormalizedName) + } + + f := recon.Finding{ + ProviderName: "", + Source: sourceURL, + SourceType: "recon:k8s", + Confidence: "low", + DetectedAt: time.Now(), + } + select { + case out <- f: + case <-ctx.Done(): + return ctx.Err() + } + } + } + return nil +} + +type k8sSearchResponse struct { + Packages []k8sPackage `json:"packages"` +} + +type k8sPackage struct { + PackageID string `json:"package_id"` + Name string `json:"name"` + NormalizedName string `json:"normalized_name"` + Repository k8sRepo `json:"repository"` +} + +type k8sRepo struct { + Name string `json:"name"` + Kind int `json:"kind"` +} + +// k8sKindPath maps Artifact Hub kind integers to URL path segments. +func k8sKindPath(kind int) string { + switch kind { + case 0: + return "helm" + case 6: + return "kube-operator" + case 7: + return "kubectl" + default: + return "other" + } +} diff --git a/pkg/recon/sources/kubernetes_test.go b/pkg/recon/sources/kubernetes_test.go new file mode 100644 index 0000000..416aa41 --- /dev/null +++ b/pkg/recon/sources/kubernetes_test.go @@ -0,0 +1,200 @@ +package sources + +import ( + "context" + "encoding/json" + "errors" + "net/http" + "net/http/httptest" + "sync/atomic" + "testing" + "time" + + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +func k8sStubHandler(t *testing.T, calls *int32) http.HandlerFunc { + t.Helper() + return func(w http.ResponseWriter, r *http.Request) { + atomic.AddInt32(calls, 1) + if r.URL.Path != "/api/v1/packages/search" { + t.Errorf("unexpected path: %s", r.URL.Path) + } + if r.URL.Query().Get("ts_query_web") == "" { + t.Errorf("missing ts_query_web param") + } + body := k8sSearchResponse{ + Packages: []k8sPackage{ + { + PackageID: "pkg-1", + Name: "openai-operator", + NormalizedName: "openai-operator", + Repository: k8sRepo{Name: "community", Kind: 6}, + }, + { + PackageID: "pkg-2", + Name: "llm-secrets", + NormalizedName: "llm-secrets", + Repository: k8sRepo{Name: "stable", Kind: 0}, + }, + }, + } + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(body) + } +} + +func TestKubernetes_SweepEmitsFindings(t *testing.T) { + reg := syntheticRegistry() + lim := recon.NewLimiterRegistry() + _ = lim.For("k8s", 1000, 100) + + var calls int32 + srv := httptest.NewServer(k8sStubHandler(t, &calls)) + defer srv.Close() + + src := &KubernetesSource{ + BaseURL: srv.URL, + Registry: reg, + Limiters: lim, + Client: NewClient(), + } + + out := make(chan recon.Finding, 32) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + done := make(chan error, 1) + go func() { done <- src.Sweep(ctx, "", out); close(out) }() + + var findings []recon.Finding + for f := range out { + findings = append(findings, f) + } + if err := <-done; err != nil { + t.Fatalf("Sweep error: %v", err) + } + + // 2 keywords * 2 results = 4 findings + if len(findings) != 4 { + t.Fatalf("expected 4 findings, got %d", len(findings)) + } + for _, f := range findings { + if f.SourceType != "recon:k8s" { + t.Errorf("SourceType=%q want recon:k8s", f.SourceType) + } + } + if got := atomic.LoadInt32(&calls); got != 2 { + t.Errorf("expected 2 server calls, got %d", got) + } +} + +func TestKubernetes_KindPaths(t *testing.T) { + reg := syntheticRegistry() + lim := recon.NewLimiterRegistry() + _ = lim.For("k8s", 1000, 100) + + var calls int32 + srv := httptest.NewServer(k8sStubHandler(t, &calls)) + defer srv.Close() + + src := &KubernetesSource{ + BaseURL: srv.URL, + Registry: reg, + Limiters: lim, + Client: NewClient(), + } + + out := make(chan recon.Finding, 32) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + done := make(chan error, 1) + go func() { done <- src.Sweep(ctx, "", out); close(out) }() + + var findings []recon.Finding + for f := range out { + findings = append(findings, f) + } + if err := <-done; err != nil { + t.Fatalf("Sweep error: %v", err) + } + + // Check that kind=6 maps to kube-operator and kind=0 maps to helm in URLs. + hasOperator := false + hasHelm := false + for _, f := range findings { + if contains(f.Source, "/kube-operator/") { + hasOperator = true + } + if contains(f.Source, "/helm/") { + hasHelm = true + } + } + if !hasOperator { + t.Error("expected at least one finding with kube-operator path") + } + if !hasHelm { + t.Error("expected at least one finding with helm path") + } +} + +func TestKubernetes_EnabledAlwaysTrue(t *testing.T) { + s := &KubernetesSource{} + if !s.Enabled(recon.Config{}) { + t.Fatal("expected Enabled=true") + } +} + +func TestKubernetes_NameAndRate(t *testing.T) { + s := &KubernetesSource{} + if s.Name() != "k8s" { + t.Errorf("unexpected name: %s", s.Name()) + } + if s.Burst() != 1 { + t.Errorf("burst: %d", s.Burst()) + } + if !s.RespectsRobots() { + t.Error("expected RespectsRobots=true") + } +} + +func TestKubernetes_CtxCancelled(t *testing.T) { + reg := syntheticRegistry() + lim := recon.NewLimiterRegistry() + _ = lim.For("k8s", 1000, 100) + + src := &KubernetesSource{ + BaseURL: "http://127.0.0.1:1", + Registry: reg, + Limiters: lim, + Client: NewClient(), + } + + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + out := make(chan recon.Finding, 1) + err := src.Sweep(ctx, "", out) + if !errors.Is(err, context.Canceled) { + t.Fatalf("expected context.Canceled, got %v", err) + } +} + +func TestKubernetes_NilRegistryNoError(t *testing.T) { + src := &KubernetesSource{Client: NewClient()} + out := make(chan recon.Finding, 1) + if err := src.Sweep(context.Background(), "", out); err != nil { + t.Fatalf("expected nil, got %v", err) + } +} + +// contains checks if substr is in s. Avoids importing strings in test. +func contains(s, substr string) bool { + for i := 0; i+len(substr) <= len(s); i++ { + if s[i:i+len(substr)] == substr { + return true + } + } + return false +}