feat(13-03): implement DockerHubSource and KubernetesSource

- DockerHub searches hub.docker.com v2 search API for repos matching provider keywords
- Kubernetes searches Artifact Hub for operators/manifests with kind-aware URL paths
- Both sources: context cancellation, nil registry, httptest-based tests
This commit is contained in:
salvacybersec
2026-04-06 12:52:45 +03:00
parent 23613150f6
commit 3a8123edc6
4 changed files with 611 additions and 0 deletions

View File

@@ -0,0 +1,125 @@
package sources
import (
"context"
"encoding/json"
"fmt"
"net/http"
"net/url"
"time"
"golang.org/x/time/rate"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
// DockerHubSource searches Docker Hub for public images matching provider
// keywords. Unauthenticated search is rate-limited but freely accessible.
//
// Emits one Finding per repository result, tagged SourceType=recon:dockerhub.
type DockerHubSource struct {
// BaseURL defaults to https://hub.docker.com. Tests override with httptest URL.
BaseURL string
// Registry drives the keyword query list via BuildQueries.
Registry *providers.Registry
// Limiters is the shared recon.LimiterRegistry.
Limiters *recon.LimiterRegistry
// Client is the shared retry HTTP wrapper. If nil, a default is used.
Client *Client
}
// Compile-time assertion that DockerHubSource satisfies recon.ReconSource.
var _ recon.ReconSource = (*DockerHubSource)(nil)
func (s *DockerHubSource) Name() string { return "dockerhub" }
func (s *DockerHubSource) RateLimit() rate.Limit { return rate.Every(2 * time.Second) }
func (s *DockerHubSource) Burst() int { return 2 }
func (s *DockerHubSource) RespectsRobots() bool { return false }
// Enabled always returns true: Docker Hub search is unauthenticated.
func (s *DockerHubSource) Enabled(_ recon.Config) bool { return true }
// Sweep iterates provider keywords, searches Docker Hub for matching
// repositories, and emits a Finding for each result.
func (s *DockerHubSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
base := s.BaseURL
if base == "" {
base = "https://hub.docker.com"
}
client := s.Client
if client == nil {
client = NewClient()
}
queries := BuildQueries(s.Registry, "dockerhub")
if len(queries) == 0 {
return nil
}
for _, q := range queries {
if err := ctx.Err(); err != nil {
return err
}
if s.Limiters != nil {
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
return err
}
}
endpoint := fmt.Sprintf("%s/v2/search/repositories/?query=%s&page_size=20",
base, url.QueryEscape(q))
req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
if err != nil {
return fmt.Errorf("dockerhub: build req: %w", err)
}
req.Header.Set("Accept", "application/json")
resp, err := client.Do(ctx, req)
if err != nil {
// Non-fatal: skip this keyword on transient errors.
continue
}
var parsed dockerHubSearchResponse
decErr := json.NewDecoder(resp.Body).Decode(&parsed)
_ = resp.Body.Close()
if decErr != nil {
continue
}
for _, repo := range parsed.Results {
if err := ctx.Err(); err != nil {
return err
}
sourceURL := fmt.Sprintf("https://hub.docker.com/r/%s", repo.RepoName)
if base != "https://hub.docker.com" {
sourceURL = fmt.Sprintf("%s/r/%s", base, repo.RepoName)
}
f := recon.Finding{
ProviderName: "",
Source: sourceURL,
SourceType: "recon:dockerhub",
Confidence: "low",
DetectedAt: time.Now(),
}
select {
case out <- f:
case <-ctx.Done():
return ctx.Err()
}
}
}
return nil
}
type dockerHubSearchResponse struct {
Results []dockerHubRepo `json:"results"`
}
type dockerHubRepo struct {
RepoName string `json:"repo_name"`
Description string `json:"description"`
IsOfficial bool `json:"is_official"`
}

View File

@@ -0,0 +1,130 @@
package sources
import (
"context"
"encoding/json"
"errors"
"net/http"
"net/http/httptest"
"sync/atomic"
"testing"
"time"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
func dockerHubStubHandler(t *testing.T, calls *int32) http.HandlerFunc {
t.Helper()
return func(w http.ResponseWriter, r *http.Request) {
atomic.AddInt32(calls, 1)
if r.URL.Path != "/v2/search/repositories/" {
t.Errorf("unexpected path: %s", r.URL.Path)
}
if r.URL.Query().Get("query") == "" {
t.Errorf("missing query param")
}
body := dockerHubSearchResponse{
Results: []dockerHubRepo{
{RepoName: "alice/openai-proxy", Description: "OpenAI proxy", IsOfficial: false},
{RepoName: "bob/llm-gateway", Description: "LLM gateway", IsOfficial: false},
},
}
w.Header().Set("Content-Type", "application/json")
_ = json.NewEncoder(w).Encode(body)
}
}
func TestDockerHub_SweepEmitsFindings(t *testing.T) {
reg := syntheticRegistry()
lim := recon.NewLimiterRegistry()
_ = lim.For("dockerhub", 1000, 100)
var calls int32
srv := httptest.NewServer(dockerHubStubHandler(t, &calls))
defer srv.Close()
src := &DockerHubSource{
BaseURL: srv.URL,
Registry: reg,
Limiters: lim,
Client: NewClient(),
}
out := make(chan recon.Finding, 32)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
done := make(chan error, 1)
go func() { done <- src.Sweep(ctx, "", out); close(out) }()
var findings []recon.Finding
for f := range out {
findings = append(findings, f)
}
if err := <-done; err != nil {
t.Fatalf("Sweep error: %v", err)
}
// 2 keywords * 2 results = 4 findings
if len(findings) != 4 {
t.Fatalf("expected 4 findings, got %d", len(findings))
}
for _, f := range findings {
if f.SourceType != "recon:dockerhub" {
t.Errorf("SourceType=%q want recon:dockerhub", f.SourceType)
}
}
if got := atomic.LoadInt32(&calls); got != 2 {
t.Errorf("expected 2 server calls, got %d", got)
}
}
func TestDockerHub_EnabledAlwaysTrue(t *testing.T) {
s := &DockerHubSource{}
if !s.Enabled(recon.Config{}) {
t.Fatal("expected Enabled=true")
}
}
func TestDockerHub_NameAndRate(t *testing.T) {
s := &DockerHubSource{}
if s.Name() != "dockerhub" {
t.Errorf("unexpected name: %s", s.Name())
}
if s.Burst() != 2 {
t.Errorf("burst: %d", s.Burst())
}
if s.RespectsRobots() {
t.Error("expected RespectsRobots=false")
}
}
func TestDockerHub_CtxCancelled(t *testing.T) {
reg := syntheticRegistry()
lim := recon.NewLimiterRegistry()
_ = lim.For("dockerhub", 1000, 100)
src := &DockerHubSource{
BaseURL: "http://127.0.0.1:1",
Registry: reg,
Limiters: lim,
Client: NewClient(),
}
ctx, cancel := context.WithCancel(context.Background())
cancel()
out := make(chan recon.Finding, 1)
err := src.Sweep(ctx, "", out)
if !errors.Is(err, context.Canceled) {
t.Fatalf("expected context.Canceled, got %v", err)
}
}
func TestDockerHub_NilRegistryNoError(t *testing.T) {
src := &DockerHubSource{Client: NewClient()}
out := make(chan recon.Finding, 1)
if err := src.Sweep(context.Background(), "", out); err != nil {
t.Fatalf("expected nil, got %v", err)
}
}

View File

@@ -0,0 +1,156 @@
package sources
import (
"context"
"encoding/json"
"fmt"
"net/http"
"net/url"
"time"
"golang.org/x/time/rate"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
// KubernetesSource searches Artifact Hub for Kubernetes operators and manifests
// matching provider keywords. This discovers publicly published K8s packages
// that may embed API keys in their manifests or values files.
//
// Emits one Finding per package result, tagged SourceType=recon:k8s.
type KubernetesSource struct {
// BaseURL defaults to https://artifacthub.io. Tests override with httptest URL.
BaseURL string
// Registry drives the keyword query list via BuildQueries.
Registry *providers.Registry
// Limiters is the shared recon.LimiterRegistry.
Limiters *recon.LimiterRegistry
// Client is the shared retry HTTP wrapper. If nil, a default is used.
Client *Client
}
// Compile-time assertion that KubernetesSource satisfies recon.ReconSource.
var _ recon.ReconSource = (*KubernetesSource)(nil)
func (s *KubernetesSource) Name() string { return "k8s" }
func (s *KubernetesSource) RateLimit() rate.Limit { return rate.Every(3 * time.Second) }
func (s *KubernetesSource) Burst() int { return 1 }
func (s *KubernetesSource) RespectsRobots() bool { return true }
// Enabled always returns true: Artifact Hub search is unauthenticated.
func (s *KubernetesSource) Enabled(_ recon.Config) bool { return true }
// Sweep iterates provider keywords, searches Artifact Hub for Kubernetes
// operators (kind=6), and emits a Finding for each result.
func (s *KubernetesSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
base := s.BaseURL
if base == "" {
base = "https://artifacthub.io"
}
client := s.Client
if client == nil {
client = NewClient()
}
queries := BuildQueries(s.Registry, "k8s")
if len(queries) == 0 {
return nil
}
for _, q := range queries {
if err := ctx.Err(); err != nil {
return err
}
if s.Limiters != nil {
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
return err
}
}
// kind left empty to search across all Kubernetes-related package types.
endpoint := fmt.Sprintf("%s/api/v1/packages/search?ts_query_web=%s&limit=20",
base, url.QueryEscape(q))
req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
if err != nil {
return fmt.Errorf("k8s: build req: %w", err)
}
req.Header.Set("Accept", "application/json")
resp, err := client.Do(ctx, req)
if err != nil {
continue
}
var parsed k8sSearchResponse
decErr := json.NewDecoder(resp.Body).Decode(&parsed)
_ = resp.Body.Close()
if decErr != nil {
continue
}
for _, pkg := range parsed.Packages {
if err := ctx.Err(); err != nil {
return err
}
repoName := ""
if pkg.Repository.Name != "" {
repoName = pkg.Repository.Name
}
kindPath := k8sKindPath(pkg.Repository.Kind)
sourceURL := fmt.Sprintf("https://artifacthub.io/packages/%s/%s/%s",
kindPath, repoName, pkg.NormalizedName)
if base != "https://artifacthub.io" {
sourceURL = fmt.Sprintf("%s/packages/%s/%s/%s",
base, kindPath, repoName, pkg.NormalizedName)
}
f := recon.Finding{
ProviderName: "",
Source: sourceURL,
SourceType: "recon:k8s",
Confidence: "low",
DetectedAt: time.Now(),
}
select {
case out <- f:
case <-ctx.Done():
return ctx.Err()
}
}
}
return nil
}
type k8sSearchResponse struct {
Packages []k8sPackage `json:"packages"`
}
type k8sPackage struct {
PackageID string `json:"package_id"`
Name string `json:"name"`
NormalizedName string `json:"normalized_name"`
Repository k8sRepo `json:"repository"`
}
type k8sRepo struct {
Name string `json:"name"`
Kind int `json:"kind"`
}
// k8sKindPath maps Artifact Hub kind integers to URL path segments.
func k8sKindPath(kind int) string {
switch kind {
case 0:
return "helm"
case 6:
return "kube-operator"
case 7:
return "kubectl"
default:
return "other"
}
}

View File

@@ -0,0 +1,200 @@
package sources
import (
"context"
"encoding/json"
"errors"
"net/http"
"net/http/httptest"
"sync/atomic"
"testing"
"time"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
func k8sStubHandler(t *testing.T, calls *int32) http.HandlerFunc {
t.Helper()
return func(w http.ResponseWriter, r *http.Request) {
atomic.AddInt32(calls, 1)
if r.URL.Path != "/api/v1/packages/search" {
t.Errorf("unexpected path: %s", r.URL.Path)
}
if r.URL.Query().Get("ts_query_web") == "" {
t.Errorf("missing ts_query_web param")
}
body := k8sSearchResponse{
Packages: []k8sPackage{
{
PackageID: "pkg-1",
Name: "openai-operator",
NormalizedName: "openai-operator",
Repository: k8sRepo{Name: "community", Kind: 6},
},
{
PackageID: "pkg-2",
Name: "llm-secrets",
NormalizedName: "llm-secrets",
Repository: k8sRepo{Name: "stable", Kind: 0},
},
},
}
w.Header().Set("Content-Type", "application/json")
_ = json.NewEncoder(w).Encode(body)
}
}
func TestKubernetes_SweepEmitsFindings(t *testing.T) {
reg := syntheticRegistry()
lim := recon.NewLimiterRegistry()
_ = lim.For("k8s", 1000, 100)
var calls int32
srv := httptest.NewServer(k8sStubHandler(t, &calls))
defer srv.Close()
src := &KubernetesSource{
BaseURL: srv.URL,
Registry: reg,
Limiters: lim,
Client: NewClient(),
}
out := make(chan recon.Finding, 32)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
done := make(chan error, 1)
go func() { done <- src.Sweep(ctx, "", out); close(out) }()
var findings []recon.Finding
for f := range out {
findings = append(findings, f)
}
if err := <-done; err != nil {
t.Fatalf("Sweep error: %v", err)
}
// 2 keywords * 2 results = 4 findings
if len(findings) != 4 {
t.Fatalf("expected 4 findings, got %d", len(findings))
}
for _, f := range findings {
if f.SourceType != "recon:k8s" {
t.Errorf("SourceType=%q want recon:k8s", f.SourceType)
}
}
if got := atomic.LoadInt32(&calls); got != 2 {
t.Errorf("expected 2 server calls, got %d", got)
}
}
func TestKubernetes_KindPaths(t *testing.T) {
reg := syntheticRegistry()
lim := recon.NewLimiterRegistry()
_ = lim.For("k8s", 1000, 100)
var calls int32
srv := httptest.NewServer(k8sStubHandler(t, &calls))
defer srv.Close()
src := &KubernetesSource{
BaseURL: srv.URL,
Registry: reg,
Limiters: lim,
Client: NewClient(),
}
out := make(chan recon.Finding, 32)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
done := make(chan error, 1)
go func() { done <- src.Sweep(ctx, "", out); close(out) }()
var findings []recon.Finding
for f := range out {
findings = append(findings, f)
}
if err := <-done; err != nil {
t.Fatalf("Sweep error: %v", err)
}
// Check that kind=6 maps to kube-operator and kind=0 maps to helm in URLs.
hasOperator := false
hasHelm := false
for _, f := range findings {
if contains(f.Source, "/kube-operator/") {
hasOperator = true
}
if contains(f.Source, "/helm/") {
hasHelm = true
}
}
if !hasOperator {
t.Error("expected at least one finding with kube-operator path")
}
if !hasHelm {
t.Error("expected at least one finding with helm path")
}
}
func TestKubernetes_EnabledAlwaysTrue(t *testing.T) {
s := &KubernetesSource{}
if !s.Enabled(recon.Config{}) {
t.Fatal("expected Enabled=true")
}
}
func TestKubernetes_NameAndRate(t *testing.T) {
s := &KubernetesSource{}
if s.Name() != "k8s" {
t.Errorf("unexpected name: %s", s.Name())
}
if s.Burst() != 1 {
t.Errorf("burst: %d", s.Burst())
}
if !s.RespectsRobots() {
t.Error("expected RespectsRobots=true")
}
}
func TestKubernetes_CtxCancelled(t *testing.T) {
reg := syntheticRegistry()
lim := recon.NewLimiterRegistry()
_ = lim.For("k8s", 1000, 100)
src := &KubernetesSource{
BaseURL: "http://127.0.0.1:1",
Registry: reg,
Limiters: lim,
Client: NewClient(),
}
ctx, cancel := context.WithCancel(context.Background())
cancel()
out := make(chan recon.Finding, 1)
err := src.Sweep(ctx, "", out)
if !errors.Is(err, context.Canceled) {
t.Fatalf("expected context.Canceled, got %v", err)
}
}
func TestKubernetes_NilRegistryNoError(t *testing.T) {
src := &KubernetesSource{Client: NewClient()}
out := make(chan recon.Finding, 1)
if err := src.Sweep(context.Background(), "", out); err != nil {
t.Fatalf("expected nil, got %v", err)
}
}
// contains checks if substr is in s. Avoids importing strings in test.
func contains(s, substr string) bool {
for i := 0; i+len(substr) <= len(s); i++ {
if s[i:i+len(substr)] == substr {
return true
}
}
return false
}