From 23613150f624dfd94eb029f8e488057695a8c97c Mon Sep 17 00:00:00 2001 From: salvacybersec Date: Mon, 6 Apr 2026 12:52:27 +0300 Subject: [PATCH] feat(13-02): implement MavenSource and NuGetSource with tests - MavenSource queries Maven Central Solr API for provider keyword matches - NuGetSource queries NuGet gallery search API with projectUrl fallback - Both sources: httptest fixtures, ctx cancellation, metadata tests --- pkg/recon/sources/maven.go | 118 ++++++++++++++++++++++++++++++ pkg/recon/sources/maven_test.go | 116 ++++++++++++++++++++++++++++++ pkg/recon/sources/nuget.go | 115 ++++++++++++++++++++++++++++++ pkg/recon/sources/nuget_test.go | 122 ++++++++++++++++++++++++++++++++ 4 files changed, 471 insertions(+) create mode 100644 pkg/recon/sources/maven.go create mode 100644 pkg/recon/sources/maven_test.go create mode 100644 pkg/recon/sources/nuget.go create mode 100644 pkg/recon/sources/nuget_test.go diff --git a/pkg/recon/sources/maven.go b/pkg/recon/sources/maven.go new file mode 100644 index 0000000..9f47be7 --- /dev/null +++ b/pkg/recon/sources/maven.go @@ -0,0 +1,118 @@ +package sources + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + "net/url" + "time" + + "golang.org/x/time/rate" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +// MavenSource searches Maven Central for artifacts matching provider keywords. +// Maven Central exposes a Solr-based JSON search API that requires no +// authentication. +type MavenSource struct { + BaseURL string + Registry *providers.Registry + Limiters *recon.LimiterRegistry + Client *Client +} + +// Compile-time assertion that MavenSource satisfies recon.ReconSource. +var _ recon.ReconSource = (*MavenSource)(nil) + +func (s *MavenSource) Name() string { return "maven" } +func (s *MavenSource) RateLimit() rate.Limit { return rate.Every(2 * time.Second) } +func (s *MavenSource) Burst() int { return 2 } +func (s *MavenSource) RespectsRobots() bool { return false } + +// Enabled always returns true: Maven Central requires no credentials. +func (s *MavenSource) Enabled(_ recon.Config) bool { return true } + +// Sweep queries Maven Central's Solr search for each provider keyword and +// emits a Finding per matching artifact. +func (s *MavenSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error { + base := s.BaseURL + if base == "" { + base = "https://search.maven.org" + } + client := s.Client + if client == nil { + client = NewClient() + } + + queries := BuildQueries(s.Registry, "maven") + if len(queries) == 0 { + return nil + } + + for _, q := range queries { + if err := ctx.Err(); err != nil { + return err + } + if s.Limiters != nil { + if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil { + return err + } + } + + endpoint := fmt.Sprintf("%s/solrsearch/select?q=%s&rows=20&wt=json", + base, url.QueryEscape(q)) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil) + if err != nil { + return fmt.Errorf("maven: build request: %w", err) + } + req.Header.Set("Accept", "application/json") + + resp, err := client.Do(ctx, req) + if err != nil { + continue // non-fatal: skip keyword on HTTP error + } + + var parsed mavenSearchResponse + decErr := json.NewDecoder(resp.Body).Decode(&parsed) + _ = resp.Body.Close() + if decErr != nil { + continue + } + + for _, doc := range parsed.Response.Docs { + if err := ctx.Err(); err != nil { + return err + } + src := fmt.Sprintf("https://search.maven.org/artifact/%s/%s/%s/jar", + doc.Group, doc.Artifact, doc.LatestVersion) + select { + case out <- recon.Finding{ + Source: src, + SourceType: "recon:maven", + Confidence: "low", + DetectedAt: time.Now(), + }: + case <-ctx.Done(): + return ctx.Err() + } + } + } + return nil +} + +type mavenSearchResponse struct { + Response mavenResponseBody `json:"response"` +} + +type mavenResponseBody struct { + Docs []mavenDoc `json:"docs"` +} + +type mavenDoc struct { + Group string `json:"g"` + Artifact string `json:"a"` + LatestVersion string `json:"latestVersion"` +} diff --git a/pkg/recon/sources/maven_test.go b/pkg/recon/sources/maven_test.go new file mode 100644 index 0000000..5e2ea69 --- /dev/null +++ b/pkg/recon/sources/maven_test.go @@ -0,0 +1,116 @@ +package sources + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +func mavenTestRegistry() *providers.Registry { + return providers.NewRegistryFromProviders([]providers.Provider{ + {Name: "openai", Keywords: []string{"sk-proj-"}}, + }) +} + +const mavenFixtureJSON = `{ + "response": { + "numFound": 2, + "docs": [ + {"g": "com.example", "a": "openai-sdk", "latestVersion": "1.2.3"}, + {"g": "org.test", "a": "llm-client", "latestVersion": "0.9.0"} + ] + } +}` + +func newMavenTestSource(srvURL string) *MavenSource { + return &MavenSource{ + BaseURL: srvURL, + Registry: mavenTestRegistry(), + Limiters: recon.NewLimiterRegistry(), + Client: NewClient(), + } +} + +func TestMaven_Sweep_ExtractsFindings(t *testing.T) { + var hits int + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/solrsearch/select" { + t.Errorf("unexpected path: %s", r.URL.Path) + } + if r.URL.Query().Get("q") == "" { + t.Errorf("missing q param") + } + hits++ + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(mavenFixtureJSON)) + })) + defer srv.Close() + + src := newMavenTestSource(srv.URL) + out := make(chan recon.Finding, 16) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + if err := src.Sweep(ctx, "", out); err != nil { + t.Fatalf("Sweep err: %v", err) + } + close(out) + + var findings []recon.Finding + for f := range out { + findings = append(findings, f) + } + if len(findings) != 2 { + t.Fatalf("expected 2 findings, got %d", len(findings)) + } + for _, f := range findings { + if f.SourceType != "recon:maven" { + t.Errorf("unexpected SourceType: %s", f.SourceType) + } + } + if hits == 0 { + t.Fatal("server was never hit") + } +} + +func TestMaven_NameAndRate(t *testing.T) { + s := &MavenSource{} + if s.Name() != "maven" { + t.Errorf("unexpected name: %s", s.Name()) + } + if s.Burst() != 2 { + t.Errorf("burst: %d", s.Burst()) + } + if s.RespectsRobots() { + t.Error("expected RespectsRobots=false") + } +} + +func TestMaven_EnabledAlwaysTrue(t *testing.T) { + s := &MavenSource{} + if !s.Enabled(recon.Config{}) { + t.Fatal("expected Enabled=true") + } +} + +func TestMaven_Sweep_CtxCancelled(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + time.Sleep(500 * time.Millisecond) + _, _ = w.Write([]byte(mavenFixtureJSON)) + })) + defer srv.Close() + + src := newMavenTestSource(srv.URL) + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + out := make(chan recon.Finding, 4) + if err := src.Sweep(ctx, "", out); err == nil { + t.Fatal("expected ctx error") + } +} diff --git a/pkg/recon/sources/nuget.go b/pkg/recon/sources/nuget.go new file mode 100644 index 0000000..a77ccae --- /dev/null +++ b/pkg/recon/sources/nuget.go @@ -0,0 +1,115 @@ +package sources + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + "net/url" + "time" + + "golang.org/x/time/rate" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +// NuGetSource searches the NuGet gallery for .NET packages matching provider +// keywords. The NuGet search API is public and requires no authentication. +type NuGetSource struct { + BaseURL string + Registry *providers.Registry + Limiters *recon.LimiterRegistry + Client *Client +} + +// Compile-time assertion that NuGetSource satisfies recon.ReconSource. +var _ recon.ReconSource = (*NuGetSource)(nil) + +func (s *NuGetSource) Name() string { return "nuget" } +func (s *NuGetSource) RateLimit() rate.Limit { return rate.Every(1 * time.Second) } +func (s *NuGetSource) Burst() int { return 3 } +func (s *NuGetSource) RespectsRobots() bool { return false } + +// Enabled always returns true: NuGet search requires no credentials. +func (s *NuGetSource) Enabled(_ recon.Config) bool { return true } + +// Sweep queries NuGet's search API for each provider keyword and emits a +// Finding per matching package. +func (s *NuGetSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error { + base := s.BaseURL + if base == "" { + base = "https://azuresearch-usnc.nuget.org" + } + client := s.Client + if client == nil { + client = NewClient() + } + + queries := BuildQueries(s.Registry, "nuget") + if len(queries) == 0 { + return nil + } + + for _, q := range queries { + if err := ctx.Err(); err != nil { + return err + } + if s.Limiters != nil { + if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil { + return err + } + } + + endpoint := fmt.Sprintf("%s/query?q=%s&take=20", + base, url.QueryEscape(q)) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil) + if err != nil { + return fmt.Errorf("nuget: build request: %w", err) + } + req.Header.Set("Accept", "application/json") + + resp, err := client.Do(ctx, req) + if err != nil { + continue + } + + var parsed nugetSearchResponse + decErr := json.NewDecoder(resp.Body).Decode(&parsed) + _ = resp.Body.Close() + if decErr != nil { + continue + } + + for _, pkg := range parsed.Data { + if err := ctx.Err(); err != nil { + return err + } + src := pkg.ProjectURL + if src == "" { + src = fmt.Sprintf("https://www.nuget.org/packages/%s", pkg.ID) + } + select { + case out <- recon.Finding{ + Source: src, + SourceType: "recon:nuget", + Confidence: "low", + DetectedAt: time.Now(), + }: + case <-ctx.Done(): + return ctx.Err() + } + } + } + return nil +} + +type nugetSearchResponse struct { + Data []nugetPackage `json:"data"` +} + +type nugetPackage struct { + ID string `json:"id"` + Version string `json:"version"` + ProjectURL string `json:"projectUrl"` +} diff --git a/pkg/recon/sources/nuget_test.go b/pkg/recon/sources/nuget_test.go new file mode 100644 index 0000000..03ed8ca --- /dev/null +++ b/pkg/recon/sources/nuget_test.go @@ -0,0 +1,122 @@ +package sources + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +func nugetTestRegistry() *providers.Registry { + return providers.NewRegistryFromProviders([]providers.Provider{ + {Name: "openai", Keywords: []string{"sk-proj-"}}, + }) +} + +const nugetFixtureJSON = `{ + "data": [ + {"id": "OpenAI.SDK", "version": "2.1.0", "projectUrl": "https://github.com/example/openai-sdk"}, + {"id": "LLM.Client", "version": "1.0.0", "projectUrl": ""} + ] +}` + +func newNuGetTestSource(srvURL string) *NuGetSource { + return &NuGetSource{ + BaseURL: srvURL, + Registry: nugetTestRegistry(), + Limiters: recon.NewLimiterRegistry(), + Client: NewClient(), + } +} + +func TestNuGet_Sweep_ExtractsFindings(t *testing.T) { + var hits int + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/query" { + t.Errorf("unexpected path: %s", r.URL.Path) + } + if r.URL.Query().Get("q") == "" { + t.Errorf("missing q param") + } + hits++ + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(nugetFixtureJSON)) + })) + defer srv.Close() + + src := newNuGetTestSource(srv.URL) + out := make(chan recon.Finding, 16) + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + if err := src.Sweep(ctx, "", out); err != nil { + t.Fatalf("Sweep err: %v", err) + } + close(out) + + var findings []recon.Finding + for f := range out { + findings = append(findings, f) + } + if len(findings) != 2 { + t.Fatalf("expected 2 findings, got %d", len(findings)) + } + + // First package has projectUrl set + if findings[0].Source != "https://github.com/example/openai-sdk" { + t.Errorf("expected projectUrl for first finding, got: %s", findings[0].Source) + } + // Second package has empty projectUrl -> fallback + if findings[1].Source != "https://www.nuget.org/packages/LLM.Client" { + t.Errorf("expected nuget.org fallback for second finding, got: %s", findings[1].Source) + } + for _, f := range findings { + if f.SourceType != "recon:nuget" { + t.Errorf("unexpected SourceType: %s", f.SourceType) + } + } + if hits == 0 { + t.Fatal("server was never hit") + } +} + +func TestNuGet_NameAndRate(t *testing.T) { + s := &NuGetSource{} + if s.Name() != "nuget" { + t.Errorf("unexpected name: %s", s.Name()) + } + if s.Burst() != 3 { + t.Errorf("burst: %d", s.Burst()) + } + if s.RespectsRobots() { + t.Error("expected RespectsRobots=false") + } +} + +func TestNuGet_EnabledAlwaysTrue(t *testing.T) { + s := &NuGetSource{} + if !s.Enabled(recon.Config{}) { + t.Fatal("expected Enabled=true") + } +} + +func TestNuGet_Sweep_CtxCancelled(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + time.Sleep(500 * time.Millisecond) + _, _ = w.Write([]byte(nugetFixtureJSON)) + })) + defer srv.Close() + + src := newNuGetTestSource(srv.URL) + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + out := make(chan recon.Finding, 4) + if err := src.Sweep(ctx, "", out); err == nil { + t.Fatal("expected ctx error") + } +}