test(10-09): add end-to-end SweepAll integration test across all ten sources
This commit is contained in:
13
.planning/phases/10-osint-code-hosting/deferred-items.md
Normal file
13
.planning/phases/10-osint-code-hosting/deferred-items.md
Normal file
@@ -0,0 +1,13 @@
|
||||
# Phase 10 — Deferred Items
|
||||
|
||||
Out-of-scope findings discovered during plan execution. These are NOT fixed in
|
||||
the current plan but are tracked here for future work.
|
||||
|
||||
## 10-09
|
||||
|
||||
- **GitHubSource struct-literal panic risk.** `GitHubSource.Sweep` dereferences
|
||||
`s.client` without a nil check (pkg/recon/sources/github.go:106). `NewGitHubSource`
|
||||
initializes `client`, so `RegisterAll` is safe, but any future caller using a
|
||||
struct literal (as sibling sources do) will panic. Fix: add
|
||||
`if s.client == nil { s.client = NewClient() }` at the top of Sweep. Siblings
|
||||
(GitLab, Bitbucket, Gist, Codeberg, HuggingFace, Kaggle) already lazy-init.
|
||||
240
pkg/recon/sources/integration_test.go
Normal file
240
pkg/recon/sources/integration_test.go
Normal file
@@ -0,0 +1,240 @@
|
||||
package sources
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||
)
|
||||
|
||||
// TestIntegration_AllSources_SweepAll spins up a single multiplexed httptest
|
||||
// server that serves canned fixtures for every Phase 10 code-hosting source,
|
||||
// registers the sources (with BaseURL overrides pointing at the test server)
|
||||
// onto a fresh recon.Engine, runs SweepAll, and asserts at least one Finding
|
||||
// was emitted per SourceType across all ten sources.
|
||||
//
|
||||
// RegisterAll cannot be used directly because it wires production URLs; the
|
||||
// test exercises the same code paths by constructing each source identically
|
||||
// to RegisterAll but with BaseURL/Platforms overrides.
|
||||
func TestIntegration_AllSources_SweepAll(t *testing.T) {
|
||||
mux := http.NewServeMux()
|
||||
|
||||
// ---- GitHub /search/code ----
|
||||
mux.HandleFunc("/search/code", func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_ = json.NewEncoder(w).Encode(ghSearchResponse{
|
||||
Items: []ghCodeItem{
|
||||
{HTMLURL: "https://github.com/alice/leak/blob/main/.env"},
|
||||
},
|
||||
})
|
||||
})
|
||||
|
||||
// ---- GitLab /api/v4/search ----
|
||||
mux.HandleFunc("/api/v4/search", func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = w.Write([]byte(`[{"basename":"keys","data":"sk-proj-abc","path":"keys.env","project_id":42,"ref":"main","startline":1}]`))
|
||||
})
|
||||
|
||||
// ---- Bitbucket /2.0/workspaces/<ws>/search/code ----
|
||||
mux.HandleFunc("/2.0/workspaces/kh-test/search/code", func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = w.Write([]byte(`{"values":[{"content_match_count":1,"page_url":"https://bitbucket.org/kh-test/repo/src/main/keys.env","file":{"path":"keys.env","commit":{"hash":"deadbeef"}}}]}`))
|
||||
})
|
||||
|
||||
// ---- Gist /gists/public + raw content ----
|
||||
mux.HandleFunc("/gists/public", func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
body := fmt.Sprintf(`[{"html_url":"https://gist.github.com/alice/gistleak","files":{"f.py":{"filename":"f.py","raw_url":"%s/raw/gist1"}}}]`, baseFromReq(r))
|
||||
_, _ = w.Write([]byte(body))
|
||||
})
|
||||
mux.HandleFunc("/raw/gist1", func(w http.ResponseWriter, r *http.Request) {
|
||||
_, _ = w.Write([]byte("api_key = sk-proj-ABCDEF"))
|
||||
})
|
||||
|
||||
// ---- Codeberg /api/v1/repos/search ----
|
||||
mux.HandleFunc("/api/v1/repos/search", func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = w.Write([]byte(`{"ok":true,"data":[{"full_name":"bob/keys","html_url":"https://codeberg.org/bob/keys"}]}`))
|
||||
})
|
||||
|
||||
// ---- HuggingFace /api/spaces + /api/models ----
|
||||
hfHandler := func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = w.Write([]byte(`[{"id":"alice/leaky-space"}]`))
|
||||
}
|
||||
mux.HandleFunc("/api/spaces", hfHandler)
|
||||
mux.HandleFunc("/api/models", hfHandler)
|
||||
|
||||
// ---- Replit /search?q=...&type=repls (HTML) ----
|
||||
// ---- CodeSandbox /search?query=...&type=sandboxes (HTML) ----
|
||||
// Both hit the same /search path; distinguish on query params.
|
||||
mux.HandleFunc("/search", func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "text/html")
|
||||
switch r.URL.Query().Get("type") {
|
||||
case "repls":
|
||||
_, _ = w.Write([]byte(`<html><body>
|
||||
<a href="/@alice/leaky-repl">hit</a>
|
||||
<a href="/other/path">skip</a>
|
||||
</body></html>`))
|
||||
case "sandboxes":
|
||||
_, _ = w.Write([]byte(`<html><body>
|
||||
<a href="/s/leaky-sandbox">hit</a>
|
||||
<a href="/other">skip</a>
|
||||
</body></html>`))
|
||||
default:
|
||||
w.WriteHeader(http.StatusNotFound)
|
||||
}
|
||||
})
|
||||
|
||||
// ---- SandboxesSource sub-platforms ----
|
||||
mux.HandleFunc("/codepen-search", func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "text/html")
|
||||
_, _ = w.Write([]byte(`<html><body><a href="/alice/pen/AbCd1234">hit</a></body></html>`))
|
||||
})
|
||||
mux.HandleFunc("/jsfiddle-search", func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = w.Write([]byte(`{"results":[{"url":"https://jsfiddle.net/u/leaky/"}]}`))
|
||||
})
|
||||
|
||||
// ---- Kaggle /api/v1/kernels/list ----
|
||||
mux.HandleFunc("/api/v1/kernels/list", func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = w.Write([]byte(`[{"ref":"alice/leaky-notebook"}]`))
|
||||
})
|
||||
|
||||
srv := httptest.NewServer(mux)
|
||||
defer srv.Close()
|
||||
|
||||
reg := providers.NewRegistryFromProviders([]providers.Provider{
|
||||
{Name: "openai", Keywords: []string{"sk-proj-"}},
|
||||
})
|
||||
lim := recon.NewLimiterRegistry()
|
||||
|
||||
eng := recon.NewEngine()
|
||||
|
||||
// GitHub — token + BaseURL override. Use the real constructor so `client`
|
||||
// is initialized, then retarget BaseURL at the test server.
|
||||
ghs := NewGitHubSource("ghp-test", reg, lim)
|
||||
ghs.BaseURL = srv.URL
|
||||
eng.Register(ghs)
|
||||
// GitLab
|
||||
eng.Register(&GitLabSource{
|
||||
Token: "glpat-test",
|
||||
BaseURL: srv.URL,
|
||||
Registry: reg,
|
||||
Limiters: lim,
|
||||
})
|
||||
// Bitbucket
|
||||
eng.Register(&BitbucketSource{
|
||||
Token: "bb-test",
|
||||
Workspace: "kh-test",
|
||||
BaseURL: srv.URL,
|
||||
Registry: reg,
|
||||
Limiters: lim,
|
||||
})
|
||||
// Gist — uses same BaseURL for /gists/public; raw URLs are absolute in fixture.
|
||||
eng.Register(&GistSource{
|
||||
Token: "ghp-test",
|
||||
BaseURL: srv.URL,
|
||||
Registry: reg,
|
||||
Limiters: lim,
|
||||
})
|
||||
// Codeberg
|
||||
eng.Register(&CodebergSource{
|
||||
BaseURL: srv.URL,
|
||||
Registry: reg,
|
||||
Limiters: lim,
|
||||
})
|
||||
// HuggingFace
|
||||
eng.Register(NewHuggingFaceSource(HuggingFaceConfig{
|
||||
BaseURL: srv.URL,
|
||||
Registry: reg,
|
||||
Limiters: lim,
|
||||
}))
|
||||
// Replit
|
||||
eng.Register(&ReplitSource{
|
||||
BaseURL: srv.URL,
|
||||
Registry: reg,
|
||||
Limiters: lim,
|
||||
})
|
||||
// CodeSandbox
|
||||
eng.Register(&CodeSandboxSource{
|
||||
BaseURL: srv.URL,
|
||||
Registry: reg,
|
||||
Limiters: lim,
|
||||
})
|
||||
// Sandboxes — inject test sub-platforms that hit srv.URL.
|
||||
eng.Register(&SandboxesSource{
|
||||
Platforms: []subPlatform{
|
||||
{Name: "codepen", SearchPath: "/codepen-search?q=%s", ResultLinkRegex: `^/[^/]+/pen/[a-zA-Z0-9]+$`, IsJSON: false},
|
||||
{Name: "jsfiddle", SearchPath: "/jsfiddle-search?q=%s", IsJSON: true, JSONItemsKey: "results", JSONURLKey: "url"},
|
||||
},
|
||||
Registry: reg,
|
||||
Limiters: lim,
|
||||
Client: NewClient(),
|
||||
BaseURL: srv.URL,
|
||||
})
|
||||
// Kaggle
|
||||
eng.Register(&KaggleSource{
|
||||
User: "kh-user",
|
||||
Key: "kh-key",
|
||||
BaseURL: srv.URL,
|
||||
WebBaseURL: "https://www.kaggle.com",
|
||||
Registry: reg,
|
||||
Limiters: lim,
|
||||
client: NewClient(),
|
||||
})
|
||||
|
||||
// Sanity: all 10 sources registered.
|
||||
if n := len(eng.List()); n != 10 {
|
||||
t.Fatalf("expected 10 sources on engine, got %d: %v", n, eng.List())
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
findings, err := eng.SweepAll(ctx, recon.Config{Query: "ignored"})
|
||||
if err != nil {
|
||||
t.Fatalf("SweepAll returned error: %v", err)
|
||||
}
|
||||
|
||||
// Group findings by SourceType and assert every expected bucket is present.
|
||||
byType := make(map[string]int)
|
||||
for _, f := range findings {
|
||||
byType[f.SourceType]++
|
||||
}
|
||||
|
||||
wantTypes := []string{
|
||||
"recon:github",
|
||||
"recon:gitlab",
|
||||
"recon:bitbucket",
|
||||
"recon:gist",
|
||||
"recon:codeberg",
|
||||
"recon:huggingface",
|
||||
"recon:replit",
|
||||
"recon:codesandbox",
|
||||
"recon:sandboxes",
|
||||
"recon:kaggle",
|
||||
}
|
||||
for _, st := range wantTypes {
|
||||
if byType[st] == 0 {
|
||||
t.Errorf("expected at least one finding with SourceType=%q, got none\nall findings: %+v", st, findings)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// baseFromReq reconstructs the scheme+host of the inbound request so handlers
|
||||
// can build absolute raw URLs pointing back at the same httptest server.
|
||||
func baseFromReq(r *http.Request) string {
|
||||
scheme := "http"
|
||||
if r.TLS != nil {
|
||||
scheme = "https"
|
||||
}
|
||||
return scheme + "://" + r.Host
|
||||
}
|
||||
Reference in New Issue
Block a user