feat(11-03): wire 18 sources into RegisterAll + credential wiring in cmd/recon.go

- Extend SourcesConfig with GoogleAPIKey, GoogleCX, BingAPIKey, YandexUser, YandexAPIKey, BraveAPIKey
- RegisterAll registers 8 Phase 11 sources alongside 10 Phase 10 sources (18 total)
- cmd/recon.go reads search engine API keys from env vars and viper config
- Guardrail tests updated to assert 18 sources
This commit is contained in:
salvacybersec
2026-04-06 12:02:11 +03:00
parent a53d952518
commit 3250408f23
3 changed files with 80 additions and 12 deletions

View File

@@ -26,7 +26,7 @@ var (
var reconCmd = &cobra.Command{ var reconCmd = &cobra.Command{
Use: "recon", Use: "recon",
Short: "Run OSINT recon across internet sources", Short: "Run OSINT recon across internet sources",
Long: "Run OSINT recon sweeps across registered sources. Phase 10 adds ten code-hosting sources (GitHub/GitLab/Bitbucket/Gist/Codeberg/HuggingFace/Replit/CodeSandbox/Sandboxes/Kaggle). Further phases add pastebins, search engines, etc.", Long: "Run OSINT recon sweeps across registered sources. Phase 10 adds ten code-hosting sources (GitHub/GitLab/Bitbucket/Gist/Codeberg/HuggingFace/Replit/CodeSandbox/Sandboxes/Kaggle). Phase 11 adds search engine dorking (Google/Bing/DuckDuckGo/Yandex/Brave) and paste site scanning (Pastebin/GistPaste/PasteSites).",
} }
var reconFullCmd = &cobra.Command{ var reconFullCmd = &cobra.Command{
@@ -153,6 +153,12 @@ func buildReconEngine() *recon.Engine {
HuggingFaceToken: firstNonEmpty(os.Getenv("HUGGINGFACE_TOKEN"), viper.GetString("recon.huggingface.token")), HuggingFaceToken: firstNonEmpty(os.Getenv("HUGGINGFACE_TOKEN"), viper.GetString("recon.huggingface.token")),
KaggleUser: firstNonEmpty(os.Getenv("KAGGLE_USERNAME"), viper.GetString("recon.kaggle.username")), KaggleUser: firstNonEmpty(os.Getenv("KAGGLE_USERNAME"), viper.GetString("recon.kaggle.username")),
KaggleKey: firstNonEmpty(os.Getenv("KAGGLE_KEY"), viper.GetString("recon.kaggle.key")), KaggleKey: firstNonEmpty(os.Getenv("KAGGLE_KEY"), viper.GetString("recon.kaggle.key")),
GoogleAPIKey: firstNonEmpty(os.Getenv("GOOGLE_API_KEY"), viper.GetString("recon.google.api_key")),
GoogleCX: firstNonEmpty(os.Getenv("GOOGLE_CX"), viper.GetString("recon.google.cx")),
BingAPIKey: firstNonEmpty(os.Getenv("BING_API_KEY"), viper.GetString("recon.bing.api_key")),
YandexUser: firstNonEmpty(os.Getenv("YANDEX_USER"), viper.GetString("recon.yandex.user")),
YandexAPIKey: firstNonEmpty(os.Getenv("YANDEX_API_KEY"), viper.GetString("recon.yandex.api_key")),
BraveAPIKey: firstNonEmpty(os.Getenv("BRAVE_API_KEY"), viper.GetString("recon.brave.api_key")),
} }
sources.RegisterAll(e, cfg) sources.RegisterAll(e, cfg)
return e return e

View File

@@ -28,20 +28,32 @@ type SourcesConfig struct {
KaggleUser string KaggleUser string
KaggleKey string KaggleKey string
// Google Custom Search API key and search engine ID (CX).
GoogleAPIKey string
GoogleCX string
// Bing Web Search API subscription key.
BingAPIKey string
// Yandex XML Search user and API key.
YandexUser string
YandexAPIKey string
// Brave Search API subscription token.
BraveAPIKey string
// Registry drives query generation for every source via BuildQueries. // Registry drives query generation for every source via BuildQueries.
Registry *providers.Registry Registry *providers.Registry
// Limiters is the shared per-source rate-limiter registry. // Limiters is the shared per-source rate-limiter registry.
Limiters *recon.LimiterRegistry Limiters *recon.LimiterRegistry
} }
// RegisterAll registers every Phase 10 code-hosting source on engine. // RegisterAll registers every Phase 10 code-hosting and Phase 11 search
// engine / paste site source on engine (18 sources total).
// //
// All ten sources are registered unconditionally so that cmd/recon.go can // All sources are registered unconditionally so that cmd/recon.go can surface
// surface the full catalog via `keyhunter recon list` regardless of which // the full catalog via `keyhunter recon list` regardless of which credentials
// credentials are configured. Sources without required credentials return // are configured. Sources without required credentials return Enabled()==false
// Enabled()==false so SweepAll skips them without erroring. // so SweepAll skips them without erroring.
// //
// A nil engine is treated as a no-op (not an error) callers in broken init // A nil engine is treated as a no-op (not an error) -- callers in broken init
// paths shouldn't panic. // paths shouldn't panic.
func RegisterAll(engine *recon.Engine, cfg SourcesConfig) { func RegisterAll(engine *recon.Engine, cfg SourcesConfig) {
if engine == nil { if engine == nil {
@@ -95,4 +107,46 @@ func RegisterAll(engine *recon.Engine, cfg SourcesConfig) {
Registry: reg, Registry: reg,
Limiters: lim, Limiters: lim,
}) })
// Phase 11: Search engine dorking sources.
engine.Register(&GoogleDorkSource{
APIKey: cfg.GoogleAPIKey,
CX: cfg.GoogleCX,
Registry: reg,
Limiters: lim,
})
engine.Register(&BingDorkSource{
APIKey: cfg.BingAPIKey,
Registry: reg,
Limiters: lim,
})
engine.Register(&DuckDuckGoSource{
Registry: reg,
Limiters: lim,
})
engine.Register(&YandexSource{
User: cfg.YandexUser,
APIKey: cfg.YandexAPIKey,
Registry: reg,
Limiters: lim,
})
engine.Register(&BraveSource{
APIKey: cfg.BraveAPIKey,
Registry: reg,
Limiters: lim,
})
// Phase 11: Paste site sources.
engine.Register(&PastebinSource{
Registry: reg,
Limiters: lim,
})
engine.Register(&GistPasteSource{
Registry: reg,
Limiters: lim,
})
engine.Register(&PasteSitesSource{
Registry: reg,
Limiters: lim,
})
} }

View File

@@ -16,9 +16,9 @@ func registerTestRegistry() *providers.Registry {
}) })
} }
// TestRegisterAll_WiresAllTenSources asserts that RegisterAll registers every // TestRegisterAll_WiresAllEighteenSources asserts that RegisterAll registers
// Phase 10 code-hosting source by its stable name on a fresh engine. // every Phase 10 + Phase 11 source by its stable name on a fresh engine.
func TestRegisterAll_WiresAllTenSources(t *testing.T) { func TestRegisterAll_WiresAllEighteenSources(t *testing.T) {
eng := recon.NewEngine() eng := recon.NewEngine()
cfg := SourcesConfig{ cfg := SourcesConfig{
Registry: registerTestRegistry(), Registry: registerTestRegistry(),
@@ -28,16 +28,24 @@ func TestRegisterAll_WiresAllTenSources(t *testing.T) {
got := eng.List() got := eng.List()
want := []string{ want := []string{
"bing",
"bitbucket", "bitbucket",
"brave",
"codeberg", "codeberg",
"codesandbox", "codesandbox",
"duckduckgo",
"gist", "gist",
"gistpaste",
"github", "github",
"gitlab", "gitlab",
"google",
"huggingface", "huggingface",
"kaggle", "kaggle",
"pastebin",
"pastesites",
"replit", "replit",
"sandboxes", "sandboxes",
"yandex",
} }
if !reflect.DeepEqual(got, want) { if !reflect.DeepEqual(got, want) {
t.Fatalf("RegisterAll names mismatch\n got: %v\nwant: %v", got, want) t.Fatalf("RegisterAll names mismatch\n got: %v\nwant: %v", got, want)
@@ -55,8 +63,8 @@ func TestRegisterAll_MissingCredsStillRegistered(t *testing.T) {
Limiters: recon.NewLimiterRegistry(), Limiters: recon.NewLimiterRegistry(),
}) })
if n := len(eng.List()); n != 10 { if n := len(eng.List()); n != 18 {
t.Fatalf("expected 10 sources registered, got %d: %v", n, eng.List()) t.Fatalf("expected 18 sources registered, got %d: %v", n, eng.List())
} }
// SweepAll with an empty config should filter out cred-gated sources // SweepAll with an empty config should filter out cred-gated sources