package cmd import ( "context" "fmt" "os" "strings" "github.com/salvacybersec/keyhunter/pkg/engine" "github.com/salvacybersec/keyhunter/pkg/providers" "github.com/salvacybersec/keyhunter/pkg/recon" "github.com/salvacybersec/keyhunter/pkg/recon/sources" "github.com/salvacybersec/keyhunter/pkg/storage" "github.com/spf13/cobra" "github.com/spf13/viper" ) var ( reconStealth bool reconRespectRobots bool reconQuery string reconSourcesFilter []string reconNoPersist bool ) var reconCmd = &cobra.Command{ Use: "recon", Short: "Run OSINT recon across internet sources", Long: "Run OSINT recon sweeps across registered sources. Phase 10 adds ten code-hosting sources (GitHub/GitLab/Bitbucket/Gist/Codeberg/HuggingFace/Replit/CodeSandbox/Sandboxes/Kaggle). Phase 11 adds search engine dorking (Google/Bing/DuckDuckGo/Yandex/Brave) and paste site scanning (Pastebin/GistPaste/PasteSites). Phase 12 adds IoT scanners (Shodan/Censys/ZoomEye/FOFA/Netlas/BinaryEdge) and cloud storage scanners (S3/GCS/AzureBlob/Spaces).", } var reconFullCmd = &cobra.Command{ Use: "full", Short: "Sweep enabled sources in parallel, deduplicate findings, and persist to DB", RunE: func(cmd *cobra.Command, args []string) error { eng := buildReconEngine() if len(reconSourcesFilter) > 0 { eng = filterEngineSources(eng, reconSourcesFilter) } cfg := recon.Config{ Stealth: reconStealth, RespectRobots: reconRespectRobots, Query: reconQuery, } ctx := context.Background() all, err := eng.SweepAll(ctx, cfg) if err != nil { return fmt.Errorf("recon sweep: %w", err) } deduped := recon.Dedup(all) fmt.Printf("recon: swept %d sources, %d findings (%d after dedup)\n", len(eng.List()), len(all), len(deduped)) for _, f := range deduped { fmt.Printf(" [%s] %s %s %s\n", f.SourceType, f.ProviderName, f.KeyMasked, f.Source) } if !reconNoPersist && len(deduped) > 0 { if err := persistReconFindings(deduped); err != nil { fmt.Fprintf(os.Stderr, "recon: warning: failed to persist findings: %v\n", err) } } return nil }, } // filterEngineSources rebuilds an Engine containing only the sources named in filter. // Unknown names are silently skipped to avoid breaking on typos — the user sees the // remaining count in the sweep summary. func filterEngineSources(eng *recon.Engine, filter []string) *recon.Engine { want := make(map[string]bool, len(filter)) for _, name := range filter { want[strings.TrimSpace(name)] = true } filtered := recon.NewEngine() // We can't introspect source structs out of the original engine, so rebuild // fresh and re-register only what matches. This relies on buildReconEngine // being idempotent and cheap. fresh := buildReconEngine() for _, name := range fresh.List() { if want[name] { if src, ok := fresh.Get(name); ok { filtered.Register(src) } } } _ = eng // parameter kept for API symmetry; filtered engine replaces it return filtered } // persistReconFindings writes deduplicated findings to the SQLite findings table. // Uses the same encryption key derivation as the scan command. func persistReconFindings(findings []engine.Finding) error { db, encKey, err := openDBWithKey() if err != nil { return err } defer db.Close() for _, f := range findings { sf := storage.Finding{ ProviderName: f.ProviderName, KeyValue: f.KeyValue, KeyMasked: f.KeyMasked, Confidence: f.Confidence, SourcePath: f.Source, SourceType: f.SourceType, LineNumber: f.LineNumber, Verified: f.Verified, VerifyStatus: f.VerifyStatus, VerifyHTTPCode: f.VerifyHTTPCode, VerifyMetadata: f.VerifyMetadata, } if _, err := db.SaveFinding(sf, encKey); err != nil { return fmt.Errorf("save finding: %w", err) } } return nil } var reconListCmd = &cobra.Command{ Use: "list", Short: "List registered recon sources", RunE: func(cmd *cobra.Command, args []string) error { eng := buildReconEngine() for _, name := range eng.List() { fmt.Println(name) } return nil }, } // buildReconEngine constructs the recon Engine with all registered sources. // Phase 9 contributes ExampleSource; Phase 10 contributes ten code-hosting // sources via sources.RegisterAll. Credentials are read from environment // variables first, then from viper config keys under `recon..*`. // Sources whose credentials are missing are still registered but Enabled() // will report false so SweepAll skips them cleanly. func buildReconEngine() *recon.Engine { e := recon.NewEngine() e.Register(recon.ExampleSource{}) reg, err := providers.NewRegistry() if err != nil { fmt.Fprintf(os.Stderr, "recon: failed to load providers: %v\n", err) return e } cfg := sources.SourcesConfig{ Registry: reg, Limiters: recon.NewLimiterRegistry(), GitHubToken: firstNonEmpty(os.Getenv("GITHUB_TOKEN"), viper.GetString("recon.github.token")), GitLabToken: firstNonEmpty(os.Getenv("GITLAB_TOKEN"), viper.GetString("recon.gitlab.token")), BitbucketToken: firstNonEmpty(os.Getenv("BITBUCKET_TOKEN"), viper.GetString("recon.bitbucket.token")), BitbucketWorkspace: firstNonEmpty(os.Getenv("BITBUCKET_WORKSPACE"), viper.GetString("recon.bitbucket.workspace")), CodebergToken: firstNonEmpty(os.Getenv("CODEBERG_TOKEN"), viper.GetString("recon.codeberg.token")), HuggingFaceToken: firstNonEmpty(os.Getenv("HUGGINGFACE_TOKEN"), viper.GetString("recon.huggingface.token")), KaggleUser: firstNonEmpty(os.Getenv("KAGGLE_USERNAME"), viper.GetString("recon.kaggle.username")), KaggleKey: firstNonEmpty(os.Getenv("KAGGLE_KEY"), viper.GetString("recon.kaggle.key")), GoogleAPIKey: firstNonEmpty(os.Getenv("GOOGLE_API_KEY"), viper.GetString("recon.google.api_key")), GoogleCX: firstNonEmpty(os.Getenv("GOOGLE_CX"), viper.GetString("recon.google.cx")), BingAPIKey: firstNonEmpty(os.Getenv("BING_API_KEY"), viper.GetString("recon.bing.api_key")), YandexUser: firstNonEmpty(os.Getenv("YANDEX_USER"), viper.GetString("recon.yandex.user")), YandexAPIKey: firstNonEmpty(os.Getenv("YANDEX_API_KEY"), viper.GetString("recon.yandex.api_key")), BraveAPIKey: firstNonEmpty(os.Getenv("BRAVE_API_KEY"), viper.GetString("recon.brave.api_key")), ShodanAPIKey: firstNonEmpty(os.Getenv("SHODAN_API_KEY"), viper.GetString("recon.shodan.api_key")), CensysAPIId: firstNonEmpty(os.Getenv("CENSYS_API_ID"), viper.GetString("recon.censys.api_id")), CensysAPISecret: firstNonEmpty(os.Getenv("CENSYS_API_SECRET"), viper.GetString("recon.censys.api_secret")), ZoomEyeAPIKey: firstNonEmpty(os.Getenv("ZOOMEYE_API_KEY"), viper.GetString("recon.zoomeye.api_key")), FOFAEmail: firstNonEmpty(os.Getenv("FOFA_EMAIL"), viper.GetString("recon.fofa.email")), FOFAAPIKey: firstNonEmpty(os.Getenv("FOFA_API_KEY"), viper.GetString("recon.fofa.api_key")), NetlasAPIKey: firstNonEmpty(os.Getenv("NETLAS_API_KEY"), viper.GetString("recon.netlas.api_key")), BinaryEdgeAPIKey: firstNonEmpty(os.Getenv("BINARYEDGE_API_KEY"), viper.GetString("recon.binaryedge.api_key")), CircleCIToken: firstNonEmpty(os.Getenv("CIRCLECI_TOKEN"), viper.GetString("recon.circleci.token")), SecurityTrailsAPIKey: firstNonEmpty(os.Getenv("SECURITYTRAILS_API_KEY"), viper.GetString("recon.securitytrails.api_key")), } sources.RegisterAll(e, cfg) return e } // firstNonEmpty returns a if non-empty, otherwise b. Used to implement the // env-var → viper-config precedence chain for credential lookup. func firstNonEmpty(a, b string) string { if a != "" { return a } return b } func init() { reconFullCmd.Flags().BoolVar(&reconStealth, "stealth", false, "enable UA rotation and jitter delays") reconFullCmd.Flags().BoolVar(&reconRespectRobots, "respect-robots", true, "respect robots.txt for web-scraping sources") reconFullCmd.Flags().StringVar(&reconQuery, "query", "", "override query sent to each source") reconFullCmd.Flags().StringSliceVar(&reconSourcesFilter, "sources", nil, "comma-separated list of sources to run (e.g., github,gitlab)") reconFullCmd.Flags().BoolVar(&reconNoPersist, "no-persist", false, "do not write findings to the database (print only)") reconCmd.AddCommand(reconFullCmd) reconCmd.AddCommand(reconListCmd) }