- GitHubActionsSource: searches GitHub code search for workflow files with provider keywords (token-gated) - TravisCISource: queries Travis CI v3 API for public build logs (credentialless) - CircleCISource: queries CircleCI v2 pipeline API for build pipelines (token-gated) - JenkinsSource: queries open Jenkins /api/json for job build consoles (credentialless) - GitLabCISource: queries GitLab projects API for CI-enabled projects (token-gated) - RegisterAll extended to 45 sources (40 Phase 10-13 + 5 Phase 14) - Integration test updated with fixtures for all 5 new sources - cmd/recon.go wires CIRCLECI_TOKEN env var
194 lines
8.1 KiB
Go
194 lines
8.1 KiB
Go
package cmd
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"os"
|
|
"strings"
|
|
|
|
"github.com/salvacybersec/keyhunter/pkg/engine"
|
|
"github.com/salvacybersec/keyhunter/pkg/providers"
|
|
"github.com/salvacybersec/keyhunter/pkg/recon"
|
|
"github.com/salvacybersec/keyhunter/pkg/recon/sources"
|
|
"github.com/salvacybersec/keyhunter/pkg/storage"
|
|
"github.com/spf13/cobra"
|
|
"github.com/spf13/viper"
|
|
)
|
|
|
|
var (
|
|
reconStealth bool
|
|
reconRespectRobots bool
|
|
reconQuery string
|
|
reconSourcesFilter []string
|
|
reconNoPersist bool
|
|
)
|
|
|
|
var reconCmd = &cobra.Command{
|
|
Use: "recon",
|
|
Short: "Run OSINT recon across internet sources",
|
|
Long: "Run OSINT recon sweeps across registered sources. Phase 10 adds ten code-hosting sources (GitHub/GitLab/Bitbucket/Gist/Codeberg/HuggingFace/Replit/CodeSandbox/Sandboxes/Kaggle). Phase 11 adds search engine dorking (Google/Bing/DuckDuckGo/Yandex/Brave) and paste site scanning (Pastebin/GistPaste/PasteSites). Phase 12 adds IoT scanners (Shodan/Censys/ZoomEye/FOFA/Netlas/BinaryEdge) and cloud storage scanners (S3/GCS/AzureBlob/Spaces).",
|
|
}
|
|
|
|
var reconFullCmd = &cobra.Command{
|
|
Use: "full",
|
|
Short: "Sweep enabled sources in parallel, deduplicate findings, and persist to DB",
|
|
RunE: func(cmd *cobra.Command, args []string) error {
|
|
eng := buildReconEngine()
|
|
if len(reconSourcesFilter) > 0 {
|
|
eng = filterEngineSources(eng, reconSourcesFilter)
|
|
}
|
|
cfg := recon.Config{
|
|
Stealth: reconStealth,
|
|
RespectRobots: reconRespectRobots,
|
|
Query: reconQuery,
|
|
}
|
|
ctx := context.Background()
|
|
all, err := eng.SweepAll(ctx, cfg)
|
|
if err != nil {
|
|
return fmt.Errorf("recon sweep: %w", err)
|
|
}
|
|
deduped := recon.Dedup(all)
|
|
fmt.Printf("recon: swept %d sources, %d findings (%d after dedup)\n", len(eng.List()), len(all), len(deduped))
|
|
for _, f := range deduped {
|
|
fmt.Printf(" [%s] %s %s %s\n", f.SourceType, f.ProviderName, f.KeyMasked, f.Source)
|
|
}
|
|
if !reconNoPersist && len(deduped) > 0 {
|
|
if err := persistReconFindings(deduped); err != nil {
|
|
fmt.Fprintf(os.Stderr, "recon: warning: failed to persist findings: %v\n", err)
|
|
}
|
|
}
|
|
return nil
|
|
},
|
|
}
|
|
|
|
// filterEngineSources rebuilds an Engine containing only the sources named in filter.
|
|
// Unknown names are silently skipped to avoid breaking on typos — the user sees the
|
|
// remaining count in the sweep summary.
|
|
func filterEngineSources(eng *recon.Engine, filter []string) *recon.Engine {
|
|
want := make(map[string]bool, len(filter))
|
|
for _, name := range filter {
|
|
want[strings.TrimSpace(name)] = true
|
|
}
|
|
filtered := recon.NewEngine()
|
|
// We can't introspect source structs out of the original engine, so rebuild
|
|
// fresh and re-register only what matches. This relies on buildReconEngine
|
|
// being idempotent and cheap.
|
|
fresh := buildReconEngine()
|
|
for _, name := range fresh.List() {
|
|
if want[name] {
|
|
if src, ok := fresh.Get(name); ok {
|
|
filtered.Register(src)
|
|
}
|
|
}
|
|
}
|
|
_ = eng // parameter kept for API symmetry; filtered engine replaces it
|
|
return filtered
|
|
}
|
|
|
|
// persistReconFindings writes deduplicated findings to the SQLite findings table.
|
|
// Uses the same encryption key derivation as the scan command.
|
|
func persistReconFindings(findings []engine.Finding) error {
|
|
db, encKey, err := openDBWithKey()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer db.Close()
|
|
for _, f := range findings {
|
|
sf := storage.Finding{
|
|
ProviderName: f.ProviderName,
|
|
KeyValue: f.KeyValue,
|
|
KeyMasked: f.KeyMasked,
|
|
Confidence: f.Confidence,
|
|
SourcePath: f.Source,
|
|
SourceType: f.SourceType,
|
|
LineNumber: f.LineNumber,
|
|
Verified: f.Verified,
|
|
VerifyStatus: f.VerifyStatus,
|
|
VerifyHTTPCode: f.VerifyHTTPCode,
|
|
VerifyMetadata: f.VerifyMetadata,
|
|
}
|
|
if _, err := db.SaveFinding(sf, encKey); err != nil {
|
|
return fmt.Errorf("save finding: %w", err)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
var reconListCmd = &cobra.Command{
|
|
Use: "list",
|
|
Short: "List registered recon sources",
|
|
RunE: func(cmd *cobra.Command, args []string) error {
|
|
eng := buildReconEngine()
|
|
for _, name := range eng.List() {
|
|
fmt.Println(name)
|
|
}
|
|
return nil
|
|
},
|
|
}
|
|
|
|
// buildReconEngine constructs the recon Engine with all registered sources.
|
|
// Phase 9 contributes ExampleSource; Phase 10 contributes ten code-hosting
|
|
// sources via sources.RegisterAll. Credentials are read from environment
|
|
// variables first, then from viper config keys under `recon.<source>.*`.
|
|
// Sources whose credentials are missing are still registered but Enabled()
|
|
// will report false so SweepAll skips them cleanly.
|
|
func buildReconEngine() *recon.Engine {
|
|
e := recon.NewEngine()
|
|
e.Register(recon.ExampleSource{})
|
|
|
|
reg, err := providers.NewRegistry()
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "recon: failed to load providers: %v\n", err)
|
|
return e
|
|
}
|
|
|
|
cfg := sources.SourcesConfig{
|
|
Registry: reg,
|
|
Limiters: recon.NewLimiterRegistry(),
|
|
GitHubToken: firstNonEmpty(os.Getenv("GITHUB_TOKEN"), viper.GetString("recon.github.token")),
|
|
GitLabToken: firstNonEmpty(os.Getenv("GITLAB_TOKEN"), viper.GetString("recon.gitlab.token")),
|
|
BitbucketToken: firstNonEmpty(os.Getenv("BITBUCKET_TOKEN"), viper.GetString("recon.bitbucket.token")),
|
|
BitbucketWorkspace: firstNonEmpty(os.Getenv("BITBUCKET_WORKSPACE"), viper.GetString("recon.bitbucket.workspace")),
|
|
CodebergToken: firstNonEmpty(os.Getenv("CODEBERG_TOKEN"), viper.GetString("recon.codeberg.token")),
|
|
HuggingFaceToken: firstNonEmpty(os.Getenv("HUGGINGFACE_TOKEN"), viper.GetString("recon.huggingface.token")),
|
|
KaggleUser: firstNonEmpty(os.Getenv("KAGGLE_USERNAME"), viper.GetString("recon.kaggle.username")),
|
|
KaggleKey: firstNonEmpty(os.Getenv("KAGGLE_KEY"), viper.GetString("recon.kaggle.key")),
|
|
GoogleAPIKey: firstNonEmpty(os.Getenv("GOOGLE_API_KEY"), viper.GetString("recon.google.api_key")),
|
|
GoogleCX: firstNonEmpty(os.Getenv("GOOGLE_CX"), viper.GetString("recon.google.cx")),
|
|
BingAPIKey: firstNonEmpty(os.Getenv("BING_API_KEY"), viper.GetString("recon.bing.api_key")),
|
|
YandexUser: firstNonEmpty(os.Getenv("YANDEX_USER"), viper.GetString("recon.yandex.user")),
|
|
YandexAPIKey: firstNonEmpty(os.Getenv("YANDEX_API_KEY"), viper.GetString("recon.yandex.api_key")),
|
|
BraveAPIKey: firstNonEmpty(os.Getenv("BRAVE_API_KEY"), viper.GetString("recon.brave.api_key")),
|
|
ShodanAPIKey: firstNonEmpty(os.Getenv("SHODAN_API_KEY"), viper.GetString("recon.shodan.api_key")),
|
|
CensysAPIId: firstNonEmpty(os.Getenv("CENSYS_API_ID"), viper.GetString("recon.censys.api_id")),
|
|
CensysAPISecret: firstNonEmpty(os.Getenv("CENSYS_API_SECRET"), viper.GetString("recon.censys.api_secret")),
|
|
ZoomEyeAPIKey: firstNonEmpty(os.Getenv("ZOOMEYE_API_KEY"), viper.GetString("recon.zoomeye.api_key")),
|
|
FOFAEmail: firstNonEmpty(os.Getenv("FOFA_EMAIL"), viper.GetString("recon.fofa.email")),
|
|
FOFAAPIKey: firstNonEmpty(os.Getenv("FOFA_API_KEY"), viper.GetString("recon.fofa.api_key")),
|
|
NetlasAPIKey: firstNonEmpty(os.Getenv("NETLAS_API_KEY"), viper.GetString("recon.netlas.api_key")),
|
|
BinaryEdgeAPIKey: firstNonEmpty(os.Getenv("BINARYEDGE_API_KEY"), viper.GetString("recon.binaryedge.api_key")),
|
|
CircleCIToken: firstNonEmpty(os.Getenv("CIRCLECI_TOKEN"), viper.GetString("recon.circleci.token")),
|
|
}
|
|
sources.RegisterAll(e, cfg)
|
|
return e
|
|
}
|
|
|
|
// firstNonEmpty returns a if non-empty, otherwise b. Used to implement the
|
|
// env-var → viper-config precedence chain for credential lookup.
|
|
func firstNonEmpty(a, b string) string {
|
|
if a != "" {
|
|
return a
|
|
}
|
|
return b
|
|
}
|
|
|
|
func init() {
|
|
reconFullCmd.Flags().BoolVar(&reconStealth, "stealth", false, "enable UA rotation and jitter delays")
|
|
reconFullCmd.Flags().BoolVar(&reconRespectRobots, "respect-robots", true, "respect robots.txt for web-scraping sources")
|
|
reconFullCmd.Flags().StringVar(&reconQuery, "query", "", "override query sent to each source")
|
|
reconFullCmd.Flags().StringSliceVar(&reconSourcesFilter, "sources", nil, "comma-separated list of sources to run (e.g., github,gitlab)")
|
|
reconFullCmd.Flags().BoolVar(&reconNoPersist, "no-persist", false, "do not write findings to the database (print only)")
|
|
reconCmd.AddCommand(reconFullCmd)
|
|
reconCmd.AddCommand(reconListCmd)
|
|
}
|