Closes 2 verification gaps: 1. --sources=github,gitlab flag filters registered sources before sweep 2. Findings persisted to SQLite via storage.SaveFinding after dedup Also adds Engine.Get() method for source lookup by name.
179 lines
6.3 KiB
Go
179 lines
6.3 KiB
Go
package cmd
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"os"
|
|
"strings"
|
|
|
|
"github.com/salvacybersec/keyhunter/pkg/engine"
|
|
"github.com/salvacybersec/keyhunter/pkg/providers"
|
|
"github.com/salvacybersec/keyhunter/pkg/recon"
|
|
"github.com/salvacybersec/keyhunter/pkg/recon/sources"
|
|
"github.com/salvacybersec/keyhunter/pkg/storage"
|
|
"github.com/spf13/cobra"
|
|
"github.com/spf13/viper"
|
|
)
|
|
|
|
var (
|
|
reconStealth bool
|
|
reconRespectRobots bool
|
|
reconQuery string
|
|
reconSourcesFilter []string
|
|
reconNoPersist bool
|
|
)
|
|
|
|
var reconCmd = &cobra.Command{
|
|
Use: "recon",
|
|
Short: "Run OSINT recon across internet sources",
|
|
Long: "Run OSINT recon sweeps across registered sources. Phase 10 adds ten code-hosting sources (GitHub/GitLab/Bitbucket/Gist/Codeberg/HuggingFace/Replit/CodeSandbox/Sandboxes/Kaggle). Further phases add pastebins, search engines, etc.",
|
|
}
|
|
|
|
var reconFullCmd = &cobra.Command{
|
|
Use: "full",
|
|
Short: "Sweep enabled sources in parallel, deduplicate findings, and persist to DB",
|
|
RunE: func(cmd *cobra.Command, args []string) error {
|
|
eng := buildReconEngine()
|
|
if len(reconSourcesFilter) > 0 {
|
|
eng = filterEngineSources(eng, reconSourcesFilter)
|
|
}
|
|
cfg := recon.Config{
|
|
Stealth: reconStealth,
|
|
RespectRobots: reconRespectRobots,
|
|
Query: reconQuery,
|
|
}
|
|
ctx := context.Background()
|
|
all, err := eng.SweepAll(ctx, cfg)
|
|
if err != nil {
|
|
return fmt.Errorf("recon sweep: %w", err)
|
|
}
|
|
deduped := recon.Dedup(all)
|
|
fmt.Printf("recon: swept %d sources, %d findings (%d after dedup)\n", len(eng.List()), len(all), len(deduped))
|
|
for _, f := range deduped {
|
|
fmt.Printf(" [%s] %s %s %s\n", f.SourceType, f.ProviderName, f.KeyMasked, f.Source)
|
|
}
|
|
if !reconNoPersist && len(deduped) > 0 {
|
|
if err := persistReconFindings(deduped); err != nil {
|
|
fmt.Fprintf(os.Stderr, "recon: warning: failed to persist findings: %v\n", err)
|
|
}
|
|
}
|
|
return nil
|
|
},
|
|
}
|
|
|
|
// filterEngineSources rebuilds an Engine containing only the sources named in filter.
|
|
// Unknown names are silently skipped to avoid breaking on typos — the user sees the
|
|
// remaining count in the sweep summary.
|
|
func filterEngineSources(eng *recon.Engine, filter []string) *recon.Engine {
|
|
want := make(map[string]bool, len(filter))
|
|
for _, name := range filter {
|
|
want[strings.TrimSpace(name)] = true
|
|
}
|
|
filtered := recon.NewEngine()
|
|
// We can't introspect source structs out of the original engine, so rebuild
|
|
// fresh and re-register only what matches. This relies on buildReconEngine
|
|
// being idempotent and cheap.
|
|
fresh := buildReconEngine()
|
|
for _, name := range fresh.List() {
|
|
if want[name] {
|
|
if src, ok := fresh.Get(name); ok {
|
|
filtered.Register(src)
|
|
}
|
|
}
|
|
}
|
|
_ = eng // parameter kept for API symmetry; filtered engine replaces it
|
|
return filtered
|
|
}
|
|
|
|
// persistReconFindings writes deduplicated findings to the SQLite findings table.
|
|
// Uses the same encryption key derivation as the scan command.
|
|
func persistReconFindings(findings []engine.Finding) error {
|
|
db, encKey, err := openDBWithKey()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer db.Close()
|
|
for _, f := range findings {
|
|
sf := storage.Finding{
|
|
ProviderName: f.ProviderName,
|
|
KeyValue: f.KeyValue,
|
|
KeyMasked: f.KeyMasked,
|
|
Confidence: f.Confidence,
|
|
SourcePath: f.Source,
|
|
SourceType: f.SourceType,
|
|
LineNumber: f.LineNumber,
|
|
Verified: f.Verified,
|
|
VerifyStatus: f.VerifyStatus,
|
|
VerifyHTTPCode: f.VerifyHTTPCode,
|
|
VerifyMetadata: f.VerifyMetadata,
|
|
}
|
|
if _, err := db.SaveFinding(sf, encKey); err != nil {
|
|
return fmt.Errorf("save finding: %w", err)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
var reconListCmd = &cobra.Command{
|
|
Use: "list",
|
|
Short: "List registered recon sources",
|
|
RunE: func(cmd *cobra.Command, args []string) error {
|
|
eng := buildReconEngine()
|
|
for _, name := range eng.List() {
|
|
fmt.Println(name)
|
|
}
|
|
return nil
|
|
},
|
|
}
|
|
|
|
// buildReconEngine constructs the recon Engine with all registered sources.
|
|
// Phase 9 contributes ExampleSource; Phase 10 contributes ten code-hosting
|
|
// sources via sources.RegisterAll. Credentials are read from environment
|
|
// variables first, then from viper config keys under `recon.<source>.*`.
|
|
// Sources whose credentials are missing are still registered but Enabled()
|
|
// will report false so SweepAll skips them cleanly.
|
|
func buildReconEngine() *recon.Engine {
|
|
e := recon.NewEngine()
|
|
e.Register(recon.ExampleSource{})
|
|
|
|
reg, err := providers.NewRegistry()
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "recon: failed to load providers: %v\n", err)
|
|
return e
|
|
}
|
|
|
|
cfg := sources.SourcesConfig{
|
|
Registry: reg,
|
|
Limiters: recon.NewLimiterRegistry(),
|
|
GitHubToken: firstNonEmpty(os.Getenv("GITHUB_TOKEN"), viper.GetString("recon.github.token")),
|
|
GitLabToken: firstNonEmpty(os.Getenv("GITLAB_TOKEN"), viper.GetString("recon.gitlab.token")),
|
|
BitbucketToken: firstNonEmpty(os.Getenv("BITBUCKET_TOKEN"), viper.GetString("recon.bitbucket.token")),
|
|
BitbucketWorkspace: firstNonEmpty(os.Getenv("BITBUCKET_WORKSPACE"), viper.GetString("recon.bitbucket.workspace")),
|
|
CodebergToken: firstNonEmpty(os.Getenv("CODEBERG_TOKEN"), viper.GetString("recon.codeberg.token")),
|
|
HuggingFaceToken: firstNonEmpty(os.Getenv("HUGGINGFACE_TOKEN"), viper.GetString("recon.huggingface.token")),
|
|
KaggleUser: firstNonEmpty(os.Getenv("KAGGLE_USERNAME"), viper.GetString("recon.kaggle.username")),
|
|
KaggleKey: firstNonEmpty(os.Getenv("KAGGLE_KEY"), viper.GetString("recon.kaggle.key")),
|
|
}
|
|
sources.RegisterAll(e, cfg)
|
|
return e
|
|
}
|
|
|
|
// firstNonEmpty returns a if non-empty, otherwise b. Used to implement the
|
|
// env-var → viper-config precedence chain for credential lookup.
|
|
func firstNonEmpty(a, b string) string {
|
|
if a != "" {
|
|
return a
|
|
}
|
|
return b
|
|
}
|
|
|
|
func init() {
|
|
reconFullCmd.Flags().BoolVar(&reconStealth, "stealth", false, "enable UA rotation and jitter delays")
|
|
reconFullCmd.Flags().BoolVar(&reconRespectRobots, "respect-robots", true, "respect robots.txt for web-scraping sources")
|
|
reconFullCmd.Flags().StringVar(&reconQuery, "query", "", "override query sent to each source")
|
|
reconFullCmd.Flags().StringSliceVar(&reconSourcesFilter, "sources", nil, "comma-separated list of sources to run (e.g., github,gitlab)")
|
|
reconFullCmd.Flags().BoolVar(&reconNoPersist, "no-persist", false, "do not write findings to the database (print only)")
|
|
reconCmd.AddCommand(reconFullCmd)
|
|
reconCmd.AddCommand(reconListCmd)
|
|
}
|