fix(phase-10): add --sources filter flag and DB persistence to recon full
Closes 2 verification gaps: 1. --sources=github,gitlab flag filters registered sources before sweep 2. Findings persisted to SQLite via storage.SaveFinding after dedup Also adds Engine.Get() method for source lookup by name.
This commit is contained in:
70
cmd/recon.go
70
cmd/recon.go
@@ -4,10 +4,13 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/engine"
|
||||||
"github.com/salvacybersec/keyhunter/pkg/providers"
|
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||||
"github.com/salvacybersec/keyhunter/pkg/recon"
|
"github.com/salvacybersec/keyhunter/pkg/recon"
|
||||||
"github.com/salvacybersec/keyhunter/pkg/recon/sources"
|
"github.com/salvacybersec/keyhunter/pkg/recon/sources"
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/storage"
|
||||||
"github.com/spf13/cobra"
|
"github.com/spf13/cobra"
|
||||||
"github.com/spf13/viper"
|
"github.com/spf13/viper"
|
||||||
)
|
)
|
||||||
@@ -16,6 +19,8 @@ var (
|
|||||||
reconStealth bool
|
reconStealth bool
|
||||||
reconRespectRobots bool
|
reconRespectRobots bool
|
||||||
reconQuery string
|
reconQuery string
|
||||||
|
reconSourcesFilter []string
|
||||||
|
reconNoPersist bool
|
||||||
)
|
)
|
||||||
|
|
||||||
var reconCmd = &cobra.Command{
|
var reconCmd = &cobra.Command{
|
||||||
@@ -26,9 +31,12 @@ var reconCmd = &cobra.Command{
|
|||||||
|
|
||||||
var reconFullCmd = &cobra.Command{
|
var reconFullCmd = &cobra.Command{
|
||||||
Use: "full",
|
Use: "full",
|
||||||
Short: "Sweep all enabled sources in parallel and deduplicate findings",
|
Short: "Sweep enabled sources in parallel, deduplicate findings, and persist to DB",
|
||||||
RunE: func(cmd *cobra.Command, args []string) error {
|
RunE: func(cmd *cobra.Command, args []string) error {
|
||||||
eng := buildReconEngine()
|
eng := buildReconEngine()
|
||||||
|
if len(reconSourcesFilter) > 0 {
|
||||||
|
eng = filterEngineSources(eng, reconSourcesFilter)
|
||||||
|
}
|
||||||
cfg := recon.Config{
|
cfg := recon.Config{
|
||||||
Stealth: reconStealth,
|
Stealth: reconStealth,
|
||||||
RespectRobots: reconRespectRobots,
|
RespectRobots: reconRespectRobots,
|
||||||
@@ -44,10 +52,68 @@ var reconFullCmd = &cobra.Command{
|
|||||||
for _, f := range deduped {
|
for _, f := range deduped {
|
||||||
fmt.Printf(" [%s] %s %s %s\n", f.SourceType, f.ProviderName, f.KeyMasked, f.Source)
|
fmt.Printf(" [%s] %s %s %s\n", f.SourceType, f.ProviderName, f.KeyMasked, f.Source)
|
||||||
}
|
}
|
||||||
|
if !reconNoPersist && len(deduped) > 0 {
|
||||||
|
if err := persistReconFindings(deduped); err != nil {
|
||||||
|
fmt.Fprintf(os.Stderr, "recon: warning: failed to persist findings: %v\n", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
return nil
|
return nil
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// filterEngineSources rebuilds an Engine containing only the sources named in filter.
|
||||||
|
// Unknown names are silently skipped to avoid breaking on typos — the user sees the
|
||||||
|
// remaining count in the sweep summary.
|
||||||
|
func filterEngineSources(eng *recon.Engine, filter []string) *recon.Engine {
|
||||||
|
want := make(map[string]bool, len(filter))
|
||||||
|
for _, name := range filter {
|
||||||
|
want[strings.TrimSpace(name)] = true
|
||||||
|
}
|
||||||
|
filtered := recon.NewEngine()
|
||||||
|
// We can't introspect source structs out of the original engine, so rebuild
|
||||||
|
// fresh and re-register only what matches. This relies on buildReconEngine
|
||||||
|
// being idempotent and cheap.
|
||||||
|
fresh := buildReconEngine()
|
||||||
|
for _, name := range fresh.List() {
|
||||||
|
if want[name] {
|
||||||
|
if src, ok := fresh.Get(name); ok {
|
||||||
|
filtered.Register(src)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ = eng // parameter kept for API symmetry; filtered engine replaces it
|
||||||
|
return filtered
|
||||||
|
}
|
||||||
|
|
||||||
|
// persistReconFindings writes deduplicated findings to the SQLite findings table.
|
||||||
|
// Uses the same encryption key derivation as the scan command.
|
||||||
|
func persistReconFindings(findings []engine.Finding) error {
|
||||||
|
db, encKey, err := openDBWithKey()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
defer db.Close()
|
||||||
|
for _, f := range findings {
|
||||||
|
sf := storage.Finding{
|
||||||
|
ProviderName: f.ProviderName,
|
||||||
|
KeyValue: f.KeyValue,
|
||||||
|
KeyMasked: f.KeyMasked,
|
||||||
|
Confidence: f.Confidence,
|
||||||
|
SourcePath: f.Source,
|
||||||
|
SourceType: f.SourceType,
|
||||||
|
LineNumber: f.LineNumber,
|
||||||
|
Verified: f.Verified,
|
||||||
|
VerifyStatus: f.VerifyStatus,
|
||||||
|
VerifyHTTPCode: f.VerifyHTTPCode,
|
||||||
|
VerifyMetadata: f.VerifyMetadata,
|
||||||
|
}
|
||||||
|
if _, err := db.SaveFinding(sf, encKey); err != nil {
|
||||||
|
return fmt.Errorf("save finding: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
var reconListCmd = &cobra.Command{
|
var reconListCmd = &cobra.Command{
|
||||||
Use: "list",
|
Use: "list",
|
||||||
Short: "List registered recon sources",
|
Short: "List registered recon sources",
|
||||||
@@ -105,6 +171,8 @@ func init() {
|
|||||||
reconFullCmd.Flags().BoolVar(&reconStealth, "stealth", false, "enable UA rotation and jitter delays")
|
reconFullCmd.Flags().BoolVar(&reconStealth, "stealth", false, "enable UA rotation and jitter delays")
|
||||||
reconFullCmd.Flags().BoolVar(&reconRespectRobots, "respect-robots", true, "respect robots.txt for web-scraping sources")
|
reconFullCmd.Flags().BoolVar(&reconRespectRobots, "respect-robots", true, "respect robots.txt for web-scraping sources")
|
||||||
reconFullCmd.Flags().StringVar(&reconQuery, "query", "", "override query sent to each source")
|
reconFullCmd.Flags().StringVar(&reconQuery, "query", "", "override query sent to each source")
|
||||||
|
reconFullCmd.Flags().StringSliceVar(&reconSourcesFilter, "sources", nil, "comma-separated list of sources to run (e.g., github,gitlab)")
|
||||||
|
reconFullCmd.Flags().BoolVar(&reconNoPersist, "no-persist", false, "do not write findings to the database (print only)")
|
||||||
reconCmd.AddCommand(reconFullCmd)
|
reconCmd.AddCommand(reconFullCmd)
|
||||||
reconCmd.AddCommand(reconListCmd)
|
reconCmd.AddCommand(reconListCmd)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -33,6 +33,14 @@ func (e *Engine) Register(s ReconSource) {
|
|||||||
e.sources[s.Name()] = s
|
e.sources[s.Name()] = s
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Get returns a registered source by name and true, or nil and false.
|
||||||
|
func (e *Engine) Get(name string) (ReconSource, bool) {
|
||||||
|
e.mu.RLock()
|
||||||
|
defer e.mu.RUnlock()
|
||||||
|
s, ok := e.sources[name]
|
||||||
|
return s, ok
|
||||||
|
}
|
||||||
|
|
||||||
// List returns registered source names in sorted order.
|
// List returns registered source names in sorted order.
|
||||||
func (e *Engine) List() []string {
|
func (e *Engine) List() []string {
|
||||||
e.mu.RLock()
|
e.mu.RLock()
|
||||||
|
|||||||
Reference in New Issue
Block a user