package cmd import ( "fmt" "os" "time" "github.com/spf13/cobra" "github.com/salvacybersec/keyhunter/pkg/engine" "github.com/salvacybersec/keyhunter/pkg/importer" "github.com/salvacybersec/keyhunter/pkg/storage" ) // importFormat holds the --format flag value for `keyhunter import`. It is // a package-level var so tests can reset it between runs. var importFormat string // importCmd wires the pkg/importer adapters to the SQLite storage layer so // users can consolidate external scanner output (TruffleHog, Gitleaks) into // the unified KeyHunter database. Delivers IMP-01/02/03 end-to-end. var importCmd = &cobra.Command{ Use: "import ", Short: "Import findings from TruffleHog or Gitleaks output", Long: `Import scan output from external secret scanners into the KeyHunter database. Supported formats: trufflehog TruffleHog v3 JSON (trufflehog ... --json) gitleaks Gitleaks native JSON (gitleaks detect -f json) gitleaks-csv Gitleaks CSV (gitleaks detect -f csv) Imports are idempotent: repeated invocations against the same file deduplicate against both in-file duplicates and findings already persisted in the database.`, Args: cobra.ExactArgs(1), RunE: runImport, } func init() { importCmd.Flags().StringVar(&importFormat, "format", "", "input format: trufflehog | gitleaks | gitleaks-csv (required)") _ = importCmd.MarkFlagRequired("format") } // runImport is the RunE for importCmd. Extracted so tests can exercise it // without going through cobra's full argument parsing stack. func runImport(cmd *cobra.Command, args []string) error { path := args[0] imp, err := selectImporter(importFormat) if err != nil { return err } f, err := os.Open(path) if err != nil { return fmt.Errorf("opening %s: %w", path, err) } defer f.Close() findings, err := imp.Import(f) if err != nil { return fmt.Errorf("parsing %s output: %w", imp.Name(), err) } total := len(findings) unique, inFileDupes := importer.Dedup(findings) db, encKey, err := openDBWithKey() if err != nil { return err } defer db.Close() newCount := 0 dbDupes := 0 for _, finding := range unique { exists, err := db.FindingExistsByKey(finding.ProviderName, finding.KeyMasked, finding.Source, finding.LineNumber) if err != nil { return fmt.Errorf("checking existing findings: %w", err) } if exists { dbDupes++ continue } sf := engineToStorage(finding) if _, err := db.SaveFinding(sf, encKey); err != nil { return fmt.Errorf("saving finding: %w", err) } newCount++ } totalDupes := inFileDupes + dbDupes fmt.Fprintf(cmd.OutOrStdout(), "Imported %d findings (%d new, %d duplicates)\n", total, newCount, totalDupes) return nil } // selectImporter resolves the --format flag to a concrete Importer. func selectImporter(format string) (importer.Importer, error) { switch format { case "trufflehog": return importer.TruffleHogImporter{}, nil case "gitleaks": return importer.GitleaksImporter{}, nil case "gitleaks-csv": return importer.GitleaksCSVImporter{}, nil default: return nil, fmt.Errorf("unknown format %q (want trufflehog | gitleaks | gitleaks-csv)", format) } } // engineToStorage converts an engine.Finding (importer output shape) into a // storage.Finding suitable for db.SaveFinding. The field name difference // between engine.Source and storage.SourcePath is the main reason this // conversion exists. DetectedAt is defaulted to now if unset so imported // records always carry a timestamp. func engineToStorage(f engine.Finding) storage.Finding { if f.DetectedAt.IsZero() { f.DetectedAt = time.Now() } return storage.Finding{ ProviderName: f.ProviderName, KeyValue: f.KeyValue, KeyMasked: f.KeyMasked, Confidence: f.Confidence, SourcePath: f.Source, SourceType: f.SourceType, LineNumber: f.LineNumber, Verified: f.Verified, VerifyStatus: f.VerifyStatus, VerifyHTTPCode: f.VerifyHTTPCode, VerifyMetadata: f.VerifyMetadata, } }