- Replace import stub with cmd/import.go dispatching to pkg/importer (trufflehog, gitleaks, gitleaks-csv) via --format flag - Reuse openDBWithKey helper so encryption + path resolution match scan/keys - engineToStorage converts engine.Finding -> storage.Finding (Source -> SourcePath) - Add pkg/storage.FindingExistsByKey for idempotent cross-import dedup keyed on (provider, masked key, source path, line number) - cmd/import_test.go: selector table, field conversion, end-to-end trufflehog import with re-run duplicate assertion, unknown-format + missing-file errors - pkg/storage queries_test: FindingExistsByKey hit and four miss cases Delivers IMP-01/02/03 end-to-end.
133 lines
3.9 KiB
Go
133 lines
3.9 KiB
Go
package cmd
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
"time"
|
|
|
|
"github.com/spf13/cobra"
|
|
|
|
"github.com/salvacybersec/keyhunter/pkg/engine"
|
|
"github.com/salvacybersec/keyhunter/pkg/importer"
|
|
"github.com/salvacybersec/keyhunter/pkg/storage"
|
|
)
|
|
|
|
// importFormat holds the --format flag value for `keyhunter import`. It is
|
|
// a package-level var so tests can reset it between runs.
|
|
var importFormat string
|
|
|
|
// importCmd wires the pkg/importer adapters to the SQLite storage layer so
|
|
// users can consolidate external scanner output (TruffleHog, Gitleaks) into
|
|
// the unified KeyHunter database. Delivers IMP-01/02/03 end-to-end.
|
|
var importCmd = &cobra.Command{
|
|
Use: "import <file>",
|
|
Short: "Import findings from TruffleHog or Gitleaks output",
|
|
Long: `Import scan output from external secret scanners into the KeyHunter database.
|
|
|
|
Supported formats:
|
|
trufflehog TruffleHog v3 JSON (trufflehog ... --json)
|
|
gitleaks Gitleaks native JSON (gitleaks detect -f json)
|
|
gitleaks-csv Gitleaks CSV (gitleaks detect -f csv)
|
|
|
|
Imports are idempotent: repeated invocations against the same file deduplicate
|
|
against both in-file duplicates and findings already persisted in the database.`,
|
|
Args: cobra.ExactArgs(1),
|
|
RunE: runImport,
|
|
}
|
|
|
|
func init() {
|
|
importCmd.Flags().StringVar(&importFormat, "format", "", "input format: trufflehog | gitleaks | gitleaks-csv (required)")
|
|
_ = importCmd.MarkFlagRequired("format")
|
|
}
|
|
|
|
// runImport is the RunE for importCmd. Extracted so tests can exercise it
|
|
// without going through cobra's full argument parsing stack.
|
|
func runImport(cmd *cobra.Command, args []string) error {
|
|
path := args[0]
|
|
|
|
imp, err := selectImporter(importFormat)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
f, err := os.Open(path)
|
|
if err != nil {
|
|
return fmt.Errorf("opening %s: %w", path, err)
|
|
}
|
|
defer f.Close()
|
|
|
|
findings, err := imp.Import(f)
|
|
if err != nil {
|
|
return fmt.Errorf("parsing %s output: %w", imp.Name(), err)
|
|
}
|
|
|
|
total := len(findings)
|
|
unique, inFileDupes := importer.Dedup(findings)
|
|
|
|
db, encKey, err := openDBWithKey()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer db.Close()
|
|
|
|
newCount := 0
|
|
dbDupes := 0
|
|
for _, finding := range unique {
|
|
exists, err := db.FindingExistsByKey(finding.ProviderName, finding.KeyMasked, finding.Source, finding.LineNumber)
|
|
if err != nil {
|
|
return fmt.Errorf("checking existing findings: %w", err)
|
|
}
|
|
if exists {
|
|
dbDupes++
|
|
continue
|
|
}
|
|
sf := engineToStorage(finding)
|
|
if _, err := db.SaveFinding(sf, encKey); err != nil {
|
|
return fmt.Errorf("saving finding: %w", err)
|
|
}
|
|
newCount++
|
|
}
|
|
|
|
totalDupes := inFileDupes + dbDupes
|
|
fmt.Fprintf(cmd.OutOrStdout(), "Imported %d findings (%d new, %d duplicates)\n", total, newCount, totalDupes)
|
|
return nil
|
|
}
|
|
|
|
// selectImporter resolves the --format flag to a concrete Importer.
|
|
func selectImporter(format string) (importer.Importer, error) {
|
|
switch format {
|
|
case "trufflehog":
|
|
return importer.TruffleHogImporter{}, nil
|
|
case "gitleaks":
|
|
return importer.GitleaksImporter{}, nil
|
|
case "gitleaks-csv":
|
|
return importer.GitleaksCSVImporter{}, nil
|
|
default:
|
|
return nil, fmt.Errorf("unknown format %q (want trufflehog | gitleaks | gitleaks-csv)", format)
|
|
}
|
|
}
|
|
|
|
// engineToStorage converts an engine.Finding (importer output shape) into a
|
|
// storage.Finding suitable for db.SaveFinding. The field name difference
|
|
// between engine.Source and storage.SourcePath is the main reason this
|
|
// conversion exists. DetectedAt is defaulted to now if unset so imported
|
|
// records always carry a timestamp.
|
|
func engineToStorage(f engine.Finding) storage.Finding {
|
|
if f.DetectedAt.IsZero() {
|
|
f.DetectedAt = time.Now()
|
|
}
|
|
return storage.Finding{
|
|
ProviderName: f.ProviderName,
|
|
KeyValue: f.KeyValue,
|
|
KeyMasked: f.KeyMasked,
|
|
Confidence: f.Confidence,
|
|
SourcePath: f.Source,
|
|
SourceType: f.SourceType,
|
|
LineNumber: f.LineNumber,
|
|
Verified: f.Verified,
|
|
VerifyStatus: f.VerifyStatus,
|
|
VerifyHTTPCode: f.VerifyHTTPCode,
|
|
VerifyMetadata: f.VerifyMetadata,
|
|
}
|
|
}
|