package importer import ( "encoding/csv" "encoding/json" "fmt" "io" "strconv" "strings" "time" "github.com/salvacybersec/keyhunter/pkg/engine" ) // GitleaksImporter parses Gitleaks native JSON output (an array of finding // records) and normalizes each record into an engine.Finding. type GitleaksImporter struct{} // GitleaksCSVImporter parses Gitleaks CSV output with a mandatory header row. // Columns are resolved by header name so Gitleaks version drift in column // order does not break ingestion. type GitleaksCSVImporter struct{} // gitleaksRecord mirrors the JSON object emitted by `gitleaks detect -f json`. type gitleaksRecord struct { Description string `json:"Description"` StartLine int `json:"StartLine"` EndLine int `json:"EndLine"` StartColumn int `json:"StartColumn"` EndColumn int `json:"EndColumn"` Match string `json:"Match"` Secret string `json:"Secret"` File string `json:"File"` SymlinkFile string `json:"SymlinkFile"` Commit string `json:"Commit"` Entropy float64 `json:"Entropy"` Author string `json:"Author"` Email string `json:"Email"` Date string `json:"Date"` Message string `json:"Message"` Tags []string `json:"Tags"` RuleID string `json:"RuleID"` Fingerprint string `json:"Fingerprint"` } // Name returns the importer identifier used by the CLI --format flag. func (GitleaksImporter) Name() string { return "gitleaks" } // Import decodes a Gitleaks JSON array from r and returns the normalized // findings. An empty array returns (nil, nil). func (GitleaksImporter) Import(r io.Reader) ([]engine.Finding, error) { var records []gitleaksRecord dec := json.NewDecoder(r) if err := dec.Decode(&records); err != nil { return nil, fmt.Errorf("gitleaks: decode json: %w", err) } if len(records) == 0 { return nil, nil } findings := make([]engine.Finding, 0, len(records)) for _, rec := range records { findings = append(findings, buildGitleaksFinding(rec.RuleID, rec.Secret, rec.File, rec.SymlinkFile, rec.StartLine)) } return findings, nil } // Name returns the importer identifier used by the CLI --format flag. func (GitleaksCSVImporter) Name() string { return "gitleaks-csv" } // Import decodes Gitleaks CSV output with a mandatory header row. Columns are // resolved by header name; missing optional fields default to zero values. // A header-only input returns (nil, nil). func (GitleaksCSVImporter) Import(r io.Reader) ([]engine.Finding, error) { reader := csv.NewReader(r) reader.FieldsPerRecord = -1 // tolerate ragged rows header, err := reader.Read() if err == io.EOF { return nil, nil } if err != nil { return nil, fmt.Errorf("gitleaks-csv: read header: %w", err) } index := make(map[string]int, len(header)) for i, col := range header { index[strings.TrimSpace(col)] = i } get := func(row []string, name string) string { i, ok := index[name] if !ok || i >= len(row) { return "" } return row[i] } var findings []engine.Finding for { row, err := reader.Read() if err == io.EOF { break } if err != nil { return nil, fmt.Errorf("gitleaks-csv: read row: %w", err) } startLine, _ := strconv.Atoi(get(row, "StartLine")) findings = append(findings, buildGitleaksFinding( get(row, "RuleID"), get(row, "Secret"), get(row, "File"), get(row, "SymlinkFile"), startLine, )) } return findings, nil } // normalizeGitleaksRuleID maps a Gitleaks rule identifier to a short // KeyHunter-style provider name. It lowercases the input and strips common // trailing tokens ("-api-key", "-access-token", ...). Unknown patterns are // returned lowercased but otherwise unchanged. func normalizeGitleaksRuleID(id string) string { id = strings.ToLower(strings.TrimSpace(id)) suffixes := []string{"-api-key", "-access-token", "-secret-key", "-secret", "-token", "-key"} for _, s := range suffixes { if strings.HasSuffix(id, s) { return strings.TrimSuffix(id, s) } } return id } // buildGitleaksFinding assembles an engine.Finding from fields common to both // the JSON and CSV Gitleaks code paths so the two importers stay in lockstep. func buildGitleaksFinding(ruleID, secret, file, symlink string, startLine int) engine.Finding { source := file if source == "" { source = symlink } return engine.Finding{ ProviderName: normalizeGitleaksRuleID(ruleID), KeyValue: secret, KeyMasked: engine.MaskKey(secret), Confidence: "medium", Source: source, SourceType: "import:gitleaks", LineNumber: startLine, DetectedAt: time.Now(), Verified: false, VerifyStatus: "unverified", } }