Files
keyhunter/pkg/importer/gitleaks.go
salvacybersec 83640ac200 feat(07-02): add Gitleaks JSON + CSV importers
- GitleaksImporter parses native JSON array output to []engine.Finding
- GitleaksCSVImporter parses CSV with header-based column resolution
- normalizeGitleaksRuleID strips suffixes (-api-key, -access-token, ...)
- Shared buildGitleaksFinding helper keeps JSON/CSV paths in lockstep
- Test fixtures + 8 tests covering happy path, empty, invalid, symlink fallback
2026-04-05 23:55:36 +03:00

154 lines
4.6 KiB
Go

package importer
import (
"encoding/csv"
"encoding/json"
"fmt"
"io"
"strconv"
"strings"
"time"
"github.com/salvacybersec/keyhunter/pkg/engine"
)
// GitleaksImporter parses Gitleaks native JSON output (an array of finding
// records) and normalizes each record into an engine.Finding.
type GitleaksImporter struct{}
// GitleaksCSVImporter parses Gitleaks CSV output with a mandatory header row.
// Columns are resolved by header name so Gitleaks version drift in column
// order does not break ingestion.
type GitleaksCSVImporter struct{}
// gitleaksRecord mirrors the JSON object emitted by `gitleaks detect -f json`.
type gitleaksRecord struct {
Description string `json:"Description"`
StartLine int `json:"StartLine"`
EndLine int `json:"EndLine"`
StartColumn int `json:"StartColumn"`
EndColumn int `json:"EndColumn"`
Match string `json:"Match"`
Secret string `json:"Secret"`
File string `json:"File"`
SymlinkFile string `json:"SymlinkFile"`
Commit string `json:"Commit"`
Entropy float64 `json:"Entropy"`
Author string `json:"Author"`
Email string `json:"Email"`
Date string `json:"Date"`
Message string `json:"Message"`
Tags []string `json:"Tags"`
RuleID string `json:"RuleID"`
Fingerprint string `json:"Fingerprint"`
}
// Name returns the importer identifier used by the CLI --format flag.
func (GitleaksImporter) Name() string { return "gitleaks" }
// Import decodes a Gitleaks JSON array from r and returns the normalized
// findings. An empty array returns (nil, nil).
func (GitleaksImporter) Import(r io.Reader) ([]engine.Finding, error) {
var records []gitleaksRecord
dec := json.NewDecoder(r)
if err := dec.Decode(&records); err != nil {
return nil, fmt.Errorf("gitleaks: decode json: %w", err)
}
if len(records) == 0 {
return nil, nil
}
findings := make([]engine.Finding, 0, len(records))
for _, rec := range records {
findings = append(findings, buildGitleaksFinding(rec.RuleID, rec.Secret, rec.File, rec.SymlinkFile, rec.StartLine))
}
return findings, nil
}
// Name returns the importer identifier used by the CLI --format flag.
func (GitleaksCSVImporter) Name() string { return "gitleaks-csv" }
// Import decodes Gitleaks CSV output with a mandatory header row. Columns are
// resolved by header name; missing optional fields default to zero values.
// A header-only input returns (nil, nil).
func (GitleaksCSVImporter) Import(r io.Reader) ([]engine.Finding, error) {
reader := csv.NewReader(r)
reader.FieldsPerRecord = -1 // tolerate ragged rows
header, err := reader.Read()
if err == io.EOF {
return nil, nil
}
if err != nil {
return nil, fmt.Errorf("gitleaks-csv: read header: %w", err)
}
index := make(map[string]int, len(header))
for i, col := range header {
index[strings.TrimSpace(col)] = i
}
get := func(row []string, name string) string {
i, ok := index[name]
if !ok || i >= len(row) {
return ""
}
return row[i]
}
var findings []engine.Finding
for {
row, err := reader.Read()
if err == io.EOF {
break
}
if err != nil {
return nil, fmt.Errorf("gitleaks-csv: read row: %w", err)
}
startLine, _ := strconv.Atoi(get(row, "StartLine"))
findings = append(findings, buildGitleaksFinding(
get(row, "RuleID"),
get(row, "Secret"),
get(row, "File"),
get(row, "SymlinkFile"),
startLine,
))
}
return findings, nil
}
// normalizeGitleaksRuleID maps a Gitleaks rule identifier to a short
// KeyHunter-style provider name. It lowercases the input and strips common
// trailing tokens ("-api-key", "-access-token", ...). Unknown patterns are
// returned lowercased but otherwise unchanged.
func normalizeGitleaksRuleID(id string) string {
id = strings.ToLower(strings.TrimSpace(id))
suffixes := []string{"-api-key", "-access-token", "-secret-key", "-secret", "-token", "-key"}
for _, s := range suffixes {
if strings.HasSuffix(id, s) {
return strings.TrimSuffix(id, s)
}
}
return id
}
// buildGitleaksFinding assembles an engine.Finding from fields common to both
// the JSON and CSV Gitleaks code paths so the two importers stay in lockstep.
func buildGitleaksFinding(ruleID, secret, file, symlink string, startLine int) engine.Finding {
source := file
if source == "" {
source = symlink
}
return engine.Finding{
ProviderName: normalizeGitleaksRuleID(ruleID),
KeyValue: secret,
KeyMasked: engine.MaskKey(secret),
Confidence: "medium",
Source: source,
SourceType: "import:gitleaks",
LineNumber: startLine,
DetectedAt: time.Now(),
Verified: false,
VerifyStatus: "unverified",
}
}