feat(07-02): add Gitleaks JSON + CSV importers
- GitleaksImporter parses native JSON array output to []engine.Finding - GitleaksCSVImporter parses CSV with header-based column resolution - normalizeGitleaksRuleID strips suffixes (-api-key, -access-token, ...) - Shared buildGitleaksFinding helper keeps JSON/CSV paths in lockstep - Test fixtures + 8 tests covering happy path, empty, invalid, symlink fallback
This commit is contained in:
153
pkg/importer/gitleaks.go
Normal file
153
pkg/importer/gitleaks.go
Normal file
@@ -0,0 +1,153 @@
|
||||
package importer
|
||||
|
||||
import (
|
||||
"encoding/csv"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/salvacybersec/keyhunter/pkg/engine"
|
||||
)
|
||||
|
||||
// GitleaksImporter parses Gitleaks native JSON output (an array of finding
|
||||
// records) and normalizes each record into an engine.Finding.
|
||||
type GitleaksImporter struct{}
|
||||
|
||||
// GitleaksCSVImporter parses Gitleaks CSV output with a mandatory header row.
|
||||
// Columns are resolved by header name so Gitleaks version drift in column
|
||||
// order does not break ingestion.
|
||||
type GitleaksCSVImporter struct{}
|
||||
|
||||
// gitleaksRecord mirrors the JSON object emitted by `gitleaks detect -f json`.
|
||||
type gitleaksRecord struct {
|
||||
Description string `json:"Description"`
|
||||
StartLine int `json:"StartLine"`
|
||||
EndLine int `json:"EndLine"`
|
||||
StartColumn int `json:"StartColumn"`
|
||||
EndColumn int `json:"EndColumn"`
|
||||
Match string `json:"Match"`
|
||||
Secret string `json:"Secret"`
|
||||
File string `json:"File"`
|
||||
SymlinkFile string `json:"SymlinkFile"`
|
||||
Commit string `json:"Commit"`
|
||||
Entropy float64 `json:"Entropy"`
|
||||
Author string `json:"Author"`
|
||||
Email string `json:"Email"`
|
||||
Date string `json:"Date"`
|
||||
Message string `json:"Message"`
|
||||
Tags []string `json:"Tags"`
|
||||
RuleID string `json:"RuleID"`
|
||||
Fingerprint string `json:"Fingerprint"`
|
||||
}
|
||||
|
||||
// Name returns the importer identifier used by the CLI --format flag.
|
||||
func (GitleaksImporter) Name() string { return "gitleaks" }
|
||||
|
||||
// Import decodes a Gitleaks JSON array from r and returns the normalized
|
||||
// findings. An empty array returns (nil, nil).
|
||||
func (GitleaksImporter) Import(r io.Reader) ([]engine.Finding, error) {
|
||||
var records []gitleaksRecord
|
||||
dec := json.NewDecoder(r)
|
||||
if err := dec.Decode(&records); err != nil {
|
||||
return nil, fmt.Errorf("gitleaks: decode json: %w", err)
|
||||
}
|
||||
if len(records) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
findings := make([]engine.Finding, 0, len(records))
|
||||
for _, rec := range records {
|
||||
findings = append(findings, buildGitleaksFinding(rec.RuleID, rec.Secret, rec.File, rec.SymlinkFile, rec.StartLine))
|
||||
}
|
||||
return findings, nil
|
||||
}
|
||||
|
||||
// Name returns the importer identifier used by the CLI --format flag.
|
||||
func (GitleaksCSVImporter) Name() string { return "gitleaks-csv" }
|
||||
|
||||
// Import decodes Gitleaks CSV output with a mandatory header row. Columns are
|
||||
// resolved by header name; missing optional fields default to zero values.
|
||||
// A header-only input returns (nil, nil).
|
||||
func (GitleaksCSVImporter) Import(r io.Reader) ([]engine.Finding, error) {
|
||||
reader := csv.NewReader(r)
|
||||
reader.FieldsPerRecord = -1 // tolerate ragged rows
|
||||
|
||||
header, err := reader.Read()
|
||||
if err == io.EOF {
|
||||
return nil, nil
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("gitleaks-csv: read header: %w", err)
|
||||
}
|
||||
|
||||
index := make(map[string]int, len(header))
|
||||
for i, col := range header {
|
||||
index[strings.TrimSpace(col)] = i
|
||||
}
|
||||
|
||||
get := func(row []string, name string) string {
|
||||
i, ok := index[name]
|
||||
if !ok || i >= len(row) {
|
||||
return ""
|
||||
}
|
||||
return row[i]
|
||||
}
|
||||
|
||||
var findings []engine.Finding
|
||||
for {
|
||||
row, err := reader.Read()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("gitleaks-csv: read row: %w", err)
|
||||
}
|
||||
startLine, _ := strconv.Atoi(get(row, "StartLine"))
|
||||
findings = append(findings, buildGitleaksFinding(
|
||||
get(row, "RuleID"),
|
||||
get(row, "Secret"),
|
||||
get(row, "File"),
|
||||
get(row, "SymlinkFile"),
|
||||
startLine,
|
||||
))
|
||||
}
|
||||
return findings, nil
|
||||
}
|
||||
|
||||
// normalizeGitleaksRuleID maps a Gitleaks rule identifier to a short
|
||||
// KeyHunter-style provider name. It lowercases the input and strips common
|
||||
// trailing tokens ("-api-key", "-access-token", ...). Unknown patterns are
|
||||
// returned lowercased but otherwise unchanged.
|
||||
func normalizeGitleaksRuleID(id string) string {
|
||||
id = strings.ToLower(strings.TrimSpace(id))
|
||||
suffixes := []string{"-api-key", "-access-token", "-secret-key", "-secret", "-token", "-key"}
|
||||
for _, s := range suffixes {
|
||||
if strings.HasSuffix(id, s) {
|
||||
return strings.TrimSuffix(id, s)
|
||||
}
|
||||
}
|
||||
return id
|
||||
}
|
||||
|
||||
// buildGitleaksFinding assembles an engine.Finding from fields common to both
|
||||
// the JSON and CSV Gitleaks code paths so the two importers stay in lockstep.
|
||||
func buildGitleaksFinding(ruleID, secret, file, symlink string, startLine int) engine.Finding {
|
||||
source := file
|
||||
if source == "" {
|
||||
source = symlink
|
||||
}
|
||||
return engine.Finding{
|
||||
ProviderName: normalizeGitleaksRuleID(ruleID),
|
||||
KeyValue: secret,
|
||||
KeyMasked: engine.MaskKey(secret),
|
||||
Confidence: "medium",
|
||||
Source: source,
|
||||
SourceType: "import:gitleaks",
|
||||
LineNumber: startLine,
|
||||
DetectedAt: time.Now(),
|
||||
Verified: false,
|
||||
VerifyStatus: "unverified",
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user