- GitleaksImporter parses native JSON array output to []engine.Finding - GitleaksCSVImporter parses CSV with header-based column resolution - normalizeGitleaksRuleID strips suffixes (-api-key, -access-token, ...) - Shared buildGitleaksFinding helper keeps JSON/CSV paths in lockstep - Test fixtures + 8 tests covering happy path, empty, invalid, symlink fallback
154 lines
4.6 KiB
Go
154 lines
4.6 KiB
Go
package importer
|
|
|
|
import (
|
|
"encoding/csv"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/salvacybersec/keyhunter/pkg/engine"
|
|
)
|
|
|
|
// GitleaksImporter parses Gitleaks native JSON output (an array of finding
|
|
// records) and normalizes each record into an engine.Finding.
|
|
type GitleaksImporter struct{}
|
|
|
|
// GitleaksCSVImporter parses Gitleaks CSV output with a mandatory header row.
|
|
// Columns are resolved by header name so Gitleaks version drift in column
|
|
// order does not break ingestion.
|
|
type GitleaksCSVImporter struct{}
|
|
|
|
// gitleaksRecord mirrors the JSON object emitted by `gitleaks detect -f json`.
|
|
type gitleaksRecord struct {
|
|
Description string `json:"Description"`
|
|
StartLine int `json:"StartLine"`
|
|
EndLine int `json:"EndLine"`
|
|
StartColumn int `json:"StartColumn"`
|
|
EndColumn int `json:"EndColumn"`
|
|
Match string `json:"Match"`
|
|
Secret string `json:"Secret"`
|
|
File string `json:"File"`
|
|
SymlinkFile string `json:"SymlinkFile"`
|
|
Commit string `json:"Commit"`
|
|
Entropy float64 `json:"Entropy"`
|
|
Author string `json:"Author"`
|
|
Email string `json:"Email"`
|
|
Date string `json:"Date"`
|
|
Message string `json:"Message"`
|
|
Tags []string `json:"Tags"`
|
|
RuleID string `json:"RuleID"`
|
|
Fingerprint string `json:"Fingerprint"`
|
|
}
|
|
|
|
// Name returns the importer identifier used by the CLI --format flag.
|
|
func (GitleaksImporter) Name() string { return "gitleaks" }
|
|
|
|
// Import decodes a Gitleaks JSON array from r and returns the normalized
|
|
// findings. An empty array returns (nil, nil).
|
|
func (GitleaksImporter) Import(r io.Reader) ([]engine.Finding, error) {
|
|
var records []gitleaksRecord
|
|
dec := json.NewDecoder(r)
|
|
if err := dec.Decode(&records); err != nil {
|
|
return nil, fmt.Errorf("gitleaks: decode json: %w", err)
|
|
}
|
|
if len(records) == 0 {
|
|
return nil, nil
|
|
}
|
|
findings := make([]engine.Finding, 0, len(records))
|
|
for _, rec := range records {
|
|
findings = append(findings, buildGitleaksFinding(rec.RuleID, rec.Secret, rec.File, rec.SymlinkFile, rec.StartLine))
|
|
}
|
|
return findings, nil
|
|
}
|
|
|
|
// Name returns the importer identifier used by the CLI --format flag.
|
|
func (GitleaksCSVImporter) Name() string { return "gitleaks-csv" }
|
|
|
|
// Import decodes Gitleaks CSV output with a mandatory header row. Columns are
|
|
// resolved by header name; missing optional fields default to zero values.
|
|
// A header-only input returns (nil, nil).
|
|
func (GitleaksCSVImporter) Import(r io.Reader) ([]engine.Finding, error) {
|
|
reader := csv.NewReader(r)
|
|
reader.FieldsPerRecord = -1 // tolerate ragged rows
|
|
|
|
header, err := reader.Read()
|
|
if err == io.EOF {
|
|
return nil, nil
|
|
}
|
|
if err != nil {
|
|
return nil, fmt.Errorf("gitleaks-csv: read header: %w", err)
|
|
}
|
|
|
|
index := make(map[string]int, len(header))
|
|
for i, col := range header {
|
|
index[strings.TrimSpace(col)] = i
|
|
}
|
|
|
|
get := func(row []string, name string) string {
|
|
i, ok := index[name]
|
|
if !ok || i >= len(row) {
|
|
return ""
|
|
}
|
|
return row[i]
|
|
}
|
|
|
|
var findings []engine.Finding
|
|
for {
|
|
row, err := reader.Read()
|
|
if err == io.EOF {
|
|
break
|
|
}
|
|
if err != nil {
|
|
return nil, fmt.Errorf("gitleaks-csv: read row: %w", err)
|
|
}
|
|
startLine, _ := strconv.Atoi(get(row, "StartLine"))
|
|
findings = append(findings, buildGitleaksFinding(
|
|
get(row, "RuleID"),
|
|
get(row, "Secret"),
|
|
get(row, "File"),
|
|
get(row, "SymlinkFile"),
|
|
startLine,
|
|
))
|
|
}
|
|
return findings, nil
|
|
}
|
|
|
|
// normalizeGitleaksRuleID maps a Gitleaks rule identifier to a short
|
|
// KeyHunter-style provider name. It lowercases the input and strips common
|
|
// trailing tokens ("-api-key", "-access-token", ...). Unknown patterns are
|
|
// returned lowercased but otherwise unchanged.
|
|
func normalizeGitleaksRuleID(id string) string {
|
|
id = strings.ToLower(strings.TrimSpace(id))
|
|
suffixes := []string{"-api-key", "-access-token", "-secret-key", "-secret", "-token", "-key"}
|
|
for _, s := range suffixes {
|
|
if strings.HasSuffix(id, s) {
|
|
return strings.TrimSuffix(id, s)
|
|
}
|
|
}
|
|
return id
|
|
}
|
|
|
|
// buildGitleaksFinding assembles an engine.Finding from fields common to both
|
|
// the JSON and CSV Gitleaks code paths so the two importers stay in lockstep.
|
|
func buildGitleaksFinding(ruleID, secret, file, symlink string, startLine int) engine.Finding {
|
|
source := file
|
|
if source == "" {
|
|
source = symlink
|
|
}
|
|
return engine.Finding{
|
|
ProviderName: normalizeGitleaksRuleID(ruleID),
|
|
KeyValue: secret,
|
|
KeyMasked: engine.MaskKey(secret),
|
|
Confidence: "medium",
|
|
Source: source,
|
|
SourceType: "import:gitleaks",
|
|
LineNumber: startLine,
|
|
DetectedAt: time.Now(),
|
|
Verified: false,
|
|
VerifyStatus: "unverified",
|
|
}
|
|
}
|