feat(07-02): add Gitleaks JSON + CSV importers
- GitleaksImporter parses native JSON array output to []engine.Finding - GitleaksCSVImporter parses CSV with header-based column resolution - normalizeGitleaksRuleID strips suffixes (-api-key, -access-token, ...) - Shared buildGitleaksFinding helper keeps JSON/CSV paths in lockstep - Test fixtures + 8 tests covering happy path, empty, invalid, symlink fallback
This commit is contained in:
153
pkg/importer/gitleaks.go
Normal file
153
pkg/importer/gitleaks.go
Normal file
@@ -0,0 +1,153 @@
|
|||||||
|
package importer
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/csv"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/engine"
|
||||||
|
)
|
||||||
|
|
||||||
|
// GitleaksImporter parses Gitleaks native JSON output (an array of finding
|
||||||
|
// records) and normalizes each record into an engine.Finding.
|
||||||
|
type GitleaksImporter struct{}
|
||||||
|
|
||||||
|
// GitleaksCSVImporter parses Gitleaks CSV output with a mandatory header row.
|
||||||
|
// Columns are resolved by header name so Gitleaks version drift in column
|
||||||
|
// order does not break ingestion.
|
||||||
|
type GitleaksCSVImporter struct{}
|
||||||
|
|
||||||
|
// gitleaksRecord mirrors the JSON object emitted by `gitleaks detect -f json`.
|
||||||
|
type gitleaksRecord struct {
|
||||||
|
Description string `json:"Description"`
|
||||||
|
StartLine int `json:"StartLine"`
|
||||||
|
EndLine int `json:"EndLine"`
|
||||||
|
StartColumn int `json:"StartColumn"`
|
||||||
|
EndColumn int `json:"EndColumn"`
|
||||||
|
Match string `json:"Match"`
|
||||||
|
Secret string `json:"Secret"`
|
||||||
|
File string `json:"File"`
|
||||||
|
SymlinkFile string `json:"SymlinkFile"`
|
||||||
|
Commit string `json:"Commit"`
|
||||||
|
Entropy float64 `json:"Entropy"`
|
||||||
|
Author string `json:"Author"`
|
||||||
|
Email string `json:"Email"`
|
||||||
|
Date string `json:"Date"`
|
||||||
|
Message string `json:"Message"`
|
||||||
|
Tags []string `json:"Tags"`
|
||||||
|
RuleID string `json:"RuleID"`
|
||||||
|
Fingerprint string `json:"Fingerprint"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Name returns the importer identifier used by the CLI --format flag.
|
||||||
|
func (GitleaksImporter) Name() string { return "gitleaks" }
|
||||||
|
|
||||||
|
// Import decodes a Gitleaks JSON array from r and returns the normalized
|
||||||
|
// findings. An empty array returns (nil, nil).
|
||||||
|
func (GitleaksImporter) Import(r io.Reader) ([]engine.Finding, error) {
|
||||||
|
var records []gitleaksRecord
|
||||||
|
dec := json.NewDecoder(r)
|
||||||
|
if err := dec.Decode(&records); err != nil {
|
||||||
|
return nil, fmt.Errorf("gitleaks: decode json: %w", err)
|
||||||
|
}
|
||||||
|
if len(records) == 0 {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
findings := make([]engine.Finding, 0, len(records))
|
||||||
|
for _, rec := range records {
|
||||||
|
findings = append(findings, buildGitleaksFinding(rec.RuleID, rec.Secret, rec.File, rec.SymlinkFile, rec.StartLine))
|
||||||
|
}
|
||||||
|
return findings, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Name returns the importer identifier used by the CLI --format flag.
|
||||||
|
func (GitleaksCSVImporter) Name() string { return "gitleaks-csv" }
|
||||||
|
|
||||||
|
// Import decodes Gitleaks CSV output with a mandatory header row. Columns are
|
||||||
|
// resolved by header name; missing optional fields default to zero values.
|
||||||
|
// A header-only input returns (nil, nil).
|
||||||
|
func (GitleaksCSVImporter) Import(r io.Reader) ([]engine.Finding, error) {
|
||||||
|
reader := csv.NewReader(r)
|
||||||
|
reader.FieldsPerRecord = -1 // tolerate ragged rows
|
||||||
|
|
||||||
|
header, err := reader.Read()
|
||||||
|
if err == io.EOF {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("gitleaks-csv: read header: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
index := make(map[string]int, len(header))
|
||||||
|
for i, col := range header {
|
||||||
|
index[strings.TrimSpace(col)] = i
|
||||||
|
}
|
||||||
|
|
||||||
|
get := func(row []string, name string) string {
|
||||||
|
i, ok := index[name]
|
||||||
|
if !ok || i >= len(row) {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return row[i]
|
||||||
|
}
|
||||||
|
|
||||||
|
var findings []engine.Finding
|
||||||
|
for {
|
||||||
|
row, err := reader.Read()
|
||||||
|
if err == io.EOF {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("gitleaks-csv: read row: %w", err)
|
||||||
|
}
|
||||||
|
startLine, _ := strconv.Atoi(get(row, "StartLine"))
|
||||||
|
findings = append(findings, buildGitleaksFinding(
|
||||||
|
get(row, "RuleID"),
|
||||||
|
get(row, "Secret"),
|
||||||
|
get(row, "File"),
|
||||||
|
get(row, "SymlinkFile"),
|
||||||
|
startLine,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
return findings, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// normalizeGitleaksRuleID maps a Gitleaks rule identifier to a short
|
||||||
|
// KeyHunter-style provider name. It lowercases the input and strips common
|
||||||
|
// trailing tokens ("-api-key", "-access-token", ...). Unknown patterns are
|
||||||
|
// returned lowercased but otherwise unchanged.
|
||||||
|
func normalizeGitleaksRuleID(id string) string {
|
||||||
|
id = strings.ToLower(strings.TrimSpace(id))
|
||||||
|
suffixes := []string{"-api-key", "-access-token", "-secret-key", "-secret", "-token", "-key"}
|
||||||
|
for _, s := range suffixes {
|
||||||
|
if strings.HasSuffix(id, s) {
|
||||||
|
return strings.TrimSuffix(id, s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return id
|
||||||
|
}
|
||||||
|
|
||||||
|
// buildGitleaksFinding assembles an engine.Finding from fields common to both
|
||||||
|
// the JSON and CSV Gitleaks code paths so the two importers stay in lockstep.
|
||||||
|
func buildGitleaksFinding(ruleID, secret, file, symlink string, startLine int) engine.Finding {
|
||||||
|
source := file
|
||||||
|
if source == "" {
|
||||||
|
source = symlink
|
||||||
|
}
|
||||||
|
return engine.Finding{
|
||||||
|
ProviderName: normalizeGitleaksRuleID(ruleID),
|
||||||
|
KeyValue: secret,
|
||||||
|
KeyMasked: engine.MaskKey(secret),
|
||||||
|
Confidence: "medium",
|
||||||
|
Source: source,
|
||||||
|
SourceType: "import:gitleaks",
|
||||||
|
LineNumber: startLine,
|
||||||
|
DetectedAt: time.Now(),
|
||||||
|
Verified: false,
|
||||||
|
VerifyStatus: "unverified",
|
||||||
|
}
|
||||||
|
}
|
||||||
159
pkg/importer/gitleaks_test.go
Normal file
159
pkg/importer/gitleaks_test.go
Normal file
@@ -0,0 +1,159 @@
|
|||||||
|
package importer
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func loadFixture(t *testing.T, name string) []byte {
|
||||||
|
t.Helper()
|
||||||
|
data, err := os.ReadFile("testdata/" + name)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("read fixture %s: %v", name, err)
|
||||||
|
}
|
||||||
|
return data
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGitleaksImporter_Name(t *testing.T) {
|
||||||
|
if (GitleaksImporter{}).Name() != "gitleaks" {
|
||||||
|
t.Errorf("GitleaksImporter.Name() = %q, want %q", (GitleaksImporter{}).Name(), "gitleaks")
|
||||||
|
}
|
||||||
|
if (GitleaksCSVImporter{}).Name() != "gitleaks-csv" {
|
||||||
|
t.Errorf("GitleaksCSVImporter.Name() = %q, want %q", (GitleaksCSVImporter{}).Name(), "gitleaks-csv")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGitleaksImporter_JSON(t *testing.T) {
|
||||||
|
data := loadFixture(t, "gitleaks-sample.json")
|
||||||
|
findings, err := (GitleaksImporter{}).Import(bytes.NewReader(data))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Import: %v", err)
|
||||||
|
}
|
||||||
|
if len(findings) != 3 {
|
||||||
|
t.Fatalf("expected 3 findings, got %d", len(findings))
|
||||||
|
}
|
||||||
|
if findings[0].ProviderName != "openai" {
|
||||||
|
t.Errorf("findings[0].ProviderName = %q, want %q", findings[0].ProviderName, "openai")
|
||||||
|
}
|
||||||
|
if findings[0].KeyValue != "sk-proj-1234567890abcdef1234" {
|
||||||
|
t.Errorf("findings[0].KeyValue mismatch: %q", findings[0].KeyValue)
|
||||||
|
}
|
||||||
|
if findings[0].Source != "config/app.yml" {
|
||||||
|
t.Errorf("findings[0].Source = %q", findings[0].Source)
|
||||||
|
}
|
||||||
|
if findings[0].LineNumber != 12 {
|
||||||
|
t.Errorf("findings[0].LineNumber = %d, want 12", findings[0].LineNumber)
|
||||||
|
}
|
||||||
|
if findings[0].SourceType != "import:gitleaks" {
|
||||||
|
t.Errorf("findings[0].SourceType = %q", findings[0].SourceType)
|
||||||
|
}
|
||||||
|
if findings[0].Confidence != "medium" {
|
||||||
|
t.Errorf("findings[0].Confidence = %q, want medium", findings[0].Confidence)
|
||||||
|
}
|
||||||
|
if findings[0].VerifyStatus != "unverified" {
|
||||||
|
t.Errorf("findings[0].VerifyStatus = %q, want unverified", findings[0].VerifyStatus)
|
||||||
|
}
|
||||||
|
if findings[0].Verified {
|
||||||
|
t.Errorf("findings[0].Verified should be false")
|
||||||
|
}
|
||||||
|
if findings[0].KeyMasked == "" {
|
||||||
|
t.Errorf("findings[0].KeyMasked should be set")
|
||||||
|
}
|
||||||
|
if findings[1].ProviderName != "aws" {
|
||||||
|
t.Errorf("findings[1].ProviderName = %q, want aws", findings[1].ProviderName)
|
||||||
|
}
|
||||||
|
if findings[1].LineNumber != 55 {
|
||||||
|
t.Errorf("findings[1].LineNumber = %d, want 55", findings[1].LineNumber)
|
||||||
|
}
|
||||||
|
if findings[2].ProviderName != "generic" {
|
||||||
|
t.Errorf("findings[2].ProviderName = %q, want generic", findings[2].ProviderName)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGitleaksImporter_CSV(t *testing.T) {
|
||||||
|
data := loadFixture(t, "gitleaks-sample.csv")
|
||||||
|
findings, err := (GitleaksCSVImporter{}).Import(bytes.NewReader(data))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Import: %v", err)
|
||||||
|
}
|
||||||
|
if len(findings) != 3 {
|
||||||
|
t.Fatalf("expected 3 findings, got %d", len(findings))
|
||||||
|
}
|
||||||
|
if findings[0].ProviderName != "openai" {
|
||||||
|
t.Errorf("findings[0].ProviderName = %q, want openai", findings[0].ProviderName)
|
||||||
|
}
|
||||||
|
if findings[0].KeyValue != "sk-proj-1234567890abcdef1234" {
|
||||||
|
t.Errorf("findings[0].KeyValue = %q", findings[0].KeyValue)
|
||||||
|
}
|
||||||
|
if findings[0].Source != "config/app.yml" {
|
||||||
|
t.Errorf("findings[0].Source = %q", findings[0].Source)
|
||||||
|
}
|
||||||
|
if findings[0].LineNumber != 12 {
|
||||||
|
t.Errorf("findings[0].LineNumber = %d, want 12", findings[0].LineNumber)
|
||||||
|
}
|
||||||
|
if findings[1].ProviderName != "aws" {
|
||||||
|
t.Errorf("findings[1].ProviderName = %q, want aws", findings[1].ProviderName)
|
||||||
|
}
|
||||||
|
if findings[2].ProviderName != "generic" {
|
||||||
|
t.Errorf("findings[2].ProviderName = %q, want generic", findings[2].ProviderName)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGitleaksImporter_NormalizeRuleID(t *testing.T) {
|
||||||
|
cases := []struct{ in, out string }{
|
||||||
|
{"openai-api-key", "openai"},
|
||||||
|
{"aws-access-token", "aws"},
|
||||||
|
{"anthropic-api-key", "anthropic"},
|
||||||
|
{"generic-api-key", "generic"},
|
||||||
|
{"github-pat", "github-pat"},
|
||||||
|
{"Some-Secret", "some"},
|
||||||
|
{"AWS-Access-Token", "aws"},
|
||||||
|
}
|
||||||
|
for _, c := range cases {
|
||||||
|
got := normalizeGitleaksRuleID(c.in)
|
||||||
|
if got != c.out {
|
||||||
|
t.Errorf("normalizeGitleaksRuleID(%q) = %q, want %q", c.in, got, c.out)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGitleaksImporter_EmptyArray(t *testing.T) {
|
||||||
|
findings, err := (GitleaksImporter{}).Import(strings.NewReader("[]"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Import: %v", err)
|
||||||
|
}
|
||||||
|
if len(findings) != 0 {
|
||||||
|
t.Errorf("expected 0 findings, got %d", len(findings))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGitleaksImporter_EmptyCSV(t *testing.T) {
|
||||||
|
header := "RuleID,Commit,File,SymlinkFile,Secret,Match,StartLine,EndLine,StartColumn,EndColumn,Author,Message,Date,Email,Fingerprint,Tags\n"
|
||||||
|
findings, err := (GitleaksCSVImporter{}).Import(strings.NewReader(header))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Import: %v", err)
|
||||||
|
}
|
||||||
|
if len(findings) != 0 {
|
||||||
|
t.Errorf("expected 0 findings, got %d", len(findings))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGitleaksImporter_InvalidJSON(t *testing.T) {
|
||||||
|
_, err := (GitleaksImporter{}).Import(strings.NewReader("{not json"))
|
||||||
|
if err == nil {
|
||||||
|
t.Errorf("expected error for invalid JSON")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGitleaksImporter_SymlinkFallback(t *testing.T) {
|
||||||
|
jsonInput := `[{"RuleID":"openai-api-key","Secret":"sk-proj-1234567890abcdef1234","File":"","SymlinkFile":"link/config.yml","StartLine":1}]`
|
||||||
|
findings, err := (GitleaksImporter{}).Import(strings.NewReader(jsonInput))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Import: %v", err)
|
||||||
|
}
|
||||||
|
if len(findings) != 1 || findings[0].Source != "link/config.yml" {
|
||||||
|
t.Errorf("expected symlink fallback source, got %+v", findings)
|
||||||
|
}
|
||||||
|
}
|
||||||
4
pkg/importer/testdata/gitleaks-sample.csv
vendored
Normal file
4
pkg/importer/testdata/gitleaks-sample.csv
vendored
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
RuleID,Commit,File,SymlinkFile,Secret,Match,StartLine,EndLine,StartColumn,EndColumn,Author,Message,Date,Email,Fingerprint,Tags
|
||||||
|
openai-api-key,abc123,config/app.yml,,sk-proj-1234567890abcdef1234,key: sk-proj-1234567890abcdef1234,12,12,10,60,dev,add config,2026-04-01T12:00:00Z,dev@example.com,abc123:config/app.yml:openai-api-key:12,"key,openai"
|
||||||
|
aws-access-token,def456,terraform/main.tf,,AKIAIOSFODNN7EXAMPLE,access_key = AKIAIOSFODNN7EXAMPLE,55,55,20,40,ops,tf update,2026-04-02T09:30:00Z,ops@example.com,def456:terraform/main.tf:aws-access-token:55,"key,aws"
|
||||||
|
generic-api-key,ghi789,scripts/deploy.sh,,xoxp-abcdefghijklmnopqrstuvwxyz,TOKEN=xoxp-abcdefghijklmnopqrstuvwxyz,3,3,8,50,dev,deploy script,2026-04-03T15:45:00Z,dev@example.com,ghi789:scripts/deploy.sh:generic-api-key:3,"key,generic"
|
||||||
|
62
pkg/importer/testdata/gitleaks-sample.json
vendored
Normal file
62
pkg/importer/testdata/gitleaks-sample.json
vendored
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"Description": "OpenAI API Key",
|
||||||
|
"StartLine": 12,
|
||||||
|
"EndLine": 12,
|
||||||
|
"StartColumn": 10,
|
||||||
|
"EndColumn": 60,
|
||||||
|
"Match": "key: sk-proj-1234567890abcdef1234",
|
||||||
|
"Secret": "sk-proj-1234567890abcdef1234",
|
||||||
|
"File": "config/app.yml",
|
||||||
|
"SymlinkFile": "",
|
||||||
|
"Commit": "abc123",
|
||||||
|
"Entropy": 4.5,
|
||||||
|
"Author": "dev",
|
||||||
|
"Email": "dev@example.com",
|
||||||
|
"Date": "2026-04-01T12:00:00Z",
|
||||||
|
"Message": "add config",
|
||||||
|
"Tags": ["key", "openai"],
|
||||||
|
"RuleID": "openai-api-key",
|
||||||
|
"Fingerprint": "abc123:config/app.yml:openai-api-key:12"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Description": "AWS Access Token",
|
||||||
|
"StartLine": 55,
|
||||||
|
"EndLine": 55,
|
||||||
|
"StartColumn": 20,
|
||||||
|
"EndColumn": 40,
|
||||||
|
"Match": "access_key = AKIAIOSFODNN7EXAMPLE",
|
||||||
|
"Secret": "AKIAIOSFODNN7EXAMPLE",
|
||||||
|
"File": "terraform/main.tf",
|
||||||
|
"SymlinkFile": "",
|
||||||
|
"Commit": "def456",
|
||||||
|
"Entropy": 4.2,
|
||||||
|
"Author": "ops",
|
||||||
|
"Email": "ops@example.com",
|
||||||
|
"Date": "2026-04-02T09:30:00Z",
|
||||||
|
"Message": "tf update",
|
||||||
|
"Tags": ["key", "aws"],
|
||||||
|
"RuleID": "aws-access-token",
|
||||||
|
"Fingerprint": "def456:terraform/main.tf:aws-access-token:55"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"Description": "Generic API Key",
|
||||||
|
"StartLine": 3,
|
||||||
|
"EndLine": 3,
|
||||||
|
"StartColumn": 8,
|
||||||
|
"EndColumn": 50,
|
||||||
|
"Match": "TOKEN=xoxp-abcdefghijklmnopqrstuvwxyz",
|
||||||
|
"Secret": "xoxp-abcdefghijklmnopqrstuvwxyz",
|
||||||
|
"File": "scripts/deploy.sh",
|
||||||
|
"SymlinkFile": "",
|
||||||
|
"Commit": "ghi789",
|
||||||
|
"Entropy": 3.8,
|
||||||
|
"Author": "dev",
|
||||||
|
"Email": "dev@example.com",
|
||||||
|
"Date": "2026-04-03T15:45:00Z",
|
||||||
|
"Message": "deploy script",
|
||||||
|
"Tags": ["key", "generic"],
|
||||||
|
"RuleID": "generic-api-key",
|
||||||
|
"Fingerprint": "ghi789:scripts/deploy.sh:generic-api-key:3"
|
||||||
|
}
|
||||||
|
]
|
||||||
Reference in New Issue
Block a user