feat(07-02): add Gitleaks JSON + CSV importers
- GitleaksImporter parses native JSON array output to []engine.Finding - GitleaksCSVImporter parses CSV with header-based column resolution - normalizeGitleaksRuleID strips suffixes (-api-key, -access-token, ...) - Shared buildGitleaksFinding helper keeps JSON/CSV paths in lockstep - Test fixtures + 8 tests covering happy path, empty, invalid, symlink fallback
This commit is contained in:
153
pkg/importer/gitleaks.go
Normal file
153
pkg/importer/gitleaks.go
Normal file
@@ -0,0 +1,153 @@
|
||||
package importer
|
||||
|
||||
import (
|
||||
"encoding/csv"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/salvacybersec/keyhunter/pkg/engine"
|
||||
)
|
||||
|
||||
// GitleaksImporter parses Gitleaks native JSON output (an array of finding
|
||||
// records) and normalizes each record into an engine.Finding.
|
||||
type GitleaksImporter struct{}
|
||||
|
||||
// GitleaksCSVImporter parses Gitleaks CSV output with a mandatory header row.
|
||||
// Columns are resolved by header name so Gitleaks version drift in column
|
||||
// order does not break ingestion.
|
||||
type GitleaksCSVImporter struct{}
|
||||
|
||||
// gitleaksRecord mirrors the JSON object emitted by `gitleaks detect -f json`.
|
||||
type gitleaksRecord struct {
|
||||
Description string `json:"Description"`
|
||||
StartLine int `json:"StartLine"`
|
||||
EndLine int `json:"EndLine"`
|
||||
StartColumn int `json:"StartColumn"`
|
||||
EndColumn int `json:"EndColumn"`
|
||||
Match string `json:"Match"`
|
||||
Secret string `json:"Secret"`
|
||||
File string `json:"File"`
|
||||
SymlinkFile string `json:"SymlinkFile"`
|
||||
Commit string `json:"Commit"`
|
||||
Entropy float64 `json:"Entropy"`
|
||||
Author string `json:"Author"`
|
||||
Email string `json:"Email"`
|
||||
Date string `json:"Date"`
|
||||
Message string `json:"Message"`
|
||||
Tags []string `json:"Tags"`
|
||||
RuleID string `json:"RuleID"`
|
||||
Fingerprint string `json:"Fingerprint"`
|
||||
}
|
||||
|
||||
// Name returns the importer identifier used by the CLI --format flag.
|
||||
func (GitleaksImporter) Name() string { return "gitleaks" }
|
||||
|
||||
// Import decodes a Gitleaks JSON array from r and returns the normalized
|
||||
// findings. An empty array returns (nil, nil).
|
||||
func (GitleaksImporter) Import(r io.Reader) ([]engine.Finding, error) {
|
||||
var records []gitleaksRecord
|
||||
dec := json.NewDecoder(r)
|
||||
if err := dec.Decode(&records); err != nil {
|
||||
return nil, fmt.Errorf("gitleaks: decode json: %w", err)
|
||||
}
|
||||
if len(records) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
findings := make([]engine.Finding, 0, len(records))
|
||||
for _, rec := range records {
|
||||
findings = append(findings, buildGitleaksFinding(rec.RuleID, rec.Secret, rec.File, rec.SymlinkFile, rec.StartLine))
|
||||
}
|
||||
return findings, nil
|
||||
}
|
||||
|
||||
// Name returns the importer identifier used by the CLI --format flag.
|
||||
func (GitleaksCSVImporter) Name() string { return "gitleaks-csv" }
|
||||
|
||||
// Import decodes Gitleaks CSV output with a mandatory header row. Columns are
|
||||
// resolved by header name; missing optional fields default to zero values.
|
||||
// A header-only input returns (nil, nil).
|
||||
func (GitleaksCSVImporter) Import(r io.Reader) ([]engine.Finding, error) {
|
||||
reader := csv.NewReader(r)
|
||||
reader.FieldsPerRecord = -1 // tolerate ragged rows
|
||||
|
||||
header, err := reader.Read()
|
||||
if err == io.EOF {
|
||||
return nil, nil
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("gitleaks-csv: read header: %w", err)
|
||||
}
|
||||
|
||||
index := make(map[string]int, len(header))
|
||||
for i, col := range header {
|
||||
index[strings.TrimSpace(col)] = i
|
||||
}
|
||||
|
||||
get := func(row []string, name string) string {
|
||||
i, ok := index[name]
|
||||
if !ok || i >= len(row) {
|
||||
return ""
|
||||
}
|
||||
return row[i]
|
||||
}
|
||||
|
||||
var findings []engine.Finding
|
||||
for {
|
||||
row, err := reader.Read()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("gitleaks-csv: read row: %w", err)
|
||||
}
|
||||
startLine, _ := strconv.Atoi(get(row, "StartLine"))
|
||||
findings = append(findings, buildGitleaksFinding(
|
||||
get(row, "RuleID"),
|
||||
get(row, "Secret"),
|
||||
get(row, "File"),
|
||||
get(row, "SymlinkFile"),
|
||||
startLine,
|
||||
))
|
||||
}
|
||||
return findings, nil
|
||||
}
|
||||
|
||||
// normalizeGitleaksRuleID maps a Gitleaks rule identifier to a short
|
||||
// KeyHunter-style provider name. It lowercases the input and strips common
|
||||
// trailing tokens ("-api-key", "-access-token", ...). Unknown patterns are
|
||||
// returned lowercased but otherwise unchanged.
|
||||
func normalizeGitleaksRuleID(id string) string {
|
||||
id = strings.ToLower(strings.TrimSpace(id))
|
||||
suffixes := []string{"-api-key", "-access-token", "-secret-key", "-secret", "-token", "-key"}
|
||||
for _, s := range suffixes {
|
||||
if strings.HasSuffix(id, s) {
|
||||
return strings.TrimSuffix(id, s)
|
||||
}
|
||||
}
|
||||
return id
|
||||
}
|
||||
|
||||
// buildGitleaksFinding assembles an engine.Finding from fields common to both
|
||||
// the JSON and CSV Gitleaks code paths so the two importers stay in lockstep.
|
||||
func buildGitleaksFinding(ruleID, secret, file, symlink string, startLine int) engine.Finding {
|
||||
source := file
|
||||
if source == "" {
|
||||
source = symlink
|
||||
}
|
||||
return engine.Finding{
|
||||
ProviderName: normalizeGitleaksRuleID(ruleID),
|
||||
KeyValue: secret,
|
||||
KeyMasked: engine.MaskKey(secret),
|
||||
Confidence: "medium",
|
||||
Source: source,
|
||||
SourceType: "import:gitleaks",
|
||||
LineNumber: startLine,
|
||||
DetectedAt: time.Now(),
|
||||
Verified: false,
|
||||
VerifyStatus: "unverified",
|
||||
}
|
||||
}
|
||||
159
pkg/importer/gitleaks_test.go
Normal file
159
pkg/importer/gitleaks_test.go
Normal file
@@ -0,0 +1,159 @@
|
||||
package importer
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"os"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func loadFixture(t *testing.T, name string) []byte {
|
||||
t.Helper()
|
||||
data, err := os.ReadFile("testdata/" + name)
|
||||
if err != nil {
|
||||
t.Fatalf("read fixture %s: %v", name, err)
|
||||
}
|
||||
return data
|
||||
}
|
||||
|
||||
func TestGitleaksImporter_Name(t *testing.T) {
|
||||
if (GitleaksImporter{}).Name() != "gitleaks" {
|
||||
t.Errorf("GitleaksImporter.Name() = %q, want %q", (GitleaksImporter{}).Name(), "gitleaks")
|
||||
}
|
||||
if (GitleaksCSVImporter{}).Name() != "gitleaks-csv" {
|
||||
t.Errorf("GitleaksCSVImporter.Name() = %q, want %q", (GitleaksCSVImporter{}).Name(), "gitleaks-csv")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGitleaksImporter_JSON(t *testing.T) {
|
||||
data := loadFixture(t, "gitleaks-sample.json")
|
||||
findings, err := (GitleaksImporter{}).Import(bytes.NewReader(data))
|
||||
if err != nil {
|
||||
t.Fatalf("Import: %v", err)
|
||||
}
|
||||
if len(findings) != 3 {
|
||||
t.Fatalf("expected 3 findings, got %d", len(findings))
|
||||
}
|
||||
if findings[0].ProviderName != "openai" {
|
||||
t.Errorf("findings[0].ProviderName = %q, want %q", findings[0].ProviderName, "openai")
|
||||
}
|
||||
if findings[0].KeyValue != "sk-proj-1234567890abcdef1234" {
|
||||
t.Errorf("findings[0].KeyValue mismatch: %q", findings[0].KeyValue)
|
||||
}
|
||||
if findings[0].Source != "config/app.yml" {
|
||||
t.Errorf("findings[0].Source = %q", findings[0].Source)
|
||||
}
|
||||
if findings[0].LineNumber != 12 {
|
||||
t.Errorf("findings[0].LineNumber = %d, want 12", findings[0].LineNumber)
|
||||
}
|
||||
if findings[0].SourceType != "import:gitleaks" {
|
||||
t.Errorf("findings[0].SourceType = %q", findings[0].SourceType)
|
||||
}
|
||||
if findings[0].Confidence != "medium" {
|
||||
t.Errorf("findings[0].Confidence = %q, want medium", findings[0].Confidence)
|
||||
}
|
||||
if findings[0].VerifyStatus != "unverified" {
|
||||
t.Errorf("findings[0].VerifyStatus = %q, want unverified", findings[0].VerifyStatus)
|
||||
}
|
||||
if findings[0].Verified {
|
||||
t.Errorf("findings[0].Verified should be false")
|
||||
}
|
||||
if findings[0].KeyMasked == "" {
|
||||
t.Errorf("findings[0].KeyMasked should be set")
|
||||
}
|
||||
if findings[1].ProviderName != "aws" {
|
||||
t.Errorf("findings[1].ProviderName = %q, want aws", findings[1].ProviderName)
|
||||
}
|
||||
if findings[1].LineNumber != 55 {
|
||||
t.Errorf("findings[1].LineNumber = %d, want 55", findings[1].LineNumber)
|
||||
}
|
||||
if findings[2].ProviderName != "generic" {
|
||||
t.Errorf("findings[2].ProviderName = %q, want generic", findings[2].ProviderName)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGitleaksImporter_CSV(t *testing.T) {
|
||||
data := loadFixture(t, "gitleaks-sample.csv")
|
||||
findings, err := (GitleaksCSVImporter{}).Import(bytes.NewReader(data))
|
||||
if err != nil {
|
||||
t.Fatalf("Import: %v", err)
|
||||
}
|
||||
if len(findings) != 3 {
|
||||
t.Fatalf("expected 3 findings, got %d", len(findings))
|
||||
}
|
||||
if findings[0].ProviderName != "openai" {
|
||||
t.Errorf("findings[0].ProviderName = %q, want openai", findings[0].ProviderName)
|
||||
}
|
||||
if findings[0].KeyValue != "sk-proj-1234567890abcdef1234" {
|
||||
t.Errorf("findings[0].KeyValue = %q", findings[0].KeyValue)
|
||||
}
|
||||
if findings[0].Source != "config/app.yml" {
|
||||
t.Errorf("findings[0].Source = %q", findings[0].Source)
|
||||
}
|
||||
if findings[0].LineNumber != 12 {
|
||||
t.Errorf("findings[0].LineNumber = %d, want 12", findings[0].LineNumber)
|
||||
}
|
||||
if findings[1].ProviderName != "aws" {
|
||||
t.Errorf("findings[1].ProviderName = %q, want aws", findings[1].ProviderName)
|
||||
}
|
||||
if findings[2].ProviderName != "generic" {
|
||||
t.Errorf("findings[2].ProviderName = %q, want generic", findings[2].ProviderName)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGitleaksImporter_NormalizeRuleID(t *testing.T) {
|
||||
cases := []struct{ in, out string }{
|
||||
{"openai-api-key", "openai"},
|
||||
{"aws-access-token", "aws"},
|
||||
{"anthropic-api-key", "anthropic"},
|
||||
{"generic-api-key", "generic"},
|
||||
{"github-pat", "github-pat"},
|
||||
{"Some-Secret", "some"},
|
||||
{"AWS-Access-Token", "aws"},
|
||||
}
|
||||
for _, c := range cases {
|
||||
got := normalizeGitleaksRuleID(c.in)
|
||||
if got != c.out {
|
||||
t.Errorf("normalizeGitleaksRuleID(%q) = %q, want %q", c.in, got, c.out)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestGitleaksImporter_EmptyArray(t *testing.T) {
|
||||
findings, err := (GitleaksImporter{}).Import(strings.NewReader("[]"))
|
||||
if err != nil {
|
||||
t.Fatalf("Import: %v", err)
|
||||
}
|
||||
if len(findings) != 0 {
|
||||
t.Errorf("expected 0 findings, got %d", len(findings))
|
||||
}
|
||||
}
|
||||
|
||||
func TestGitleaksImporter_EmptyCSV(t *testing.T) {
|
||||
header := "RuleID,Commit,File,SymlinkFile,Secret,Match,StartLine,EndLine,StartColumn,EndColumn,Author,Message,Date,Email,Fingerprint,Tags\n"
|
||||
findings, err := (GitleaksCSVImporter{}).Import(strings.NewReader(header))
|
||||
if err != nil {
|
||||
t.Fatalf("Import: %v", err)
|
||||
}
|
||||
if len(findings) != 0 {
|
||||
t.Errorf("expected 0 findings, got %d", len(findings))
|
||||
}
|
||||
}
|
||||
|
||||
func TestGitleaksImporter_InvalidJSON(t *testing.T) {
|
||||
_, err := (GitleaksImporter{}).Import(strings.NewReader("{not json"))
|
||||
if err == nil {
|
||||
t.Errorf("expected error for invalid JSON")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGitleaksImporter_SymlinkFallback(t *testing.T) {
|
||||
jsonInput := `[{"RuleID":"openai-api-key","Secret":"sk-proj-1234567890abcdef1234","File":"","SymlinkFile":"link/config.yml","StartLine":1}]`
|
||||
findings, err := (GitleaksImporter{}).Import(strings.NewReader(jsonInput))
|
||||
if err != nil {
|
||||
t.Fatalf("Import: %v", err)
|
||||
}
|
||||
if len(findings) != 1 || findings[0].Source != "link/config.yml" {
|
||||
t.Errorf("expected symlink fallback source, got %+v", findings)
|
||||
}
|
||||
}
|
||||
4
pkg/importer/testdata/gitleaks-sample.csv
vendored
Normal file
4
pkg/importer/testdata/gitleaks-sample.csv
vendored
Normal file
@@ -0,0 +1,4 @@
|
||||
RuleID,Commit,File,SymlinkFile,Secret,Match,StartLine,EndLine,StartColumn,EndColumn,Author,Message,Date,Email,Fingerprint,Tags
|
||||
openai-api-key,abc123,config/app.yml,,sk-proj-1234567890abcdef1234,key: sk-proj-1234567890abcdef1234,12,12,10,60,dev,add config,2026-04-01T12:00:00Z,dev@example.com,abc123:config/app.yml:openai-api-key:12,"key,openai"
|
||||
aws-access-token,def456,terraform/main.tf,,AKIAIOSFODNN7EXAMPLE,access_key = AKIAIOSFODNN7EXAMPLE,55,55,20,40,ops,tf update,2026-04-02T09:30:00Z,ops@example.com,def456:terraform/main.tf:aws-access-token:55,"key,aws"
|
||||
generic-api-key,ghi789,scripts/deploy.sh,,xoxp-abcdefghijklmnopqrstuvwxyz,TOKEN=xoxp-abcdefghijklmnopqrstuvwxyz,3,3,8,50,dev,deploy script,2026-04-03T15:45:00Z,dev@example.com,ghi789:scripts/deploy.sh:generic-api-key:3,"key,generic"
|
||||
|
62
pkg/importer/testdata/gitleaks-sample.json
vendored
Normal file
62
pkg/importer/testdata/gitleaks-sample.json
vendored
Normal file
@@ -0,0 +1,62 @@
|
||||
[
|
||||
{
|
||||
"Description": "OpenAI API Key",
|
||||
"StartLine": 12,
|
||||
"EndLine": 12,
|
||||
"StartColumn": 10,
|
||||
"EndColumn": 60,
|
||||
"Match": "key: sk-proj-1234567890abcdef1234",
|
||||
"Secret": "sk-proj-1234567890abcdef1234",
|
||||
"File": "config/app.yml",
|
||||
"SymlinkFile": "",
|
||||
"Commit": "abc123",
|
||||
"Entropy": 4.5,
|
||||
"Author": "dev",
|
||||
"Email": "dev@example.com",
|
||||
"Date": "2026-04-01T12:00:00Z",
|
||||
"Message": "add config",
|
||||
"Tags": ["key", "openai"],
|
||||
"RuleID": "openai-api-key",
|
||||
"Fingerprint": "abc123:config/app.yml:openai-api-key:12"
|
||||
},
|
||||
{
|
||||
"Description": "AWS Access Token",
|
||||
"StartLine": 55,
|
||||
"EndLine": 55,
|
||||
"StartColumn": 20,
|
||||
"EndColumn": 40,
|
||||
"Match": "access_key = AKIAIOSFODNN7EXAMPLE",
|
||||
"Secret": "AKIAIOSFODNN7EXAMPLE",
|
||||
"File": "terraform/main.tf",
|
||||
"SymlinkFile": "",
|
||||
"Commit": "def456",
|
||||
"Entropy": 4.2,
|
||||
"Author": "ops",
|
||||
"Email": "ops@example.com",
|
||||
"Date": "2026-04-02T09:30:00Z",
|
||||
"Message": "tf update",
|
||||
"Tags": ["key", "aws"],
|
||||
"RuleID": "aws-access-token",
|
||||
"Fingerprint": "def456:terraform/main.tf:aws-access-token:55"
|
||||
},
|
||||
{
|
||||
"Description": "Generic API Key",
|
||||
"StartLine": 3,
|
||||
"EndLine": 3,
|
||||
"StartColumn": 8,
|
||||
"EndColumn": 50,
|
||||
"Match": "TOKEN=xoxp-abcdefghijklmnopqrstuvwxyz",
|
||||
"Secret": "xoxp-abcdefghijklmnopqrstuvwxyz",
|
||||
"File": "scripts/deploy.sh",
|
||||
"SymlinkFile": "",
|
||||
"Commit": "ghi789",
|
||||
"Entropy": 3.8,
|
||||
"Author": "dev",
|
||||
"Email": "dev@example.com",
|
||||
"Date": "2026-04-03T15:45:00Z",
|
||||
"Message": "deploy script",
|
||||
"Tags": ["key", "generic"],
|
||||
"RuleID": "generic-api-key",
|
||||
"Fingerprint": "ghi789:scripts/deploy.sh:generic-api-key:3"
|
||||
}
|
||||
]
|
||||
Reference in New Issue
Block a user