feat(07-01): Importer interface and TruffleHog v3 JSON adapter
- pkg/importer/importer.go: shared Importer interface (Name, Import) - pkg/importer/trufflehog.go: TruffleHogImporter with v3 JSON decoding, detector-name normalization (OpenAI/GithubV2/AWS -> canonical ids), SourceMetadata path+line extraction for Git/Filesystem/Github - pkg/importer/testdata/trufflehog-sample.json: 3-record fixture - pkg/importer/trufflehog_test.go: Name, Import, NormalizeName, EmptyArray, InvalidJSON tests -- all passing
This commit is contained in:
24
pkg/importer/importer.go
Normal file
24
pkg/importer/importer.go
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
// Package importer provides adapters that parse output from external secret
|
||||||
|
// scanners (TruffleHog, Gitleaks, ...) and normalize them into KeyHunter's
|
||||||
|
// engine.Finding model so they can be inserted into the unified storage layer.
|
||||||
|
package importer
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/engine"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Importer parses output from an external secret scanner and returns
|
||||||
|
// normalized engine.Finding records. Implementations must be stateless
|
||||||
|
// and safe for reuse across calls.
|
||||||
|
type Importer interface {
|
||||||
|
// Name returns the short identifier of the source format
|
||||||
|
// (e.g. "trufflehog", "gitleaks"). Used by the CLI --format flag.
|
||||||
|
Name() string
|
||||||
|
|
||||||
|
// Import reads scanner output from r and returns the normalized findings.
|
||||||
|
// Implementations should return a wrapped error on malformed input and an
|
||||||
|
// empty slice with nil error on empty input.
|
||||||
|
Import(r io.Reader) ([]engine.Finding, error)
|
||||||
|
}
|
||||||
57
pkg/importer/testdata/trufflehog-sample.json
vendored
Normal file
57
pkg/importer/testdata/trufflehog-sample.json
vendored
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
[
|
||||||
|
{
|
||||||
|
"SourceID": 1,
|
||||||
|
"SourceName": "git-scan",
|
||||||
|
"SourceMetadata": {
|
||||||
|
"Data": {
|
||||||
|
"Git": {
|
||||||
|
"commit": "deadbeef1234",
|
||||||
|
"file": "src/config.py",
|
||||||
|
"repository": "https://github.com/example/repo",
|
||||||
|
"line": 42
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"DetectorName": "OpenAI",
|
||||||
|
"DetectorType": 17,
|
||||||
|
"Verified": true,
|
||||||
|
"Raw": "sk-proj-abcdef1234567890abcdef",
|
||||||
|
"Redacted": "sk-proj-abcd...cdef",
|
||||||
|
"ExtraData": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"SourceID": 2,
|
||||||
|
"SourceName": "fs-scan",
|
||||||
|
"SourceMetadata": {
|
||||||
|
"Data": {
|
||||||
|
"Filesystem": {
|
||||||
|
"file": "/tmp/leaked.env"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"DetectorName": "AnthropicV2",
|
||||||
|
"DetectorType": 92,
|
||||||
|
"Verified": false,
|
||||||
|
"Raw": "sk-ant-api03-xxxxxxxxxxxxxxxx",
|
||||||
|
"Redacted": "sk-ant-api03-xxxx",
|
||||||
|
"ExtraData": {}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"SourceID": 3,
|
||||||
|
"SourceName": "github-scan",
|
||||||
|
"SourceMetadata": {
|
||||||
|
"Data": {
|
||||||
|
"Github": {
|
||||||
|
"link": "https://github.com/foo/bar/blob/main/a.yml",
|
||||||
|
"repository": "https://github.com/foo/bar"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"DetectorName": "AWS",
|
||||||
|
"DetectorType": 2,
|
||||||
|
"Verified": true,
|
||||||
|
"Raw": "AKIAIOSFODNN7EXAMPLE",
|
||||||
|
"Redacted": "AKIA****EXAMPLE",
|
||||||
|
"ExtraData": {}
|
||||||
|
}
|
||||||
|
]
|
||||||
174
pkg/importer/trufflehog.go
Normal file
174
pkg/importer/trufflehog.go
Normal file
@@ -0,0 +1,174 @@
|
|||||||
|
package importer
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/engine"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TruffleHogImporter parses TruffleHog v3 JSON output
|
||||||
|
// (`trufflehog ... --json`) into engine.Finding records.
|
||||||
|
//
|
||||||
|
// TruffleHog v3 emits a JSON array whose elements describe each detected
|
||||||
|
// secret with detector metadata, verification status, and a nested
|
||||||
|
// SourceMetadata object whose shape depends on the scan source (git,
|
||||||
|
// filesystem, github, ...). See 07-CONTEXT.md for the field decisions.
|
||||||
|
type TruffleHogImporter struct{}
|
||||||
|
|
||||||
|
// trufflehogRecord mirrors the v3 JSON schema. Fields we do not consume
|
||||||
|
// (DetectorType numeric ID, ExtraData blob) are kept as raw JSON so
|
||||||
|
// decoding does not fail on unknown shapes.
|
||||||
|
type trufflehogRecord struct {
|
||||||
|
SourceID int `json:"SourceID"`
|
||||||
|
SourceName string `json:"SourceName"`
|
||||||
|
SourceMetadata json.RawMessage `json:"SourceMetadata"`
|
||||||
|
DetectorName string `json:"DetectorName"`
|
||||||
|
DetectorType int `json:"DetectorType"`
|
||||||
|
Verified bool `json:"Verified"`
|
||||||
|
Raw string `json:"Raw"`
|
||||||
|
Redacted string `json:"Redacted"`
|
||||||
|
ExtraData json.RawMessage `json:"ExtraData"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// tfhSourceMetadata captures the subset of SourceMetadata.Data we extract a
|
||||||
|
// source path / line number from. All sub-objects are pointers so we can tell
|
||||||
|
// "not present" from "empty".
|
||||||
|
type tfhSourceMetadata struct {
|
||||||
|
Data struct {
|
||||||
|
Git *struct {
|
||||||
|
File string `json:"file"`
|
||||||
|
Repository string `json:"repository"`
|
||||||
|
Commit string `json:"commit"`
|
||||||
|
Line int `json:"line"`
|
||||||
|
} `json:"Git"`
|
||||||
|
Filesystem *struct {
|
||||||
|
File string `json:"file"`
|
||||||
|
} `json:"Filesystem"`
|
||||||
|
Github *struct {
|
||||||
|
File string `json:"file"`
|
||||||
|
Link string `json:"link"`
|
||||||
|
Repository string `json:"repository"`
|
||||||
|
} `json:"Github"`
|
||||||
|
} `json:"Data"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// tfhVersionSuffix strips trailing version markers from detector names
|
||||||
|
// ("GithubV2" -> "Github", "AnthropicV2" -> "Anthropic").
|
||||||
|
var tfhVersionSuffix = regexp.MustCompile(`v\d+$`)
|
||||||
|
|
||||||
|
// tfhAliases maps known lowercase TruffleHog detector names to KeyHunter's
|
||||||
|
// canonical provider identifiers. Entries that are the same on both sides
|
||||||
|
// are listed explicitly so intent is clear.
|
||||||
|
var tfhAliases = map[string]string{
|
||||||
|
"aws": "aws",
|
||||||
|
"gcp": "gcp",
|
||||||
|
"openai": "openai",
|
||||||
|
"anthropic": "anthropic",
|
||||||
|
"huggingface": "huggingface",
|
||||||
|
"github": "github",
|
||||||
|
}
|
||||||
|
|
||||||
|
// Name implements Importer.
|
||||||
|
func (TruffleHogImporter) Name() string { return "trufflehog" }
|
||||||
|
|
||||||
|
// Import decodes a TruffleHog v3 JSON array from r and returns the findings
|
||||||
|
// in the same order they appear in the input. Records with an empty Raw
|
||||||
|
// value are skipped silently because they carry no usable key material.
|
||||||
|
func (TruffleHogImporter) Import(r io.Reader) ([]engine.Finding, error) {
|
||||||
|
var records []trufflehogRecord
|
||||||
|
if err := json.NewDecoder(r).Decode(&records); err != nil {
|
||||||
|
return nil, fmt.Errorf("decoding trufflehog json: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
findings := make([]engine.Finding, 0, len(records))
|
||||||
|
now := time.Now()
|
||||||
|
|
||||||
|
for _, rec := range records {
|
||||||
|
if rec.Raw == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
source, line := extractSourcePath(rec.SourceMetadata)
|
||||||
|
if source == "" {
|
||||||
|
source = rec.SourceName
|
||||||
|
}
|
||||||
|
|
||||||
|
confidence := "medium"
|
||||||
|
verifyStatus := "unverified"
|
||||||
|
if rec.Verified {
|
||||||
|
confidence = "high"
|
||||||
|
verifyStatus = "live"
|
||||||
|
}
|
||||||
|
|
||||||
|
findings = append(findings, engine.Finding{
|
||||||
|
ProviderName: normalizeTruffleHogName(rec.DetectorName),
|
||||||
|
KeyValue: rec.Raw,
|
||||||
|
KeyMasked: engine.MaskKey(rec.Raw),
|
||||||
|
Confidence: confidence,
|
||||||
|
Source: source,
|
||||||
|
SourceType: "import:trufflehog",
|
||||||
|
LineNumber: line,
|
||||||
|
DetectedAt: now,
|
||||||
|
Verified: rec.Verified,
|
||||||
|
VerifyStatus: verifyStatus,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
return findings, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// normalizeTruffleHogName converts a TruffleHog detector name
|
||||||
|
// ("OpenAI", "GithubV2", "AWS") to the lowercase KeyHunter provider id.
|
||||||
|
// Unknown detectors fall through as their lowercased, de-versioned form.
|
||||||
|
func normalizeTruffleHogName(detector string) string {
|
||||||
|
lowered := strings.ToLower(strings.TrimSpace(detector))
|
||||||
|
lowered = tfhVersionSuffix.ReplaceAllString(lowered, "")
|
||||||
|
if alias, ok := tfhAliases[lowered]; ok {
|
||||||
|
return alias
|
||||||
|
}
|
||||||
|
return lowered
|
||||||
|
}
|
||||||
|
|
||||||
|
// extractSourcePath walks SourceMetadata.Data in priority order and returns
|
||||||
|
// the first non-empty location string together with a line number when one
|
||||||
|
// is available. Any unmarshal error is non-fatal and yields ("", 0).
|
||||||
|
func extractSourcePath(meta json.RawMessage) (string, int) {
|
||||||
|
if len(meta) == 0 {
|
||||||
|
return "", 0
|
||||||
|
}
|
||||||
|
var sm tfhSourceMetadata
|
||||||
|
if err := json.Unmarshal(meta, &sm); err != nil {
|
||||||
|
return "", 0
|
||||||
|
}
|
||||||
|
|
||||||
|
line := 0
|
||||||
|
if sm.Data.Git != nil {
|
||||||
|
line = sm.Data.Git.Line
|
||||||
|
if sm.Data.Git.File != "" {
|
||||||
|
return sm.Data.Git.File, line
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if sm.Data.Filesystem != nil && sm.Data.Filesystem.File != "" {
|
||||||
|
return sm.Data.Filesystem.File, line
|
||||||
|
}
|
||||||
|
if sm.Data.Github != nil {
|
||||||
|
if sm.Data.Github.File != "" {
|
||||||
|
return sm.Data.Github.File, line
|
||||||
|
}
|
||||||
|
if sm.Data.Github.Link != "" {
|
||||||
|
return sm.Data.Github.Link, line
|
||||||
|
}
|
||||||
|
if sm.Data.Github.Repository != "" {
|
||||||
|
return sm.Data.Github.Repository, line
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if sm.Data.Git != nil && sm.Data.Git.Repository != "" {
|
||||||
|
return sm.Data.Git.Repository, line
|
||||||
|
}
|
||||||
|
return "", line
|
||||||
|
}
|
||||||
128
pkg/importer/trufflehog_test.go
Normal file
128
pkg/importer/trufflehog_test.go
Normal file
@@ -0,0 +1,128 @@
|
|||||||
|
package importer
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestTruffleHogImporter_Name(t *testing.T) {
|
||||||
|
var imp TruffleHogImporter
|
||||||
|
if got := imp.Name(); got != "trufflehog" {
|
||||||
|
t.Fatalf("Name() = %q, want %q", got, "trufflehog")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTruffleHogImporter_Import(t *testing.T) {
|
||||||
|
f, err := os.Open("testdata/trufflehog-sample.json")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("open fixture: %v", err)
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
var imp TruffleHogImporter
|
||||||
|
findings, err := imp.Import(f)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Import returned error: %v", err)
|
||||||
|
}
|
||||||
|
if len(findings) != 3 {
|
||||||
|
t.Fatalf("expected 3 findings, got %d", len(findings))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Record 1: OpenAI / Git / verified.
|
||||||
|
f0 := findings[0]
|
||||||
|
if f0.ProviderName != "openai" {
|
||||||
|
t.Errorf("findings[0].ProviderName = %q, want openai", f0.ProviderName)
|
||||||
|
}
|
||||||
|
if f0.Confidence != "high" {
|
||||||
|
t.Errorf("findings[0].Confidence = %q, want high", f0.Confidence)
|
||||||
|
}
|
||||||
|
if !f0.Verified {
|
||||||
|
t.Error("findings[0].Verified = false, want true")
|
||||||
|
}
|
||||||
|
if f0.VerifyStatus != "live" {
|
||||||
|
t.Errorf("findings[0].VerifyStatus = %q, want live", f0.VerifyStatus)
|
||||||
|
}
|
||||||
|
if f0.Source != "src/config.py" {
|
||||||
|
t.Errorf("findings[0].Source = %q, want src/config.py", f0.Source)
|
||||||
|
}
|
||||||
|
if f0.LineNumber != 42 {
|
||||||
|
t.Errorf("findings[0].LineNumber = %d, want 42", f0.LineNumber)
|
||||||
|
}
|
||||||
|
if f0.SourceType != "import:trufflehog" {
|
||||||
|
t.Errorf("findings[0].SourceType = %q, want import:trufflehog", f0.SourceType)
|
||||||
|
}
|
||||||
|
if f0.KeyValue != "sk-proj-abcdef1234567890abcdef" {
|
||||||
|
t.Errorf("findings[0].KeyValue unexpected: %q", f0.KeyValue)
|
||||||
|
}
|
||||||
|
if f0.KeyMasked == "" || f0.KeyMasked == f0.KeyValue {
|
||||||
|
t.Errorf("findings[0].KeyMasked not populated: %q", f0.KeyMasked)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Record 2: AnthropicV2 / Filesystem / unverified.
|
||||||
|
f1 := findings[1]
|
||||||
|
if f1.ProviderName != "anthropic" {
|
||||||
|
t.Errorf("findings[1].ProviderName = %q, want anthropic", f1.ProviderName)
|
||||||
|
}
|
||||||
|
if f1.Confidence != "medium" {
|
||||||
|
t.Errorf("findings[1].Confidence = %q, want medium", f1.Confidence)
|
||||||
|
}
|
||||||
|
if f1.Verified {
|
||||||
|
t.Error("findings[1].Verified = true, want false")
|
||||||
|
}
|
||||||
|
if f1.VerifyStatus != "unverified" {
|
||||||
|
t.Errorf("findings[1].VerifyStatus = %q, want unverified", f1.VerifyStatus)
|
||||||
|
}
|
||||||
|
if f1.Source != "/tmp/leaked.env" {
|
||||||
|
t.Errorf("findings[1].Source = %q, want /tmp/leaked.env", f1.Source)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Record 3: AWS / Github link.
|
||||||
|
f2 := findings[2]
|
||||||
|
if f2.ProviderName != "aws" {
|
||||||
|
t.Errorf("findings[2].ProviderName = %q, want aws", f2.ProviderName)
|
||||||
|
}
|
||||||
|
if !f2.Verified {
|
||||||
|
t.Error("findings[2].Verified = false, want true")
|
||||||
|
}
|
||||||
|
if f2.Source != "https://github.com/foo/bar/blob/main/a.yml" {
|
||||||
|
t.Errorf("findings[2].Source = %q, want github link", f2.Source)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTruffleHogImporter_NormalizeName(t *testing.T) {
|
||||||
|
cases := []struct {
|
||||||
|
in, want string
|
||||||
|
}{
|
||||||
|
{"OpenAI", "openai"},
|
||||||
|
{"GithubV2", "github"},
|
||||||
|
{"AnthropicV2", "anthropic"},
|
||||||
|
{"AWS", "aws"},
|
||||||
|
{"GCP", "gcp"},
|
||||||
|
{"HuggingFace", "huggingface"},
|
||||||
|
{"UnknownDetector", "unknowndetector"},
|
||||||
|
}
|
||||||
|
for _, c := range cases {
|
||||||
|
if got := normalizeTruffleHogName(c.in); got != c.want {
|
||||||
|
t.Errorf("normalizeTruffleHogName(%q) = %q, want %q", c.in, got, c.want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTruffleHogImporter_EmptyArray(t *testing.T) {
|
||||||
|
var imp TruffleHogImporter
|
||||||
|
findings, err := imp.Import(strings.NewReader("[]"))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
if len(findings) != 0 {
|
||||||
|
t.Fatalf("expected 0 findings, got %d", len(findings))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestTruffleHogImporter_InvalidJSON(t *testing.T) {
|
||||||
|
var imp TruffleHogImporter
|
||||||
|
if _, err := imp.Import(strings.NewReader("not json")); err == nil {
|
||||||
|
t.Fatal("expected error for invalid JSON, got nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user