feat(07-01): Importer interface and TruffleHog v3 JSON adapter
- pkg/importer/importer.go: shared Importer interface (Name, Import) - pkg/importer/trufflehog.go: TruffleHogImporter with v3 JSON decoding, detector-name normalization (OpenAI/GithubV2/AWS -> canonical ids), SourceMetadata path+line extraction for Git/Filesystem/Github - pkg/importer/testdata/trufflehog-sample.json: 3-record fixture - pkg/importer/trufflehog_test.go: Name, Import, NormalizeName, EmptyArray, InvalidJSON tests -- all passing
This commit is contained in:
174
pkg/importer/trufflehog.go
Normal file
174
pkg/importer/trufflehog.go
Normal file
@@ -0,0 +1,174 @@
|
||||
package importer
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/salvacybersec/keyhunter/pkg/engine"
|
||||
)
|
||||
|
||||
// TruffleHogImporter parses TruffleHog v3 JSON output
|
||||
// (`trufflehog ... --json`) into engine.Finding records.
|
||||
//
|
||||
// TruffleHog v3 emits a JSON array whose elements describe each detected
|
||||
// secret with detector metadata, verification status, and a nested
|
||||
// SourceMetadata object whose shape depends on the scan source (git,
|
||||
// filesystem, github, ...). See 07-CONTEXT.md for the field decisions.
|
||||
type TruffleHogImporter struct{}
|
||||
|
||||
// trufflehogRecord mirrors the v3 JSON schema. Fields we do not consume
|
||||
// (DetectorType numeric ID, ExtraData blob) are kept as raw JSON so
|
||||
// decoding does not fail on unknown shapes.
|
||||
type trufflehogRecord struct {
|
||||
SourceID int `json:"SourceID"`
|
||||
SourceName string `json:"SourceName"`
|
||||
SourceMetadata json.RawMessage `json:"SourceMetadata"`
|
||||
DetectorName string `json:"DetectorName"`
|
||||
DetectorType int `json:"DetectorType"`
|
||||
Verified bool `json:"Verified"`
|
||||
Raw string `json:"Raw"`
|
||||
Redacted string `json:"Redacted"`
|
||||
ExtraData json.RawMessage `json:"ExtraData"`
|
||||
}
|
||||
|
||||
// tfhSourceMetadata captures the subset of SourceMetadata.Data we extract a
|
||||
// source path / line number from. All sub-objects are pointers so we can tell
|
||||
// "not present" from "empty".
|
||||
type tfhSourceMetadata struct {
|
||||
Data struct {
|
||||
Git *struct {
|
||||
File string `json:"file"`
|
||||
Repository string `json:"repository"`
|
||||
Commit string `json:"commit"`
|
||||
Line int `json:"line"`
|
||||
} `json:"Git"`
|
||||
Filesystem *struct {
|
||||
File string `json:"file"`
|
||||
} `json:"Filesystem"`
|
||||
Github *struct {
|
||||
File string `json:"file"`
|
||||
Link string `json:"link"`
|
||||
Repository string `json:"repository"`
|
||||
} `json:"Github"`
|
||||
} `json:"Data"`
|
||||
}
|
||||
|
||||
// tfhVersionSuffix strips trailing version markers from detector names
|
||||
// ("GithubV2" -> "Github", "AnthropicV2" -> "Anthropic").
|
||||
var tfhVersionSuffix = regexp.MustCompile(`v\d+$`)
|
||||
|
||||
// tfhAliases maps known lowercase TruffleHog detector names to KeyHunter's
|
||||
// canonical provider identifiers. Entries that are the same on both sides
|
||||
// are listed explicitly so intent is clear.
|
||||
var tfhAliases = map[string]string{
|
||||
"aws": "aws",
|
||||
"gcp": "gcp",
|
||||
"openai": "openai",
|
||||
"anthropic": "anthropic",
|
||||
"huggingface": "huggingface",
|
||||
"github": "github",
|
||||
}
|
||||
|
||||
// Name implements Importer.
|
||||
func (TruffleHogImporter) Name() string { return "trufflehog" }
|
||||
|
||||
// Import decodes a TruffleHog v3 JSON array from r and returns the findings
|
||||
// in the same order they appear in the input. Records with an empty Raw
|
||||
// value are skipped silently because they carry no usable key material.
|
||||
func (TruffleHogImporter) Import(r io.Reader) ([]engine.Finding, error) {
|
||||
var records []trufflehogRecord
|
||||
if err := json.NewDecoder(r).Decode(&records); err != nil {
|
||||
return nil, fmt.Errorf("decoding trufflehog json: %w", err)
|
||||
}
|
||||
|
||||
findings := make([]engine.Finding, 0, len(records))
|
||||
now := time.Now()
|
||||
|
||||
for _, rec := range records {
|
||||
if rec.Raw == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
source, line := extractSourcePath(rec.SourceMetadata)
|
||||
if source == "" {
|
||||
source = rec.SourceName
|
||||
}
|
||||
|
||||
confidence := "medium"
|
||||
verifyStatus := "unverified"
|
||||
if rec.Verified {
|
||||
confidence = "high"
|
||||
verifyStatus = "live"
|
||||
}
|
||||
|
||||
findings = append(findings, engine.Finding{
|
||||
ProviderName: normalizeTruffleHogName(rec.DetectorName),
|
||||
KeyValue: rec.Raw,
|
||||
KeyMasked: engine.MaskKey(rec.Raw),
|
||||
Confidence: confidence,
|
||||
Source: source,
|
||||
SourceType: "import:trufflehog",
|
||||
LineNumber: line,
|
||||
DetectedAt: now,
|
||||
Verified: rec.Verified,
|
||||
VerifyStatus: verifyStatus,
|
||||
})
|
||||
}
|
||||
|
||||
return findings, nil
|
||||
}
|
||||
|
||||
// normalizeTruffleHogName converts a TruffleHog detector name
|
||||
// ("OpenAI", "GithubV2", "AWS") to the lowercase KeyHunter provider id.
|
||||
// Unknown detectors fall through as their lowercased, de-versioned form.
|
||||
func normalizeTruffleHogName(detector string) string {
|
||||
lowered := strings.ToLower(strings.TrimSpace(detector))
|
||||
lowered = tfhVersionSuffix.ReplaceAllString(lowered, "")
|
||||
if alias, ok := tfhAliases[lowered]; ok {
|
||||
return alias
|
||||
}
|
||||
return lowered
|
||||
}
|
||||
|
||||
// extractSourcePath walks SourceMetadata.Data in priority order and returns
|
||||
// the first non-empty location string together with a line number when one
|
||||
// is available. Any unmarshal error is non-fatal and yields ("", 0).
|
||||
func extractSourcePath(meta json.RawMessage) (string, int) {
|
||||
if len(meta) == 0 {
|
||||
return "", 0
|
||||
}
|
||||
var sm tfhSourceMetadata
|
||||
if err := json.Unmarshal(meta, &sm); err != nil {
|
||||
return "", 0
|
||||
}
|
||||
|
||||
line := 0
|
||||
if sm.Data.Git != nil {
|
||||
line = sm.Data.Git.Line
|
||||
if sm.Data.Git.File != "" {
|
||||
return sm.Data.Git.File, line
|
||||
}
|
||||
}
|
||||
if sm.Data.Filesystem != nil && sm.Data.Filesystem.File != "" {
|
||||
return sm.Data.Filesystem.File, line
|
||||
}
|
||||
if sm.Data.Github != nil {
|
||||
if sm.Data.Github.File != "" {
|
||||
return sm.Data.Github.File, line
|
||||
}
|
||||
if sm.Data.Github.Link != "" {
|
||||
return sm.Data.Github.Link, line
|
||||
}
|
||||
if sm.Data.Github.Repository != "" {
|
||||
return sm.Data.Github.Repository, line
|
||||
}
|
||||
}
|
||||
if sm.Data.Git != nil && sm.Data.Git.Repository != "" {
|
||||
return sm.Data.Git.Repository, line
|
||||
}
|
||||
return "", line
|
||||
}
|
||||
Reference in New Issue
Block a user