package importer import ( "encoding/json" "fmt" "io" "regexp" "strings" "time" "github.com/salvacybersec/keyhunter/pkg/engine" ) // TruffleHogImporter parses TruffleHog v3 JSON output // (`trufflehog ... --json`) into engine.Finding records. // // TruffleHog v3 emits a JSON array whose elements describe each detected // secret with detector metadata, verification status, and a nested // SourceMetadata object whose shape depends on the scan source (git, // filesystem, github, ...). See 07-CONTEXT.md for the field decisions. type TruffleHogImporter struct{} // trufflehogRecord mirrors the v3 JSON schema. Fields we do not consume // (DetectorType numeric ID, ExtraData blob) are kept as raw JSON so // decoding does not fail on unknown shapes. type trufflehogRecord struct { SourceID int `json:"SourceID"` SourceName string `json:"SourceName"` SourceMetadata json.RawMessage `json:"SourceMetadata"` DetectorName string `json:"DetectorName"` DetectorType int `json:"DetectorType"` Verified bool `json:"Verified"` Raw string `json:"Raw"` Redacted string `json:"Redacted"` ExtraData json.RawMessage `json:"ExtraData"` } // tfhSourceMetadata captures the subset of SourceMetadata.Data we extract a // source path / line number from. All sub-objects are pointers so we can tell // "not present" from "empty". type tfhSourceMetadata struct { Data struct { Git *struct { File string `json:"file"` Repository string `json:"repository"` Commit string `json:"commit"` Line int `json:"line"` } `json:"Git"` Filesystem *struct { File string `json:"file"` } `json:"Filesystem"` Github *struct { File string `json:"file"` Link string `json:"link"` Repository string `json:"repository"` } `json:"Github"` } `json:"Data"` } // tfhVersionSuffix strips trailing version markers from detector names // ("GithubV2" -> "Github", "AnthropicV2" -> "Anthropic"). var tfhVersionSuffix = regexp.MustCompile(`v\d+$`) // tfhAliases maps known lowercase TruffleHog detector names to KeyHunter's // canonical provider identifiers. Entries that are the same on both sides // are listed explicitly so intent is clear. var tfhAliases = map[string]string{ "aws": "aws", "gcp": "gcp", "openai": "openai", "anthropic": "anthropic", "huggingface": "huggingface", "github": "github", } // Name implements Importer. func (TruffleHogImporter) Name() string { return "trufflehog" } // Import decodes a TruffleHog v3 JSON array from r and returns the findings // in the same order they appear in the input. Records with an empty Raw // value are skipped silently because they carry no usable key material. func (TruffleHogImporter) Import(r io.Reader) ([]engine.Finding, error) { var records []trufflehogRecord if err := json.NewDecoder(r).Decode(&records); err != nil { return nil, fmt.Errorf("decoding trufflehog json: %w", err) } findings := make([]engine.Finding, 0, len(records)) now := time.Now() for _, rec := range records { if rec.Raw == "" { continue } source, line := extractSourcePath(rec.SourceMetadata) if source == "" { source = rec.SourceName } confidence := "medium" verifyStatus := "unverified" if rec.Verified { confidence = "high" verifyStatus = "live" } findings = append(findings, engine.Finding{ ProviderName: normalizeTruffleHogName(rec.DetectorName), KeyValue: rec.Raw, KeyMasked: engine.MaskKey(rec.Raw), Confidence: confidence, Source: source, SourceType: "import:trufflehog", LineNumber: line, DetectedAt: now, Verified: rec.Verified, VerifyStatus: verifyStatus, }) } return findings, nil } // normalizeTruffleHogName converts a TruffleHog detector name // ("OpenAI", "GithubV2", "AWS") to the lowercase KeyHunter provider id. // Unknown detectors fall through as their lowercased, de-versioned form. func normalizeTruffleHogName(detector string) string { lowered := strings.ToLower(strings.TrimSpace(detector)) lowered = tfhVersionSuffix.ReplaceAllString(lowered, "") if alias, ok := tfhAliases[lowered]; ok { return alias } return lowered } // extractSourcePath walks SourceMetadata.Data in priority order and returns // the first non-empty location string together with a line number when one // is available. Any unmarshal error is non-fatal and yields ("", 0). func extractSourcePath(meta json.RawMessage) (string, int) { if len(meta) == 0 { return "", 0 } var sm tfhSourceMetadata if err := json.Unmarshal(meta, &sm); err != nil { return "", 0 } line := 0 if sm.Data.Git != nil { line = sm.Data.Git.Line if sm.Data.Git.File != "" { return sm.Data.Git.File, line } } if sm.Data.Filesystem != nil && sm.Data.Filesystem.File != "" { return sm.Data.Filesystem.File, line } if sm.Data.Github != nil { if sm.Data.Github.File != "" { return sm.Data.Github.File, line } if sm.Data.Github.Link != "" { return sm.Data.Github.Link, line } if sm.Data.Github.Repository != "" { return sm.Data.Github.Repository, line } } if sm.Data.Git != nil && sm.Data.Git.Repository != "" { return sm.Data.Git.Repository, line } return "", line }