package importer import ( "crypto/sha256" "encoding/hex" "fmt" "github.com/salvacybersec/keyhunter/pkg/engine" ) // FindingKey returns a stable identity hash for a finding based on the // provider name, masked key, source path, and line number. This is the // dedup identity used by import pipelines so the same underlying secret // is not inserted twice when re-importing the same scanner output. // // Fields outside this tuple (DetectedAt, Confidence, VerifyStatus, ...) // intentionally do not contribute to the key: re-running the same import // at a later time must collapse onto the original finding. func FindingKey(f engine.Finding) string { payload := fmt.Sprintf("%s\x00%s\x00%s\x00%d", f.ProviderName, f.KeyMasked, f.Source, f.LineNumber) sum := sha256.Sum256([]byte(payload)) return hex.EncodeToString(sum[:]) } // Dedup removes duplicate findings from in-memory slices before insert. // Order of first-seen findings is preserved. Returns the deduplicated // slice and the number of duplicates dropped. func Dedup(in []engine.Finding) ([]engine.Finding, int) { seen := make(map[string]struct{}, len(in)) out := make([]engine.Finding, 0, len(in)) dropped := 0 for _, f := range in { k := FindingKey(f) if _, ok := seen[k]; ok { dropped++ continue } seen[k] = struct{}{} out = append(out, f) } return out, dropped }