package recon import ( "crypto/sha256" "encoding/hex" "github.com/salvacybersec/keyhunter/pkg/engine" ) // Dedup removes duplicate findings from a recon sweep using // SHA256(ProviderName|KeyMasked|Source) as the dedup key. // // The operation is stable: input order is preserved and first-seen metadata // (DetectedAt, Confidence, VerifyStatus, etc.) wins when a later duplicate // would otherwise overwrite it. Two findings with the same provider and // masked key but different Source URLs are kept separate, so callers can // see every distinct location where a leaked key was found. // // Callers (e.g. Engine.SweepAll from Plan 09-01) should invoke Dedup on the // aggregated finding slice before persisting to storage. A nil or empty // input returns nil. // // Note: this package uses engine.Finding directly rather than a local alias // so it compiles independently of Plan 09-01 during parallel execution. func Dedup(in []engine.Finding) []engine.Finding { if len(in) == 0 { return nil } seen := make(map[string]struct{}, len(in)) out := make([]engine.Finding, 0, len(in)) for _, f := range in { sum := sha256.Sum256([]byte(f.ProviderName + "|" + f.KeyMasked + "|" + f.Source)) key := hex.EncodeToString(sum[:]) if _, dup := seen[key]; dup { continue } seen[key] = struct{}{} out = append(out, f) } return out }