Files
keyhunter/pkg/recon/dedup.go
salvacybersec 2988fdf9b3 feat(09-03): implement stable cross-source finding Dedup
- Dedup drops duplicates keyed by sha256(ProviderName|KeyMasked|Source)
- Preserves input order and first-seen metadata (stable dedup)
- Same provider+masked with different Source URLs are kept separate
- Uses engine.Finding directly to avoid alias collision with Plan 09-01
2026-04-06 00:43:07 +03:00

42 lines
1.3 KiB
Go

package recon
import (
"crypto/sha256"
"encoding/hex"
"github.com/salvacybersec/keyhunter/pkg/engine"
)
// Dedup removes duplicate findings from a recon sweep using
// SHA256(ProviderName|KeyMasked|Source) as the dedup key.
//
// The operation is stable: input order is preserved and first-seen metadata
// (DetectedAt, Confidence, VerifyStatus, etc.) wins when a later duplicate
// would otherwise overwrite it. Two findings with the same provider and
// masked key but different Source URLs are kept separate, so callers can
// see every distinct location where a leaked key was found.
//
// Callers (e.g. Engine.SweepAll from Plan 09-01) should invoke Dedup on the
// aggregated finding slice before persisting to storage. A nil or empty
// input returns nil.
//
// Note: this package uses engine.Finding directly rather than a local alias
// so it compiles independently of Plan 09-01 during parallel execution.
func Dedup(in []engine.Finding) []engine.Finding {
if len(in) == 0 {
return nil
}
seen := make(map[string]struct{}, len(in))
out := make([]engine.Finding, 0, len(in))
for _, f := range in {
sum := sha256.Sum256([]byte(f.ProviderName + "|" + f.KeyMasked + "|" + f.Source))
key := hex.EncodeToString(sum[:])
if _, dup := seen[key]; dup {
continue
}
seen[key] = struct{}{}
out = append(out, f)
}
return out
}