feat(09-03): implement stable cross-source finding Dedup
- Dedup drops duplicates keyed by sha256(ProviderName|KeyMasked|Source) - Preserves input order and first-seen metadata (stable dedup) - Same provider+masked with different Source URLs are kept separate - Uses engine.Finding directly to avoid alias collision with Plan 09-01
This commit is contained in:
41
pkg/recon/dedup.go
Normal file
41
pkg/recon/dedup.go
Normal file
@@ -0,0 +1,41 @@
|
||||
package recon
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
|
||||
"github.com/salvacybersec/keyhunter/pkg/engine"
|
||||
)
|
||||
|
||||
// Dedup removes duplicate findings from a recon sweep using
|
||||
// SHA256(ProviderName|KeyMasked|Source) as the dedup key.
|
||||
//
|
||||
// The operation is stable: input order is preserved and first-seen metadata
|
||||
// (DetectedAt, Confidence, VerifyStatus, etc.) wins when a later duplicate
|
||||
// would otherwise overwrite it. Two findings with the same provider and
|
||||
// masked key but different Source URLs are kept separate, so callers can
|
||||
// see every distinct location where a leaked key was found.
|
||||
//
|
||||
// Callers (e.g. Engine.SweepAll from Plan 09-01) should invoke Dedup on the
|
||||
// aggregated finding slice before persisting to storage. A nil or empty
|
||||
// input returns nil.
|
||||
//
|
||||
// Note: this package uses engine.Finding directly rather than a local alias
|
||||
// so it compiles independently of Plan 09-01 during parallel execution.
|
||||
func Dedup(in []engine.Finding) []engine.Finding {
|
||||
if len(in) == 0 {
|
||||
return nil
|
||||
}
|
||||
seen := make(map[string]struct{}, len(in))
|
||||
out := make([]engine.Finding, 0, len(in))
|
||||
for _, f := range in {
|
||||
sum := sha256.Sum256([]byte(f.ProviderName + "|" + f.KeyMasked + "|" + f.Source))
|
||||
key := hex.EncodeToString(sum[:])
|
||||
if _, dup := seen[key]; dup {
|
||||
continue
|
||||
}
|
||||
seen[key] = struct{}{}
|
||||
out = append(out, f)
|
||||
}
|
||||
return out
|
||||
}
|
||||
Reference in New Issue
Block a user