- FindingKey: stable SHA-256 over provider+masked+source+line - Dedup: preserves first-seen order, returns drop count - 8 unit tests covering stability, field sensitivity, order preservation
43 lines
1.3 KiB
Go
43 lines
1.3 KiB
Go
package importer
|
|
|
|
import (
|
|
"crypto/sha256"
|
|
"encoding/hex"
|
|
"fmt"
|
|
|
|
"github.com/salvacybersec/keyhunter/pkg/engine"
|
|
)
|
|
|
|
// FindingKey returns a stable identity hash for a finding based on the
|
|
// provider name, masked key, source path, and line number. This is the
|
|
// dedup identity used by import pipelines so the same underlying secret
|
|
// is not inserted twice when re-importing the same scanner output.
|
|
//
|
|
// Fields outside this tuple (DetectedAt, Confidence, VerifyStatus, ...)
|
|
// intentionally do not contribute to the key: re-running the same import
|
|
// at a later time must collapse onto the original finding.
|
|
func FindingKey(f engine.Finding) string {
|
|
payload := fmt.Sprintf("%s\x00%s\x00%s\x00%d", f.ProviderName, f.KeyMasked, f.Source, f.LineNumber)
|
|
sum := sha256.Sum256([]byte(payload))
|
|
return hex.EncodeToString(sum[:])
|
|
}
|
|
|
|
// Dedup removes duplicate findings from in-memory slices before insert.
|
|
// Order of first-seen findings is preserved. Returns the deduplicated
|
|
// slice and the number of duplicates dropped.
|
|
func Dedup(in []engine.Finding) ([]engine.Finding, int) {
|
|
seen := make(map[string]struct{}, len(in))
|
|
out := make([]engine.Finding, 0, len(in))
|
|
dropped := 0
|
|
for _, f := range in {
|
|
k := FindingKey(f)
|
|
if _, ok := seen[k]; ok {
|
|
dropped++
|
|
continue
|
|
}
|
|
seen[k] = struct{}{}
|
|
out = append(out, f)
|
|
}
|
|
return out, dropped
|
|
}
|