feat(07-03): dedup helper for imported findings

- FindingKey: stable SHA-256 over provider+masked+source+line
- Dedup: preserves first-seen order, returns drop count
- 8 unit tests covering stability, field sensitivity, order preservation
This commit is contained in:
salvacybersec
2026-04-05 23:54:44 +03:00
parent 779c5b3d6f
commit 6a3d5b0cb7
2 changed files with 148 additions and 0 deletions

106
pkg/importer/dedup_test.go Normal file
View File

@@ -0,0 +1,106 @@
package importer
import (
"testing"
"time"
"github.com/salvacybersec/keyhunter/pkg/engine"
)
func mkFinding(provider, masked, source string, line int) engine.Finding {
return engine.Finding{
ProviderName: provider,
KeyMasked: masked,
Source: source,
LineNumber: line,
}
}
func TestFindingKey_Stable(t *testing.T) {
f := mkFinding("openai", "sk-abc12...wxyz", "src/app.go", 42)
if FindingKey(f) != FindingKey(f) {
t.Fatal("FindingKey must be deterministic for identical input")
}
}
func TestFindingKey_DiffersByProvider(t *testing.T) {
a := mkFinding("openai", "sk-abc12...wxyz", "src/app.go", 42)
b := mkFinding("anthropic", "sk-abc12...wxyz", "src/app.go", 42)
if FindingKey(a) == FindingKey(b) {
t.Fatal("different providers must yield different keys")
}
}
func TestFindingKey_DiffersByMasked(t *testing.T) {
a := mkFinding("openai", "sk-aaaaa...wxyz", "src/app.go", 42)
b := mkFinding("openai", "sk-bbbbb...wxyz", "src/app.go", 42)
if FindingKey(a) == FindingKey(b) {
t.Fatal("different masked keys must yield different keys")
}
}
func TestFindingKey_DiffersBySource(t *testing.T) {
a := mkFinding("openai", "sk-abc12...wxyz", "src/app.go", 42)
b := mkFinding("openai", "sk-abc12...wxyz", "src/other.go", 42)
if FindingKey(a) == FindingKey(b) {
t.Fatal("different sources must yield different keys")
}
}
func TestFindingKey_DiffersByLine(t *testing.T) {
a := mkFinding("openai", "sk-abc12...wxyz", "src/app.go", 42)
b := mkFinding("openai", "sk-abc12...wxyz", "src/app.go", 43)
if FindingKey(a) == FindingKey(b) {
t.Fatal("different line numbers must yield different keys")
}
}
func TestDedup_PreservesOrder(t *testing.T) {
a := mkFinding("openai", "sk-aaa...0001", "file_a.go", 1)
b := mkFinding("openai", "sk-bbb...0002", "file_b.go", 2)
c := mkFinding("openai", "sk-ccc...0003", "file_c.go", 3)
in := []engine.Finding{a, b, a, c, b}
out, dropped := Dedup(in)
if dropped != 2 {
t.Fatalf("expected 2 dropped, got %d", dropped)
}
if len(out) != 3 {
t.Fatalf("expected 3 unique, got %d", len(out))
}
if out[0].KeyMasked != a.KeyMasked || out[1].KeyMasked != b.KeyMasked || out[2].KeyMasked != c.KeyMasked {
t.Fatalf("dedup did not preserve first-seen order: %+v", out)
}
}
func TestDedup_Empty(t *testing.T) {
out, dropped := Dedup(nil)
if dropped != 0 {
t.Fatalf("expected 0 dropped, got %d", dropped)
}
if len(out) != 0 {
t.Fatalf("expected empty output, got %d", len(out))
}
}
func TestDedup_IgnoresUnrelatedFields(t *testing.T) {
base := mkFinding("openai", "sk-abc12...wxyz", "src/app.go", 42)
base.DetectedAt = time.Now()
base.Confidence = "high"
twin := mkFinding("openai", "sk-abc12...wxyz", "src/app.go", 42)
twin.DetectedAt = base.DetectedAt.Add(24 * time.Hour)
twin.Confidence = "low"
out, dropped := Dedup([]engine.Finding{base, twin})
if dropped != 1 {
t.Fatalf("expected 1 dropped, got %d", dropped)
}
if len(out) != 1 {
t.Fatalf("expected 1 kept, got %d", len(out))
}
if out[0].Confidence != "high" {
t.Fatalf("expected first-seen to be kept, got Confidence=%s", out[0].Confidence)
}
}