diff --git a/pkg/recon/dedup_test.go b/pkg/recon/dedup_test.go new file mode 100644 index 0000000..819004c --- /dev/null +++ b/pkg/recon/dedup_test.go @@ -0,0 +1,55 @@ +package recon + +import ( + "testing" + "time" + + "github.com/stretchr/testify/require" + + "github.com/salvacybersec/keyhunter/pkg/engine" +) + +func TestDedupEmpty(t *testing.T) { + require.Nil(t, Dedup(nil)) + require.Nil(t, Dedup([]engine.Finding{})) +} + +func TestDedupNoDuplicates(t *testing.T) { + in := []engine.Finding{ + {ProviderName: "openai", KeyMasked: "sk-abc12...9xyz", Source: "https://example.com/a"}, + {ProviderName: "anthropic", KeyMasked: "sk-ant-1...2def", Source: "https://example.com/b"}, + {ProviderName: "cohere", KeyMasked: "co-abcde...wxyz", Source: "https://example.com/c"}, + } + out := Dedup(in) + require.Len(t, out, 3) + require.Equal(t, in, out, "order must be preserved") +} + +func TestDedupAllDuplicates(t *testing.T) { + f := engine.Finding{ProviderName: "openai", KeyMasked: "sk-abc12...9xyz", Source: "https://example.com/a"} + out := Dedup([]engine.Finding{f, f, f}) + require.Len(t, out, 1) + require.Equal(t, f, out[0]) +} + +func TestDedupPreservesFirstSeen(t *testing.T) { + first := time.Date(2026, 1, 1, 0, 0, 0, 0, time.UTC) + second := time.Date(2026, 2, 2, 0, 0, 0, 0, time.UTC) + in := []engine.Finding{ + {ProviderName: "openai", KeyMasked: "sk-abc12...9xyz", Source: "https://example.com/a", DetectedAt: first, Confidence: "high"}, + {ProviderName: "openai", KeyMasked: "sk-abc12...9xyz", Source: "https://example.com/a", DetectedAt: second, Confidence: "low"}, + } + out := Dedup(in) + require.Len(t, out, 1) + require.Equal(t, first, out[0].DetectedAt, "first-seen timestamp must win") + require.Equal(t, "high", out[0].Confidence, "first-seen metadata must win") +} + +func TestDedupDifferentSource(t *testing.T) { + in := []engine.Finding{ + {ProviderName: "openai", KeyMasked: "sk-abc12...9xyz", Source: "https://example.com/a"}, + {ProviderName: "openai", KeyMasked: "sk-abc12...9xyz", Source: "https://example.com/b"}, + } + out := Dedup(in) + require.Len(t, out, 2, "same provider+masked but different Source URLs must both be kept") +}