merge: plan 09-03 stealth+dedup

This commit is contained in:
salvacybersec
2026-04-06 00:45:13 +03:00
5 changed files with 297 additions and 0 deletions

41
pkg/recon/dedup.go Normal file
View File

@@ -0,0 +1,41 @@
package recon
import (
"crypto/sha256"
"encoding/hex"
"github.com/salvacybersec/keyhunter/pkg/engine"
)
// Dedup removes duplicate findings from a recon sweep using
// SHA256(ProviderName|KeyMasked|Source) as the dedup key.
//
// The operation is stable: input order is preserved and first-seen metadata
// (DetectedAt, Confidence, VerifyStatus, etc.) wins when a later duplicate
// would otherwise overwrite it. Two findings with the same provider and
// masked key but different Source URLs are kept separate, so callers can
// see every distinct location where a leaked key was found.
//
// Callers (e.g. Engine.SweepAll from Plan 09-01) should invoke Dedup on the
// aggregated finding slice before persisting to storage. A nil or empty
// input returns nil.
//
// Note: this package uses engine.Finding directly rather than a local alias
// so it compiles independently of Plan 09-01 during parallel execution.
func Dedup(in []engine.Finding) []engine.Finding {
if len(in) == 0 {
return nil
}
seen := make(map[string]struct{}, len(in))
out := make([]engine.Finding, 0, len(in))
for _, f := range in {
sum := sha256.Sum256([]byte(f.ProviderName + "|" + f.KeyMasked + "|" + f.Source))
key := hex.EncodeToString(sum[:])
if _, dup := seen[key]; dup {
continue
}
seen[key] = struct{}{}
out = append(out, f)
}
return out
}

55
pkg/recon/dedup_test.go Normal file
View File

@@ -0,0 +1,55 @@
package recon
import (
"testing"
"time"
"github.com/stretchr/testify/require"
"github.com/salvacybersec/keyhunter/pkg/engine"
)
func TestDedupEmpty(t *testing.T) {
require.Nil(t, Dedup(nil))
require.Nil(t, Dedup([]engine.Finding{}))
}
func TestDedupNoDuplicates(t *testing.T) {
in := []engine.Finding{
{ProviderName: "openai", KeyMasked: "sk-abc12...9xyz", Source: "https://example.com/a"},
{ProviderName: "anthropic", KeyMasked: "sk-ant-1...2def", Source: "https://example.com/b"},
{ProviderName: "cohere", KeyMasked: "co-abcde...wxyz", Source: "https://example.com/c"},
}
out := Dedup(in)
require.Len(t, out, 3)
require.Equal(t, in, out, "order must be preserved")
}
func TestDedupAllDuplicates(t *testing.T) {
f := engine.Finding{ProviderName: "openai", KeyMasked: "sk-abc12...9xyz", Source: "https://example.com/a"}
out := Dedup([]engine.Finding{f, f, f})
require.Len(t, out, 1)
require.Equal(t, f, out[0])
}
func TestDedupPreservesFirstSeen(t *testing.T) {
first := time.Date(2026, 1, 1, 0, 0, 0, 0, time.UTC)
second := time.Date(2026, 2, 2, 0, 0, 0, 0, time.UTC)
in := []engine.Finding{
{ProviderName: "openai", KeyMasked: "sk-abc12...9xyz", Source: "https://example.com/a", DetectedAt: first, Confidence: "high"},
{ProviderName: "openai", KeyMasked: "sk-abc12...9xyz", Source: "https://example.com/a", DetectedAt: second, Confidence: "low"},
}
out := Dedup(in)
require.Len(t, out, 1)
require.Equal(t, first, out[0].DetectedAt, "first-seen timestamp must win")
require.Equal(t, "high", out[0].Confidence, "first-seen metadata must win")
}
func TestDedupDifferentSource(t *testing.T) {
in := []engine.Finding{
{ProviderName: "openai", KeyMasked: "sk-abc12...9xyz", Source: "https://example.com/a"},
{ProviderName: "openai", KeyMasked: "sk-abc12...9xyz", Source: "https://example.com/b"},
}
out := Dedup(in)
require.Len(t, out, 2, "same provider+masked but different Source URLs must both be kept")
}

36
pkg/recon/stealth.go Normal file
View File

@@ -0,0 +1,36 @@
package recon
import "math/rand"
// userAgents is a curated pool of 10 realistic desktop/mobile browser
// User-Agent strings used when Config.Stealth is enabled. The pool covers
// Chrome/Firefox/Safari/Edge across Windows, macOS, Linux, iOS, and Android
// to avoid UA-fingerprint blocking by OSINT targets.
var userAgents = []string{
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 14.2; rv:121.0) Gecko/20100101 Firefox/121.0",
"Mozilla/5.0 (X11; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15",
"Mozilla/5.0 (iPhone; CPU iPhone OS 17_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Mobile/15E148 Safari/604.1",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.2210.61",
"Mozilla/5.0 (Linux; Android 14; Pixel 8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36",
}
// RandomUserAgent returns a pseudo-random browser User-Agent from the pool.
// Used when Config.Stealth is true to rotate UA per outbound request.
func RandomUserAgent() string {
return userAgents[rand.Intn(len(userAgents))]
}
// StealthHeaders returns a minimal header map carrying a rotated User-Agent
// plus a stable Accept-Language. Recon sources merge this into their
// outbound requests when stealth mode is enabled.
func StealthHeaders() map[string]string {
return map[string]string{
"User-Agent": RandomUserAgent(),
"Accept-Language": "en-US,en;q=0.9",
}
}

38
pkg/recon/stealth_test.go Normal file
View File

@@ -0,0 +1,38 @@
package recon
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestUAPoolSize(t *testing.T) {
require.Len(t, userAgents, 10, "UA pool must contain exactly 10 entries")
}
func TestRandomUserAgentInPool(t *testing.T) {
pool := make(map[string]struct{}, len(userAgents))
for _, ua := range userAgents {
pool[ua] = struct{}{}
}
for i := 0; i < 100; i++ {
got := RandomUserAgent()
_, ok := pool[got]
require.True(t, ok, "RandomUserAgent returned value not in pool: %q", got)
}
}
func TestStealthHeadersHasUA(t *testing.T) {
h := StealthHeaders()
ua, ok := h["User-Agent"]
require.True(t, ok, "StealthHeaders missing User-Agent")
require.NotEmpty(t, ua)
require.Equal(t, "en-US,en;q=0.9", h["Accept-Language"])
pool := make(map[string]struct{}, len(userAgents))
for _, u := range userAgents {
pool[u] = struct{}{}
}
_, inPool := pool[ua]
require.True(t, inPool, "StealthHeaders User-Agent not in pool: %q", ua)
}