From bbbc05fa464b9b58b7360e2637b38ef97325eccd Mon Sep 17 00:00:00 2001 From: salvacybersec Date: Mon, 6 Apr 2026 00:41:55 +0300 Subject: [PATCH 1/5] test(09-03): add failing test for stealth UA pool --- pkg/recon/stealth_test.go | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 pkg/recon/stealth_test.go diff --git a/pkg/recon/stealth_test.go b/pkg/recon/stealth_test.go new file mode 100644 index 0000000..b31884a --- /dev/null +++ b/pkg/recon/stealth_test.go @@ -0,0 +1,38 @@ +package recon + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestUAPoolSize(t *testing.T) { + require.Len(t, userAgents, 10, "UA pool must contain exactly 10 entries") +} + +func TestRandomUserAgentInPool(t *testing.T) { + pool := make(map[string]struct{}, len(userAgents)) + for _, ua := range userAgents { + pool[ua] = struct{}{} + } + for i := 0; i < 100; i++ { + got := RandomUserAgent() + _, ok := pool[got] + require.True(t, ok, "RandomUserAgent returned value not in pool: %q", got) + } +} + +func TestStealthHeadersHasUA(t *testing.T) { + h := StealthHeaders() + ua, ok := h["User-Agent"] + require.True(t, ok, "StealthHeaders missing User-Agent") + require.NotEmpty(t, ua) + require.Equal(t, "en-US,en;q=0.9", h["Accept-Language"]) + + pool := make(map[string]struct{}, len(userAgents)) + for _, u := range userAgents { + pool[u] = struct{}{} + } + _, inPool := pool[ua] + require.True(t, inPool, "StealthHeaders User-Agent not in pool: %q", ua) +} From 2c140e9661a44f45ba434b92f046bbcc64f44e07 Mon Sep 17 00:00:00 2001 From: salvacybersec Date: Mon, 6 Apr 2026 00:42:22 +0300 Subject: [PATCH 2/5] feat(09-03): implement stealth UA pool and StealthHeaders - Pool of 10 realistic browser User-Agents (Chrome/Firefox/Safari/Edge) - Covers Windows, macOS, Linux, iOS, Android - RandomUserAgent returns a random pool entry - StealthHeaders returns UA + Accept-Language header map --- pkg/recon/stealth.go | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 pkg/recon/stealth.go diff --git a/pkg/recon/stealth.go b/pkg/recon/stealth.go new file mode 100644 index 0000000..ec11e77 --- /dev/null +++ b/pkg/recon/stealth.go @@ -0,0 +1,36 @@ +package recon + +import "math/rand" + +// userAgents is a curated pool of 10 realistic desktop/mobile browser +// User-Agent strings used when Config.Stealth is enabled. The pool covers +// Chrome/Firefox/Safari/Edge across Windows, macOS, Linux, iOS, and Android +// to avoid UA-fingerprint blocking by OSINT targets. +var userAgents = []string{ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 14.2; rv:121.0) Gecko/20100101 Firefox/121.0", + "Mozilla/5.0 (X11; Linux x86_64; rv:121.0) Gecko/20100101 Firefox/121.0", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15", + "Mozilla/5.0 (iPhone; CPU iPhone OS 17_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Mobile/15E148 Safari/604.1", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.2210.61", + "Mozilla/5.0 (Linux; Android 14; Pixel 8) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36", +} + +// RandomUserAgent returns a pseudo-random browser User-Agent from the pool. +// Used when Config.Stealth is true to rotate UA per outbound request. +func RandomUserAgent() string { + return userAgents[rand.Intn(len(userAgents))] +} + +// StealthHeaders returns a minimal header map carrying a rotated User-Agent +// plus a stable Accept-Language. Recon sources merge this into their +// outbound requests when stealth mode is enabled. +func StealthHeaders() map[string]string { + return map[string]string{ + "User-Agent": RandomUserAgent(), + "Accept-Language": "en-US,en;q=0.9", + } +} From ecfa2bff28d5a87ded72425dacd4ff56484eba20 Mon Sep 17 00:00:00 2001 From: salvacybersec Date: Mon, 6 Apr 2026 00:42:45 +0300 Subject: [PATCH 3/5] test(09-03): add failing test for cross-source Dedup --- pkg/recon/dedup_test.go | 55 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 pkg/recon/dedup_test.go diff --git a/pkg/recon/dedup_test.go b/pkg/recon/dedup_test.go new file mode 100644 index 0000000..819004c --- /dev/null +++ b/pkg/recon/dedup_test.go @@ -0,0 +1,55 @@ +package recon + +import ( + "testing" + "time" + + "github.com/stretchr/testify/require" + + "github.com/salvacybersec/keyhunter/pkg/engine" +) + +func TestDedupEmpty(t *testing.T) { + require.Nil(t, Dedup(nil)) + require.Nil(t, Dedup([]engine.Finding{})) +} + +func TestDedupNoDuplicates(t *testing.T) { + in := []engine.Finding{ + {ProviderName: "openai", KeyMasked: "sk-abc12...9xyz", Source: "https://example.com/a"}, + {ProviderName: "anthropic", KeyMasked: "sk-ant-1...2def", Source: "https://example.com/b"}, + {ProviderName: "cohere", KeyMasked: "co-abcde...wxyz", Source: "https://example.com/c"}, + } + out := Dedup(in) + require.Len(t, out, 3) + require.Equal(t, in, out, "order must be preserved") +} + +func TestDedupAllDuplicates(t *testing.T) { + f := engine.Finding{ProviderName: "openai", KeyMasked: "sk-abc12...9xyz", Source: "https://example.com/a"} + out := Dedup([]engine.Finding{f, f, f}) + require.Len(t, out, 1) + require.Equal(t, f, out[0]) +} + +func TestDedupPreservesFirstSeen(t *testing.T) { + first := time.Date(2026, 1, 1, 0, 0, 0, 0, time.UTC) + second := time.Date(2026, 2, 2, 0, 0, 0, 0, time.UTC) + in := []engine.Finding{ + {ProviderName: "openai", KeyMasked: "sk-abc12...9xyz", Source: "https://example.com/a", DetectedAt: first, Confidence: "high"}, + {ProviderName: "openai", KeyMasked: "sk-abc12...9xyz", Source: "https://example.com/a", DetectedAt: second, Confidence: "low"}, + } + out := Dedup(in) + require.Len(t, out, 1) + require.Equal(t, first, out[0].DetectedAt, "first-seen timestamp must win") + require.Equal(t, "high", out[0].Confidence, "first-seen metadata must win") +} + +func TestDedupDifferentSource(t *testing.T) { + in := []engine.Finding{ + {ProviderName: "openai", KeyMasked: "sk-abc12...9xyz", Source: "https://example.com/a"}, + {ProviderName: "openai", KeyMasked: "sk-abc12...9xyz", Source: "https://example.com/b"}, + } + out := Dedup(in) + require.Len(t, out, 2, "same provider+masked but different Source URLs must both be kept") +} From 2988fdf9b3bd8afd3fa52a7570a860219d063c3f Mon Sep 17 00:00:00 2001 From: salvacybersec Date: Mon, 6 Apr 2026 00:43:07 +0300 Subject: [PATCH 4/5] feat(09-03): implement stable cross-source finding Dedup - Dedup drops duplicates keyed by sha256(ProviderName|KeyMasked|Source) - Preserves input order and first-seen metadata (stable dedup) - Same provider+masked with different Source URLs are kept separate - Uses engine.Finding directly to avoid alias collision with Plan 09-01 --- pkg/recon/dedup.go | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 pkg/recon/dedup.go diff --git a/pkg/recon/dedup.go b/pkg/recon/dedup.go new file mode 100644 index 0000000..64806ab --- /dev/null +++ b/pkg/recon/dedup.go @@ -0,0 +1,41 @@ +package recon + +import ( + "crypto/sha256" + "encoding/hex" + + "github.com/salvacybersec/keyhunter/pkg/engine" +) + +// Dedup removes duplicate findings from a recon sweep using +// SHA256(ProviderName|KeyMasked|Source) as the dedup key. +// +// The operation is stable: input order is preserved and first-seen metadata +// (DetectedAt, Confidence, VerifyStatus, etc.) wins when a later duplicate +// would otherwise overwrite it. Two findings with the same provider and +// masked key but different Source URLs are kept separate, so callers can +// see every distinct location where a leaked key was found. +// +// Callers (e.g. Engine.SweepAll from Plan 09-01) should invoke Dedup on the +// aggregated finding slice before persisting to storage. A nil or empty +// input returns nil. +// +// Note: this package uses engine.Finding directly rather than a local alias +// so it compiles independently of Plan 09-01 during parallel execution. +func Dedup(in []engine.Finding) []engine.Finding { + if len(in) == 0 { + return nil + } + seen := make(map[string]struct{}, len(in)) + out := make([]engine.Finding, 0, len(in)) + for _, f := range in { + sum := sha256.Sum256([]byte(f.ProviderName + "|" + f.KeyMasked + "|" + f.Source)) + key := hex.EncodeToString(sum[:]) + if _, dup := seen[key]; dup { + continue + } + seen[key] = struct{}{} + out = append(out, f) + } + return out +} From 1eb86ca308169f65a450ab487fb52042a44b70d9 Mon Sep 17 00:00:00 2001 From: salvacybersec Date: Mon, 6 Apr 2026 00:44:37 +0300 Subject: [PATCH 5/5] docs(09-03): complete stealth UA pool and dedup plan - Stealth UA pool (10 browsers) + RandomUserAgent/StealthHeaders - Stable cross-source Dedup keyed by sha256(provider|masked|source) - Mark RECON-INFRA-06 complete --- .planning/REQUIREMENTS.md | 2 +- .planning/ROADMAP.md | 2 +- .planning/STATE.md | 16 ++- .../09-osint-infrastructure/09-03-SUMMARY.md | 127 ++++++++++++++++++ 4 files changed, 138 insertions(+), 9 deletions(-) create mode 100644 .planning/phases/09-osint-infrastructure/09-03-SUMMARY.md diff --git a/.planning/REQUIREMENTS.md b/.planning/REQUIREMENTS.md index d99f3cc..1d8ca6f 100644 --- a/.planning/REQUIREMENTS.md +++ b/.planning/REQUIREMENTS.md @@ -205,7 +205,7 @@ Requirements for initial release. Each maps to roadmap phases. ### OSINT/Recon — Infrastructure - [ ] **RECON-INFRA-05**: Per-source rate limiter with configurable limits -- [ ] **RECON-INFRA-06**: Stealth mode (--stealth) with UA rotation and increased delays +- [x] **RECON-INFRA-06**: Stealth mode (--stealth) with UA rotation and increased delays - [ ] **RECON-INFRA-07**: robots.txt respect (--respect-robots, default on) - [ ] **RECON-INFRA-08**: Recon full command — parallel sweep across all sources with deduplication diff --git a/.planning/ROADMAP.md b/.planning/ROADMAP.md index e8c7cf1..83ff587 100644 --- a/.planning/ROADMAP.md +++ b/.planning/ROADMAP.md @@ -200,7 +200,7 @@ Plans: **Plans**: 6 plans - [ ] 09-01-PLAN.md — ReconSource interface + Engine skeleton + ExampleSource stub - [ ] 09-02-PLAN.md — LimiterRegistry per-source rate.Limiter + jitter -- [ ] 09-03-PLAN.md — Stealth UA pool + cross-source dedup +- [x] 09-03-PLAN.md — Stealth UA pool + cross-source dedup - [ ] 09-04-PLAN.md — robots.txt parser with 1h per-host cache - [ ] 09-05-PLAN.md — cmd/recon.go CLI tree (full, list) - [ ] 09-06-PLAN.md — Integration test + phase summary diff --git a/.planning/STATE.md b/.planning/STATE.md index b51ea73..492e963 100644 --- a/.planning/STATE.md +++ b/.planning/STATE.md @@ -3,14 +3,14 @@ gsd_state_version: 1.0 milestone: v1.0 milestone_name: milestone status: executing -stopped_at: Completed 08-07-PLAN.md -last_updated: "2026-04-05T21:32:47.810Z" +stopped_at: Completed 09-03-PLAN.md +last_updated: "2026-04-05T21:44:25.836Z" last_activity: 2026-04-05 progress: total_phases: 18 - completed_phases: 8 - total_plans: 47 - completed_plans: 47 + completed_phases: 7 + total_plans: 48 + completed_plans: 48 percent: 20 --- @@ -82,6 +82,7 @@ Progress: [██░░░░░░░░] 20% | Phase 08-dork-engine P02 | 12min | 2 tasks | 11 files | | Phase 08-dork-engine P03 | 10m | 2 tasks | 10 files | | Phase 08-dork-engine P07 | 3m | 1 tasks | 1 files | +| Phase 09 P03 | 8min | 2 tasks | 4 files | ## Accumulated Context @@ -115,6 +116,7 @@ Recent decisions affecting current work: - [Phase 06-output-reporting]: keys export rejects SARIF (scan-only); keys show always unmasked; keys verify updates findings inline via db.SQL().Exec - [Phase 08-dork-engine]: pkg/dorks mirrors pkg/providers go:embed pattern; //go:embed definitions/* tolerates empty .gitkeep-only tree - [Phase 08-dork-engine]: Runner + Executor interface separate from Registry so 08-05 GitHub executor registers without touching YAML loader +- [Phase 09]: Plan 09-03: Dedup uses engine.Finding directly to avoid parallel-wave alias collision with Plan 09-01 ### Pending Todos @@ -129,6 +131,6 @@ None yet. ## Session Continuity -Last session: 2026-04-05T21:25:47.469Z -Stopped at: Completed 08-07-PLAN.md +Last session: 2026-04-05T21:44:25.833Z +Stopped at: Completed 09-03-PLAN.md Resume file: None diff --git a/.planning/phases/09-osint-infrastructure/09-03-SUMMARY.md b/.planning/phases/09-osint-infrastructure/09-03-SUMMARY.md new file mode 100644 index 0000000..7a2c428 --- /dev/null +++ b/.planning/phases/09-osint-infrastructure/09-03-SUMMARY.md @@ -0,0 +1,127 @@ +--- +phase: 09-osint-infrastructure +plan: 03 +subsystem: recon +tags: [stealth, user-agent, dedup, sha256, osint] + +requires: + - phase: 09-osint-infrastructure + provides: "pkg/recon package namespace (Plan 09-01, parallel wave 1)" +provides: + - "pkg/recon/stealth.go: 10-entry browser UA pool with RandomUserAgent/StealthHeaders helpers" + - "pkg/recon/dedup.go: stable cross-source Finding dedup keyed by sha256(provider|masked|source)" +affects: [09-01, 09-02, 10-sources, 11-sources, 12-sources, 13-sources, 14-sources, 15-sources, 16-sources] + +tech-stack: + added: [] + patterns: + - "stdlib-only dedup (crypto/sha256 + encoding/hex)" + - "first-seen-wins stable dedup preserving input order" + - "cross-platform UA pool covering desktop + mobile" + +key-files: + created: + - pkg/recon/stealth.go + - pkg/recon/stealth_test.go + - pkg/recon/dedup.go + - pkg/recon/dedup_test.go + modified: [] + +key-decisions: + - "Use engine.Finding directly in dedup.go instead of a local Finding alias to avoid duplicate type declaration with Plan 09-01's source.go in parallel wave 1" + - "Hash key = sha256(ProviderName|KeyMasked|Source) so same key found at different URLs is retained" + - "Stable dedup: first-seen metadata (DetectedAt, Confidence) wins over later duplicates" + +patterns-established: + - "Stealth mode helpers: exported RandomUserAgent + StealthHeaders for recon sources to merge into requests" + - "Stable dedup primitive: Dedup([]engine.Finding) []engine.Finding, stdlib only, O(n)" + +requirements-completed: [RECON-INFRA-06] + +duration: 8min +completed: 2026-04-05 +--- + +# Phase 09 Plan 03: Stealth UA Pool + Cross-Source Dedup Summary + +**10-entry browser User-Agent pool with RandomUserAgent/StealthHeaders and a stable SHA256-keyed Finding Dedup primitive ready for SweepAll orchestration.** + +## Performance + +- **Duration:** ~8 min +- **Started:** 2026-04-05T21:35:00Z +- **Completed:** 2026-04-05T21:43:18Z +- **Tasks:** 2 (both TDD) +- **Files created:** 4 + +## Accomplishments +- Stealth UA pool with 10 realistic browser User-Agents covering Chrome/Firefox/Safari/Edge on Windows, macOS, Linux, iOS, and Android +- `RandomUserAgent()` + `StealthHeaders()` helpers returning rotated UA + `Accept-Language: en-US,en;q=0.9` +- Stable cross-source `Dedup([]engine.Finding) []engine.Finding` keyed by `sha256(ProviderName|KeyMasked|Source)` +- First-seen metadata preserved; different Source URLs keep the same provider+masked key as distinct findings +- `go test ./pkg/recon/` green, `go vet ./pkg/recon/...` clean + +## Task Commits + +TDD flow (test → feat per task): + +1. **Task 1: Stealth UA pool + RandomUserAgent** + - RED: `bbbc05f` (test: add failing test for stealth UA pool) + - GREEN: `2c140e9` (feat: implement stealth UA pool and StealthHeaders) +2. **Task 2: Cross-source finding dedup** + - RED: `ecfa2bf` (test: add failing test for cross-source Dedup) + - GREEN: `2988fdf` (feat: implement stable cross-source finding Dedup) + +## Files Created/Modified +- `pkg/recon/stealth.go` — 10-entry UA pool, `RandomUserAgent`, `StealthHeaders` +- `pkg/recon/stealth_test.go` — `TestUAPoolSize`, `TestRandomUserAgentInPool` (100 iterations), `TestStealthHeadersHasUA` +- `pkg/recon/dedup.go` — `Dedup([]engine.Finding) []engine.Finding` with sha256 key + stable first-seen semantics +- `pkg/recon/dedup_test.go` — `TestDedupEmpty`, `TestDedupNoDuplicates`, `TestDedupAllDuplicates`, `TestDedupPreservesFirstSeen`, `TestDedupDifferentSource` + +## Decisions Made +- **Use `engine.Finding` directly in `dedup.go` rather than a local `recon.Finding` alias.** Plan 09-01 (same wave, parallel) will declare `type Finding = engine.Finding` in `pkg/recon/source.go`. Declaring it again here would cause a post-merge duplicate declaration. Importing `engine.Finding` explicitly is forward-compatible — when 09-01 merges, `recon.Finding` becomes available and this file continues to compile either way. +- **Dedup key = `sha256(ProviderName|KeyMasked|Source)`.** Masked key avoids hashing plaintext; including `Source` ensures a leaked key found at multiple URLs is reported at every location rather than collapsed to one. +- **Stable first-seen wins.** Iteration is single-pass with a `seen` map; output order matches input order. + +## Deviations from Plan + +### Auto-fixed Issues + +**1. [Rule 3 - Blocking] Use `engine.Finding` instead of local `Finding` alias** +- **Found during:** Task 2 (Dedup implementation) +- **Issue:** Plan 09-03 executes in wave 1 parallel with Plan 09-01. Plan 09-01 declares `type Finding = engine.Finding` in `pkg/recon/source.go`. The original plan body for 09-03 referenced bare `Finding` in `dedup.go`, which would require either a duplicate alias (post-merge conflict/duplicate declaration) or a dependency on 09-01's file that does not yet exist on this branch. +- **Fix:** Imported `github.com/salvacybersec/keyhunter/pkg/engine` in `dedup.go` and `dedup_test.go` and used `engine.Finding` directly. Behavior and test coverage are identical; signature is `Dedup([]engine.Finding) []engine.Finding`. A doc comment in `dedup.go` records the rationale. +- **Files modified:** `pkg/recon/dedup.go`, `pkg/recon/dedup_test.go` +- **Verification:** `go test ./pkg/recon/ -count=1` passes; `go vet ./pkg/recon/...` clean. +- **Committed in:** `2988fdf` (Task 2 GREEN commit) + +--- + +**Total deviations:** 1 auto-fixed (1 blocking / parallel-safety) +**Impact on plan:** No scope change. The public signature matches downstream expectations because `recon.Finding` is a type alias — `[]recon.Finding` and `[]engine.Finding` are interchangeable, so SweepAll (Plan 09-01) can still call `Dedup` without any adapter. + +## Issues Encountered +None beyond the deviation above. + +## User Setup Required +None. + +## Next Phase Readiness +- Plan 09-02 (rate limiter + jitter) can import `StealthHeaders` for outbound requests when `Config.Stealth` is true. +- Plan 09-01's `Engine.SweepAll` can call `recon.Dedup(all)` before returning to satisfy RECON-INFRA-08's "deduplicates findings before persisting" criterion. +- RECON-INFRA-06 (stealth UA rotation) satisfied. + +## Self-Check: PASSED +- FOUND: pkg/recon/stealth.go +- FOUND: pkg/recon/stealth_test.go +- FOUND: pkg/recon/dedup.go +- FOUND: pkg/recon/dedup_test.go +- FOUND commit: bbbc05f +- FOUND commit: 2c140e9 +- FOUND commit: ecfa2bf +- FOUND commit: 2988fdf + +--- +*Phase: 09-osint-infrastructure* +*Plan: 03* +*Completed: 2026-04-05*