package engine_test import ( "context" "testing" "github.com/salvacybersec/keyhunter/pkg/engine" "github.com/salvacybersec/keyhunter/pkg/engine/sources" "github.com/salvacybersec/keyhunter/pkg/providers" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) func newTestRegistry(t *testing.T) *providers.Registry { t.Helper() reg, err := providers.NewRegistry() require.NoError(t, err) return reg } func TestShannonEntropy(t *testing.T) { assert.InDelta(t, 0.0, engine.Shannon("aaaaaaa"), 0.01) assert.Greater(t, engine.Shannon("sk-proj-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqr"), 3.5) assert.Equal(t, 0.0, engine.Shannon("")) } func TestKeywordPreFilter(t *testing.T) { reg := newTestRegistry(t) ac := reg.AC() // Chunk with OpenAI keyword should pass matches := ac.FindAll("export OPENAI_API_KEY=sk-proj-test") assert.NotEmpty(t, matches) // Chunk with no keywords should be dropped noMatches := ac.FindAll("hello world no secrets here") assert.Empty(t, noMatches) } func TestScannerPipelineOpenAI(t *testing.T) { reg := newTestRegistry(t) eng := engine.NewEngine(reg) src := sources.NewFileSource("../../testdata/samples/openai_key.txt") cfg := engine.ScanConfig{Workers: 2} ch, err := eng.Scan(context.Background(), src, cfg) require.NoError(t, err) var findings []engine.Finding for f := range ch { findings = append(findings, f) } require.NotEmpty(t, findings, "expected at least 1 finding in openai_key.txt") var names []string for _, f := range findings { names = append(names, f.ProviderName) } assert.Contains(t, names, "openai", "expected openai provider in findings") } func TestScannerPipelineAnthropic(t *testing.T) { reg := newTestRegistry(t) eng := engine.NewEngine(reg) src := sources.NewFileSource("../../testdata/samples/anthropic_key.txt") cfg := engine.ScanConfig{Workers: 2} ch, err := eng.Scan(context.Background(), src, cfg) require.NoError(t, err) var findings []engine.Finding for f := range ch { findings = append(findings, f) } require.NotEmpty(t, findings, "expected at least 1 finding in anthropic_key.txt") var names []string for _, f := range findings { names = append(names, f.ProviderName) } assert.Contains(t, names, "anthropic", "expected anthropic provider in findings") } func TestScannerPipelineNoKeys(t *testing.T) { reg := newTestRegistry(t) eng := engine.NewEngine(reg) src := sources.NewFileSource("../../testdata/samples/no_keys.txt") cfg := engine.ScanConfig{Workers: 2} ch, err := eng.Scan(context.Background(), src, cfg) require.NoError(t, err) var findings []engine.Finding for f := range ch { findings = append(findings, f) } assert.Empty(t, findings, "expected zero findings in no_keys.txt") } func TestScannerPipelineMultipleKeys(t *testing.T) { reg := newTestRegistry(t) eng := engine.NewEngine(reg) src := sources.NewFileSource("../../testdata/samples/multiple_keys.txt") cfg := engine.ScanConfig{Workers: 2} ch, err := eng.Scan(context.Background(), src, cfg) require.NoError(t, err) var findings []engine.Finding for f := range ch { findings = append(findings, f) } assert.GreaterOrEqual(t, len(findings), 2, "expected at least 2 findings in multiple_keys.txt") var names []string for _, f := range findings { names = append(names, f.ProviderName) } assert.Contains(t, names, "openai") assert.Contains(t, names, "anthropic") }