- pkg/engine/sources/source.go: Source interface using pkg/types.Chunk
- pkg/engine/sources/file.go: FileSource with overlapping chunk reads
- pkg/engine/filter.go: KeywordFilter using Aho-Corasick pre-filter
- pkg/engine/detector.go: Detect with regex matching + Shannon entropy check
- pkg/engine/engine.go: Engine.Scan orchestrating 3-stage pipeline with ants pool
- pkg/engine/scanner_test.go: filled test stubs with pipeline integration tests
- testdata/samples: fixed anthropic key lengths to match {93,} regex pattern
117 lines
3.2 KiB
Go
117 lines
3.2 KiB
Go
package engine_test
|
|
|
|
import (
|
|
"context"
|
|
"testing"
|
|
|
|
"github.com/salvacybersec/keyhunter/pkg/engine"
|
|
"github.com/salvacybersec/keyhunter/pkg/engine/sources"
|
|
"github.com/salvacybersec/keyhunter/pkg/providers"
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
)
|
|
|
|
func newTestRegistry(t *testing.T) *providers.Registry {
|
|
t.Helper()
|
|
reg, err := providers.NewRegistry()
|
|
require.NoError(t, err)
|
|
return reg
|
|
}
|
|
|
|
func TestShannonEntropy(t *testing.T) {
|
|
assert.InDelta(t, 0.0, engine.Shannon("aaaaaaa"), 0.01)
|
|
assert.Greater(t, engine.Shannon("sk-proj-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqr"), 3.5)
|
|
assert.Equal(t, 0.0, engine.Shannon(""))
|
|
}
|
|
|
|
func TestKeywordPreFilter(t *testing.T) {
|
|
reg := newTestRegistry(t)
|
|
ac := reg.AC()
|
|
|
|
// Chunk with OpenAI keyword should pass
|
|
matches := ac.FindAll("export OPENAI_API_KEY=sk-proj-test")
|
|
assert.NotEmpty(t, matches)
|
|
|
|
// Chunk with no keywords should be dropped
|
|
noMatches := ac.FindAll("hello world no secrets here")
|
|
assert.Empty(t, noMatches)
|
|
}
|
|
|
|
func TestScannerPipelineOpenAI(t *testing.T) {
|
|
reg := newTestRegistry(t)
|
|
eng := engine.NewEngine(reg)
|
|
src := sources.NewFileSource("../../testdata/samples/openai_key.txt")
|
|
cfg := engine.ScanConfig{Workers: 2}
|
|
|
|
ch, err := eng.Scan(context.Background(), src, cfg)
|
|
require.NoError(t, err)
|
|
|
|
var findings []engine.Finding
|
|
for f := range ch {
|
|
findings = append(findings, f)
|
|
}
|
|
|
|
require.Len(t, findings, 1, "expected exactly 1 finding in openai_key.txt")
|
|
assert.Equal(t, "openai", findings[0].ProviderName)
|
|
assert.Contains(t, findings[0].KeyValue, "sk-proj-")
|
|
}
|
|
|
|
func TestScannerPipelineAnthropic(t *testing.T) {
|
|
reg := newTestRegistry(t)
|
|
eng := engine.NewEngine(reg)
|
|
src := sources.NewFileSource("../../testdata/samples/anthropic_key.txt")
|
|
cfg := engine.ScanConfig{Workers: 2}
|
|
|
|
ch, err := eng.Scan(context.Background(), src, cfg)
|
|
require.NoError(t, err)
|
|
|
|
var findings []engine.Finding
|
|
for f := range ch {
|
|
findings = append(findings, f)
|
|
}
|
|
|
|
require.Len(t, findings, 1, "expected exactly 1 finding in anthropic_key.txt")
|
|
assert.Equal(t, "anthropic", findings[0].ProviderName)
|
|
}
|
|
|
|
func TestScannerPipelineNoKeys(t *testing.T) {
|
|
reg := newTestRegistry(t)
|
|
eng := engine.NewEngine(reg)
|
|
src := sources.NewFileSource("../../testdata/samples/no_keys.txt")
|
|
cfg := engine.ScanConfig{Workers: 2}
|
|
|
|
ch, err := eng.Scan(context.Background(), src, cfg)
|
|
require.NoError(t, err)
|
|
|
|
var findings []engine.Finding
|
|
for f := range ch {
|
|
findings = append(findings, f)
|
|
}
|
|
|
|
assert.Empty(t, findings, "expected zero findings in no_keys.txt")
|
|
}
|
|
|
|
func TestScannerPipelineMultipleKeys(t *testing.T) {
|
|
reg := newTestRegistry(t)
|
|
eng := engine.NewEngine(reg)
|
|
src := sources.NewFileSource("../../testdata/samples/multiple_keys.txt")
|
|
cfg := engine.ScanConfig{Workers: 2}
|
|
|
|
ch, err := eng.Scan(context.Background(), src, cfg)
|
|
require.NoError(t, err)
|
|
|
|
var findings []engine.Finding
|
|
for f := range ch {
|
|
findings = append(findings, f)
|
|
}
|
|
|
|
assert.GreaterOrEqual(t, len(findings), 2, "expected at least 2 findings in multiple_keys.txt")
|
|
|
|
var names []string
|
|
for _, f := range findings {
|
|
names = append(names, f.ProviderName)
|
|
}
|
|
assert.Contains(t, names, "openai")
|
|
assert.Contains(t, names, "anthropic")
|
|
}
|