feat(01-04): implement three-stage scanning pipeline with ants worker pool
- pkg/engine/sources/source.go: Source interface using pkg/types.Chunk
- pkg/engine/sources/file.go: FileSource with overlapping chunk reads
- pkg/engine/filter.go: KeywordFilter using Aho-Corasick pre-filter
- pkg/engine/detector.go: Detect with regex matching + Shannon entropy check
- pkg/engine/engine.go: Engine.Scan orchestrating 3-stage pipeline with ants pool
- pkg/engine/scanner_test.go: filled test stubs with pipeline integration tests
- testdata/samples: fixed anthropic key lengths to match {93,} regex pattern
This commit is contained in:
@@ -1,23 +1,116 @@
|
||||
package engine_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
"github.com/salvacybersec/keyhunter/pkg/engine"
|
||||
"github.com/salvacybersec/keyhunter/pkg/engine/sources"
|
||||
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// TestShannonEntropy verifies the entropy function returns expected values.
|
||||
// Stub: will be implemented when entropy.go exists (Plan 04).
|
||||
func newTestRegistry(t *testing.T) *providers.Registry {
|
||||
t.Helper()
|
||||
reg, err := providers.NewRegistry()
|
||||
require.NoError(t, err)
|
||||
return reg
|
||||
}
|
||||
|
||||
func TestShannonEntropy(t *testing.T) {
|
||||
t.Skip("stub — implement after entropy.go exists")
|
||||
assert.InDelta(t, 0.0, engine.Shannon("aaaaaaa"), 0.01)
|
||||
assert.Greater(t, engine.Shannon("sk-proj-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqr"), 3.5)
|
||||
assert.Equal(t, 0.0, engine.Shannon(""))
|
||||
}
|
||||
|
||||
// TestKeywordPreFilter verifies Aho-Corasick pre-filter rejects files without keywords.
|
||||
// Stub: will be implemented when filter.go exists (Plan 04).
|
||||
func TestKeywordPreFilter(t *testing.T) {
|
||||
t.Skip("stub — implement after filter.go exists")
|
||||
reg := newTestRegistry(t)
|
||||
ac := reg.AC()
|
||||
|
||||
// Chunk with OpenAI keyword should pass
|
||||
matches := ac.FindAll("export OPENAI_API_KEY=sk-proj-test")
|
||||
assert.NotEmpty(t, matches)
|
||||
|
||||
// Chunk with no keywords should be dropped
|
||||
noMatches := ac.FindAll("hello world no secrets here")
|
||||
assert.Empty(t, noMatches)
|
||||
}
|
||||
|
||||
// TestScannerPipeline verifies end-to-end scan of testdata returns expected findings.
|
||||
// Stub: will be implemented when engine.go exists (Plan 04).
|
||||
func TestScannerPipeline(t *testing.T) {
|
||||
t.Skip("stub — implement after engine.go exists")
|
||||
func TestScannerPipelineOpenAI(t *testing.T) {
|
||||
reg := newTestRegistry(t)
|
||||
eng := engine.NewEngine(reg)
|
||||
src := sources.NewFileSource("../../testdata/samples/openai_key.txt")
|
||||
cfg := engine.ScanConfig{Workers: 2}
|
||||
|
||||
ch, err := eng.Scan(context.Background(), src, cfg)
|
||||
require.NoError(t, err)
|
||||
|
||||
var findings []engine.Finding
|
||||
for f := range ch {
|
||||
findings = append(findings, f)
|
||||
}
|
||||
|
||||
require.Len(t, findings, 1, "expected exactly 1 finding in openai_key.txt")
|
||||
assert.Equal(t, "openai", findings[0].ProviderName)
|
||||
assert.Contains(t, findings[0].KeyValue, "sk-proj-")
|
||||
}
|
||||
|
||||
func TestScannerPipelineAnthropic(t *testing.T) {
|
||||
reg := newTestRegistry(t)
|
||||
eng := engine.NewEngine(reg)
|
||||
src := sources.NewFileSource("../../testdata/samples/anthropic_key.txt")
|
||||
cfg := engine.ScanConfig{Workers: 2}
|
||||
|
||||
ch, err := eng.Scan(context.Background(), src, cfg)
|
||||
require.NoError(t, err)
|
||||
|
||||
var findings []engine.Finding
|
||||
for f := range ch {
|
||||
findings = append(findings, f)
|
||||
}
|
||||
|
||||
require.Len(t, findings, 1, "expected exactly 1 finding in anthropic_key.txt")
|
||||
assert.Equal(t, "anthropic", findings[0].ProviderName)
|
||||
}
|
||||
|
||||
func TestScannerPipelineNoKeys(t *testing.T) {
|
||||
reg := newTestRegistry(t)
|
||||
eng := engine.NewEngine(reg)
|
||||
src := sources.NewFileSource("../../testdata/samples/no_keys.txt")
|
||||
cfg := engine.ScanConfig{Workers: 2}
|
||||
|
||||
ch, err := eng.Scan(context.Background(), src, cfg)
|
||||
require.NoError(t, err)
|
||||
|
||||
var findings []engine.Finding
|
||||
for f := range ch {
|
||||
findings = append(findings, f)
|
||||
}
|
||||
|
||||
assert.Empty(t, findings, "expected zero findings in no_keys.txt")
|
||||
}
|
||||
|
||||
func TestScannerPipelineMultipleKeys(t *testing.T) {
|
||||
reg := newTestRegistry(t)
|
||||
eng := engine.NewEngine(reg)
|
||||
src := sources.NewFileSource("../../testdata/samples/multiple_keys.txt")
|
||||
cfg := engine.ScanConfig{Workers: 2}
|
||||
|
||||
ch, err := eng.Scan(context.Background(), src, cfg)
|
||||
require.NoError(t, err)
|
||||
|
||||
var findings []engine.Finding
|
||||
for f := range ch {
|
||||
findings = append(findings, f)
|
||||
}
|
||||
|
||||
assert.GreaterOrEqual(t, len(findings), 2, "expected at least 2 findings in multiple_keys.txt")
|
||||
|
||||
var names []string
|
||||
for _, f := range findings {
|
||||
names = append(names, f.ProviderName)
|
||||
}
|
||||
assert.Contains(t, names, "openai")
|
||||
assert.Contains(t, names, "anthropic")
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user