feat(01-04): implement three-stage scanning pipeline with ants worker pool
- pkg/engine/sources/source.go: Source interface using pkg/types.Chunk
- pkg/engine/sources/file.go: FileSource with overlapping chunk reads
- pkg/engine/filter.go: KeywordFilter using Aho-Corasick pre-filter
- pkg/engine/detector.go: Detect with regex matching + Shannon entropy check
- pkg/engine/engine.go: Engine.Scan orchestrating 3-stage pipeline with ants pool
- pkg/engine/scanner_test.go: filled test stubs with pipeline integration tests
- testdata/samples: fixed anthropic key lengths to match {93,} regex pattern
This commit is contained in:
17
pkg/engine/filter.go
Normal file
17
pkg/engine/filter.go
Normal file
@@ -0,0 +1,17 @@
|
||||
package engine
|
||||
|
||||
import (
|
||||
ahocorasick "github.com/petar-dambovaliev/aho-corasick"
|
||||
"github.com/salvacybersec/keyhunter/pkg/types"
|
||||
)
|
||||
|
||||
// KeywordFilter filters a stream of chunks using an Aho-Corasick automaton.
|
||||
// Only chunks that contain at least one provider keyword are sent to out.
|
||||
// This is Stage 2 of the pipeline (runs after Source, before Detector).
|
||||
func KeywordFilter(ac ahocorasick.AhoCorasick, in <-chan types.Chunk, out chan<- types.Chunk) {
|
||||
for chunk := range in {
|
||||
if len(ac.FindAll(string(chunk.Data))) > 0 {
|
||||
out <- chunk
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user