- pkg/engine/sources/source.go: Source interface using pkg/types.Chunk
- pkg/engine/sources/file.go: FileSource with overlapping chunk reads
- pkg/engine/filter.go: KeywordFilter using Aho-Corasick pre-filter
- pkg/engine/detector.go: Detect with regex matching + Shannon entropy check
- pkg/engine/engine.go: Engine.Scan orchestrating 3-stage pipeline with ants pool
- pkg/engine/scanner_test.go: filled test stubs with pipeline integration tests
- testdata/samples: fixed anthropic key lengths to match {93,} regex pattern
18 lines
551 B
Go
18 lines
551 B
Go
package engine
|
|
|
|
import (
|
|
ahocorasick "github.com/petar-dambovaliev/aho-corasick"
|
|
"github.com/salvacybersec/keyhunter/pkg/types"
|
|
)
|
|
|
|
// KeywordFilter filters a stream of chunks using an Aho-Corasick automaton.
|
|
// Only chunks that contain at least one provider keyword are sent to out.
|
|
// This is Stage 2 of the pipeline (runs after Source, before Detector).
|
|
func KeywordFilter(ac ahocorasick.AhoCorasick, in <-chan types.Chunk, out chan<- types.Chunk) {
|
|
for chunk := range in {
|
|
if len(ac.FindAll(string(chunk.Data))) > 0 {
|
|
out <- chunk
|
|
}
|
|
}
|
|
}
|