- pkg/engine/sources/source.go: Source interface using pkg/types.Chunk
- pkg/engine/sources/file.go: FileSource with overlapping chunk reads
- pkg/engine/filter.go: KeywordFilter using Aho-Corasick pre-filter
- pkg/engine/detector.go: Detect with regex matching + Shannon entropy check
- pkg/engine/engine.go: Engine.Scan orchestrating 3-stage pipeline with ants pool
- pkg/engine/scanner_test.go: filled test stubs with pipeline integration tests
- testdata/samples: fixed anthropic key lengths to match {93,} regex pattern
56 lines
1.4 KiB
Go
56 lines
1.4 KiB
Go
package engine
|
|
|
|
import (
|
|
"regexp"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/salvacybersec/keyhunter/pkg/providers"
|
|
"github.com/salvacybersec/keyhunter/pkg/types"
|
|
)
|
|
|
|
// Detect applies provider regex patterns and optional entropy checks to a chunk.
|
|
// It returns all findings from the chunk.
|
|
func Detect(chunk types.Chunk, providerList []providers.Provider) []Finding {
|
|
var findings []Finding
|
|
content := string(chunk.Data)
|
|
|
|
for _, p := range providerList {
|
|
for _, pat := range p.Patterns {
|
|
re, err := regexp.Compile(pat.Regex)
|
|
if err != nil {
|
|
continue // invalid regex -- skip silently
|
|
}
|
|
matches := re.FindAllString(content, -1)
|
|
for _, match := range matches {
|
|
// Apply entropy check if threshold is set
|
|
if pat.EntropyMin > 0 && Shannon(match) < pat.EntropyMin {
|
|
continue // too low entropy -- likely a placeholder
|
|
}
|
|
line := lineNumber(content, match)
|
|
findings = append(findings, Finding{
|
|
ProviderName: p.Name,
|
|
KeyValue: match,
|
|
KeyMasked: MaskKey(match),
|
|
Confidence: pat.Confidence,
|
|
Source: chunk.Source,
|
|
SourceType: "file",
|
|
LineNumber: line,
|
|
Offset: chunk.Offset,
|
|
DetectedAt: time.Now(),
|
|
})
|
|
}
|
|
}
|
|
}
|
|
return findings
|
|
}
|
|
|
|
// lineNumber returns the 1-based line number where match first appears in content.
|
|
func lineNumber(content, match string) int {
|
|
idx := strings.Index(content, match)
|
|
if idx < 0 {
|
|
return 0
|
|
}
|
|
return strings.Count(content[:idx], "\n") + 1
|
|
}
|