Files
keyhunter/pkg/engine/detector.go
salvacybersec cea2e371cc feat(01-04): implement three-stage scanning pipeline with ants worker pool
- pkg/engine/sources/source.go: Source interface using pkg/types.Chunk
- pkg/engine/sources/file.go: FileSource with overlapping chunk reads
- pkg/engine/filter.go: KeywordFilter using Aho-Corasick pre-filter
- pkg/engine/detector.go: Detect with regex matching + Shannon entropy check
- pkg/engine/engine.go: Engine.Scan orchestrating 3-stage pipeline with ants pool
- pkg/engine/scanner_test.go: filled test stubs with pipeline integration tests
- testdata/samples: fixed anthropic key lengths to match {93,} regex pattern
2026-04-05 12:21:17 +03:00

56 lines
1.4 KiB
Go

package engine
import (
"regexp"
"strings"
"time"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/types"
)
// Detect applies provider regex patterns and optional entropy checks to a chunk.
// It returns all findings from the chunk.
func Detect(chunk types.Chunk, providerList []providers.Provider) []Finding {
var findings []Finding
content := string(chunk.Data)
for _, p := range providerList {
for _, pat := range p.Patterns {
re, err := regexp.Compile(pat.Regex)
if err != nil {
continue // invalid regex -- skip silently
}
matches := re.FindAllString(content, -1)
for _, match := range matches {
// Apply entropy check if threshold is set
if pat.EntropyMin > 0 && Shannon(match) < pat.EntropyMin {
continue // too low entropy -- likely a placeholder
}
line := lineNumber(content, match)
findings = append(findings, Finding{
ProviderName: p.Name,
KeyValue: match,
KeyMasked: MaskKey(match),
Confidence: pat.Confidence,
Source: chunk.Source,
SourceType: "file",
LineNumber: line,
Offset: chunk.Offset,
DetectedAt: time.Now(),
})
}
}
}
return findings
}
// lineNumber returns the 1-based line number where match first appears in content.
func lineNumber(content, match string) int {
idx := strings.Index(content, match)
if idx < 0 {
return 0
}
return strings.Count(content[:idx], "\n") + 1
}