- TestVerifyAll_MultipleFindings: 5 findings via 3-worker pool - TestVerifyAll_MissingProvider: unknown provider yields StatusUnknown - TestVerifyAll_ContextCancellation: cancellation closes channel early - Add providers.NewRegistryFromProviders test helper
91 lines
2.5 KiB
Go
91 lines
2.5 KiB
Go
package providers
|
|
|
|
import (
|
|
"fmt"
|
|
|
|
ahocorasick "github.com/petar-dambovaliev/aho-corasick"
|
|
)
|
|
|
|
// Registry is the in-memory store of all loaded provider definitions.
|
|
// It is initialized once at startup and is safe for concurrent reads.
|
|
type Registry struct {
|
|
providers []Provider
|
|
index map[string]int // name -> slice index
|
|
ac ahocorasick.AhoCorasick // pre-built automaton for keyword pre-filter
|
|
}
|
|
|
|
// NewRegistry loads all embedded provider YAML files, validates them, builds the
|
|
// Aho-Corasick automaton from all provider keywords, and returns the Registry.
|
|
func NewRegistry() (*Registry, error) {
|
|
providers, err := loadProviders()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("loading providers: %w", err)
|
|
}
|
|
|
|
index := make(map[string]int, len(providers))
|
|
var keywords []string
|
|
for i, p := range providers {
|
|
index[p.Name] = i
|
|
keywords = append(keywords, p.Keywords...)
|
|
}
|
|
|
|
builder := ahocorasick.NewAhoCorasickBuilder(ahocorasick.Opts{DFA: true})
|
|
ac := builder.Build(keywords)
|
|
|
|
return &Registry{
|
|
providers: providers,
|
|
index: index,
|
|
ac: ac,
|
|
}, nil
|
|
}
|
|
|
|
// NewRegistryFromProviders builds a Registry from an explicit slice of providers
|
|
// without touching the embedded YAML files. Intended for tests that need a
|
|
// minimal registry with synthetic providers.
|
|
func NewRegistryFromProviders(ps []Provider) *Registry {
|
|
index := make(map[string]int, len(ps))
|
|
var keywords []string
|
|
for i, p := range ps {
|
|
index[p.Name] = i
|
|
keywords = append(keywords, p.Keywords...)
|
|
}
|
|
builder := ahocorasick.NewAhoCorasickBuilder(ahocorasick.Opts{DFA: true})
|
|
ac := builder.Build(keywords)
|
|
return &Registry{providers: ps, index: index, ac: ac}
|
|
}
|
|
|
|
// List returns all loaded providers.
|
|
func (r *Registry) List() []Provider {
|
|
return r.providers
|
|
}
|
|
|
|
// Get returns a provider by name and a boolean indicating whether it was found.
|
|
func (r *Registry) Get(name string) (Provider, bool) {
|
|
idx, ok := r.index[name]
|
|
if !ok {
|
|
return Provider{}, false
|
|
}
|
|
return r.providers[idx], true
|
|
}
|
|
|
|
// Stats returns aggregate statistics about the loaded providers.
|
|
func (r *Registry) Stats() RegistryStats {
|
|
stats := RegistryStats{
|
|
Total: len(r.providers),
|
|
ByTier: make(map[int]int),
|
|
ByConfidence: make(map[string]int),
|
|
}
|
|
for _, p := range r.providers {
|
|
stats.ByTier[p.Tier]++
|
|
for _, pat := range p.Patterns {
|
|
stats.ByConfidence[pat.Confidence]++
|
|
}
|
|
}
|
|
return stats
|
|
}
|
|
|
|
// AC returns the pre-built Aho-Corasick automaton for keyword pre-filtering.
|
|
func (r *Registry) AC() ahocorasick.AhoCorasick {
|
|
return r.ac
|
|
}
|