package providers import ( "fmt" ahocorasick "github.com/petar-dambovaliev/aho-corasick" ) // Registry is the in-memory store of all loaded provider definitions. // It is initialized once at startup and is safe for concurrent reads. type Registry struct { providers []Provider index map[string]int // name -> slice index ac ahocorasick.AhoCorasick // pre-built automaton for keyword pre-filter } // NewRegistry loads all embedded provider YAML files, validates them, builds the // Aho-Corasick automaton from all provider keywords, and returns the Registry. func NewRegistry() (*Registry, error) { providers, err := loadProviders() if err != nil { return nil, fmt.Errorf("loading providers: %w", err) } index := make(map[string]int, len(providers)) var keywords []string for i, p := range providers { index[p.Name] = i keywords = append(keywords, p.Keywords...) } builder := ahocorasick.NewAhoCorasickBuilder(ahocorasick.Opts{DFA: true}) ac := builder.Build(keywords) return &Registry{ providers: providers, index: index, ac: ac, }, nil } // NewRegistryFromProviders builds a Registry from an explicit slice of providers // without touching the embedded YAML files. Intended for tests that need a // minimal registry with synthetic providers. func NewRegistryFromProviders(ps []Provider) *Registry { index := make(map[string]int, len(ps)) var keywords []string for i, p := range ps { index[p.Name] = i keywords = append(keywords, p.Keywords...) } builder := ahocorasick.NewAhoCorasickBuilder(ahocorasick.Opts{DFA: true}) ac := builder.Build(keywords) return &Registry{providers: ps, index: index, ac: ac} } // List returns all loaded providers. func (r *Registry) List() []Provider { return r.providers } // Get returns a provider by name and a boolean indicating whether it was found. func (r *Registry) Get(name string) (Provider, bool) { idx, ok := r.index[name] if !ok { return Provider{}, false } return r.providers[idx], true } // Stats returns aggregate statistics about the loaded providers. func (r *Registry) Stats() RegistryStats { stats := RegistryStats{ Total: len(r.providers), ByTier: make(map[int]int), ByConfidence: make(map[string]int), } for _, p := range r.providers { stats.ByTier[p.Tier]++ for _, pat := range p.Patterns { stats.ByConfidence[pat.Confidence]++ } } return stats } // AC returns the pre-built Aho-Corasick automaton for keyword pre-filtering. func (r *Registry) AC() ahocorasick.AhoCorasick { return r.ac }