feat(01-02): embed loader, registry with Aho-Corasick, and filled test stubs
- loader.go with go:embed definitions/*.yaml for compile-time embedding - registry.go with List(), Get(), Stats(), AC() methods - Aho-Corasick automaton built from all provider keywords at NewRegistry() - pkg/providers/definitions/ with 3 YAML files for embed - All 5 provider tests pass: load, get, stats, AC, schema validation
This commit is contained in:
75
pkg/providers/registry.go
Normal file
75
pkg/providers/registry.go
Normal file
@@ -0,0 +1,75 @@
|
||||
package providers
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
ahocorasick "github.com/petar-dambovaliev/aho-corasick"
|
||||
)
|
||||
|
||||
// Registry is the in-memory store of all loaded provider definitions.
|
||||
// It is initialized once at startup and is safe for concurrent reads.
|
||||
type Registry struct {
|
||||
providers []Provider
|
||||
index map[string]int // name -> slice index
|
||||
ac ahocorasick.AhoCorasick // pre-built automaton for keyword pre-filter
|
||||
}
|
||||
|
||||
// NewRegistry loads all embedded provider YAML files, validates them, builds the
|
||||
// Aho-Corasick automaton from all provider keywords, and returns the Registry.
|
||||
func NewRegistry() (*Registry, error) {
|
||||
providers, err := loadProviders()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("loading providers: %w", err)
|
||||
}
|
||||
|
||||
index := make(map[string]int, len(providers))
|
||||
var keywords []string
|
||||
for i, p := range providers {
|
||||
index[p.Name] = i
|
||||
keywords = append(keywords, p.Keywords...)
|
||||
}
|
||||
|
||||
builder := ahocorasick.NewAhoCorasickBuilder(ahocorasick.Opts{DFA: true})
|
||||
ac := builder.Build(keywords)
|
||||
|
||||
return &Registry{
|
||||
providers: providers,
|
||||
index: index,
|
||||
ac: ac,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// List returns all loaded providers.
|
||||
func (r *Registry) List() []Provider {
|
||||
return r.providers
|
||||
}
|
||||
|
||||
// Get returns a provider by name and a boolean indicating whether it was found.
|
||||
func (r *Registry) Get(name string) (Provider, bool) {
|
||||
idx, ok := r.index[name]
|
||||
if !ok {
|
||||
return Provider{}, false
|
||||
}
|
||||
return r.providers[idx], true
|
||||
}
|
||||
|
||||
// Stats returns aggregate statistics about the loaded providers.
|
||||
func (r *Registry) Stats() RegistryStats {
|
||||
stats := RegistryStats{
|
||||
Total: len(r.providers),
|
||||
ByTier: make(map[int]int),
|
||||
ByConfidence: make(map[string]int),
|
||||
}
|
||||
for _, p := range r.providers {
|
||||
stats.ByTier[p.Tier]++
|
||||
for _, pat := range p.Patterns {
|
||||
stats.ByConfidence[pat.Confidence]++
|
||||
}
|
||||
}
|
||||
return stats
|
||||
}
|
||||
|
||||
// AC returns the pre-built Aho-Corasick automaton for keyword pre-filtering.
|
||||
func (r *Registry) AC() ahocorasick.AhoCorasick {
|
||||
return r.ac
|
||||
}
|
||||
Reference in New Issue
Block a user