feat(01-02): embed loader, registry with Aho-Corasick, and filled test stubs
- loader.go with go:embed definitions/*.yaml for compile-time embedding - registry.go with List(), Get(), Stats(), AC() methods - Aho-Corasick automaton built from all provider keywords at NewRegistry() - pkg/providers/definitions/ with 3 YAML files for embed - All 5 provider tests pass: load, get, stats, AC, schema validation
This commit is contained in:
1
go.mod
1
go.mod
@@ -3,6 +3,7 @@ module github.com/salvacybersec/keyhunter
|
|||||||
go 1.26.1
|
go 1.26.1
|
||||||
|
|
||||||
require (
|
require (
|
||||||
|
github.com/petar-dambovaliev/aho-corasick v0.0.0-20250424160509-463d218d4745
|
||||||
github.com/stretchr/testify v1.11.1
|
github.com/stretchr/testify v1.11.1
|
||||||
gopkg.in/yaml.v3 v3.0.1
|
gopkg.in/yaml.v3 v3.0.1
|
||||||
)
|
)
|
||||||
|
|||||||
2
go.sum
2
go.sum
@@ -1,5 +1,7 @@
|
|||||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
|
github.com/petar-dambovaliev/aho-corasick v0.0.0-20250424160509-463d218d4745 h1:Vpr4VgAizEgEZsaMohpw6JYDP+i9Of9dmdY4ufNP6HI=
|
||||||
|
github.com/petar-dambovaliev/aho-corasick v0.0.0-20250424160509-463d218d4745/go.mod h1:EHPiTAKtiFmrMldLUNswFwfZ2eJIYBHktdaUTZxYWRw=
|
||||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
||||||
|
|||||||
20
pkg/providers/definitions/anthropic.yaml
Normal file
20
pkg/providers/definitions/anthropic.yaml
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
format_version: 1
|
||||||
|
name: anthropic
|
||||||
|
display_name: Anthropic
|
||||||
|
tier: 1
|
||||||
|
last_verified: "2026-04-04"
|
||||||
|
keywords:
|
||||||
|
- "sk-ant-api03-"
|
||||||
|
- "anthropic"
|
||||||
|
patterns:
|
||||||
|
- regex: 'sk-ant-api03-[A-Za-z0-9_\-]{93,}'
|
||||||
|
entropy_min: 3.5
|
||||||
|
confidence: high
|
||||||
|
verify:
|
||||||
|
method: GET
|
||||||
|
url: https://api.anthropic.com/v1/models
|
||||||
|
headers:
|
||||||
|
x-api-key: "{KEY}"
|
||||||
|
anthropic-version: "2023-06-01"
|
||||||
|
valid_status: [200]
|
||||||
|
invalid_status: [401, 403]
|
||||||
19
pkg/providers/definitions/huggingface.yaml
Normal file
19
pkg/providers/definitions/huggingface.yaml
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
format_version: 1
|
||||||
|
name: huggingface
|
||||||
|
display_name: HuggingFace
|
||||||
|
tier: 3
|
||||||
|
last_verified: "2026-04-04"
|
||||||
|
keywords:
|
||||||
|
- "hf_"
|
||||||
|
- "huggingface"
|
||||||
|
patterns:
|
||||||
|
- regex: 'hf_[A-Za-z0-9]{34,}'
|
||||||
|
entropy_min: 3.5
|
||||||
|
confidence: high
|
||||||
|
verify:
|
||||||
|
method: GET
|
||||||
|
url: https://huggingface.co/api/whoami-v2
|
||||||
|
headers:
|
||||||
|
Authorization: "Bearer {KEY}"
|
||||||
|
valid_status: [200]
|
||||||
|
invalid_status: [401, 403]
|
||||||
19
pkg/providers/definitions/openai.yaml
Normal file
19
pkg/providers/definitions/openai.yaml
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
format_version: 1
|
||||||
|
name: openai
|
||||||
|
display_name: OpenAI
|
||||||
|
tier: 1
|
||||||
|
last_verified: "2026-04-04"
|
||||||
|
keywords:
|
||||||
|
- "sk-proj-"
|
||||||
|
- "openai"
|
||||||
|
patterns:
|
||||||
|
- regex: 'sk-proj-[A-Za-z0-9_\-]{48,}'
|
||||||
|
entropy_min: 3.5
|
||||||
|
confidence: high
|
||||||
|
verify:
|
||||||
|
method: GET
|
||||||
|
url: https://api.openai.com/v1/models
|
||||||
|
headers:
|
||||||
|
Authorization: "Bearer {KEY}"
|
||||||
|
valid_status: [200]
|
||||||
|
invalid_status: [401, 403]
|
||||||
37
pkg/providers/loader.go
Normal file
37
pkg/providers/loader.go
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
package providers
|
||||||
|
|
||||||
|
import (
|
||||||
|
"embed"
|
||||||
|
"fmt"
|
||||||
|
"io/fs"
|
||||||
|
"path/filepath"
|
||||||
|
|
||||||
|
"gopkg.in/yaml.v3"
|
||||||
|
)
|
||||||
|
|
||||||
|
//go:embed definitions/*.yaml
|
||||||
|
var definitionsFS embed.FS
|
||||||
|
|
||||||
|
// loadProviders reads all YAML files from the embedded definitions FS.
|
||||||
|
func loadProviders() ([]Provider, error) {
|
||||||
|
var providers []Provider
|
||||||
|
err := fs.WalkDir(definitionsFS, "definitions", func(path string, d fs.DirEntry, err error) error {
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if d.IsDir() || filepath.Ext(path) != ".yaml" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
data, err := definitionsFS.ReadFile(path)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("reading provider file %s: %w", path, err)
|
||||||
|
}
|
||||||
|
var p Provider
|
||||||
|
if err := yaml.Unmarshal(data, &p); err != nil {
|
||||||
|
return fmt.Errorf("parsing provider %s: %w", path, err)
|
||||||
|
}
|
||||||
|
providers = append(providers, p)
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
return providers, err
|
||||||
|
}
|
||||||
75
pkg/providers/registry.go
Normal file
75
pkg/providers/registry.go
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
package providers
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
ahocorasick "github.com/petar-dambovaliev/aho-corasick"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Registry is the in-memory store of all loaded provider definitions.
|
||||||
|
// It is initialized once at startup and is safe for concurrent reads.
|
||||||
|
type Registry struct {
|
||||||
|
providers []Provider
|
||||||
|
index map[string]int // name -> slice index
|
||||||
|
ac ahocorasick.AhoCorasick // pre-built automaton for keyword pre-filter
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewRegistry loads all embedded provider YAML files, validates them, builds the
|
||||||
|
// Aho-Corasick automaton from all provider keywords, and returns the Registry.
|
||||||
|
func NewRegistry() (*Registry, error) {
|
||||||
|
providers, err := loadProviders()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("loading providers: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
index := make(map[string]int, len(providers))
|
||||||
|
var keywords []string
|
||||||
|
for i, p := range providers {
|
||||||
|
index[p.Name] = i
|
||||||
|
keywords = append(keywords, p.Keywords...)
|
||||||
|
}
|
||||||
|
|
||||||
|
builder := ahocorasick.NewAhoCorasickBuilder(ahocorasick.Opts{DFA: true})
|
||||||
|
ac := builder.Build(keywords)
|
||||||
|
|
||||||
|
return &Registry{
|
||||||
|
providers: providers,
|
||||||
|
index: index,
|
||||||
|
ac: ac,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// List returns all loaded providers.
|
||||||
|
func (r *Registry) List() []Provider {
|
||||||
|
return r.providers
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get returns a provider by name and a boolean indicating whether it was found.
|
||||||
|
func (r *Registry) Get(name string) (Provider, bool) {
|
||||||
|
idx, ok := r.index[name]
|
||||||
|
if !ok {
|
||||||
|
return Provider{}, false
|
||||||
|
}
|
||||||
|
return r.providers[idx], true
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stats returns aggregate statistics about the loaded providers.
|
||||||
|
func (r *Registry) Stats() RegistryStats {
|
||||||
|
stats := RegistryStats{
|
||||||
|
Total: len(r.providers),
|
||||||
|
ByTier: make(map[int]int),
|
||||||
|
ByConfidence: make(map[string]int),
|
||||||
|
}
|
||||||
|
for _, p := range r.providers {
|
||||||
|
stats.ByTier[p.Tier]++
|
||||||
|
for _, pat := range p.Patterns {
|
||||||
|
stats.ByConfidence[pat.Confidence]++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return stats
|
||||||
|
}
|
||||||
|
|
||||||
|
// AC returns the pre-built Aho-Corasick automaton for keyword pre-filtering.
|
||||||
|
func (r *Registry) AC() ahocorasick.AhoCorasick {
|
||||||
|
return r.ac
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user