merge: plan 01-02 provider registry
This commit is contained in:
20
pkg/providers/definitions/anthropic.yaml
Normal file
20
pkg/providers/definitions/anthropic.yaml
Normal file
@@ -0,0 +1,20 @@
|
||||
format_version: 1
|
||||
name: anthropic
|
||||
display_name: Anthropic
|
||||
tier: 1
|
||||
last_verified: "2026-04-04"
|
||||
keywords:
|
||||
- "sk-ant-api03-"
|
||||
- "anthropic"
|
||||
patterns:
|
||||
- regex: 'sk-ant-api03-[A-Za-z0-9_\-]{93,}'
|
||||
entropy_min: 3.5
|
||||
confidence: high
|
||||
verify:
|
||||
method: GET
|
||||
url: https://api.anthropic.com/v1/models
|
||||
headers:
|
||||
x-api-key: "{KEY}"
|
||||
anthropic-version: "2023-06-01"
|
||||
valid_status: [200]
|
||||
invalid_status: [401, 403]
|
||||
19
pkg/providers/definitions/huggingface.yaml
Normal file
19
pkg/providers/definitions/huggingface.yaml
Normal file
@@ -0,0 +1,19 @@
|
||||
format_version: 1
|
||||
name: huggingface
|
||||
display_name: HuggingFace
|
||||
tier: 3
|
||||
last_verified: "2026-04-04"
|
||||
keywords:
|
||||
- "hf_"
|
||||
- "huggingface"
|
||||
patterns:
|
||||
- regex: 'hf_[A-Za-z0-9]{34,}'
|
||||
entropy_min: 3.5
|
||||
confidence: high
|
||||
verify:
|
||||
method: GET
|
||||
url: https://huggingface.co/api/whoami-v2
|
||||
headers:
|
||||
Authorization: "Bearer {KEY}"
|
||||
valid_status: [200]
|
||||
invalid_status: [401, 403]
|
||||
19
pkg/providers/definitions/openai.yaml
Normal file
19
pkg/providers/definitions/openai.yaml
Normal file
@@ -0,0 +1,19 @@
|
||||
format_version: 1
|
||||
name: openai
|
||||
display_name: OpenAI
|
||||
tier: 1
|
||||
last_verified: "2026-04-04"
|
||||
keywords:
|
||||
- "sk-proj-"
|
||||
- "openai"
|
||||
patterns:
|
||||
- regex: 'sk-proj-[A-Za-z0-9_\-]{48,}'
|
||||
entropy_min: 3.5
|
||||
confidence: high
|
||||
verify:
|
||||
method: GET
|
||||
url: https://api.openai.com/v1/models
|
||||
headers:
|
||||
Authorization: "Bearer {KEY}"
|
||||
valid_status: [200]
|
||||
invalid_status: [401, 403]
|
||||
37
pkg/providers/loader.go
Normal file
37
pkg/providers/loader.go
Normal file
@@ -0,0 +1,37 @@
|
||||
package providers
|
||||
|
||||
import (
|
||||
"embed"
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"path/filepath"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
//go:embed definitions/*.yaml
|
||||
var definitionsFS embed.FS
|
||||
|
||||
// loadProviders reads all YAML files from the embedded definitions FS.
|
||||
func loadProviders() ([]Provider, error) {
|
||||
var providers []Provider
|
||||
err := fs.WalkDir(definitionsFS, "definitions", func(path string, d fs.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if d.IsDir() || filepath.Ext(path) != ".yaml" {
|
||||
return nil
|
||||
}
|
||||
data, err := definitionsFS.ReadFile(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("reading provider file %s: %w", path, err)
|
||||
}
|
||||
var p Provider
|
||||
if err := yaml.Unmarshal(data, &p); err != nil {
|
||||
return fmt.Errorf("parsing provider %s: %w", path, err)
|
||||
}
|
||||
providers = append(providers, p)
|
||||
return nil
|
||||
})
|
||||
return providers, err
|
||||
}
|
||||
75
pkg/providers/registry.go
Normal file
75
pkg/providers/registry.go
Normal file
@@ -0,0 +1,75 @@
|
||||
package providers
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
ahocorasick "github.com/petar-dambovaliev/aho-corasick"
|
||||
)
|
||||
|
||||
// Registry is the in-memory store of all loaded provider definitions.
|
||||
// It is initialized once at startup and is safe for concurrent reads.
|
||||
type Registry struct {
|
||||
providers []Provider
|
||||
index map[string]int // name -> slice index
|
||||
ac ahocorasick.AhoCorasick // pre-built automaton for keyword pre-filter
|
||||
}
|
||||
|
||||
// NewRegistry loads all embedded provider YAML files, validates them, builds the
|
||||
// Aho-Corasick automaton from all provider keywords, and returns the Registry.
|
||||
func NewRegistry() (*Registry, error) {
|
||||
providers, err := loadProviders()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("loading providers: %w", err)
|
||||
}
|
||||
|
||||
index := make(map[string]int, len(providers))
|
||||
var keywords []string
|
||||
for i, p := range providers {
|
||||
index[p.Name] = i
|
||||
keywords = append(keywords, p.Keywords...)
|
||||
}
|
||||
|
||||
builder := ahocorasick.NewAhoCorasickBuilder(ahocorasick.Opts{DFA: true})
|
||||
ac := builder.Build(keywords)
|
||||
|
||||
return &Registry{
|
||||
providers: providers,
|
||||
index: index,
|
||||
ac: ac,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// List returns all loaded providers.
|
||||
func (r *Registry) List() []Provider {
|
||||
return r.providers
|
||||
}
|
||||
|
||||
// Get returns a provider by name and a boolean indicating whether it was found.
|
||||
func (r *Registry) Get(name string) (Provider, bool) {
|
||||
idx, ok := r.index[name]
|
||||
if !ok {
|
||||
return Provider{}, false
|
||||
}
|
||||
return r.providers[idx], true
|
||||
}
|
||||
|
||||
// Stats returns aggregate statistics about the loaded providers.
|
||||
func (r *Registry) Stats() RegistryStats {
|
||||
stats := RegistryStats{
|
||||
Total: len(r.providers),
|
||||
ByTier: make(map[int]int),
|
||||
ByConfidence: make(map[string]int),
|
||||
}
|
||||
for _, p := range r.providers {
|
||||
stats.ByTier[p.Tier]++
|
||||
for _, pat := range p.Patterns {
|
||||
stats.ByConfidence[pat.Confidence]++
|
||||
}
|
||||
}
|
||||
return stats
|
||||
}
|
||||
|
||||
// AC returns the pre-built Aho-Corasick automaton for keyword pre-filtering.
|
||||
func (r *Registry) AC() ahocorasick.AhoCorasick {
|
||||
return r.ac
|
||||
}
|
||||
@@ -2,22 +2,57 @@ package providers_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
// TestRegistryLoad verifies that provider YAML files are loaded from embed.FS.
|
||||
// Stub: will be implemented when registry.go exists (Plan 02).
|
||||
func TestRegistryLoad(t *testing.T) {
|
||||
t.Skip("stub — implement after registry.go exists")
|
||||
reg, err := providers.NewRegistry()
|
||||
require.NoError(t, err)
|
||||
assert.GreaterOrEqual(t, len(reg.List()), 3, "expected at least 3 providers")
|
||||
}
|
||||
|
||||
// TestProviderSchemaValidation verifies format_version and last_verified are required.
|
||||
// Stub: will be implemented when schema.go validation exists (Plan 02).
|
||||
func TestProviderSchemaValidation(t *testing.T) {
|
||||
t.Skip("stub — implement after schema.go validation exists")
|
||||
func TestRegistryGet(t *testing.T) {
|
||||
reg, err := providers.NewRegistry()
|
||||
require.NoError(t, err)
|
||||
|
||||
p, ok := reg.Get("openai")
|
||||
assert.True(t, ok)
|
||||
assert.Equal(t, "openai", p.Name)
|
||||
assert.Equal(t, 1, p.Tier)
|
||||
|
||||
_, notOk := reg.Get("nonexistent-provider")
|
||||
assert.False(t, notOk)
|
||||
}
|
||||
|
||||
func TestRegistryStats(t *testing.T) {
|
||||
reg, err := providers.NewRegistry()
|
||||
require.NoError(t, err)
|
||||
|
||||
stats := reg.Stats()
|
||||
assert.GreaterOrEqual(t, stats.Total, 3)
|
||||
assert.GreaterOrEqual(t, stats.ByTier[1], 2)
|
||||
}
|
||||
|
||||
// TestAhoCorasickBuild verifies Aho-Corasick automaton builds from provider keywords.
|
||||
// Stub: will be implemented when registry builds automaton (Plan 02).
|
||||
func TestAhoCorasickBuild(t *testing.T) {
|
||||
t.Skip("stub — implement after registry AC build exists")
|
||||
reg, err := providers.NewRegistry()
|
||||
require.NoError(t, err)
|
||||
|
||||
ac := reg.AC()
|
||||
matches := ac.FindAll("export OPENAI_API_KEY=sk-proj-abc")
|
||||
assert.NotEmpty(t, matches)
|
||||
|
||||
noMatches := ac.FindAll("hello world nothing here")
|
||||
assert.Empty(t, noMatches)
|
||||
}
|
||||
|
||||
func TestProviderSchemaValidation(t *testing.T) {
|
||||
invalid := []byte("format_version: 0\nname: invalid\nlast_verified: \"\"\n")
|
||||
var p providers.Provider
|
||||
err := yaml.Unmarshal(invalid, &p)
|
||||
assert.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "format_version")
|
||||
}
|
||||
|
||||
66
pkg/providers/schema.go
Normal file
66
pkg/providers/schema.go
Normal file
@@ -0,0 +1,66 @@
|
||||
package providers
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
// Provider represents a single API key provider definition loaded from YAML.
|
||||
type Provider struct {
|
||||
FormatVersion int `yaml:"format_version"`
|
||||
Name string `yaml:"name"`
|
||||
DisplayName string `yaml:"display_name"`
|
||||
Tier int `yaml:"tier"`
|
||||
LastVerified string `yaml:"last_verified"`
|
||||
Keywords []string `yaml:"keywords"`
|
||||
Patterns []Pattern `yaml:"patterns"`
|
||||
Verify VerifySpec `yaml:"verify"`
|
||||
}
|
||||
|
||||
// Pattern defines a single regex pattern for API key detection.
|
||||
type Pattern struct {
|
||||
Regex string `yaml:"regex"`
|
||||
EntropyMin float64 `yaml:"entropy_min"`
|
||||
Confidence string `yaml:"confidence"`
|
||||
}
|
||||
|
||||
// VerifySpec defines how to verify a key is live (used by Phase 5 verification engine).
|
||||
type VerifySpec struct {
|
||||
Method string `yaml:"method"`
|
||||
URL string `yaml:"url"`
|
||||
Headers map[string]string `yaml:"headers"`
|
||||
ValidStatus []int `yaml:"valid_status"`
|
||||
InvalidStatus []int `yaml:"invalid_status"`
|
||||
}
|
||||
|
||||
// RegistryStats holds aggregate statistics about loaded providers.
|
||||
type RegistryStats struct {
|
||||
Total int
|
||||
ByTier map[int]int
|
||||
ByConfidence map[string]int
|
||||
}
|
||||
|
||||
// UnmarshalYAML implements yaml.Unmarshaler with schema validation (satisfies PROV-10).
|
||||
func (p *Provider) UnmarshalYAML(value *yaml.Node) error {
|
||||
// Use a type alias to avoid infinite recursion
|
||||
type ProviderAlias Provider
|
||||
var alias ProviderAlias
|
||||
if err := value.Decode(&alias); err != nil {
|
||||
return err
|
||||
}
|
||||
if alias.FormatVersion < 1 {
|
||||
return fmt.Errorf("provider %q: format_version must be >= 1 (got %d)", alias.Name, alias.FormatVersion)
|
||||
}
|
||||
if alias.LastVerified == "" {
|
||||
return fmt.Errorf("provider %q: last_verified is required", alias.Name)
|
||||
}
|
||||
validConfidences := map[string]bool{"high": true, "medium": true, "low": true, "": true}
|
||||
for _, pat := range alias.Patterns {
|
||||
if !validConfidences[pat.Confidence] {
|
||||
return fmt.Errorf("provider %q: pattern confidence %q must be high, medium, or low", alias.Name, pat.Confidence)
|
||||
}
|
||||
}
|
||||
*p = Provider(alias)
|
||||
return nil
|
||||
}
|
||||
Reference in New Issue
Block a user