feat(01-02): provider YAML schema structs with validation and reference YAML files

- Provider, Pattern, VerifySpec, RegistryStats structs in schema.go
- UnmarshalYAML validates format_version >= 1 and last_verified non-empty
- Three reference YAML files: openai, anthropic, huggingface
This commit is contained in:
salvacybersec
2026-04-05 00:04:29 +03:00
parent ebaf7d7c2d
commit 4fcdc42c70
4 changed files with 124 additions and 0 deletions

66
pkg/providers/schema.go Normal file
View File

@@ -0,0 +1,66 @@
package providers
import (
"fmt"
"gopkg.in/yaml.v3"
)
// Provider represents a single API key provider definition loaded from YAML.
type Provider struct {
FormatVersion int `yaml:"format_version"`
Name string `yaml:"name"`
DisplayName string `yaml:"display_name"`
Tier int `yaml:"tier"`
LastVerified string `yaml:"last_verified"`
Keywords []string `yaml:"keywords"`
Patterns []Pattern `yaml:"patterns"`
Verify VerifySpec `yaml:"verify"`
}
// Pattern defines a single regex pattern for API key detection.
type Pattern struct {
Regex string `yaml:"regex"`
EntropyMin float64 `yaml:"entropy_min"`
Confidence string `yaml:"confidence"`
}
// VerifySpec defines how to verify a key is live (used by Phase 5 verification engine).
type VerifySpec struct {
Method string `yaml:"method"`
URL string `yaml:"url"`
Headers map[string]string `yaml:"headers"`
ValidStatus []int `yaml:"valid_status"`
InvalidStatus []int `yaml:"invalid_status"`
}
// RegistryStats holds aggregate statistics about loaded providers.
type RegistryStats struct {
Total int
ByTier map[int]int
ByConfidence map[string]int
}
// UnmarshalYAML implements yaml.Unmarshaler with schema validation (satisfies PROV-10).
func (p *Provider) UnmarshalYAML(value *yaml.Node) error {
// Use a type alias to avoid infinite recursion
type ProviderAlias Provider
var alias ProviderAlias
if err := value.Decode(&alias); err != nil {
return err
}
if alias.FormatVersion < 1 {
return fmt.Errorf("provider %q: format_version must be >= 1 (got %d)", alias.Name, alias.FormatVersion)
}
if alias.LastVerified == "" {
return fmt.Errorf("provider %q: last_verified is required", alias.Name)
}
validConfidences := map[string]bool{"high": true, "medium": true, "low": true, "": true}
for _, pat := range alias.Patterns {
if !validConfidences[pat.Confidence] {
return fmt.Errorf("provider %q: pattern confidence %q must be high, medium, or low", alias.Name, pat.Confidence)
}
}
*p = Provider(alias)
return nil
}