From 4fcdc42c70e69100db8b4c3cb729e3af6090b0a0 Mon Sep 17 00:00:00 2001 From: salvacybersec Date: Sun, 5 Apr 2026 00:04:29 +0300 Subject: [PATCH] feat(01-02): provider YAML schema structs with validation and reference YAML files - Provider, Pattern, VerifySpec, RegistryStats structs in schema.go - UnmarshalYAML validates format_version >= 1 and last_verified non-empty - Three reference YAML files: openai, anthropic, huggingface --- pkg/providers/schema.go | 66 ++++++++++++++++++++++++++++++++++++++ providers/anthropic.yaml | 20 ++++++++++++ providers/huggingface.yaml | 19 +++++++++++ providers/openai.yaml | 19 +++++++++++ 4 files changed, 124 insertions(+) create mode 100644 pkg/providers/schema.go create mode 100644 providers/anthropic.yaml create mode 100644 providers/huggingface.yaml create mode 100644 providers/openai.yaml diff --git a/pkg/providers/schema.go b/pkg/providers/schema.go new file mode 100644 index 0000000..8a4491b --- /dev/null +++ b/pkg/providers/schema.go @@ -0,0 +1,66 @@ +package providers + +import ( + "fmt" + + "gopkg.in/yaml.v3" +) + +// Provider represents a single API key provider definition loaded from YAML. +type Provider struct { + FormatVersion int `yaml:"format_version"` + Name string `yaml:"name"` + DisplayName string `yaml:"display_name"` + Tier int `yaml:"tier"` + LastVerified string `yaml:"last_verified"` + Keywords []string `yaml:"keywords"` + Patterns []Pattern `yaml:"patterns"` + Verify VerifySpec `yaml:"verify"` +} + +// Pattern defines a single regex pattern for API key detection. +type Pattern struct { + Regex string `yaml:"regex"` + EntropyMin float64 `yaml:"entropy_min"` + Confidence string `yaml:"confidence"` +} + +// VerifySpec defines how to verify a key is live (used by Phase 5 verification engine). +type VerifySpec struct { + Method string `yaml:"method"` + URL string `yaml:"url"` + Headers map[string]string `yaml:"headers"` + ValidStatus []int `yaml:"valid_status"` + InvalidStatus []int `yaml:"invalid_status"` +} + +// RegistryStats holds aggregate statistics about loaded providers. +type RegistryStats struct { + Total int + ByTier map[int]int + ByConfidence map[string]int +} + +// UnmarshalYAML implements yaml.Unmarshaler with schema validation (satisfies PROV-10). +func (p *Provider) UnmarshalYAML(value *yaml.Node) error { + // Use a type alias to avoid infinite recursion + type ProviderAlias Provider + var alias ProviderAlias + if err := value.Decode(&alias); err != nil { + return err + } + if alias.FormatVersion < 1 { + return fmt.Errorf("provider %q: format_version must be >= 1 (got %d)", alias.Name, alias.FormatVersion) + } + if alias.LastVerified == "" { + return fmt.Errorf("provider %q: last_verified is required", alias.Name) + } + validConfidences := map[string]bool{"high": true, "medium": true, "low": true, "": true} + for _, pat := range alias.Patterns { + if !validConfidences[pat.Confidence] { + return fmt.Errorf("provider %q: pattern confidence %q must be high, medium, or low", alias.Name, pat.Confidence) + } + } + *p = Provider(alias) + return nil +} diff --git a/providers/anthropic.yaml b/providers/anthropic.yaml new file mode 100644 index 0000000..23aa116 --- /dev/null +++ b/providers/anthropic.yaml @@ -0,0 +1,20 @@ +format_version: 1 +name: anthropic +display_name: Anthropic +tier: 1 +last_verified: "2026-04-04" +keywords: + - "sk-ant-api03-" + - "anthropic" +patterns: + - regex: 'sk-ant-api03-[A-Za-z0-9_\-]{93,}' + entropy_min: 3.5 + confidence: high +verify: + method: GET + url: https://api.anthropic.com/v1/models + headers: + x-api-key: "{KEY}" + anthropic-version: "2023-06-01" + valid_status: [200] + invalid_status: [401, 403] diff --git a/providers/huggingface.yaml b/providers/huggingface.yaml new file mode 100644 index 0000000..db7a0b9 --- /dev/null +++ b/providers/huggingface.yaml @@ -0,0 +1,19 @@ +format_version: 1 +name: huggingface +display_name: HuggingFace +tier: 3 +last_verified: "2026-04-04" +keywords: + - "hf_" + - "huggingface" +patterns: + - regex: 'hf_[A-Za-z0-9]{34,}' + entropy_min: 3.5 + confidence: high +verify: + method: GET + url: https://huggingface.co/api/whoami-v2 + headers: + Authorization: "Bearer {KEY}" + valid_status: [200] + invalid_status: [401, 403] diff --git a/providers/openai.yaml b/providers/openai.yaml new file mode 100644 index 0000000..6a6fdd9 --- /dev/null +++ b/providers/openai.yaml @@ -0,0 +1,19 @@ +format_version: 1 +name: openai +display_name: OpenAI +tier: 1 +last_verified: "2026-04-04" +keywords: + - "sk-proj-" + - "openai" +patterns: + - regex: 'sk-proj-[A-Za-z0-9_\-]{48,}' + entropy_min: 3.5 + confidence: high +verify: + method: GET + url: https://api.openai.com/v1/models + headers: + Authorization: "Bearer {KEY}" + valid_status: [200] + invalid_status: [401, 403]