Files
keyhunter/pkg/providers/schema.go
salvacybersec 30c0e9871b feat(05-01): extend VerifySpec and Finding, add gjson dep
- VerifySpec: add SuccessCodes, FailureCodes, RateLimitCodes, MetadataPaths, Body
- Preserve legacy ValidStatus/InvalidStatus for backward compat
- Add EffectiveSuccessCodes/FailureCodes/RateLimitCodes fallback helpers
- Add ExtractMetadata helper using gjson (skeleton for Plan 05-03)
- Finding: add Verified, VerifyStatus, VerifyHTTPCode, VerifyMetadata, VerifyError
- Add github.com/tidwall/gjson v1.18.0 as direct dependency
2026-04-05 15:41:13 +03:00

130 lines
4.1 KiB
Go

package providers
import (
"fmt"
"github.com/tidwall/gjson"
"gopkg.in/yaml.v3"
)
// Provider represents a single API key provider definition loaded from YAML.
type Provider struct {
FormatVersion int `yaml:"format_version"`
Name string `yaml:"name"`
DisplayName string `yaml:"display_name"`
Tier int `yaml:"tier"`
LastVerified string `yaml:"last_verified"`
Keywords []string `yaml:"keywords"`
Patterns []Pattern `yaml:"patterns"`
Verify VerifySpec `yaml:"verify"`
}
// Pattern defines a single regex pattern for API key detection.
type Pattern struct {
Regex string `yaml:"regex"`
EntropyMin float64 `yaml:"entropy_min"`
Confidence string `yaml:"confidence"`
}
// VerifySpec defines how to verify a key is live (used by Phase 5 verification engine).
type VerifySpec struct {
Method string `yaml:"method"`
URL string `yaml:"url"`
Headers map[string]string `yaml:"headers"`
// Body is an optional request body template; supports {{KEY}} substitution.
Body string `yaml:"body"`
// Canonical status code fields (Phase 5)
SuccessCodes []int `yaml:"success_codes"`
FailureCodes []int `yaml:"failure_codes"`
RateLimitCodes []int `yaml:"rate_limit_codes"`
// MetadataPaths maps display-name -> gjson path (e.g. "org" -> "organization.name").
MetadataPaths map[string]string `yaml:"metadata_paths"`
// Legacy fields kept for backward compat with existing YAMLs (Phase 2-3 providers).
ValidStatus []int `yaml:"valid_status"`
InvalidStatus []int `yaml:"invalid_status"`
}
// EffectiveSuccessCodes returns SuccessCodes if non-empty, else falls back to
// legacy ValidStatus, else the default [200].
func (v VerifySpec) EffectiveSuccessCodes() []int {
if len(v.SuccessCodes) > 0 {
return v.SuccessCodes
}
if len(v.ValidStatus) > 0 {
return v.ValidStatus
}
return []int{200}
}
// EffectiveFailureCodes returns FailureCodes if non-empty, else falls back to
// legacy InvalidStatus, else the default [401, 403].
func (v VerifySpec) EffectiveFailureCodes() []int {
if len(v.FailureCodes) > 0 {
return v.FailureCodes
}
if len(v.InvalidStatus) > 0 {
return v.InvalidStatus
}
return []int{401, 403}
}
// EffectiveRateLimitCodes returns RateLimitCodes if non-empty, else the default [429].
func (v VerifySpec) EffectiveRateLimitCodes() []int {
if len(v.RateLimitCodes) > 0 {
return v.RateLimitCodes
}
return []int{429}
}
// ExtractMetadata applies MetadataPaths (gjson expressions) to a JSON response
// body and returns a display-name -> value map. Paths that do not resolve are
// skipped. Returns nil if no paths are configured or the body is empty.
// Plan 05-03 may extend this with type coercion and nested extraction.
func (v VerifySpec) ExtractMetadata(jsonBody []byte) map[string]string {
if len(v.MetadataPaths) == 0 || len(jsonBody) == 0 {
return nil
}
out := make(map[string]string, len(v.MetadataPaths))
for name, path := range v.MetadataPaths {
result := gjson.GetBytes(jsonBody, path)
if result.Exists() {
out[name] = result.String()
}
}
if len(out) == 0 {
return nil
}
return out
}
// RegistryStats holds aggregate statistics about loaded providers.
type RegistryStats struct {
Total int
ByTier map[int]int
ByConfidence map[string]int
}
// UnmarshalYAML implements yaml.Unmarshaler with schema validation (satisfies PROV-10).
func (p *Provider) UnmarshalYAML(value *yaml.Node) error {
// Use a type alias to avoid infinite recursion
type ProviderAlias Provider
var alias ProviderAlias
if err := value.Decode(&alias); err != nil {
return err
}
if alias.FormatVersion < 1 {
return fmt.Errorf("provider %q: format_version must be >= 1 (got %d)", alias.Name, alias.FormatVersion)
}
if alias.LastVerified == "" {
return fmt.Errorf("provider %q: last_verified is required", alias.Name)
}
validConfidences := map[string]bool{"high": true, "medium": true, "low": true, "": true}
for _, pat := range alias.Patterns {
if !validConfidences[pat.Confidence] {
return fmt.Errorf("provider %q: pattern confidence %q must be high, medium, or low", alias.Name, pat.Confidence)
}
}
*p = Provider(alias)
return nil
}