- frontier.yaml: 15 dorks covering Tier 1/2 providers (OpenAI, Anthropic, Google AI, Azure OpenAI, AWS Bedrock, xAI, Cohere, Mistral, Groq, Together, Replicate) - specialized.yaml: 10 dorks covering Tier 3 providers (Perplexity, Voyage, Jina, AssemblyAI, Deepgram, ElevenLabs, Stability, HuggingFace) - Extend loader to accept YAML list format in addition to single-dork mapping, enabling multi-dork files for Wave 2+ plans - Mirror all YAMLs into dorks/github/ (user-visible) and pkg/dorks/definitions/github/ (go:embed target)
78 lines
2.2 KiB
Go
78 lines
2.2 KiB
Go
package dorks
|
|
|
|
import (
|
|
"embed"
|
|
"errors"
|
|
"fmt"
|
|
"io/fs"
|
|
"path/filepath"
|
|
"strings"
|
|
|
|
"gopkg.in/yaml.v3"
|
|
)
|
|
|
|
// definitionsFS embeds every file under pkg/dorks/definitions. The trailing
|
|
// `/*` is deliberate: it tolerates an empty tree containing only a .gitkeep
|
|
// placeholder, which is the case for this foundation plan before Wave 2
|
|
// plans drop in 150+ real dork YAML files.
|
|
//
|
|
//go:embed definitions/*
|
|
var definitionsFS embed.FS
|
|
|
|
// loadDorks walks the embedded definitions tree and returns every Dork found
|
|
// in a *.yaml file. Non-YAML files (e.g. .gitkeep) are ignored, empty trees
|
|
// return (nil, nil), and parse or validation errors are wrapped with the
|
|
// offending file path.
|
|
func loadDorks() ([]Dork, error) {
|
|
var dorks []Dork
|
|
err := fs.WalkDir(definitionsFS, "definitions", func(path string, d fs.DirEntry, err error) error {
|
|
if err != nil {
|
|
// Empty definitions directory (only .gitkeep) is valid.
|
|
if errors.Is(err, fs.ErrNotExist) {
|
|
return fs.SkipAll
|
|
}
|
|
return err
|
|
}
|
|
if d.IsDir() {
|
|
return nil
|
|
}
|
|
if !strings.EqualFold(filepath.Ext(path), ".yaml") && !strings.EqualFold(filepath.Ext(path), ".yml") {
|
|
return nil
|
|
}
|
|
data, err := definitionsFS.ReadFile(path)
|
|
if err != nil {
|
|
return fmt.Errorf("reading dork file %s: %w", path, err)
|
|
}
|
|
// Support two YAML shapes:
|
|
// 1. A top-level list of Dork entries (preferred, used by Wave 2+ plans).
|
|
// 2. A single Dork as a top-level mapping (legacy one-per-file form).
|
|
var list []Dork
|
|
if err := yaml.Unmarshal(data, &list); err == nil && len(list) > 0 {
|
|
for _, dk := range list {
|
|
if err := dk.Validate(); err != nil {
|
|
return fmt.Errorf("validating dork %s (%s): %w", path, dk.ID, err)
|
|
}
|
|
dorks = append(dorks, dk)
|
|
}
|
|
return nil
|
|
}
|
|
var dk Dork
|
|
if err := yaml.Unmarshal(data, &dk); err != nil {
|
|
return fmt.Errorf("parsing dork %s: %w", path, err)
|
|
}
|
|
if strings.TrimSpace(dk.ID) == "" {
|
|
// Empty file or placeholder (e.g., .gitkeep-adjacent empty YAML) — ignore.
|
|
return nil
|
|
}
|
|
if err := dk.Validate(); err != nil {
|
|
return fmt.Errorf("validating dork %s: %w", path, err)
|
|
}
|
|
dorks = append(dorks, dk)
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return dorks, nil
|
|
}
|