From 09722eaec4bc44dc928f53c527ceda09e26c4456 Mon Sep 17 00:00:00 2001 From: salvacybersec Date: Mon, 6 Apr 2026 00:20:43 +0300 Subject: [PATCH] feat(08-02): add 25 GitHub dorks for frontier and specialized categories - frontier.yaml: 15 dorks covering Tier 1/2 providers (OpenAI, Anthropic, Google AI, Azure OpenAI, AWS Bedrock, xAI, Cohere, Mistral, Groq, Together, Replicate) - specialized.yaml: 10 dorks covering Tier 3 providers (Perplexity, Voyage, Jina, AssemblyAI, Deepgram, ElevenLabs, Stability, HuggingFace) - Extend loader to accept YAML list format in addition to single-dork mapping, enabling multi-dork files for Wave 2+ plans - Mirror all YAMLs into dorks/github/ (user-visible) and pkg/dorks/definitions/github/ (go:embed target) --- dorks/github/frontier.yaml | 105 ++++++++++++++++++ dorks/github/specialized.yaml | 70 ++++++++++++ pkg/dorks/definitions/github/frontier.yaml | 105 ++++++++++++++++++ pkg/dorks/definitions/github/specialized.yaml | 70 ++++++++++++ pkg/dorks/loader.go | 17 +++ 5 files changed, 367 insertions(+) create mode 100644 dorks/github/frontier.yaml create mode 100644 dorks/github/specialized.yaml create mode 100644 pkg/dorks/definitions/github/frontier.yaml create mode 100644 pkg/dorks/definitions/github/specialized.yaml diff --git a/dorks/github/frontier.yaml b/dorks/github/frontier.yaml new file mode 100644 index 0000000..2766033 --- /dev/null +++ b/dorks/github/frontier.yaml @@ -0,0 +1,105 @@ +- id: openai-github-envfile + name: "OpenAI Project Key in .env files" + source: github + category: frontier + query: 'sk-proj- extension:env' + description: "Finds OpenAI project keys exposed in committed .env files" + tags: [openai, env, tier1] +- id: openai-github-pyfile + name: "OpenAI Project Key in Python files" + source: github + category: frontier + query: 'sk-proj- extension:py' + description: "Finds OpenAI project keys hard-coded in Python source" + tags: [openai, python, tier1] +- id: openai-github-jsonfile + name: "OpenAI Project Key in JSON files" + source: github + category: frontier + query: 'sk-proj- extension:json' + description: "Finds OpenAI project keys in JSON configs and fixtures" + tags: [openai, json, tier1] +- id: anthropic-github-envfile + name: "Anthropic API Key in .env files" + source: github + category: frontier + query: 'sk-ant-api03- extension:env' + description: "Finds Anthropic Claude API keys exposed in committed .env files" + tags: [anthropic, env, tier1] +- id: anthropic-github-pyfile + name: "Anthropic API Key in Python files" + source: github + category: frontier + query: 'sk-ant-api03- extension:py' + description: "Finds Anthropic Claude API keys hard-coded in Python source" + tags: [anthropic, python, tier1] +- id: google-ai-github-envfile + name: "Google AI Studio Key in .env files" + source: github + category: frontier + query: 'AIzaSy extension:env "GOOGLE_API_KEY"' + description: "Finds Google Generative AI / Gemini keys in .env files" + tags: [google, gemini, env, tier1] +- id: google-ai-github-jsonfile + name: "Google Generative Language Key in JSON" + source: github + category: frontier + query: 'AIzaSy extension:json "generativelanguage"' + description: "Finds Gemini keys adjacent to generativelanguage.googleapis.com references" + tags: [google, gemini, json, tier1] +- id: azure-openai-envfile + name: "Azure OpenAI Key in .env files" + source: github + category: frontier + query: 'AZURE_OPENAI_KEY extension:env' + description: "Finds Azure OpenAI deployment keys in .env files" + tags: [azure, openai, env, tier1] +- id: aws-bedrock-envfile + name: "AWS Bedrock Access Key in .env files" + source: github + category: frontier + query: 'AKIA extension:env "bedrock"' + description: "Finds AWS access keys adjacent to Bedrock references in .env files" + tags: [aws, bedrock, env, tier1] +- id: xai-envfile + name: "xAI Grok Key in .env files" + source: github + category: frontier + query: 'xai- extension:env' + description: "Finds xAI Grok API keys in .env files" + tags: [xai, grok, env, tier2] +- id: cohere-envfile + name: "Cohere API Key in .env files" + source: github + category: frontier + query: 'COHERE_API_KEY extension:env' + description: "Finds Cohere API keys in .env files" + tags: [cohere, env, tier2] +- id: mistral-envfile + name: "Mistral API Key in .env files" + source: github + category: frontier + query: 'MISTRAL_API_KEY extension:env' + description: "Finds Mistral platform keys in .env files" + tags: [mistral, env, tier2] +- id: groq-envfile + name: "Groq API Key in .env files" + source: github + category: frontier + query: 'gsk_ extension:env' + description: "Finds Groq API keys (gsk_ prefix) in .env files" + tags: [groq, env, tier2] +- id: together-envfile + name: "Together AI API Key in .env files" + source: github + category: frontier + query: 'TOGETHER_API_KEY extension:env' + description: "Finds Together.ai inference keys in .env files" + tags: [together, env, tier2] +- id: replicate-envfile + name: "Replicate API Token in .env files" + source: github + category: frontier + query: 'r8_ extension:env' + description: "Finds Replicate API tokens (r8_ prefix) in .env files" + tags: [replicate, env, tier2] diff --git a/dorks/github/specialized.yaml b/dorks/github/specialized.yaml new file mode 100644 index 0000000..7e7512d --- /dev/null +++ b/dorks/github/specialized.yaml @@ -0,0 +1,70 @@ +- id: perplexity-envfile + name: "Perplexity API Key in .env files" + source: github + category: specialized + query: 'pplx- extension:env' + description: "Finds Perplexity API keys (pplx- prefix) in .env files" + tags: [perplexity, env, tier3] +- id: voyage-envfile + name: "Voyage AI API Key in .env files" + source: github + category: specialized + query: 'VOYAGE_API_KEY extension:env' + description: "Finds Voyage AI embedding keys in .env files" + tags: [voyage, embeddings, env, tier3] +- id: jina-envfile + name: "Jina AI API Key in .env files" + source: github + category: specialized + query: 'jina_ extension:env' + description: "Finds Jina AI API keys (jina_ prefix) in .env files" + tags: [jina, embeddings, env, tier3] +- id: assemblyai-envfile + name: "AssemblyAI API Key in .env files" + source: github + category: specialized + query: 'ASSEMBLYAI_API_KEY extension:env' + description: "Finds AssemblyAI speech-to-text keys in .env files" + tags: [assemblyai, speech, env, tier3] +- id: deepgram-envfile + name: "Deepgram API Key in .env files" + source: github + category: specialized + query: 'DEEPGRAM_API_KEY extension:env' + description: "Finds Deepgram speech API keys in .env files" + tags: [deepgram, speech, env, tier3] +- id: elevenlabs-envfile + name: "ElevenLabs API Key in .env files" + source: github + category: specialized + query: 'ELEVENLABS_API_KEY extension:env' + description: "Finds ElevenLabs voice synthesis keys in .env files" + tags: [elevenlabs, voice, env, tier3] +- id: stability-envfile + name: "Stability AI Key in .env files" + source: github + category: specialized + query: 'sk-stability- extension:env' + description: "Finds Stability AI image generation keys in .env files" + tags: [stability, image, env, tier3] +- id: huggingface-envfile + name: "Hugging Face Token in .env files" + source: github + category: specialized + query: 'hf_ extension:env' + description: "Finds Hugging Face access tokens (hf_ prefix) in .env files" + tags: [huggingface, env, tier3] +- id: perplexity-config + name: "Perplexity Key in config.yaml" + source: github + category: specialized + query: 'pplx- filename:config.yaml' + description: "Finds Perplexity API keys hard-coded into config.yaml files" + tags: [perplexity, config, tier3] +- id: deepgram-config + name: "Deepgram in .env.local" + source: github + category: specialized + query: 'DEEPGRAM filename:.env.local' + description: "Finds Deepgram references in .env.local files (Next.js style)" + tags: [deepgram, env, nextjs, tier3] diff --git a/pkg/dorks/definitions/github/frontier.yaml b/pkg/dorks/definitions/github/frontier.yaml new file mode 100644 index 0000000..2766033 --- /dev/null +++ b/pkg/dorks/definitions/github/frontier.yaml @@ -0,0 +1,105 @@ +- id: openai-github-envfile + name: "OpenAI Project Key in .env files" + source: github + category: frontier + query: 'sk-proj- extension:env' + description: "Finds OpenAI project keys exposed in committed .env files" + tags: [openai, env, tier1] +- id: openai-github-pyfile + name: "OpenAI Project Key in Python files" + source: github + category: frontier + query: 'sk-proj- extension:py' + description: "Finds OpenAI project keys hard-coded in Python source" + tags: [openai, python, tier1] +- id: openai-github-jsonfile + name: "OpenAI Project Key in JSON files" + source: github + category: frontier + query: 'sk-proj- extension:json' + description: "Finds OpenAI project keys in JSON configs and fixtures" + tags: [openai, json, tier1] +- id: anthropic-github-envfile + name: "Anthropic API Key in .env files" + source: github + category: frontier + query: 'sk-ant-api03- extension:env' + description: "Finds Anthropic Claude API keys exposed in committed .env files" + tags: [anthropic, env, tier1] +- id: anthropic-github-pyfile + name: "Anthropic API Key in Python files" + source: github + category: frontier + query: 'sk-ant-api03- extension:py' + description: "Finds Anthropic Claude API keys hard-coded in Python source" + tags: [anthropic, python, tier1] +- id: google-ai-github-envfile + name: "Google AI Studio Key in .env files" + source: github + category: frontier + query: 'AIzaSy extension:env "GOOGLE_API_KEY"' + description: "Finds Google Generative AI / Gemini keys in .env files" + tags: [google, gemini, env, tier1] +- id: google-ai-github-jsonfile + name: "Google Generative Language Key in JSON" + source: github + category: frontier + query: 'AIzaSy extension:json "generativelanguage"' + description: "Finds Gemini keys adjacent to generativelanguage.googleapis.com references" + tags: [google, gemini, json, tier1] +- id: azure-openai-envfile + name: "Azure OpenAI Key in .env files" + source: github + category: frontier + query: 'AZURE_OPENAI_KEY extension:env' + description: "Finds Azure OpenAI deployment keys in .env files" + tags: [azure, openai, env, tier1] +- id: aws-bedrock-envfile + name: "AWS Bedrock Access Key in .env files" + source: github + category: frontier + query: 'AKIA extension:env "bedrock"' + description: "Finds AWS access keys adjacent to Bedrock references in .env files" + tags: [aws, bedrock, env, tier1] +- id: xai-envfile + name: "xAI Grok Key in .env files" + source: github + category: frontier + query: 'xai- extension:env' + description: "Finds xAI Grok API keys in .env files" + tags: [xai, grok, env, tier2] +- id: cohere-envfile + name: "Cohere API Key in .env files" + source: github + category: frontier + query: 'COHERE_API_KEY extension:env' + description: "Finds Cohere API keys in .env files" + tags: [cohere, env, tier2] +- id: mistral-envfile + name: "Mistral API Key in .env files" + source: github + category: frontier + query: 'MISTRAL_API_KEY extension:env' + description: "Finds Mistral platform keys in .env files" + tags: [mistral, env, tier2] +- id: groq-envfile + name: "Groq API Key in .env files" + source: github + category: frontier + query: 'gsk_ extension:env' + description: "Finds Groq API keys (gsk_ prefix) in .env files" + tags: [groq, env, tier2] +- id: together-envfile + name: "Together AI API Key in .env files" + source: github + category: frontier + query: 'TOGETHER_API_KEY extension:env' + description: "Finds Together.ai inference keys in .env files" + tags: [together, env, tier2] +- id: replicate-envfile + name: "Replicate API Token in .env files" + source: github + category: frontier + query: 'r8_ extension:env' + description: "Finds Replicate API tokens (r8_ prefix) in .env files" + tags: [replicate, env, tier2] diff --git a/pkg/dorks/definitions/github/specialized.yaml b/pkg/dorks/definitions/github/specialized.yaml new file mode 100644 index 0000000..7e7512d --- /dev/null +++ b/pkg/dorks/definitions/github/specialized.yaml @@ -0,0 +1,70 @@ +- id: perplexity-envfile + name: "Perplexity API Key in .env files" + source: github + category: specialized + query: 'pplx- extension:env' + description: "Finds Perplexity API keys (pplx- prefix) in .env files" + tags: [perplexity, env, tier3] +- id: voyage-envfile + name: "Voyage AI API Key in .env files" + source: github + category: specialized + query: 'VOYAGE_API_KEY extension:env' + description: "Finds Voyage AI embedding keys in .env files" + tags: [voyage, embeddings, env, tier3] +- id: jina-envfile + name: "Jina AI API Key in .env files" + source: github + category: specialized + query: 'jina_ extension:env' + description: "Finds Jina AI API keys (jina_ prefix) in .env files" + tags: [jina, embeddings, env, tier3] +- id: assemblyai-envfile + name: "AssemblyAI API Key in .env files" + source: github + category: specialized + query: 'ASSEMBLYAI_API_KEY extension:env' + description: "Finds AssemblyAI speech-to-text keys in .env files" + tags: [assemblyai, speech, env, tier3] +- id: deepgram-envfile + name: "Deepgram API Key in .env files" + source: github + category: specialized + query: 'DEEPGRAM_API_KEY extension:env' + description: "Finds Deepgram speech API keys in .env files" + tags: [deepgram, speech, env, tier3] +- id: elevenlabs-envfile + name: "ElevenLabs API Key in .env files" + source: github + category: specialized + query: 'ELEVENLABS_API_KEY extension:env' + description: "Finds ElevenLabs voice synthesis keys in .env files" + tags: [elevenlabs, voice, env, tier3] +- id: stability-envfile + name: "Stability AI Key in .env files" + source: github + category: specialized + query: 'sk-stability- extension:env' + description: "Finds Stability AI image generation keys in .env files" + tags: [stability, image, env, tier3] +- id: huggingface-envfile + name: "Hugging Face Token in .env files" + source: github + category: specialized + query: 'hf_ extension:env' + description: "Finds Hugging Face access tokens (hf_ prefix) in .env files" + tags: [huggingface, env, tier3] +- id: perplexity-config + name: "Perplexity Key in config.yaml" + source: github + category: specialized + query: 'pplx- filename:config.yaml' + description: "Finds Perplexity API keys hard-coded into config.yaml files" + tags: [perplexity, config, tier3] +- id: deepgram-config + name: "Deepgram in .env.local" + source: github + category: specialized + query: 'DEEPGRAM filename:.env.local' + description: "Finds Deepgram references in .env.local files (Next.js style)" + tags: [deepgram, env, nextjs, tier3] diff --git a/pkg/dorks/loader.go b/pkg/dorks/loader.go index c28b3f4..a3c16d0 100644 --- a/pkg/dorks/loader.go +++ b/pkg/dorks/loader.go @@ -43,10 +43,27 @@ func loadDorks() ([]Dork, error) { if err != nil { return fmt.Errorf("reading dork file %s: %w", path, err) } + // Support two YAML shapes: + // 1. A top-level list of Dork entries (preferred, used by Wave 2+ plans). + // 2. A single Dork as a top-level mapping (legacy one-per-file form). + var list []Dork + if err := yaml.Unmarshal(data, &list); err == nil && len(list) > 0 { + for _, dk := range list { + if err := dk.Validate(); err != nil { + return fmt.Errorf("validating dork %s (%s): %w", path, dk.ID, err) + } + dorks = append(dorks, dk) + } + return nil + } var dk Dork if err := yaml.Unmarshal(data, &dk); err != nil { return fmt.Errorf("parsing dork %s: %w", path, err) } + if strings.TrimSpace(dk.ID) == "" { + // Empty file or placeholder (e.g., .gitkeep-adjacent empty YAML) — ignore. + return nil + } if err := dk.Validate(); err != nil { return fmt.Errorf("validating dork %s: %w", path, err) }