diff --git a/.planning/ROADMAP.md b/.planning/ROADMAP.md
index 47344de..3121d67 100644
--- a/.planning/ROADMAP.md
+++ b/.planning/ROADMAP.md
@@ -177,7 +177,16 @@ Plans:
2. `keyhunter dorks run --source=github --category=frontier` executes all Tier 1 frontier provider dorks against GitHub code search
3. `keyhunter dorks add --source=google --query='site:pastebin.com "sk-ant-api03-"'` persists a custom dork that appears in subsequent `dorks list` output
4. `keyhunter dorks export --format=json` exports all dorks including custom additions
-**Plans**: TBD
+**Plans**: 7 plans
+
+Plans:
+- [ ] 08-01-PLAN.md — Dork schema, go:embed loader, registry, executor interface, custom_dorks storage table
+- [ ] 08-02-PLAN.md — 50 GitHub dork YAML definitions across 5 categories
+- [ ] 08-03-PLAN.md — 30 Google + 20 Shodan dork YAML definitions
+- [ ] 08-04-PLAN.md — 15 Censys + 10 ZoomEye + 10 FOFA + 10 GitLab + 5 Bing dork YAML definitions
+- [ ] 08-05-PLAN.md — Live GitHub Code Search executor (net/http, Retry-After, limit cap)
+- [ ] 08-06-PLAN.md — cmd/dorks.go Cobra tree: list/run/add/export/info/delete
+- [ ] 08-07-PLAN.md — Dork count guardrail test (>=150 total, per-source minimums, ID uniqueness)
### Phase 9: OSINT Infrastructure
**Goal**: The recon engine's `ReconSource` interface, per-source rate limiter architecture, stealth mode, and parallel sweep orchestrator exist and are validated — all individual source modules build on this foundation
diff --git a/.planning/phases/08-dork-engine/08-01-PLAN.md b/.planning/phases/08-dork-engine/08-01-PLAN.md
new file mode 100644
index 0000000..4f81f54
--- /dev/null
+++ b/.planning/phases/08-dork-engine/08-01-PLAN.md
@@ -0,0 +1,327 @@
+---
+phase: 08-dork-engine
+plan: 01
+type: execute
+wave: 1
+depends_on: []
+files_modified:
+ - pkg/dorks/schema.go
+ - pkg/dorks/loader.go
+ - pkg/dorks/registry.go
+ - pkg/dorks/executor.go
+ - pkg/dorks/registry_test.go
+ - pkg/dorks/definitions/.gitkeep
+ - dorks/.gitkeep
+ - pkg/storage/schema.sql
+ - pkg/storage/custom_dorks.go
+ - pkg/storage/custom_dorks_test.go
+autonomous: true
+requirements:
+ - DORK-01
+ - DORK-03
+must_haves:
+ truths:
+ - "pkg/dorks.NewRegistry() loads embedded YAML files without error"
+ - "Registry.List(), Get(id), Stats(), ListBySource(), ListByCategory() return correct data"
+ - "ExecuteDork interface defined and per-source Executor map exists (all stubbed except placeholder)"
+ - "custom_dorks table exists and SaveCustomDork/ListCustomDorks/DeleteCustomDork work round-trip"
+ artifacts:
+ - path: "pkg/dorks/schema.go"
+ provides: "Dork struct matching 08-CONTEXT YAML schema"
+ contains: "type Dork struct"
+ - path: "pkg/dorks/loader.go"
+ provides: "go:embed loader mirroring pkg/providers/loader.go"
+ contains: "//go:embed definitions"
+ - path: "pkg/dorks/registry.go"
+ provides: "Registry with List/Get/Stats/ListBySource/ListByCategory"
+ contains: "func NewRegistry"
+ - path: "pkg/dorks/executor.go"
+ provides: "Executor interface + source dispatch + ErrSourceNotImplemented"
+ contains: "type Executor interface"
+ - path: "pkg/storage/custom_dorks.go"
+ provides: "SaveCustomDork/ListCustomDorks/DeleteCustomDork/GetCustomDork"
+ contains: "custom_dorks"
+ key_links:
+ - from: "pkg/dorks/loader.go"
+ to: "pkg/dorks/definitions/*/*.yaml"
+ via: "go:embed"
+ pattern: "embed.FS"
+ - from: "pkg/storage/schema.sql"
+ to: "custom_dorks table"
+ via: "CREATE TABLE"
+ pattern: "CREATE TABLE IF NOT EXISTS custom_dorks"
+---
+
+
+Foundation of the dork engine: schema, go:embed loader, registry, executor interface,
+and storage table for user-added custom dorks. Mirrors the proven pkg/providers pattern
+from Phase 1 so downstream plans can drop 150+ YAML files into pkg/dorks/definitions/{source}/
+and have them immediately load at startup.
+
+Purpose: Unblock parallel Wave 2 plans (50-dork YAML batches and GitHub live executor).
+Output: pkg/dorks package with passing tests + custom_dorks table migration.
+
+
+
+@$HOME/.claude/get-shit-done/workflows/execute-plan.md
+@$HOME/.claude/get-shit-done/templates/summary.md
+
+
+
+@.planning/PROJECT.md
+@.planning/ROADMAP.md
+@.planning/STATE.md
+@.planning/phases/08-dork-engine/08-CONTEXT.md
+@pkg/providers/loader.go
+@pkg/providers/registry.go
+@pkg/storage/db.go
+@pkg/storage/schema.sql
+
+
+
+
+From pkg/providers/loader.go:
+```go
+//go:embed definitions/*.yaml
+var definitionsFS embed.FS
+
+func loadProviders() ([]Provider, error) {
+ fs.WalkDir(definitionsFS, "definitions", func(path string, d fs.DirEntry, err error) error { ... })
+}
+```
+
+From pkg/providers/registry.go:
+```go
+type Registry struct { providers []Provider; index map[string]int; ... }
+func NewRegistry() (*Registry, error)
+func (r *Registry) List() []Provider
+func (r *Registry) Get(name string) (Provider, bool)
+func (r *Registry) Stats() RegistryStats
+```
+
+From pkg/storage/db.go:
+```go
+type DB struct { sql *sql.DB }
+func (db *DB) SQL() *sql.DB
+```
+
+
+
+
+
+
+ Task 1: Dork schema, go:embed loader, registry, executor interface
+
+ pkg/dorks/schema.go,
+ pkg/dorks/loader.go,
+ pkg/dorks/registry.go,
+ pkg/dorks/executor.go,
+ pkg/dorks/registry_test.go,
+ pkg/dorks/definitions/.gitkeep,
+ dorks/.gitkeep
+
+
+ - Test: registry with two synthetic YAMLs under definitions/ loads 2 dorks
+ - Test: Registry.Get("openai-github-envfile") returns the correct Dork
+ - Test: Registry.ListBySource("github") returns only github dorks
+ - Test: Registry.ListByCategory("frontier") returns only frontier dorks
+ - Test: Registry.Stats() returns ByCategory + BySource counts
+ - Test: executor.ExecuteDork with source "shodan" returns ErrSourceNotImplemented
+ - Test: Dork.Validate() rejects empty id/source/query
+
+
+ 1. Create pkg/dorks/schema.go:
+ ```go
+ package dorks
+
+ type Dork struct {
+ ID string `yaml:"id"`
+ Name string `yaml:"name"`
+ Source string `yaml:"source"` // github|google|shodan|censys|zoomeye|fofa|gitlab|bing
+ Category string `yaml:"category"` // frontier|specialized|infrastructure|emerging|enterprise
+ Query string `yaml:"query"`
+ Description string `yaml:"description"`
+ Tags []string `yaml:"tags"`
+ }
+
+ var ValidSources = []string{"github","google","shodan","censys","zoomeye","fofa","gitlab","bing"}
+
+ func (d Dork) Validate() error { /* non-empty id/source/query + source in ValidSources */ }
+ ```
+
+ 2. Create pkg/dorks/loader.go mirroring pkg/providers/loader.go:
+ ```go
+ //go:embed definitions
+ var definitionsFS embed.FS
+
+ func loadDorks() ([]Dork, error) {
+ // fs.WalkDir on "definitions", descend into {source}/ subdirs, parse *.yaml
+ }
+ ```
+ Walk pattern: definitions/github/*.yaml, definitions/google/*.yaml, etc.
+ Every file decoded via yaml.Unmarshal into Dork. Call Validate() per file; wrap
+ errors with file path. Return combined slice.
+
+ 3. Create pkg/dorks/registry.go:
+ ```go
+ type Registry struct {
+ dorks []Dork
+ byID map[string]int
+ bySource map[string][]int
+ byCategory map[string][]int
+ }
+
+ func NewRegistry() (*Registry, error) // uses loadDorks()
+ func NewRegistryFromDorks(ds []Dork) *Registry // for tests
+ func (r *Registry) List() []Dork
+ func (r *Registry) Get(id string) (Dork, bool)
+ func (r *Registry) ListBySource(src string) []Dork
+ func (r *Registry) ListByCategory(cat string) []Dork
+ func (r *Registry) Stats() Stats // {Total int; BySource map[string]int; ByCategory map[string]int}
+ ```
+
+ 4. Create pkg/dorks/executor.go (interface + source dispatcher, stubs only —
+ GitHub real impl comes in Plan 08-05):
+ ```go
+ var ErrSourceNotImplemented = errors.New("dork source not yet implemented")
+ var ErrMissingAuth = errors.New("dork source requires auth credentials")
+
+ type Match struct {
+ DorkID string
+ Source string
+ URL string
+ Snippet string // content chunk to feed into engine detector
+ Path string // file path in repo, if applicable
+ }
+
+ type Executor interface {
+ Source() string
+ Execute(ctx context.Context, d Dork, limit int) ([]Match, error)
+ }
+
+ type Runner struct {
+ executors map[string]Executor
+ }
+
+ func NewRunner() *Runner { return &Runner{executors: map[string]Executor{}} }
+ func (r *Runner) Register(e Executor) { r.executors[e.Source()] = e }
+ func (r *Runner) Run(ctx context.Context, d Dork, limit int) ([]Match, error) {
+ ex, ok := r.executors[d.Source]
+ if !ok { return nil, fmt.Errorf("%w: %s (coming Phase 9-16)", ErrSourceNotImplemented, d.Source) }
+ return ex.Execute(ctx, d, limit)
+ }
+ ```
+ No real executors are registered here — Plan 08-05 wires the GitHub executor via
+ a separate constructor (NewRunnerWithGitHub or similar).
+
+ 5. Create pkg/dorks/registry_test.go with the behavior cases listed above.
+ Use NewRegistryFromDorks for synthetic fixtures — do NOT touch the real
+ embedded FS (downstream plans populate it). One test MAY call NewRegistry()
+ and only assert err is nil or "definitions directory empty" — acceptable
+ either way pre-YAML.
+
+ 6. Create placeholder files to make go:embed succeed with empty tree:
+ - pkg/dorks/definitions/.gitkeep (empty)
+ - dorks/.gitkeep (empty)
+
+ IMPORTANT: go:embed requires at least one matching file. If
+ `//go:embed definitions` fails when only .gitkeep exists, switch the directive
+ to `//go:embed definitions/*` and handle the empty case by returning nil
+ dorks (no error) when WalkDir sees only .gitkeep. Test must pass with
+ zero real YAML present.
+
+
+ cd /home/salva/Documents/apikey && go test ./pkg/dorks/... -v
+
+
+ pkg/dorks builds, all registry + executor tests pass, loader tolerates empty
+ definitions tree, ErrSourceNotImplemented returned for unknown source.
+
+
+
+
+ Task 2: custom_dorks storage table + CRUD
+
+ pkg/storage/schema.sql,
+ pkg/storage/custom_dorks.go,
+ pkg/storage/custom_dorks_test.go
+
+
+ - Test: SaveCustomDork inserts a row and returns an auto-increment ID
+ - Test: ListCustomDorks returns all saved custom dorks newest first
+ - Test: GetCustomDork(id) returns the dork or sql.ErrNoRows
+ - Test: DeleteCustomDork(id) removes it; subsequent Get returns ErrNoRows
+ - Test: schema migration is idempotent (Open twice on same :memory: is fine — new DB each call, so instead verify CREATE TABLE IF NOT EXISTS form via re-exec on same *sql.DB)
+
+
+ 1. Append to pkg/storage/schema.sql:
+ ```sql
+ CREATE TABLE IF NOT EXISTS custom_dorks (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ dork_id TEXT NOT NULL UNIQUE,
+ name TEXT NOT NULL,
+ source TEXT NOT NULL,
+ category TEXT NOT NULL,
+ query TEXT NOT NULL,
+ description TEXT,
+ tags TEXT, -- JSON array
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP
+ );
+
+ CREATE INDEX IF NOT EXISTS idx_custom_dorks_source ON custom_dorks(source);
+ CREATE INDEX IF NOT EXISTS idx_custom_dorks_category ON custom_dorks(category);
+ ```
+
+ 2. Create pkg/storage/custom_dorks.go:
+ ```go
+ type CustomDork struct {
+ ID int64
+ DorkID string
+ Name string
+ Source string
+ Category string
+ Query string
+ Description string
+ Tags []string
+ CreatedAt time.Time
+ }
+
+ func (db *DB) SaveCustomDork(d CustomDork) (int64, error)
+ func (db *DB) ListCustomDorks() ([]CustomDork, error)
+ func (db *DB) GetCustomDork(id int64) (CustomDork, error) // returns sql.ErrNoRows if missing
+ func (db *DB) GetCustomDorkByDorkID(dorkID string) (CustomDork, error)
+ func (db *DB) DeleteCustomDork(id int64) (int64, error)
+ ```
+ Tags round-tripped via encoding/json (TEXT column). Dork_id UNIQUE so
+ user cannot create duplicate custom IDs.
+
+ 3. Create pkg/storage/custom_dorks_test.go covering the behavior cases above.
+ Use storage.Open(":memory:") as the existing storage tests do.
+
+
+ cd /home/salva/Documents/apikey && go test ./pkg/storage/... -run CustomDork -v
+
+
+ custom_dorks table created on Open(), CRUD round-trip tests pass, no
+ regressions in the existing storage test suite.
+
+
+
+
+
+
+- `go build ./...` succeeds
+- `go test ./pkg/dorks/... ./pkg/storage/...` passes
+- `grep -r "//go:embed" pkg/dorks/` shows the definitions embed directive
+
+
+
+- pkg/dorks.NewRegistry() compiles and runs (zero or more embedded dorks)
+- Executor interface + ErrSourceNotImplemented in place for Plan 08-05 and 08-06
+- custom_dorks CRUD functional; downstream `dorks add`/`dorks delete` commands have
+ a storage backend to call
+
+
+
diff --git a/.planning/phases/08-dork-engine/08-02-PLAN.md b/.planning/phases/08-dork-engine/08-02-PLAN.md
new file mode 100644
index 0000000..1621afa
--- /dev/null
+++ b/.planning/phases/08-dork-engine/08-02-PLAN.md
@@ -0,0 +1,227 @@
+---
+phase: 08-dork-engine
+plan: 02
+type: execute
+wave: 2
+depends_on: [08-01]
+files_modified:
+ - pkg/dorks/definitions/github/frontier.yaml
+ - pkg/dorks/definitions/github/specialized.yaml
+ - pkg/dorks/definitions/github/infrastructure.yaml
+ - pkg/dorks/definitions/github/emerging.yaml
+ - pkg/dorks/definitions/github/enterprise.yaml
+ - dorks/github/frontier.yaml
+ - dorks/github/specialized.yaml
+ - dorks/github/infrastructure.yaml
+ - dorks/github/emerging.yaml
+ - dorks/github/enterprise.yaml
+autonomous: true
+requirements:
+ - DORK-01
+ - DORK-02
+ - DORK-04
+must_haves:
+ truths:
+ - "pkg/dorks.NewRegistry() loads at least 50 github dorks"
+ - "Dorks cover all 5 categories (frontier, specialized, infrastructure, emerging, enterprise)"
+ - "Registry.ListBySource(\"github\") returns >= 50 entries"
+ - "All dork IDs are unique and pass Dork.Validate()"
+ artifacts:
+ - path: "pkg/dorks/definitions/github/frontier.yaml"
+ provides: "~15 GitHub dorks for Tier 1/2 frontier providers"
+ contains: "source: github"
+ - path: "pkg/dorks/definitions/github/specialized.yaml"
+ provides: "~10 GitHub dorks for Tier 3 specialized providers"
+ contains: "category: specialized"
+ key_links:
+ - from: "pkg/dorks/definitions/github/*.yaml"
+ to: "pkg/dorks/loader.go go:embed"
+ via: "compile-time embed"
+ pattern: "source: github"
+---
+
+
+Populate the GitHub source with 50 production dork queries covering every provider
+category. Each dork is a real GitHub Code Search query formatted per the Dork schema
+from Plan 08-01. Mirrored into `dorks/github/` (user-visible) and
+`pkg/dorks/definitions/github/` (go:embed target) per the Phase 1 dual-location
+pattern.
+
+Purpose: Half of the 150+ dork requirement (DORK-02) lives here. GitHub is the
+largest single source because it is the primary live executor (Plan 08-05) and
+because leaked keys overwhelmingly show up in .env/config files.
+Output: 50 GitHub dorks, embedded and loadable.
+
+
+
+@$HOME/.claude/get-shit-done/workflows/execute-plan.md
+@$HOME/.claude/get-shit-done/templates/summary.md
+
+
+
+@.planning/phases/08-dork-engine/08-CONTEXT.md
+@.planning/phases/08-dork-engine/08-01-PLAN.md
+@pkg/providers/definitions/openai.yaml
+@pkg/dorks/schema.go
+
+
+
+
+
+ Task 1: 25 GitHub dorks — frontier + specialized categories
+
+ pkg/dorks/definitions/github/frontier.yaml,
+ pkg/dorks/definitions/github/specialized.yaml,
+ dorks/github/frontier.yaml,
+ dorks/github/specialized.yaml
+
+
+ Create both files with the YAML list format supported by the loader. Each file
+ is a YAML document containing a top-level list of Dork entries. If the loader
+ in 08-01 was written to expect one-Dork-per-file, update it here to also
+ accept a list — check pkg/dorks/loader.go and adapt (preferred: loader accepts
+ both `type dorkFile struct { Dorks []Dork }` wrapper OR top-level list). Use
+ the list form.
+
+ File format (list of Dork):
+ ```yaml
+ - id: openai-github-envfile
+ name: "OpenAI API Key in .env files"
+ source: github
+ category: frontier
+ query: 'sk-proj- extension:env'
+ description: "Finds OpenAI project keys committed in .env files"
+ tags: [openai, env, tier1]
+ - id: openai-github-pyfile
+ ...
+ ```
+
+ **frontier.yaml — 15 dorks** covering Tier 1/2 providers. Each provider gets
+ 1-2 dorks. Use real, validated prefixes from pkg/providers/definitions/*.yaml:
+ - openai-github-envfile: `sk-proj- extension:env`
+ - openai-github-pyfile: `sk-proj- extension:py`
+ - openai-github-jsonfile: `sk-proj- extension:json`
+ - anthropic-github-envfile: `sk-ant-api03- extension:env`
+ - anthropic-github-pyfile: `sk-ant-api03- extension:py`
+ - google-ai-github-envfile: `AIzaSy extension:env "GOOGLE_API_KEY"`
+ - google-ai-github-jsonfile: `AIzaSy extension:json "generativelanguage"`
+ - azure-openai-envfile: `AZURE_OPENAI_KEY extension:env`
+ - aws-bedrock-envfile: `AKIA extension:env "bedrock"`
+ - xai-envfile: `xai- extension:env`
+ - cohere-envfile: `COHERE_API_KEY extension:env`
+ - mistral-envfile: `MISTRAL_API_KEY extension:env`
+ - groq-envfile: `gsk_ extension:env`
+ - together-envfile: `TOGETHER_API_KEY extension:env`
+ - replicate-envfile: `r8_ extension:env`
+
+ All with category: frontier, appropriate tags. Each query MUST be a literal
+ GitHub Code Search query — no templating.
+
+ **specialized.yaml — 10 dorks** covering Tier 3 providers:
+ - perplexity-envfile: `pplx- extension:env`
+ - voyage-envfile: `VOYAGE_API_KEY extension:env`
+ - jina-envfile: `jina_ extension:env`
+ - assemblyai-envfile: `ASSEMBLYAI_API_KEY extension:env`
+ - deepgram-envfile: `DEEPGRAM_API_KEY extension:env`
+ - elevenlabs-envfile: `ELEVENLABS_API_KEY extension:env`
+ - stability-envfile: `sk-stability- extension:env`
+ - huggingface-envfile: `hf_ extension:env`
+ - perplexity-config: `pplx- filename:config.yaml`
+ - deepgram-config: `DEEPGRAM filename:.env.local`
+
+ category: specialized.
+
+ Write identical content to both `pkg/dorks/definitions/github/{file}.yaml`
+ and `dorks/github/{file}.yaml`. The pkg/ copy is for go:embed, the dorks/
+ copy is user-visible.
+
+ **Adapt loader if needed.** If 08-01 wrote `yaml.Unmarshal(data, &Dork{})`
+ (single dork per file), change to:
+ ```go
+ var list []Dork
+ if err := yaml.Unmarshal(data, &list); err != nil { return err }
+ dorks = append(dorks, list...)
+ ```
+ Run `go test ./pkg/dorks/...` to confirm.
+
+
+ cd /home/salva/Documents/apikey && go test ./pkg/dorks/... && go run ./cmd/... 2>&1 || true; awk 'FNR==1{print FILENAME}/^- id:/{c++}END{print "count:",c}' pkg/dorks/definitions/github/frontier.yaml pkg/dorks/definitions/github/specialized.yaml
+
+ 25 dorks loaded, all pass Validate(), tests pass.
+
+
+
+ Task 2: 25 GitHub dorks — infrastructure + emerging + enterprise
+
+ pkg/dorks/definitions/github/infrastructure.yaml,
+ pkg/dorks/definitions/github/emerging.yaml,
+ pkg/dorks/definitions/github/enterprise.yaml,
+ dorks/github/infrastructure.yaml,
+ dorks/github/emerging.yaml,
+ dorks/github/enterprise.yaml
+
+
+ Create six YAML files (three pairs) using the same list format as Task 1.
+
+ **infrastructure.yaml — 10 dorks** (Tier 5 gateways + Tier 8 self-hosted):
+ - openrouter-envfile: `sk-or-v1- extension:env`
+ - openrouter-pyfile: `sk-or-v1- extension:py`
+ - litellm-envfile: `LITELLM_MASTER_KEY extension:env`
+ - portkey-envfile: `PORTKEY_API_KEY extension:env`
+ - helicone-envfile: `sk-helicone- extension:env`
+ - cloudflare-ai-envfile: `CF_API_TOKEN "ai.run"`
+ - vercel-ai-envfile: `VERCEL_AI extension:env`
+ - ollama-config: `OLLAMA_HOST filename:docker-compose.yaml`
+ - vllm-config: `vllm.entrypoints filename:config.yaml`
+ - localai-envfile: `LOCALAI_API_KEY extension:env`
+
+ category: infrastructure.
+
+ **emerging.yaml — 10 dorks** (Tier 4 Chinese + Tier 6 niche + vector DBs):
+ - deepseek-envfile: `sk- extension:env "deepseek"`
+ - moonshot-envfile: `sk- extension:env "moonshot"`
+ - qwen-envfile: `DASHSCOPE_API_KEY extension:env`
+ - zhipu-envfile: `ZHIPU_API_KEY extension:env`
+ - minimax-envfile: `MINIMAX_API_KEY extension:env`
+ - pinecone-envfile: `PINECONE_API_KEY extension:env`
+ - weaviate-envfile: `WEAVIATE_API_KEY extension:env`
+ - qdrant-envfile: `QDRANT_API_KEY extension:env`
+ - chroma-envfile: `CHROMA_API_KEY extension:env`
+ - writer-envfile: `WRITER_API_KEY extension:env`
+
+ category: emerging.
+
+ **enterprise.yaml — 5 dorks** (Tier 7 dev tools + Tier 9 enterprise):
+ - codeium-envfile: `CODEIUM_API_KEY extension:env`
+ - tabnine-envfile: `TABNINE_TOKEN extension:env`
+ - databricks-envfile: `DATABRICKS_TOKEN extension:env`
+ - snowflake-cortex: `SNOWFLAKE_PASSWORD "cortex"`
+ - watsonx-envfile: `WATSONX_APIKEY extension:env`
+
+ category: enterprise.
+
+ Write each YAML to both pkg/dorks/definitions/github/ and dorks/github/.
+ All dorks use source: github.
+
+
+ cd /home/salva/Documents/apikey && go test ./pkg/dorks/... && grep -c '^- id:' pkg/dorks/definitions/github/*.yaml | awk -F: '{s+=$NF}END{print "total github dorks:",s; if(s<50) exit 1}'
+
+ 50 total GitHub dorks across 5 category files, loader picks all up, counts pass.
+
+
+
+
+
+`cd /home/salva/Documents/apikey && go test ./pkg/dorks/...` passes
+Registry reports >= 50 dorks via a throwaway main or test assertion.
+
+
+
+- 50 GitHub dorks loadable via pkg/dorks.NewRegistry()
+- All 5 categories represented
+- Dual location (dorks/ + pkg/dorks/definitions/) maintained
+
+
+
diff --git a/.planning/phases/08-dork-engine/08-03-PLAN.md b/.planning/phases/08-dork-engine/08-03-PLAN.md
new file mode 100644
index 0000000..d4c6177
--- /dev/null
+++ b/.planning/phases/08-dork-engine/08-03-PLAN.md
@@ -0,0 +1,192 @@
+---
+phase: 08-dork-engine
+plan: 03
+type: execute
+wave: 2
+depends_on: [08-01]
+files_modified:
+ - pkg/dorks/definitions/google/frontier.yaml
+ - pkg/dorks/definitions/google/specialized.yaml
+ - pkg/dorks/definitions/google/infrastructure.yaml
+ - pkg/dorks/definitions/shodan/frontier.yaml
+ - pkg/dorks/definitions/shodan/infrastructure.yaml
+ - dorks/google/frontier.yaml
+ - dorks/google/specialized.yaml
+ - dorks/google/infrastructure.yaml
+ - dorks/shodan/frontier.yaml
+ - dorks/shodan/infrastructure.yaml
+autonomous: true
+requirements:
+ - DORK-01
+ - DORK-02
+ - DORK-04
+must_haves:
+ truths:
+ - "Registry.ListBySource(\"google\") returns >= 30 entries"
+ - "Registry.ListBySource(\"shodan\") returns >= 20 entries"
+ - "All google dorks use real site:/filetype: operators"
+ - "All shodan dorks use real shodan query syntax (ssl.cert, http.title, etc.)"
+ artifacts:
+ - path: "pkg/dorks/definitions/google/"
+ provides: "30 Google dorks across 3 categories"
+ contains: "source: google"
+ - path: "pkg/dorks/definitions/shodan/"
+ provides: "20 Shodan dorks"
+ contains: "source: shodan"
+ key_links:
+ - from: "pkg/dorks/definitions/{google,shodan}/*.yaml"
+ to: "pkg/dorks/loader.go"
+ via: "go:embed subdir walk"
+ pattern: "source: (google|shodan)"
+---
+
+
+Populate Google (30) and Shodan (20) sources — 50 dorks total. Google uses site:
+and filetype: operators to find leaked keys on non-GitHub platforms (pastebin,
+gitlab raw, etc.). Shodan uses its banner/certificate query syntax to surface
+exposed self-hosted LLM services (Ollama, vLLM, LocalAI, LiteLLM dashboards).
+
+Purpose: Broadens DORK-02 coverage beyond GitHub into the two next-most-important
+sources. Execution of these queries is deferred to Phase 11 (Google) and Phase 12
+(Shodan) — this plan only delivers the definitions.
+Output: 50 loadable Google + Shodan dorks.
+
+
+
+@$HOME/.claude/get-shit-done/workflows/execute-plan.md
+@$HOME/.claude/get-shit-done/templates/summary.md
+
+
+
+@.planning/phases/08-dork-engine/08-CONTEXT.md
+@.planning/phases/08-dork-engine/08-01-PLAN.md
+@pkg/dorks/schema.go
+
+
+
+
+
+ Task 1: 30 Google dorks (site/filetype operators)
+
+ pkg/dorks/definitions/google/frontier.yaml,
+ pkg/dorks/definitions/google/specialized.yaml,
+ pkg/dorks/definitions/google/infrastructure.yaml,
+ dorks/google/frontier.yaml,
+ dorks/google/specialized.yaml,
+ dorks/google/infrastructure.yaml
+
+
+ Create three frontier/specialized/infrastructure files (dual location).
+ All entries use source: google. Queries are real Google search operators
+ (site:, filetype:, intext:, inurl:). No HTML escaping — raw text.
+
+ **frontier.yaml — 12 dorks:**
+ - google-openai-pastebin: `site:pastebin.com "sk-proj-"`
+ - google-openai-gitlab-raw: `site:gitlab.com/*/raw/* "sk-proj-"`
+ - google-openai-env-leak: `intext:"OPENAI_API_KEY=sk-proj-" filetype:env`
+ - google-anthropic-pastebin: `site:pastebin.com "sk-ant-api03-"`
+ - google-anthropic-env: `intext:"ANTHROPIC_API_KEY=sk-ant-" filetype:env`
+ - google-ai-pastebin: `site:pastebin.com "AIzaSy" "generativelanguage"`
+ - google-ai-gitlab: `site:gitlab.com "AIzaSy" inurl:config`
+ - google-groq-pastebin: `site:pastebin.com "gsk_"`
+ - google-cohere-env: `intext:"COHERE_API_KEY" filetype:env`
+ - google-mistral-env: `intext:"MISTRAL_API_KEY=" filetype:env`
+ - google-xai-pastebin: `site:pastebin.com "xai-"`
+ - google-replicate-env: `intext:"r8_" filetype:env -site:github.com`
+
+ category: frontier.
+
+ **specialized.yaml — 10 dorks:**
+ - google-perplexity-pastebin: `site:pastebin.com "pplx-"`
+ - google-hf-pastebin: `site:pastebin.com "hf_"`
+ - google-elevenlabs-env: `intext:"ELEVENLABS_API_KEY" filetype:env`
+ - google-deepgram-env: `intext:"DEEPGRAM_API_KEY" filetype:env`
+ - google-assemblyai-pastebin: `site:pastebin.com "ASSEMBLYAI_API_KEY"`
+ - google-stability-env: `intext:"sk-stability-" filetype:env`
+ - google-jina-env: `intext:"jina_" filetype:env`
+ - google-voyage-env: `intext:"VOYAGE_API_KEY" filetype:env`
+ - google-hf-notebook: `site:colab.research.google.com "hf_"`
+ - google-hf-kaggle: `site:kaggle.com "hf_" inurl:notebook`
+
+ category: specialized.
+
+ **infrastructure.yaml — 8 dorks:**
+ - google-openrouter-pastebin: `site:pastebin.com "sk-or-v1-"`
+ - google-openrouter-env: `intext:"sk-or-v1-" filetype:env`
+ - google-litellm-config: `intext:"LITELLM_MASTER_KEY" filetype:yaml`
+ - google-helicone-env: `intext:"sk-helicone-" filetype:env`
+ - google-portkey-env: `intext:"PORTKEY_API_KEY" filetype:env`
+ - google-ollama-exposed: `intitle:"Ollama" inurl:":11434"`
+ - google-vllm-exposed: `intitle:"vLLM" "/v1/models"`
+ - google-localai-exposed: `intitle:"LocalAI" "/v1/chat/completions"`
+
+ category: infrastructure.
+
+
+ cd /home/salva/Documents/apikey && go test ./pkg/dorks/... && grep -c '^- id:' pkg/dorks/definitions/google/*.yaml | awk -F: '{s+=$NF}END{print "google:",s; if(s<30) exit 1}'
+
+ 30 Google dorks loaded, all pass Validate(), tests pass.
+
+
+
+ Task 2: 20 Shodan dorks (banner/cert queries)
+
+ pkg/dorks/definitions/shodan/frontier.yaml,
+ pkg/dorks/definitions/shodan/infrastructure.yaml,
+ dorks/shodan/frontier.yaml,
+ dorks/shodan/infrastructure.yaml
+
+
+ Two Shodan files, dual location. All entries source: shodan. Queries use
+ real Shodan syntax: http.title, http.html, ssl.cert.subject.cn, product,
+ port, org, http.favicon.hash.
+
+ **frontier.yaml — 6 dorks** (exposed frontier-adjacent APIs):
+ - shodan-openai-proxy: `http.title:"openai" http.html:"/v1/chat/completions"`
+ - shodan-litellm-proxy: `http.title:"LiteLLM" port:4000`
+ - shodan-openai-nginx: `http.html:"OPENAI_API_KEY" http.component:nginx`
+ - shodan-azure-openai: `ssl.cert.subject.cn:"openai.azure.com"`
+ - shodan-bedrock-runtime: `ssl.cert.subject.cn:"bedrock-runtime"`
+ - shodan-anthropic-proxy: `http.html:"anthropic" http.html:"messages"`
+
+ category: frontier.
+
+ **infrastructure.yaml — 14 dorks** (the bulk: self-hosted + gateway):
+ - shodan-ollama-default: `product:"Ollama" port:11434`
+ - shodan-ollama-tags: `http.html:"/api/tags" http.title:"Ollama"`
+ - shodan-vllm: `http.html:"vLLM" http.html:"/v1/models"`
+ - shodan-localai: `http.title:"LocalAI"`
+ - shodan-lmstudio: `http.title:"LM Studio"`
+ - shodan-textgenwebui: `http.title:"text-generation-webui"`
+ - shodan-openwebui: `http.title:"Open WebUI" http.html:"/api/chat"`
+ - shodan-openrouter-proxy: `http.html:"openrouter.ai" port:443`
+ - shodan-portkey-gateway: `http.title:"Portkey"`
+ - shodan-helicone-gateway: `http.html:"helicone" http.html:"/v1"`
+ - shodan-triton-server: `http.html:"NVIDIA Triton" http.html:"/v2/models"`
+ - shodan-tgi-hf: `http.html:"text-generation-inference" "/generate"`
+ - shodan-langserve: `http.title:"LangServe"`
+ - shodan-fastchat: `http.title:"FastChat"`
+
+ category: infrastructure.
+
+
+ cd /home/salva/Documents/apikey && go test ./pkg/dorks/... && grep -c '^- id:' pkg/dorks/definitions/shodan/*.yaml | awk -F: '{s+=$NF}END{print "shodan:",s; if(s<20) exit 1}'
+
+ 20 Shodan dorks loaded, all pass Validate().
+
+
+
+
+
+`go test ./pkg/dorks/...` passes; Google+Shodan sources visible in registry stats.
+
+
+
+- 30 Google dorks + 20 Shodan dorks = 50 new dorks
+- Cumulative total (with Plan 08-02) reaches 100
+- All dorks pass schema validation
+
+
+
diff --git a/.planning/phases/08-dork-engine/08-04-PLAN.md b/.planning/phases/08-dork-engine/08-04-PLAN.md
new file mode 100644
index 0000000..229dc1e
--- /dev/null
+++ b/.planning/phases/08-dork-engine/08-04-PLAN.md
@@ -0,0 +1,201 @@
+---
+phase: 08-dork-engine
+plan: 04
+type: execute
+wave: 2
+depends_on: [08-01]
+files_modified:
+ - pkg/dorks/definitions/censys/all.yaml
+ - pkg/dorks/definitions/zoomeye/all.yaml
+ - pkg/dorks/definitions/fofa/all.yaml
+ - pkg/dorks/definitions/gitlab/all.yaml
+ - pkg/dorks/definitions/bing/all.yaml
+ - dorks/censys/all.yaml
+ - dorks/zoomeye/all.yaml
+ - dorks/fofa/all.yaml
+ - dorks/gitlab/all.yaml
+ - dorks/bing/all.yaml
+autonomous: true
+requirements:
+ - DORK-01
+ - DORK-02
+ - DORK-04
+must_haves:
+ truths:
+ - "Registry.ListBySource(\"censys\") returns >= 15"
+ - "Registry.ListBySource(\"zoomeye\") returns >= 10"
+ - "Registry.ListBySource(\"fofa\") returns >= 10"
+ - "Registry.ListBySource(\"gitlab\") returns >= 10"
+ - "Registry.ListBySource(\"bing\") returns >= 5"
+ - "Combined total from plans 02+03+04 is >= 150"
+ artifacts:
+ - path: "pkg/dorks/definitions/censys/all.yaml"
+ provides: "15 Censys search queries"
+ contains: "source: censys"
+ - path: "pkg/dorks/definitions/zoomeye/all.yaml"
+ provides: "10 ZoomEye queries"
+ contains: "source: zoomeye"
+ - path: "pkg/dorks/definitions/fofa/all.yaml"
+ provides: "10 FOFA queries"
+ contains: "source: fofa"
+ - path: "pkg/dorks/definitions/gitlab/all.yaml"
+ provides: "10 GitLab code search queries"
+ contains: "source: gitlab"
+ - path: "pkg/dorks/definitions/bing/all.yaml"
+ provides: "5 Bing dorks"
+ contains: "source: bing"
+ key_links:
+ - from: "pkg/dorks/definitions/{censys,zoomeye,fofa,gitlab,bing}/all.yaml"
+ to: "pkg/dorks/loader.go"
+ via: "go:embed subdir walk"
+ pattern: "source: (censys|zoomeye|fofa|gitlab|bing)"
+---
+
+
+Fill the remaining 5 sources to hit the 150-dork total: Censys (15), ZoomEye (10),
+FOFA (10), GitLab (10), Bing (5). Each source uses its own native query syntax.
+Execution of these is deferred to later OSINT phases (9-16); this plan only
+delivers the definitions.
+
+Purpose: Closes DORK-02 (150+ dorks) and ensures DORK-04 (category filtering)
+has coverage across every source.
+Output: 50 dorks across 5 sources, hitting the 150 total when combined with
+Plans 08-02 (50 GitHub) + 08-03 (50 Google/Shodan).
+
+
+
+@$HOME/.claude/get-shit-done/workflows/execute-plan.md
+@$HOME/.claude/get-shit-done/templates/summary.md
+
+
+
+@.planning/phases/08-dork-engine/08-CONTEXT.md
+@pkg/dorks/schema.go
+
+
+
+
+
+ Task 1: 25 Censys + ZoomEye dorks
+
+ pkg/dorks/definitions/censys/all.yaml,
+ pkg/dorks/definitions/zoomeye/all.yaml,
+ dorks/censys/all.yaml,
+ dorks/zoomeye/all.yaml
+
+
+ Dual location. List YAML format identical to plans 02-03.
+
+ **censys/all.yaml — 15 dorks** using Censys Search 2.0 syntax
+ (services.http.response.*, services.tls.certificates.*):
+ - censys-ollama-11434: `services.port: 11434 and services.http.response.body: "Ollama"`
+ - censys-vllm: `services.http.response.body: "vLLM" and services.http.response.body: "/v1/models"`
+ - censys-localai: `services.http.response.html_title: "LocalAI"`
+ - censys-openwebui: `services.http.response.html_title: "Open WebUI"`
+ - censys-lmstudio: `services.http.response.html_title: "LM Studio"`
+ - censys-triton: `services.http.response.body: "NVIDIA Triton" and services.http.response.body: "/v2/models"`
+ - censys-tgi: `services.http.response.body: "text-generation-inference"`
+ - censys-litellm: `services.http.response.html_title: "LiteLLM" and services.port: 4000`
+ - censys-portkey: `services.http.response.html_title: "Portkey"`
+ - censys-langserve: `services.http.response.html_title: "LangServe"`
+ - censys-openai-azure-cert: `services.tls.certificates.leaf_data.subject.common_name: "openai.azure.com"`
+ - censys-bedrock-cert: `services.tls.certificates.leaf_data.subject.common_name: "bedrock-runtime"`
+ - censys-fastchat: `services.http.response.html_title: "FastChat"`
+ - censys-textgen-webui: `services.http.response.html_title: "text-generation-webui"`
+ - censys-openai-proxy: `services.http.response.body: "/v1/chat/completions" and services.http.response.body: "OPENAI_API_KEY"`
+
+ Category mix: 12 infrastructure, 3 frontier (the azure/bedrock/openai-proxy entries).
+
+ **zoomeye/all.yaml — 10 dorks** using ZoomEye query syntax
+ (app:, title:, service:, port:):
+ - zoomeye-ollama: `port:11434 +app:"Ollama"`
+ - zoomeye-vllm: `title:"vLLM" +app:"nginx"`
+ - zoomeye-localai: `title:"LocalAI"`
+ - zoomeye-openwebui: `title:"Open WebUI"`
+ - zoomeye-litellm: `title:"LiteLLM" +port:4000`
+ - zoomeye-lmstudio: `title:"LM Studio"`
+ - zoomeye-triton-grpc: `port:8001 +service:"triton"`
+ - zoomeye-fastchat: `title:"FastChat"`
+ - zoomeye-langserve: `title:"LangServe"`
+ - zoomeye-openai-proxy: `title:"openai" +"/v1/chat/completions"`
+
+ All category: infrastructure (with the last one arguably frontier; mark it frontier).
+
+
+ cd /home/salva/Documents/apikey && go test ./pkg/dorks/... && grep -c '^- id:' pkg/dorks/definitions/censys/*.yaml pkg/dorks/definitions/zoomeye/*.yaml | awk -F: '{s+=$NF}END{print "censys+zoomeye:",s; if(s<25) exit 1}'
+
+ 25 dorks loaded across Censys + ZoomEye.
+
+
+
+ Task 2: 25 FOFA + GitLab + Bing dorks
+
+ pkg/dorks/definitions/fofa/all.yaml,
+ pkg/dorks/definitions/gitlab/all.yaml,
+ pkg/dorks/definitions/bing/all.yaml,
+ dorks/fofa/all.yaml,
+ dorks/gitlab/all.yaml,
+ dorks/bing/all.yaml
+
+
+ Dual location. List YAML format.
+
+ **fofa/all.yaml — 10 dorks** using FOFA syntax (title=, body=, port=, cert=):
+ - fofa-ollama: `port="11434" && body="Ollama"`
+ - fofa-vllm: `title="vLLM"`
+ - fofa-localai: `title="LocalAI"`
+ - fofa-openwebui: `title="Open WebUI"`
+ - fofa-litellm: `title="LiteLLM" && port="4000"`
+ - fofa-openai-azure: `cert="openai.azure.com"`
+ - fofa-triton: `body="NVIDIA Triton" && body="/v2/models"`
+ - fofa-langserve: `title="LangServe"`
+ - fofa-tgi-hf: `body="text-generation-inference"`
+ - fofa-openai-proxy: `body="/v1/chat/completions" && body="api_key"`
+
+ Mix: 8 infrastructure, 2 frontier (azure cert, openai-proxy).
+
+ **gitlab/all.yaml — 10 dorks** for GitLab code search API:
+ - gitlab-openai-envfile: `sk-proj- extension:env`
+ - gitlab-anthropic-envfile: `sk-ant-api03- extension:env`
+ - gitlab-google-ai-json: `AIzaSy extension:json "generativelanguage"`
+ - gitlab-groq-envfile: `gsk_ extension:env`
+ - gitlab-cohere-envfile: `COHERE_API_KEY extension:env`
+ - gitlab-hf-pyfile: `hf_ extension:py`
+ - gitlab-openrouter-env: `sk-or-v1- extension:env`
+ - gitlab-perplexity-env: `pplx- extension:env`
+ - gitlab-deepseek-env: `DEEPSEEK_API_KEY extension:env`
+ - gitlab-pinecone-env: `PINECONE_API_KEY extension:env`
+
+ Categories spread across frontier/specialized/infrastructure/emerging.
+
+ **bing/all.yaml — 5 dorks** using Bing operators (site:, filetype:,
+ intitle:, inbody:):
+ - bing-openai-pastebin: `site:pastebin.com "sk-proj-"`
+ - bing-anthropic-pastebin: `site:pastebin.com "sk-ant-api03-"`
+ - bing-ollama-exposed: `intitle:"Ollama" inbody:"/api/tags"`
+ - bing-openai-env: `filetype:env "OPENAI_API_KEY=sk-proj-"`
+ - bing-hf-pastebin: `site:pastebin.com "hf_"`
+
+ Mix: 3 frontier, 1 specialized (hf), 1 infrastructure (ollama).
+
+
+ cd /home/salva/Documents/apikey && go test ./pkg/dorks/... && grep -rhc '^- id:' pkg/dorks/definitions/ | awk '{s+=$1}END{print "grand total:",s; if(s<150) exit 1}'
+
+ 25 more dorks loaded; grand total across all sources >= 150.
+
+
+
+
+
+`go test ./pkg/dorks/...` passes; `grep -rhc '^- id:' pkg/dorks/definitions/ | paste -sd+ | bc` >= 150.
+
+
+
+- Censys 15 + ZoomEye 10 + FOFA 10 + GitLab 10 + Bing 5 = 50 dorks added
+- Cumulative phase total reaches 150+ (DORK-02 satisfied)
+- All 8 sources present in registry stats
+
+
+
diff --git a/.planning/phases/08-dork-engine/08-05-PLAN.md b/.planning/phases/08-dork-engine/08-05-PLAN.md
new file mode 100644
index 0000000..9371dfe
--- /dev/null
+++ b/.planning/phases/08-dork-engine/08-05-PLAN.md
@@ -0,0 +1,264 @@
+---
+phase: 08-dork-engine
+plan: 05
+type: execute
+wave: 2
+depends_on: [08-01]
+files_modified:
+ - pkg/dorks/github.go
+ - pkg/dorks/github_test.go
+autonomous: true
+requirements:
+ - DORK-02
+must_haves:
+ truths:
+ - "GitHubExecutor.Source() returns \"github\""
+ - "GitHubExecutor.Execute runs GitHub Code Search against api.github.com and returns []Match"
+ - "Missing token returns ErrMissingAuth with setup instructions"
+ - "Retry-After header is honored (sleep + retry once) for 403/429"
+ - "Response items mapped to Match with URL, Path, Snippet (text_matches)"
+ artifacts:
+ - path: "pkg/dorks/github.go"
+ provides: "GitHubExecutor implementing Executor interface"
+ contains: "type GitHubExecutor struct"
+ - path: "pkg/dorks/github_test.go"
+ provides: "httptest server exercising success/auth/rate-limit paths"
+ contains: "httptest.NewServer"
+ key_links:
+ - from: "pkg/dorks/github.go"
+ to: "https://api.github.com/search/code"
+ via: "net/http client"
+ pattern: "api.github.com/search/code"
+ - from: "pkg/dorks/github.go"
+ to: "pkg/dorks/executor.go Executor interface"
+ via: "interface satisfaction"
+ pattern: "Execute\\(ctx"
+---
+
+
+Implement the live GitHub Code Search executor — the only source that actually
+runs in Phase 8 (all other executors stay stubbed with ErrSourceNotImplemented).
+Hits `GET https://api.github.com/search/code?q={query}`, authenticated via
+GITHUB_TOKEN env var / viper config. Honors rate-limit response codes. Maps
+response items to pkg/dorks.Match entries consumable by the engine pipeline in
+downstream phases.
+
+Purpose: Satisfies the "GitHub live" slice of DORK-02 and unblocks `keyhunter
+dorks run --source=github` in Plan 08-06.
+Output: Working pkg/dorks.GitHubExecutor + httptest-backed test suite.
+
+
+
+@$HOME/.claude/get-shit-done/workflows/execute-plan.md
+@$HOME/.claude/get-shit-done/templates/summary.md
+
+
+
+@.planning/phases/08-dork-engine/08-CONTEXT.md
+@.planning/phases/08-dork-engine/08-01-PLAN.md
+@pkg/dorks/executor.go
+
+
+
+```go
+type Executor interface {
+ Source() string
+ Execute(ctx context.Context, d Dork, limit int) ([]Match, error)
+}
+
+type Match struct {
+ DorkID string
+ Source string
+ URL string
+ Snippet string
+ Path string
+}
+
+var ErrMissingAuth = errors.New("dork source requires auth credentials")
+```
+
+
+
+
+
+
+ Task 1: GitHubExecutor with net/http + Retry-After handling
+ pkg/dorks/github.go, pkg/dorks/github_test.go
+
+ - Test: Execute with empty token returns ErrMissingAuth (wrapped) without hitting HTTP
+ - Test: Execute with httptest server returning 200 + items parses response into []Match with URL/Path/Snippet
+ - Test: limit=5 caps returned Match count at 5 even if API returns 10
+ - Test: 403 with X-RateLimit-Remaining=0 and Retry-After=1 sleeps and retries once, then succeeds
+ - Test: 401 returns ErrMissingAuth (token rejected)
+ - Test: 422 (invalid query) returns a descriptive error containing the status code
+ - Test: Source() returns "github"
+
+
+ Create pkg/dorks/github.go:
+
+ ```go
+ package dorks
+
+ import (
+ "context"
+ "encoding/json"
+ "fmt"
+ "io"
+ "net/http"
+ "strconv"
+ "time"
+ )
+
+ type GitHubExecutor struct {
+ Token string
+ BaseURL string // default "https://api.github.com", overridable for tests
+ HTTPClient *http.Client
+ MaxRetries int // default 1
+ }
+
+ func NewGitHubExecutor(token string) *GitHubExecutor {
+ return &GitHubExecutor{
+ Token: token,
+ BaseURL: "https://api.github.com",
+ HTTPClient: &http.Client{Timeout: 30 * time.Second},
+ MaxRetries: 1,
+ }
+ }
+
+ func (g *GitHubExecutor) Source() string { return "github" }
+
+ type ghSearchResponse struct {
+ TotalCount int `json:"total_count"`
+ Items []struct {
+ Name string `json:"name"`
+ Path string `json:"path"`
+ HTMLURL string `json:"html_url"`
+ Repository struct {
+ FullName string `json:"full_name"`
+ } `json:"repository"`
+ TextMatches []struct {
+ Fragment string `json:"fragment"`
+ } `json:"text_matches"`
+ } `json:"items"`
+ }
+
+ func (g *GitHubExecutor) Execute(ctx context.Context, d Dork, limit int) ([]Match, error) {
+ if g.Token == "" {
+ return nil, fmt.Errorf("%w: set GITHUB_TOKEN env var or `keyhunter config set dorks.github.token ` (needs public_repo scope)", ErrMissingAuth)
+ }
+ if limit <= 0 || limit > 100 {
+ limit = 30
+ }
+
+ url := fmt.Sprintf("%s/search/code?q=%s&per_page=%d", g.BaseURL, urlQueryEscape(d.Query), limit)
+
+ var resp *http.Response
+ for attempt := 0; attempt <= g.MaxRetries; attempt++ {
+ req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil)
+ if err != nil { return nil, err }
+ req.Header.Set("Accept", "application/vnd.github.v3.text-match+json")
+ req.Header.Set("Authorization", "Bearer "+g.Token)
+ req.Header.Set("User-Agent", "keyhunter-dork-engine")
+
+ r, err := g.HTTPClient.Do(req)
+ if err != nil { return nil, fmt.Errorf("github search: %w", err) }
+
+ if r.StatusCode == http.StatusOK {
+ resp = r
+ break
+ }
+
+ body, _ := io.ReadAll(r.Body)
+ r.Body.Close()
+
+ switch r.StatusCode {
+ case http.StatusUnauthorized:
+ return nil, fmt.Errorf("%w: github token rejected (401)", ErrMissingAuth)
+ case http.StatusForbidden, http.StatusTooManyRequests:
+ if attempt < g.MaxRetries {
+ sleep := parseRetryAfter(r.Header.Get("Retry-After"))
+ select {
+ case <-time.After(sleep):
+ continue
+ case <-ctx.Done():
+ return nil, ctx.Err()
+ }
+ }
+ return nil, fmt.Errorf("github rate limit: %d %s", r.StatusCode, string(body))
+ default:
+ return nil, fmt.Errorf("github search failed: %d %s", r.StatusCode, string(body))
+ }
+ }
+ defer resp.Body.Close()
+
+ var parsed ghSearchResponse
+ if err := json.NewDecoder(resp.Body).Decode(&parsed); err != nil {
+ return nil, fmt.Errorf("decoding github response: %w", err)
+ }
+
+ out := make([]Match, 0, len(parsed.Items))
+ for _, it := range parsed.Items {
+ snippet := ""
+ if len(it.TextMatches) > 0 {
+ snippet = it.TextMatches[0].Fragment
+ }
+ out = append(out, Match{
+ DorkID: d.ID,
+ Source: "github",
+ URL: it.HTMLURL,
+ Path: it.Repository.FullName + "/" + it.Path,
+ Snippet: snippet,
+ })
+ if len(out) >= limit { break }
+ }
+ return out, nil
+ }
+
+ func parseRetryAfter(v string) time.Duration {
+ if v == "" { return time.Second }
+ if secs, err := strconv.Atoi(v); err == nil {
+ return time.Duration(secs) * time.Second
+ }
+ return time.Second
+ }
+
+ func urlQueryEscape(s string) string {
+ return (&url.URL{Path: s}).EscapedPath() // wrong — use url.QueryEscape
+ }
+ ```
+
+ Fix the helper: import "net/url" and use `url.QueryEscape(s)` — do NOT hand-roll.
+
+ Create pkg/dorks/github_test.go using httptest.NewServer. Override
+ executor.BaseURL to the test server URL. One subtest per behavior case.
+ For Retry-After test: server returns 403 with Retry-After: 1 on first
+ request, 200 with fake items on second.
+
+ Do NOT register GitHubExecutor into a global Runner here — Plan 08-06 does
+ the wiring inside cmd/dorks.go via NewGitHubExecutor(viper.GetString(...)).
+
+
+ cd /home/salva/Documents/apikey && go test ./pkg/dorks/... -run GitHub -v
+
+
+ All GitHub executor test cases pass; Execute honors token, rate limit, and
+ limit cap; Match fields populated from real response shape.
+
+
+
+
+
+
+`go test ./pkg/dorks/...` passes including all new GitHub cases.
+
+
+
+- pkg/dorks.GitHubExecutor implements Executor interface
+- Live GitHub Code Search calls are testable via httptest (BaseURL override)
+- ErrMissingAuth surfaces with actionable setup instructions
+- Retry-After respected once before giving up
+
+
+
diff --git a/.planning/phases/08-dork-engine/08-06-PLAN.md b/.planning/phases/08-dork-engine/08-06-PLAN.md
new file mode 100644
index 0000000..ae8755d
--- /dev/null
+++ b/.planning/phases/08-dork-engine/08-06-PLAN.md
@@ -0,0 +1,258 @@
+---
+phase: 08-dork-engine
+plan: 06
+type: execute
+wave: 3
+depends_on: [08-01, 08-02, 08-03, 08-04, 08-05]
+files_modified:
+ - cmd/dorks.go
+ - cmd/dorks_test.go
+ - cmd/stubs.go
+ - cmd/root.go
+autonomous: true
+requirements:
+ - DORK-03
+ - DORK-04
+must_haves:
+ truths:
+ - "`keyhunter dorks list` prints a table of embedded + custom dorks"
+ - "`keyhunter dorks list --source=github --category=frontier` filters correctly"
+ - "`keyhunter dorks info ` prints full dork detail"
+ - "`keyhunter dorks run --source=github --id=` calls GitHubExecutor"
+ - "`keyhunter dorks run --source=shodan` returns ErrSourceNotImplemented with a helpful message"
+ - "`keyhunter dorks add --source=github --category=frontier --query=... --description=...` persists to custom_dorks table"
+ - "`keyhunter dorks delete ` removes from custom_dorks and refuses embedded IDs"
+ - "`keyhunter dorks export --format=yaml` dumps embedded+custom"
+ artifacts:
+ - path: "cmd/dorks.go"
+ provides: "dorks Cobra command tree replacing stub"
+ contains: "dorksCmd.AddCommand"
+ key_links:
+ - from: "cmd/dorks.go"
+ to: "pkg/dorks.Registry"
+ via: "dorks.NewRegistry()"
+ pattern: "dorks.NewRegistry"
+ - from: "cmd/dorks.go run"
+ to: "pkg/dorks.GitHubExecutor"
+ via: "NewGitHubExecutor(viper.GetString(\"dorks.github.token\"))"
+ pattern: "NewGitHubExecutor"
+ - from: "cmd/dorks.go add/delete"
+ to: "storage.DB custom_dorks CRUD"
+ via: "db.SaveCustomDork / DeleteCustomDork"
+ pattern: "SaveCustomDork|DeleteCustomDork"
+---
+
+
+Replace the Phase-8 dorks stub in cmd/stubs.go with a full Cobra command tree:
+list / run / add / export / info / delete. Wires the Registry (embedded dorks)
+together with storage custom_dorks (user dorks) and the GitHubExecutor (live
+execution). Satisfies DORK-03 (all four CLI verbs) and DORK-04 (category
+filtering via --category flag).
+
+Purpose: User-facing surface of the dork engine — everything built in 08-01
+through 08-05 becomes usable from the CLI.
+Output: Working `keyhunter dorks ...` subcommands.
+
+
+
+@$HOME/.claude/get-shit-done/workflows/execute-plan.md
+@$HOME/.claude/get-shit-done/templates/summary.md
+
+
+
+@.planning/phases/08-dork-engine/08-CONTEXT.md
+@.planning/phases/08-dork-engine/08-01-PLAN.md
+@.planning/phases/08-dork-engine/08-05-PLAN.md
+@cmd/stubs.go
+@cmd/root.go
+@cmd/keys.go
+@pkg/dorks/schema.go
+@pkg/dorks/registry.go
+@pkg/dorks/executor.go
+@pkg/storage/custom_dorks.go
+
+
+
+```go
+// pkg/dorks
+func NewRegistry() (*Registry, error)
+func (r *Registry) List() []Dork
+func (r *Registry) Get(id string) (Dork, bool)
+func (r *Registry) ListBySource(src string) []Dork
+func (r *Registry) ListByCategory(cat string) []Dork
+func NewGitHubExecutor(token string) *GitHubExecutor
+
+// pkg/storage
+func (db *DB) SaveCustomDork(d CustomDork) (int64, error)
+func (db *DB) ListCustomDorks() ([]CustomDork, error)
+func (db *DB) GetCustomDorkByDorkID(dorkID string) (CustomDork, error)
+func (db *DB) DeleteCustomDork(id int64) (int64, error)
+```
+
+
+
+
+
+
+ Task 1: cmd/dorks.go command tree (list/info/export)
+ cmd/dorks.go, cmd/stubs.go, cmd/root.go
+
+ 1. Remove `var dorksCmd` from cmd/stubs.go (keep the rest of the stubs file
+ untouched).
+
+ 2. Create cmd/dorks.go with the full command tree. Wire into root.go by
+ leaving the existing `rootCmd.AddCommand(dorksCmd)` in place — only the
+ declaration moves.
+
+ Structure:
+ ```go
+ package cmd
+
+ import (
+ "context"
+ "encoding/json"
+ "errors"
+ "fmt"
+ "os"
+ "strings"
+
+ "github.com/spf13/cobra"
+ "github.com/spf13/viper"
+ "gopkg.in/yaml.v3"
+
+ "github.com///pkg/dorks"
+ "github.com///pkg/storage"
+ )
+
+ var (
+ dorksFilterSource string
+ dorksFilterCategory string
+ dorksRunID string
+ dorksRunLimit int
+ dorksAddSource string
+ dorksAddCategory string
+ dorksAddID string
+ dorksAddName string
+ dorksAddQuery string
+ dorksAddDescription string
+ dorksExportFormat string
+ )
+
+ var dorksCmd = &cobra.Command{
+ Use: "dorks",
+ Short: "Manage and run dork queries (DORK-01..DORK-04)",
+ }
+
+ // list, info, run, add, delete, export subcommands declared here.
+ ```
+
+ Replace `/` by reading the existing `module` line in
+ /home/salva/Documents/apikey/go.mod before writing the file (use the same
+ import path that cmd/keys.go uses).
+
+ **dorks list** — merges Registry.List() + db.ListCustomDorks(). Applies
+ --source and --category filters in memory. Prints a lipgloss table
+ (reuse pkg/output helpers if present, else a simple tab-aligned fmt). Mark
+ custom rows with a `*` prefix on the ID column.
+
+ **dorks info ** — looks up by dork_id first in Registry.Get, falls back
+ to db.GetCustomDorkByDorkID. Prints all fields including tags.
+
+ **dorks export [--format=yaml|json]** — emits a single combined list of
+ embedded + custom dorks in the requested format. Default yaml.
+
+ Use `initDorksDB()` helper that opens storage.DB using the same path logic
+ cmd/keys.go uses (viper.GetString("database.path")) so tests can inject a
+ temp path.
+
+
+ cd /home/salva/Documents/apikey && go build ./... && go run ./ dorks list --source=github --category=frontier 2>&1 | grep -Ei '(openai|anthropic)'
+
+
+ list/info/export commands compile and run. Filtering by source/category
+ works. Stub removed from cmd/stubs.go.
+
+
+
+
+ Task 2: dorks run/add/delete + command tests
+ cmd/dorks.go, cmd/dorks_test.go
+
+ - Test: `dorks add` persists a custom dork and `dorks list` shows it marked with `*`
+ - Test: `dorks delete ` removes it; `dorks delete ` errors "embedded dorks cannot be deleted"
+ - Test: `dorks run --source=shodan --id=` returns error wrapping ErrSourceNotImplemented
+ - Test: `dorks run --source=github --id=` with empty GITHUB_TOKEN returns ErrMissingAuth setup message
+ - Test: `dorks run --source=github --id=` with injected fake GitHubExecutor returns mocked matches
+ - Use cobra.Command.SetArgs + bytes.Buffer to capture stdout in tests; inject
+ a tempdir DB path via viper.Set("database.path", tmp).
+
+
+ 1. Add **dorks add** subcommand to cmd/dorks.go. Builds a storage.CustomDork
+ from flags, validates source and category against dorks.ValidSources /
+ valid categories (frontier, specialized, infrastructure, emerging,
+ enterprise), calls db.SaveCustomDork. Fails if dork_id collides with an
+ embedded dork ID.
+
+ 2. Add **dorks delete ** subcommand. If the ID matches an embedded
+ dork, print "embedded dorks cannot be deleted" and exit 2. Otherwise
+ call db.GetCustomDorkByDorkID -> db.DeleteCustomDork.
+
+ 3. Add **dorks run** subcommand:
+ - Required flags: --source; optional: --id, --category, --limit (default 10)
+ - If --id is set, run just that single dork (from registry or custom)
+ - Else, run all dorks matching --source and --category filters
+ - For each dork: dispatch via a Runner wired only with GitHub executor.
+ Non-github sources return fmt.Errorf wrapping ErrSourceNotImplemented
+ with message: "source %q not yet implemented (coming Phase 9-16)"
+ - Print matches as a simple list: `[dork-id] URL (path) snippet`
+ - Exit code 0 on success (even with zero matches), 2 on any executor
+ error other than ErrSourceNotImplemented (which should exit 2 also
+ but with the friendly message).
+
+ 4. Extract the executor factory into a package-level var so tests can inject:
+ ```go
+ var newGitHubExecutor = func() dorks.Executor {
+ return dorks.NewGitHubExecutor(viper.GetString("dorks.github.token"))
+ }
+ ```
+ Tests override `newGitHubExecutor` to return a fake Executor.
+
+ 5. Create cmd/dorks_test.go:
+ - Helper `setupDorksTest(t)` — creates tempdir, viper.Set("database.path", ...),
+ opens DB via initDorksDB, runs schema, returns cleanup func.
+ - Tests execute rootCmd with SetArgs([]string{"dorks", "add", "--source=github", ...}).
+ - Capture output via rootCmd.SetOut(&buf).
+ - FakeExecutor type implements dorks.Executor with a stubbed Execute()
+ returning canned Match slice.
+
+ 6. **Config binding:** In cmd/root.go initConfig, ensure
+ `viper.BindEnv("dorks.github.token", "GITHUB_TOKEN")` is set so the
+ executor picks up the env var.
+
+
+ cd /home/salva/Documents/apikey && go test ./cmd/... -run Dorks -v
+
+
+ run/add/delete subcommands work end-to-end. All cmd/dorks_test.go tests pass.
+ GITHUB_TOKEN env var bound. Embedded dork deletion refused.
+
+
+
+
+
+
+- `go build ./...` succeeds
+- `go test ./cmd/... ./pkg/dorks/... ./pkg/storage/...` passes
+- `./keyhunter dorks list --source=github --category=frontier` shows real dorks
+- `./keyhunter dorks run --source=shodan --id=shodan-ollama-default` emits ErrSourceNotImplemented message
+
+
+
+- DORK-03 satisfied: list, run, add, export, info, delete all functional
+- DORK-04 satisfied: --source and --category filters applied in both list and run
+- dorks stub removed from cmd/stubs.go
+
+
+
diff --git a/.planning/phases/08-dork-engine/08-07-PLAN.md b/.planning/phases/08-dork-engine/08-07-PLAN.md
new file mode 100644
index 0000000..30c8e57
--- /dev/null
+++ b/.planning/phases/08-dork-engine/08-07-PLAN.md
@@ -0,0 +1,158 @@
+---
+phase: 08-dork-engine
+plan: 07
+type: execute
+wave: 3
+depends_on: [08-02, 08-03, 08-04, 08-06]
+files_modified:
+ - pkg/dorks/count_test.go
+autonomous: true
+requirements:
+ - DORK-02
+must_haves:
+ truths:
+ - "A guardrail test asserts pkg/dorks.NewRegistry() loads >= 150 dorks"
+ - "Per-source minimums enforced: github>=50, google>=30, shodan>=20, censys>=15, zoomeye>=10, fofa>=10, gitlab>=10, bing>=5"
+ - "All 8 sources present in Stats().BySource"
+ - "All 5 categories present in Stats().ByCategory"
+ - "All dork IDs are unique (no collisions between source files)"
+ artifacts:
+ - path: "pkg/dorks/count_test.go"
+ provides: "Guardrail test preventing regression below 150-dork threshold"
+ contains: "TestDorkCountGuardrail"
+ key_links:
+ - from: "pkg/dorks/count_test.go"
+ to: "pkg/dorks.NewRegistry()"
+ via: "direct call to real embedded FS"
+ pattern: "NewRegistry\\(\\)"
+---
+
+
+Lock in the DORK-02 "150+ built-in dorks" requirement with a guardrail test
+that runs against the real embedded filesystem. If a future contributor removes
+a dork file or breaks a source's YAML, CI fails loudly instead of silently
+dropping coverage. Also asserts per-source minimums and ID uniqueness so partial
+regressions are caught.
+
+Purpose: Prevents silent regression of the requirement that took plans 02-04
+to satisfy.
+Output: One test file with a few targeted assertions.
+
+
+
+@$HOME/.claude/get-shit-done/workflows/execute-plan.md
+@$HOME/.claude/get-shit-done/templates/summary.md
+
+
+
+@.planning/phases/08-dork-engine/08-CONTEXT.md
+@pkg/dorks/registry.go
+
+
+
+
+
+ Task 1: Dork count + uniqueness guardrail test
+ pkg/dorks/count_test.go
+
+ - TestDorkCountGuardrail: NewRegistry() returns >= 150 dorks total
+ - TestDorkCountPerSource: each of the 8 sources meets its minimum
+ - TestDorkCategoriesPresent: all 5 categories appear in Stats().ByCategory
+ - TestDorkIDsUnique: walking Registry.List(), no duplicate IDs
+
+
+ Create pkg/dorks/count_test.go:
+
+ ```go
+ package dorks
+
+ import "testing"
+
+ func TestDorkCountGuardrail(t *testing.T) {
+ r, err := NewRegistry()
+ if err != nil {
+ t.Fatalf("NewRegistry: %v", err)
+ }
+ const minTotal = 150
+ if got := len(r.List()); got < minTotal {
+ t.Fatalf("dork count regression: got %d, want >= %d (see DORK-02)", got, minTotal)
+ }
+ }
+
+ func TestDorkCountPerSource(t *testing.T) {
+ r, err := NewRegistry()
+ if err != nil { t.Fatal(err) }
+
+ minimums := map[string]int{
+ "github": 50,
+ "google": 30,
+ "shodan": 20,
+ "censys": 15,
+ "zoomeye": 10,
+ "fofa": 10,
+ "gitlab": 10,
+ "bing": 5,
+ }
+ stats := r.Stats()
+ for src, min := range minimums {
+ if stats.BySource[src] < min {
+ t.Errorf("source %s: got %d, want >= %d", src, stats.BySource[src], min)
+ }
+ }
+ }
+
+ func TestDorkCategoriesPresent(t *testing.T) {
+ r, err := NewRegistry()
+ if err != nil { t.Fatal(err) }
+ required := []string{"frontier", "specialized", "infrastructure", "emerging", "enterprise"}
+ stats := r.Stats()
+ for _, c := range required {
+ if stats.ByCategory[c] == 0 {
+ t.Errorf("category %q missing", c)
+ }
+ }
+ }
+
+ func TestDorkIDsUnique(t *testing.T) {
+ r, err := NewRegistry()
+ if err != nil { t.Fatal(err) }
+ seen := map[string]string{}
+ for _, d := range r.List() {
+ if existing, ok := seen[d.ID]; ok {
+ t.Errorf("duplicate dork id %q (second occurrence: %s, first: %s)", d.ID, d.Source, existing)
+ }
+ seen[d.ID] = d.Source
+ }
+ }
+ ```
+
+ Note: Stats struct was defined in 08-01 with BySource and ByCategory
+ map[string]int fields — confirm the exact field names match. If the
+ implementation used different names, align test accordingly (do not
+ modify 08-01; adjust the test).
+
+
+ cd /home/salva/Documents/apikey && go test ./pkg/dorks/ -run 'TestDork(Count|Categories|IDs)' -v
+
+
+ All four guardrail tests pass against the real embedded filesystem. The
+ DORK-02 150+ floor is now CI-enforced.
+
+
+
+
+
+
+`go test ./pkg/dorks/...` passes (full suite).
+`go test ./...` for the repo passes.
+
+
+
+- DORK-02 enforced by test (not just counted by grep)
+- Per-source minimums match the 50/30/20/15/10/10/10/5 distribution
+- No duplicate dork IDs across the embedded corpus
+
+
+