diff --git a/.planning/ROADMAP.md b/.planning/ROADMAP.md index 47344de..3121d67 100644 --- a/.planning/ROADMAP.md +++ b/.planning/ROADMAP.md @@ -177,7 +177,16 @@ Plans: 2. `keyhunter dorks run --source=github --category=frontier` executes all Tier 1 frontier provider dorks against GitHub code search 3. `keyhunter dorks add --source=google --query='site:pastebin.com "sk-ant-api03-"'` persists a custom dork that appears in subsequent `dorks list` output 4. `keyhunter dorks export --format=json` exports all dorks including custom additions -**Plans**: TBD +**Plans**: 7 plans + +Plans: +- [ ] 08-01-PLAN.md — Dork schema, go:embed loader, registry, executor interface, custom_dorks storage table +- [ ] 08-02-PLAN.md — 50 GitHub dork YAML definitions across 5 categories +- [ ] 08-03-PLAN.md — 30 Google + 20 Shodan dork YAML definitions +- [ ] 08-04-PLAN.md — 15 Censys + 10 ZoomEye + 10 FOFA + 10 GitLab + 5 Bing dork YAML definitions +- [ ] 08-05-PLAN.md — Live GitHub Code Search executor (net/http, Retry-After, limit cap) +- [ ] 08-06-PLAN.md — cmd/dorks.go Cobra tree: list/run/add/export/info/delete +- [ ] 08-07-PLAN.md — Dork count guardrail test (>=150 total, per-source minimums, ID uniqueness) ### Phase 9: OSINT Infrastructure **Goal**: The recon engine's `ReconSource` interface, per-source rate limiter architecture, stealth mode, and parallel sweep orchestrator exist and are validated — all individual source modules build on this foundation diff --git a/.planning/phases/08-dork-engine/08-01-PLAN.md b/.planning/phases/08-dork-engine/08-01-PLAN.md new file mode 100644 index 0000000..4f81f54 --- /dev/null +++ b/.planning/phases/08-dork-engine/08-01-PLAN.md @@ -0,0 +1,327 @@ +--- +phase: 08-dork-engine +plan: 01 +type: execute +wave: 1 +depends_on: [] +files_modified: + - pkg/dorks/schema.go + - pkg/dorks/loader.go + - pkg/dorks/registry.go + - pkg/dorks/executor.go + - pkg/dorks/registry_test.go + - pkg/dorks/definitions/.gitkeep + - dorks/.gitkeep + - pkg/storage/schema.sql + - pkg/storage/custom_dorks.go + - pkg/storage/custom_dorks_test.go +autonomous: true +requirements: + - DORK-01 + - DORK-03 +must_haves: + truths: + - "pkg/dorks.NewRegistry() loads embedded YAML files without error" + - "Registry.List(), Get(id), Stats(), ListBySource(), ListByCategory() return correct data" + - "ExecuteDork interface defined and per-source Executor map exists (all stubbed except placeholder)" + - "custom_dorks table exists and SaveCustomDork/ListCustomDorks/DeleteCustomDork work round-trip" + artifacts: + - path: "pkg/dorks/schema.go" + provides: "Dork struct matching 08-CONTEXT YAML schema" + contains: "type Dork struct" + - path: "pkg/dorks/loader.go" + provides: "go:embed loader mirroring pkg/providers/loader.go" + contains: "//go:embed definitions" + - path: "pkg/dorks/registry.go" + provides: "Registry with List/Get/Stats/ListBySource/ListByCategory" + contains: "func NewRegistry" + - path: "pkg/dorks/executor.go" + provides: "Executor interface + source dispatch + ErrSourceNotImplemented" + contains: "type Executor interface" + - path: "pkg/storage/custom_dorks.go" + provides: "SaveCustomDork/ListCustomDorks/DeleteCustomDork/GetCustomDork" + contains: "custom_dorks" + key_links: + - from: "pkg/dorks/loader.go" + to: "pkg/dorks/definitions/*/*.yaml" + via: "go:embed" + pattern: "embed.FS" + - from: "pkg/storage/schema.sql" + to: "custom_dorks table" + via: "CREATE TABLE" + pattern: "CREATE TABLE IF NOT EXISTS custom_dorks" +--- + + +Foundation of the dork engine: schema, go:embed loader, registry, executor interface, +and storage table for user-added custom dorks. Mirrors the proven pkg/providers pattern +from Phase 1 so downstream plans can drop 150+ YAML files into pkg/dorks/definitions/{source}/ +and have them immediately load at startup. + +Purpose: Unblock parallel Wave 2 plans (50-dork YAML batches and GitHub live executor). +Output: pkg/dorks package with passing tests + custom_dorks table migration. + + + +@$HOME/.claude/get-shit-done/workflows/execute-plan.md +@$HOME/.claude/get-shit-done/templates/summary.md + + + +@.planning/PROJECT.md +@.planning/ROADMAP.md +@.planning/STATE.md +@.planning/phases/08-dork-engine/08-CONTEXT.md +@pkg/providers/loader.go +@pkg/providers/registry.go +@pkg/storage/db.go +@pkg/storage/schema.sql + + + + +From pkg/providers/loader.go: +```go +//go:embed definitions/*.yaml +var definitionsFS embed.FS + +func loadProviders() ([]Provider, error) { + fs.WalkDir(definitionsFS, "definitions", func(path string, d fs.DirEntry, err error) error { ... }) +} +``` + +From pkg/providers/registry.go: +```go +type Registry struct { providers []Provider; index map[string]int; ... } +func NewRegistry() (*Registry, error) +func (r *Registry) List() []Provider +func (r *Registry) Get(name string) (Provider, bool) +func (r *Registry) Stats() RegistryStats +``` + +From pkg/storage/db.go: +```go +type DB struct { sql *sql.DB } +func (db *DB) SQL() *sql.DB +``` + + + + + + + Task 1: Dork schema, go:embed loader, registry, executor interface + + pkg/dorks/schema.go, + pkg/dorks/loader.go, + pkg/dorks/registry.go, + pkg/dorks/executor.go, + pkg/dorks/registry_test.go, + pkg/dorks/definitions/.gitkeep, + dorks/.gitkeep + + + - Test: registry with two synthetic YAMLs under definitions/ loads 2 dorks + - Test: Registry.Get("openai-github-envfile") returns the correct Dork + - Test: Registry.ListBySource("github") returns only github dorks + - Test: Registry.ListByCategory("frontier") returns only frontier dorks + - Test: Registry.Stats() returns ByCategory + BySource counts + - Test: executor.ExecuteDork with source "shodan" returns ErrSourceNotImplemented + - Test: Dork.Validate() rejects empty id/source/query + + + 1. Create pkg/dorks/schema.go: + ```go + package dorks + + type Dork struct { + ID string `yaml:"id"` + Name string `yaml:"name"` + Source string `yaml:"source"` // github|google|shodan|censys|zoomeye|fofa|gitlab|bing + Category string `yaml:"category"` // frontier|specialized|infrastructure|emerging|enterprise + Query string `yaml:"query"` + Description string `yaml:"description"` + Tags []string `yaml:"tags"` + } + + var ValidSources = []string{"github","google","shodan","censys","zoomeye","fofa","gitlab","bing"} + + func (d Dork) Validate() error { /* non-empty id/source/query + source in ValidSources */ } + ``` + + 2. Create pkg/dorks/loader.go mirroring pkg/providers/loader.go: + ```go + //go:embed definitions + var definitionsFS embed.FS + + func loadDorks() ([]Dork, error) { + // fs.WalkDir on "definitions", descend into {source}/ subdirs, parse *.yaml + } + ``` + Walk pattern: definitions/github/*.yaml, definitions/google/*.yaml, etc. + Every file decoded via yaml.Unmarshal into Dork. Call Validate() per file; wrap + errors with file path. Return combined slice. + + 3. Create pkg/dorks/registry.go: + ```go + type Registry struct { + dorks []Dork + byID map[string]int + bySource map[string][]int + byCategory map[string][]int + } + + func NewRegistry() (*Registry, error) // uses loadDorks() + func NewRegistryFromDorks(ds []Dork) *Registry // for tests + func (r *Registry) List() []Dork + func (r *Registry) Get(id string) (Dork, bool) + func (r *Registry) ListBySource(src string) []Dork + func (r *Registry) ListByCategory(cat string) []Dork + func (r *Registry) Stats() Stats // {Total int; BySource map[string]int; ByCategory map[string]int} + ``` + + 4. Create pkg/dorks/executor.go (interface + source dispatcher, stubs only — + GitHub real impl comes in Plan 08-05): + ```go + var ErrSourceNotImplemented = errors.New("dork source not yet implemented") + var ErrMissingAuth = errors.New("dork source requires auth credentials") + + type Match struct { + DorkID string + Source string + URL string + Snippet string // content chunk to feed into engine detector + Path string // file path in repo, if applicable + } + + type Executor interface { + Source() string + Execute(ctx context.Context, d Dork, limit int) ([]Match, error) + } + + type Runner struct { + executors map[string]Executor + } + + func NewRunner() *Runner { return &Runner{executors: map[string]Executor{}} } + func (r *Runner) Register(e Executor) { r.executors[e.Source()] = e } + func (r *Runner) Run(ctx context.Context, d Dork, limit int) ([]Match, error) { + ex, ok := r.executors[d.Source] + if !ok { return nil, fmt.Errorf("%w: %s (coming Phase 9-16)", ErrSourceNotImplemented, d.Source) } + return ex.Execute(ctx, d, limit) + } + ``` + No real executors are registered here — Plan 08-05 wires the GitHub executor via + a separate constructor (NewRunnerWithGitHub or similar). + + 5. Create pkg/dorks/registry_test.go with the behavior cases listed above. + Use NewRegistryFromDorks for synthetic fixtures — do NOT touch the real + embedded FS (downstream plans populate it). One test MAY call NewRegistry() + and only assert err is nil or "definitions directory empty" — acceptable + either way pre-YAML. + + 6. Create placeholder files to make go:embed succeed with empty tree: + - pkg/dorks/definitions/.gitkeep (empty) + - dorks/.gitkeep (empty) + + IMPORTANT: go:embed requires at least one matching file. If + `//go:embed definitions` fails when only .gitkeep exists, switch the directive + to `//go:embed definitions/*` and handle the empty case by returning nil + dorks (no error) when WalkDir sees only .gitkeep. Test must pass with + zero real YAML present. + + + cd /home/salva/Documents/apikey && go test ./pkg/dorks/... -v + + + pkg/dorks builds, all registry + executor tests pass, loader tolerates empty + definitions tree, ErrSourceNotImplemented returned for unknown source. + + + + + Task 2: custom_dorks storage table + CRUD + + pkg/storage/schema.sql, + pkg/storage/custom_dorks.go, + pkg/storage/custom_dorks_test.go + + + - Test: SaveCustomDork inserts a row and returns an auto-increment ID + - Test: ListCustomDorks returns all saved custom dorks newest first + - Test: GetCustomDork(id) returns the dork or sql.ErrNoRows + - Test: DeleteCustomDork(id) removes it; subsequent Get returns ErrNoRows + - Test: schema migration is idempotent (Open twice on same :memory: is fine — new DB each call, so instead verify CREATE TABLE IF NOT EXISTS form via re-exec on same *sql.DB) + + + 1. Append to pkg/storage/schema.sql: + ```sql + CREATE TABLE IF NOT EXISTS custom_dorks ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + dork_id TEXT NOT NULL UNIQUE, + name TEXT NOT NULL, + source TEXT NOT NULL, + category TEXT NOT NULL, + query TEXT NOT NULL, + description TEXT, + tags TEXT, -- JSON array + created_at DATETIME DEFAULT CURRENT_TIMESTAMP + ); + + CREATE INDEX IF NOT EXISTS idx_custom_dorks_source ON custom_dorks(source); + CREATE INDEX IF NOT EXISTS idx_custom_dorks_category ON custom_dorks(category); + ``` + + 2. Create pkg/storage/custom_dorks.go: + ```go + type CustomDork struct { + ID int64 + DorkID string + Name string + Source string + Category string + Query string + Description string + Tags []string + CreatedAt time.Time + } + + func (db *DB) SaveCustomDork(d CustomDork) (int64, error) + func (db *DB) ListCustomDorks() ([]CustomDork, error) + func (db *DB) GetCustomDork(id int64) (CustomDork, error) // returns sql.ErrNoRows if missing + func (db *DB) GetCustomDorkByDorkID(dorkID string) (CustomDork, error) + func (db *DB) DeleteCustomDork(id int64) (int64, error) + ``` + Tags round-tripped via encoding/json (TEXT column). Dork_id UNIQUE so + user cannot create duplicate custom IDs. + + 3. Create pkg/storage/custom_dorks_test.go covering the behavior cases above. + Use storage.Open(":memory:") as the existing storage tests do. + + + cd /home/salva/Documents/apikey && go test ./pkg/storage/... -run CustomDork -v + + + custom_dorks table created on Open(), CRUD round-trip tests pass, no + regressions in the existing storage test suite. + + + + + + +- `go build ./...` succeeds +- `go test ./pkg/dorks/... ./pkg/storage/...` passes +- `grep -r "//go:embed" pkg/dorks/` shows the definitions embed directive + + + +- pkg/dorks.NewRegistry() compiles and runs (zero or more embedded dorks) +- Executor interface + ErrSourceNotImplemented in place for Plan 08-05 and 08-06 +- custom_dorks CRUD functional; downstream `dorks add`/`dorks delete` commands have + a storage backend to call + + + +After completion, create `.planning/phases/08-dork-engine/08-01-SUMMARY.md` + diff --git a/.planning/phases/08-dork-engine/08-02-PLAN.md b/.planning/phases/08-dork-engine/08-02-PLAN.md new file mode 100644 index 0000000..1621afa --- /dev/null +++ b/.planning/phases/08-dork-engine/08-02-PLAN.md @@ -0,0 +1,227 @@ +--- +phase: 08-dork-engine +plan: 02 +type: execute +wave: 2 +depends_on: [08-01] +files_modified: + - pkg/dorks/definitions/github/frontier.yaml + - pkg/dorks/definitions/github/specialized.yaml + - pkg/dorks/definitions/github/infrastructure.yaml + - pkg/dorks/definitions/github/emerging.yaml + - pkg/dorks/definitions/github/enterprise.yaml + - dorks/github/frontier.yaml + - dorks/github/specialized.yaml + - dorks/github/infrastructure.yaml + - dorks/github/emerging.yaml + - dorks/github/enterprise.yaml +autonomous: true +requirements: + - DORK-01 + - DORK-02 + - DORK-04 +must_haves: + truths: + - "pkg/dorks.NewRegistry() loads at least 50 github dorks" + - "Dorks cover all 5 categories (frontier, specialized, infrastructure, emerging, enterprise)" + - "Registry.ListBySource(\"github\") returns >= 50 entries" + - "All dork IDs are unique and pass Dork.Validate()" + artifacts: + - path: "pkg/dorks/definitions/github/frontier.yaml" + provides: "~15 GitHub dorks for Tier 1/2 frontier providers" + contains: "source: github" + - path: "pkg/dorks/definitions/github/specialized.yaml" + provides: "~10 GitHub dorks for Tier 3 specialized providers" + contains: "category: specialized" + key_links: + - from: "pkg/dorks/definitions/github/*.yaml" + to: "pkg/dorks/loader.go go:embed" + via: "compile-time embed" + pattern: "source: github" +--- + + +Populate the GitHub source with 50 production dork queries covering every provider +category. Each dork is a real GitHub Code Search query formatted per the Dork schema +from Plan 08-01. Mirrored into `dorks/github/` (user-visible) and +`pkg/dorks/definitions/github/` (go:embed target) per the Phase 1 dual-location +pattern. + +Purpose: Half of the 150+ dork requirement (DORK-02) lives here. GitHub is the +largest single source because it is the primary live executor (Plan 08-05) and +because leaked keys overwhelmingly show up in .env/config files. +Output: 50 GitHub dorks, embedded and loadable. + + + +@$HOME/.claude/get-shit-done/workflows/execute-plan.md +@$HOME/.claude/get-shit-done/templates/summary.md + + + +@.planning/phases/08-dork-engine/08-CONTEXT.md +@.planning/phases/08-dork-engine/08-01-PLAN.md +@pkg/providers/definitions/openai.yaml +@pkg/dorks/schema.go + + + + + + Task 1: 25 GitHub dorks — frontier + specialized categories + + pkg/dorks/definitions/github/frontier.yaml, + pkg/dorks/definitions/github/specialized.yaml, + dorks/github/frontier.yaml, + dorks/github/specialized.yaml + + + Create both files with the YAML list format supported by the loader. Each file + is a YAML document containing a top-level list of Dork entries. If the loader + in 08-01 was written to expect one-Dork-per-file, update it here to also + accept a list — check pkg/dorks/loader.go and adapt (preferred: loader accepts + both `type dorkFile struct { Dorks []Dork }` wrapper OR top-level list). Use + the list form. + + File format (list of Dork): + ```yaml + - id: openai-github-envfile + name: "OpenAI API Key in .env files" + source: github + category: frontier + query: 'sk-proj- extension:env' + description: "Finds OpenAI project keys committed in .env files" + tags: [openai, env, tier1] + - id: openai-github-pyfile + ... + ``` + + **frontier.yaml — 15 dorks** covering Tier 1/2 providers. Each provider gets + 1-2 dorks. Use real, validated prefixes from pkg/providers/definitions/*.yaml: + - openai-github-envfile: `sk-proj- extension:env` + - openai-github-pyfile: `sk-proj- extension:py` + - openai-github-jsonfile: `sk-proj- extension:json` + - anthropic-github-envfile: `sk-ant-api03- extension:env` + - anthropic-github-pyfile: `sk-ant-api03- extension:py` + - google-ai-github-envfile: `AIzaSy extension:env "GOOGLE_API_KEY"` + - google-ai-github-jsonfile: `AIzaSy extension:json "generativelanguage"` + - azure-openai-envfile: `AZURE_OPENAI_KEY extension:env` + - aws-bedrock-envfile: `AKIA extension:env "bedrock"` + - xai-envfile: `xai- extension:env` + - cohere-envfile: `COHERE_API_KEY extension:env` + - mistral-envfile: `MISTRAL_API_KEY extension:env` + - groq-envfile: `gsk_ extension:env` + - together-envfile: `TOGETHER_API_KEY extension:env` + - replicate-envfile: `r8_ extension:env` + + All with category: frontier, appropriate tags. Each query MUST be a literal + GitHub Code Search query — no templating. + + **specialized.yaml — 10 dorks** covering Tier 3 providers: + - perplexity-envfile: `pplx- extension:env` + - voyage-envfile: `VOYAGE_API_KEY extension:env` + - jina-envfile: `jina_ extension:env` + - assemblyai-envfile: `ASSEMBLYAI_API_KEY extension:env` + - deepgram-envfile: `DEEPGRAM_API_KEY extension:env` + - elevenlabs-envfile: `ELEVENLABS_API_KEY extension:env` + - stability-envfile: `sk-stability- extension:env` + - huggingface-envfile: `hf_ extension:env` + - perplexity-config: `pplx- filename:config.yaml` + - deepgram-config: `DEEPGRAM filename:.env.local` + + category: specialized. + + Write identical content to both `pkg/dorks/definitions/github/{file}.yaml` + and `dorks/github/{file}.yaml`. The pkg/ copy is for go:embed, the dorks/ + copy is user-visible. + + **Adapt loader if needed.** If 08-01 wrote `yaml.Unmarshal(data, &Dork{})` + (single dork per file), change to: + ```go + var list []Dork + if err := yaml.Unmarshal(data, &list); err != nil { return err } + dorks = append(dorks, list...) + ``` + Run `go test ./pkg/dorks/...` to confirm. + + + cd /home/salva/Documents/apikey && go test ./pkg/dorks/... && go run ./cmd/... 2>&1 || true; awk 'FNR==1{print FILENAME}/^- id:/{c++}END{print "count:",c}' pkg/dorks/definitions/github/frontier.yaml pkg/dorks/definitions/github/specialized.yaml + + 25 dorks loaded, all pass Validate(), tests pass. + + + + Task 2: 25 GitHub dorks — infrastructure + emerging + enterprise + + pkg/dorks/definitions/github/infrastructure.yaml, + pkg/dorks/definitions/github/emerging.yaml, + pkg/dorks/definitions/github/enterprise.yaml, + dorks/github/infrastructure.yaml, + dorks/github/emerging.yaml, + dorks/github/enterprise.yaml + + + Create six YAML files (three pairs) using the same list format as Task 1. + + **infrastructure.yaml — 10 dorks** (Tier 5 gateways + Tier 8 self-hosted): + - openrouter-envfile: `sk-or-v1- extension:env` + - openrouter-pyfile: `sk-or-v1- extension:py` + - litellm-envfile: `LITELLM_MASTER_KEY extension:env` + - portkey-envfile: `PORTKEY_API_KEY extension:env` + - helicone-envfile: `sk-helicone- extension:env` + - cloudflare-ai-envfile: `CF_API_TOKEN "ai.run"` + - vercel-ai-envfile: `VERCEL_AI extension:env` + - ollama-config: `OLLAMA_HOST filename:docker-compose.yaml` + - vllm-config: `vllm.entrypoints filename:config.yaml` + - localai-envfile: `LOCALAI_API_KEY extension:env` + + category: infrastructure. + + **emerging.yaml — 10 dorks** (Tier 4 Chinese + Tier 6 niche + vector DBs): + - deepseek-envfile: `sk- extension:env "deepseek"` + - moonshot-envfile: `sk- extension:env "moonshot"` + - qwen-envfile: `DASHSCOPE_API_KEY extension:env` + - zhipu-envfile: `ZHIPU_API_KEY extension:env` + - minimax-envfile: `MINIMAX_API_KEY extension:env` + - pinecone-envfile: `PINECONE_API_KEY extension:env` + - weaviate-envfile: `WEAVIATE_API_KEY extension:env` + - qdrant-envfile: `QDRANT_API_KEY extension:env` + - chroma-envfile: `CHROMA_API_KEY extension:env` + - writer-envfile: `WRITER_API_KEY extension:env` + + category: emerging. + + **enterprise.yaml — 5 dorks** (Tier 7 dev tools + Tier 9 enterprise): + - codeium-envfile: `CODEIUM_API_KEY extension:env` + - tabnine-envfile: `TABNINE_TOKEN extension:env` + - databricks-envfile: `DATABRICKS_TOKEN extension:env` + - snowflake-cortex: `SNOWFLAKE_PASSWORD "cortex"` + - watsonx-envfile: `WATSONX_APIKEY extension:env` + + category: enterprise. + + Write each YAML to both pkg/dorks/definitions/github/ and dorks/github/. + All dorks use source: github. + + + cd /home/salva/Documents/apikey && go test ./pkg/dorks/... && grep -c '^- id:' pkg/dorks/definitions/github/*.yaml | awk -F: '{s+=$NF}END{print "total github dorks:",s; if(s<50) exit 1}' + + 50 total GitHub dorks across 5 category files, loader picks all up, counts pass. + + + + + +`cd /home/salva/Documents/apikey && go test ./pkg/dorks/...` passes +Registry reports >= 50 dorks via a throwaway main or test assertion. + + + +- 50 GitHub dorks loadable via pkg/dorks.NewRegistry() +- All 5 categories represented +- Dual location (dorks/ + pkg/dorks/definitions/) maintained + + + +After completion, create `.planning/phases/08-dork-engine/08-02-SUMMARY.md` + diff --git a/.planning/phases/08-dork-engine/08-03-PLAN.md b/.planning/phases/08-dork-engine/08-03-PLAN.md new file mode 100644 index 0000000..d4c6177 --- /dev/null +++ b/.planning/phases/08-dork-engine/08-03-PLAN.md @@ -0,0 +1,192 @@ +--- +phase: 08-dork-engine +plan: 03 +type: execute +wave: 2 +depends_on: [08-01] +files_modified: + - pkg/dorks/definitions/google/frontier.yaml + - pkg/dorks/definitions/google/specialized.yaml + - pkg/dorks/definitions/google/infrastructure.yaml + - pkg/dorks/definitions/shodan/frontier.yaml + - pkg/dorks/definitions/shodan/infrastructure.yaml + - dorks/google/frontier.yaml + - dorks/google/specialized.yaml + - dorks/google/infrastructure.yaml + - dorks/shodan/frontier.yaml + - dorks/shodan/infrastructure.yaml +autonomous: true +requirements: + - DORK-01 + - DORK-02 + - DORK-04 +must_haves: + truths: + - "Registry.ListBySource(\"google\") returns >= 30 entries" + - "Registry.ListBySource(\"shodan\") returns >= 20 entries" + - "All google dorks use real site:/filetype: operators" + - "All shodan dorks use real shodan query syntax (ssl.cert, http.title, etc.)" + artifacts: + - path: "pkg/dorks/definitions/google/" + provides: "30 Google dorks across 3 categories" + contains: "source: google" + - path: "pkg/dorks/definitions/shodan/" + provides: "20 Shodan dorks" + contains: "source: shodan" + key_links: + - from: "pkg/dorks/definitions/{google,shodan}/*.yaml" + to: "pkg/dorks/loader.go" + via: "go:embed subdir walk" + pattern: "source: (google|shodan)" +--- + + +Populate Google (30) and Shodan (20) sources — 50 dorks total. Google uses site: +and filetype: operators to find leaked keys on non-GitHub platforms (pastebin, +gitlab raw, etc.). Shodan uses its banner/certificate query syntax to surface +exposed self-hosted LLM services (Ollama, vLLM, LocalAI, LiteLLM dashboards). + +Purpose: Broadens DORK-02 coverage beyond GitHub into the two next-most-important +sources. Execution of these queries is deferred to Phase 11 (Google) and Phase 12 +(Shodan) — this plan only delivers the definitions. +Output: 50 loadable Google + Shodan dorks. + + + +@$HOME/.claude/get-shit-done/workflows/execute-plan.md +@$HOME/.claude/get-shit-done/templates/summary.md + + + +@.planning/phases/08-dork-engine/08-CONTEXT.md +@.planning/phases/08-dork-engine/08-01-PLAN.md +@pkg/dorks/schema.go + + + + + + Task 1: 30 Google dorks (site/filetype operators) + + pkg/dorks/definitions/google/frontier.yaml, + pkg/dorks/definitions/google/specialized.yaml, + pkg/dorks/definitions/google/infrastructure.yaml, + dorks/google/frontier.yaml, + dorks/google/specialized.yaml, + dorks/google/infrastructure.yaml + + + Create three frontier/specialized/infrastructure files (dual location). + All entries use source: google. Queries are real Google search operators + (site:, filetype:, intext:, inurl:). No HTML escaping — raw text. + + **frontier.yaml — 12 dorks:** + - google-openai-pastebin: `site:pastebin.com "sk-proj-"` + - google-openai-gitlab-raw: `site:gitlab.com/*/raw/* "sk-proj-"` + - google-openai-env-leak: `intext:"OPENAI_API_KEY=sk-proj-" filetype:env` + - google-anthropic-pastebin: `site:pastebin.com "sk-ant-api03-"` + - google-anthropic-env: `intext:"ANTHROPIC_API_KEY=sk-ant-" filetype:env` + - google-ai-pastebin: `site:pastebin.com "AIzaSy" "generativelanguage"` + - google-ai-gitlab: `site:gitlab.com "AIzaSy" inurl:config` + - google-groq-pastebin: `site:pastebin.com "gsk_"` + - google-cohere-env: `intext:"COHERE_API_KEY" filetype:env` + - google-mistral-env: `intext:"MISTRAL_API_KEY=" filetype:env` + - google-xai-pastebin: `site:pastebin.com "xai-"` + - google-replicate-env: `intext:"r8_" filetype:env -site:github.com` + + category: frontier. + + **specialized.yaml — 10 dorks:** + - google-perplexity-pastebin: `site:pastebin.com "pplx-"` + - google-hf-pastebin: `site:pastebin.com "hf_"` + - google-elevenlabs-env: `intext:"ELEVENLABS_API_KEY" filetype:env` + - google-deepgram-env: `intext:"DEEPGRAM_API_KEY" filetype:env` + - google-assemblyai-pastebin: `site:pastebin.com "ASSEMBLYAI_API_KEY"` + - google-stability-env: `intext:"sk-stability-" filetype:env` + - google-jina-env: `intext:"jina_" filetype:env` + - google-voyage-env: `intext:"VOYAGE_API_KEY" filetype:env` + - google-hf-notebook: `site:colab.research.google.com "hf_"` + - google-hf-kaggle: `site:kaggle.com "hf_" inurl:notebook` + + category: specialized. + + **infrastructure.yaml — 8 dorks:** + - google-openrouter-pastebin: `site:pastebin.com "sk-or-v1-"` + - google-openrouter-env: `intext:"sk-or-v1-" filetype:env` + - google-litellm-config: `intext:"LITELLM_MASTER_KEY" filetype:yaml` + - google-helicone-env: `intext:"sk-helicone-" filetype:env` + - google-portkey-env: `intext:"PORTKEY_API_KEY" filetype:env` + - google-ollama-exposed: `intitle:"Ollama" inurl:":11434"` + - google-vllm-exposed: `intitle:"vLLM" "/v1/models"` + - google-localai-exposed: `intitle:"LocalAI" "/v1/chat/completions"` + + category: infrastructure. + + + cd /home/salva/Documents/apikey && go test ./pkg/dorks/... && grep -c '^- id:' pkg/dorks/definitions/google/*.yaml | awk -F: '{s+=$NF}END{print "google:",s; if(s<30) exit 1}' + + 30 Google dorks loaded, all pass Validate(), tests pass. + + + + Task 2: 20 Shodan dorks (banner/cert queries) + + pkg/dorks/definitions/shodan/frontier.yaml, + pkg/dorks/definitions/shodan/infrastructure.yaml, + dorks/shodan/frontier.yaml, + dorks/shodan/infrastructure.yaml + + + Two Shodan files, dual location. All entries source: shodan. Queries use + real Shodan syntax: http.title, http.html, ssl.cert.subject.cn, product, + port, org, http.favicon.hash. + + **frontier.yaml — 6 dorks** (exposed frontier-adjacent APIs): + - shodan-openai-proxy: `http.title:"openai" http.html:"/v1/chat/completions"` + - shodan-litellm-proxy: `http.title:"LiteLLM" port:4000` + - shodan-openai-nginx: `http.html:"OPENAI_API_KEY" http.component:nginx` + - shodan-azure-openai: `ssl.cert.subject.cn:"openai.azure.com"` + - shodan-bedrock-runtime: `ssl.cert.subject.cn:"bedrock-runtime"` + - shodan-anthropic-proxy: `http.html:"anthropic" http.html:"messages"` + + category: frontier. + + **infrastructure.yaml — 14 dorks** (the bulk: self-hosted + gateway): + - shodan-ollama-default: `product:"Ollama" port:11434` + - shodan-ollama-tags: `http.html:"/api/tags" http.title:"Ollama"` + - shodan-vllm: `http.html:"vLLM" http.html:"/v1/models"` + - shodan-localai: `http.title:"LocalAI"` + - shodan-lmstudio: `http.title:"LM Studio"` + - shodan-textgenwebui: `http.title:"text-generation-webui"` + - shodan-openwebui: `http.title:"Open WebUI" http.html:"/api/chat"` + - shodan-openrouter-proxy: `http.html:"openrouter.ai" port:443` + - shodan-portkey-gateway: `http.title:"Portkey"` + - shodan-helicone-gateway: `http.html:"helicone" http.html:"/v1"` + - shodan-triton-server: `http.html:"NVIDIA Triton" http.html:"/v2/models"` + - shodan-tgi-hf: `http.html:"text-generation-inference" "/generate"` + - shodan-langserve: `http.title:"LangServe"` + - shodan-fastchat: `http.title:"FastChat"` + + category: infrastructure. + + + cd /home/salva/Documents/apikey && go test ./pkg/dorks/... && grep -c '^- id:' pkg/dorks/definitions/shodan/*.yaml | awk -F: '{s+=$NF}END{print "shodan:",s; if(s<20) exit 1}' + + 20 Shodan dorks loaded, all pass Validate(). + + + + + +`go test ./pkg/dorks/...` passes; Google+Shodan sources visible in registry stats. + + + +- 30 Google dorks + 20 Shodan dorks = 50 new dorks +- Cumulative total (with Plan 08-02) reaches 100 +- All dorks pass schema validation + + + +After completion, create `.planning/phases/08-dork-engine/08-03-SUMMARY.md` + diff --git a/.planning/phases/08-dork-engine/08-04-PLAN.md b/.planning/phases/08-dork-engine/08-04-PLAN.md new file mode 100644 index 0000000..229dc1e --- /dev/null +++ b/.planning/phases/08-dork-engine/08-04-PLAN.md @@ -0,0 +1,201 @@ +--- +phase: 08-dork-engine +plan: 04 +type: execute +wave: 2 +depends_on: [08-01] +files_modified: + - pkg/dorks/definitions/censys/all.yaml + - pkg/dorks/definitions/zoomeye/all.yaml + - pkg/dorks/definitions/fofa/all.yaml + - pkg/dorks/definitions/gitlab/all.yaml + - pkg/dorks/definitions/bing/all.yaml + - dorks/censys/all.yaml + - dorks/zoomeye/all.yaml + - dorks/fofa/all.yaml + - dorks/gitlab/all.yaml + - dorks/bing/all.yaml +autonomous: true +requirements: + - DORK-01 + - DORK-02 + - DORK-04 +must_haves: + truths: + - "Registry.ListBySource(\"censys\") returns >= 15" + - "Registry.ListBySource(\"zoomeye\") returns >= 10" + - "Registry.ListBySource(\"fofa\") returns >= 10" + - "Registry.ListBySource(\"gitlab\") returns >= 10" + - "Registry.ListBySource(\"bing\") returns >= 5" + - "Combined total from plans 02+03+04 is >= 150" + artifacts: + - path: "pkg/dorks/definitions/censys/all.yaml" + provides: "15 Censys search queries" + contains: "source: censys" + - path: "pkg/dorks/definitions/zoomeye/all.yaml" + provides: "10 ZoomEye queries" + contains: "source: zoomeye" + - path: "pkg/dorks/definitions/fofa/all.yaml" + provides: "10 FOFA queries" + contains: "source: fofa" + - path: "pkg/dorks/definitions/gitlab/all.yaml" + provides: "10 GitLab code search queries" + contains: "source: gitlab" + - path: "pkg/dorks/definitions/bing/all.yaml" + provides: "5 Bing dorks" + contains: "source: bing" + key_links: + - from: "pkg/dorks/definitions/{censys,zoomeye,fofa,gitlab,bing}/all.yaml" + to: "pkg/dorks/loader.go" + via: "go:embed subdir walk" + pattern: "source: (censys|zoomeye|fofa|gitlab|bing)" +--- + + +Fill the remaining 5 sources to hit the 150-dork total: Censys (15), ZoomEye (10), +FOFA (10), GitLab (10), Bing (5). Each source uses its own native query syntax. +Execution of these is deferred to later OSINT phases (9-16); this plan only +delivers the definitions. + +Purpose: Closes DORK-02 (150+ dorks) and ensures DORK-04 (category filtering) +has coverage across every source. +Output: 50 dorks across 5 sources, hitting the 150 total when combined with +Plans 08-02 (50 GitHub) + 08-03 (50 Google/Shodan). + + + +@$HOME/.claude/get-shit-done/workflows/execute-plan.md +@$HOME/.claude/get-shit-done/templates/summary.md + + + +@.planning/phases/08-dork-engine/08-CONTEXT.md +@pkg/dorks/schema.go + + + + + + Task 1: 25 Censys + ZoomEye dorks + + pkg/dorks/definitions/censys/all.yaml, + pkg/dorks/definitions/zoomeye/all.yaml, + dorks/censys/all.yaml, + dorks/zoomeye/all.yaml + + + Dual location. List YAML format identical to plans 02-03. + + **censys/all.yaml — 15 dorks** using Censys Search 2.0 syntax + (services.http.response.*, services.tls.certificates.*): + - censys-ollama-11434: `services.port: 11434 and services.http.response.body: "Ollama"` + - censys-vllm: `services.http.response.body: "vLLM" and services.http.response.body: "/v1/models"` + - censys-localai: `services.http.response.html_title: "LocalAI"` + - censys-openwebui: `services.http.response.html_title: "Open WebUI"` + - censys-lmstudio: `services.http.response.html_title: "LM Studio"` + - censys-triton: `services.http.response.body: "NVIDIA Triton" and services.http.response.body: "/v2/models"` + - censys-tgi: `services.http.response.body: "text-generation-inference"` + - censys-litellm: `services.http.response.html_title: "LiteLLM" and services.port: 4000` + - censys-portkey: `services.http.response.html_title: "Portkey"` + - censys-langserve: `services.http.response.html_title: "LangServe"` + - censys-openai-azure-cert: `services.tls.certificates.leaf_data.subject.common_name: "openai.azure.com"` + - censys-bedrock-cert: `services.tls.certificates.leaf_data.subject.common_name: "bedrock-runtime"` + - censys-fastchat: `services.http.response.html_title: "FastChat"` + - censys-textgen-webui: `services.http.response.html_title: "text-generation-webui"` + - censys-openai-proxy: `services.http.response.body: "/v1/chat/completions" and services.http.response.body: "OPENAI_API_KEY"` + + Category mix: 12 infrastructure, 3 frontier (the azure/bedrock/openai-proxy entries). + + **zoomeye/all.yaml — 10 dorks** using ZoomEye query syntax + (app:, title:, service:, port:): + - zoomeye-ollama: `port:11434 +app:"Ollama"` + - zoomeye-vllm: `title:"vLLM" +app:"nginx"` + - zoomeye-localai: `title:"LocalAI"` + - zoomeye-openwebui: `title:"Open WebUI"` + - zoomeye-litellm: `title:"LiteLLM" +port:4000` + - zoomeye-lmstudio: `title:"LM Studio"` + - zoomeye-triton-grpc: `port:8001 +service:"triton"` + - zoomeye-fastchat: `title:"FastChat"` + - zoomeye-langserve: `title:"LangServe"` + - zoomeye-openai-proxy: `title:"openai" +"/v1/chat/completions"` + + All category: infrastructure (with the last one arguably frontier; mark it frontier). + + + cd /home/salva/Documents/apikey && go test ./pkg/dorks/... && grep -c '^- id:' pkg/dorks/definitions/censys/*.yaml pkg/dorks/definitions/zoomeye/*.yaml | awk -F: '{s+=$NF}END{print "censys+zoomeye:",s; if(s<25) exit 1}' + + 25 dorks loaded across Censys + ZoomEye. + + + + Task 2: 25 FOFA + GitLab + Bing dorks + + pkg/dorks/definitions/fofa/all.yaml, + pkg/dorks/definitions/gitlab/all.yaml, + pkg/dorks/definitions/bing/all.yaml, + dorks/fofa/all.yaml, + dorks/gitlab/all.yaml, + dorks/bing/all.yaml + + + Dual location. List YAML format. + + **fofa/all.yaml — 10 dorks** using FOFA syntax (title=, body=, port=, cert=): + - fofa-ollama: `port="11434" && body="Ollama"` + - fofa-vllm: `title="vLLM"` + - fofa-localai: `title="LocalAI"` + - fofa-openwebui: `title="Open WebUI"` + - fofa-litellm: `title="LiteLLM" && port="4000"` + - fofa-openai-azure: `cert="openai.azure.com"` + - fofa-triton: `body="NVIDIA Triton" && body="/v2/models"` + - fofa-langserve: `title="LangServe"` + - fofa-tgi-hf: `body="text-generation-inference"` + - fofa-openai-proxy: `body="/v1/chat/completions" && body="api_key"` + + Mix: 8 infrastructure, 2 frontier (azure cert, openai-proxy). + + **gitlab/all.yaml — 10 dorks** for GitLab code search API: + - gitlab-openai-envfile: `sk-proj- extension:env` + - gitlab-anthropic-envfile: `sk-ant-api03- extension:env` + - gitlab-google-ai-json: `AIzaSy extension:json "generativelanguage"` + - gitlab-groq-envfile: `gsk_ extension:env` + - gitlab-cohere-envfile: `COHERE_API_KEY extension:env` + - gitlab-hf-pyfile: `hf_ extension:py` + - gitlab-openrouter-env: `sk-or-v1- extension:env` + - gitlab-perplexity-env: `pplx- extension:env` + - gitlab-deepseek-env: `DEEPSEEK_API_KEY extension:env` + - gitlab-pinecone-env: `PINECONE_API_KEY extension:env` + + Categories spread across frontier/specialized/infrastructure/emerging. + + **bing/all.yaml — 5 dorks** using Bing operators (site:, filetype:, + intitle:, inbody:): + - bing-openai-pastebin: `site:pastebin.com "sk-proj-"` + - bing-anthropic-pastebin: `site:pastebin.com "sk-ant-api03-"` + - bing-ollama-exposed: `intitle:"Ollama" inbody:"/api/tags"` + - bing-openai-env: `filetype:env "OPENAI_API_KEY=sk-proj-"` + - bing-hf-pastebin: `site:pastebin.com "hf_"` + + Mix: 3 frontier, 1 specialized (hf), 1 infrastructure (ollama). + + + cd /home/salva/Documents/apikey && go test ./pkg/dorks/... && grep -rhc '^- id:' pkg/dorks/definitions/ | awk '{s+=$1}END{print "grand total:",s; if(s<150) exit 1}' + + 25 more dorks loaded; grand total across all sources >= 150. + + + + + +`go test ./pkg/dorks/...` passes; `grep -rhc '^- id:' pkg/dorks/definitions/ | paste -sd+ | bc` >= 150. + + + +- Censys 15 + ZoomEye 10 + FOFA 10 + GitLab 10 + Bing 5 = 50 dorks added +- Cumulative phase total reaches 150+ (DORK-02 satisfied) +- All 8 sources present in registry stats + + + +After completion, create `.planning/phases/08-dork-engine/08-04-SUMMARY.md` + diff --git a/.planning/phases/08-dork-engine/08-05-PLAN.md b/.planning/phases/08-dork-engine/08-05-PLAN.md new file mode 100644 index 0000000..9371dfe --- /dev/null +++ b/.planning/phases/08-dork-engine/08-05-PLAN.md @@ -0,0 +1,264 @@ +--- +phase: 08-dork-engine +plan: 05 +type: execute +wave: 2 +depends_on: [08-01] +files_modified: + - pkg/dorks/github.go + - pkg/dorks/github_test.go +autonomous: true +requirements: + - DORK-02 +must_haves: + truths: + - "GitHubExecutor.Source() returns \"github\"" + - "GitHubExecutor.Execute runs GitHub Code Search against api.github.com and returns []Match" + - "Missing token returns ErrMissingAuth with setup instructions" + - "Retry-After header is honored (sleep + retry once) for 403/429" + - "Response items mapped to Match with URL, Path, Snippet (text_matches)" + artifacts: + - path: "pkg/dorks/github.go" + provides: "GitHubExecutor implementing Executor interface" + contains: "type GitHubExecutor struct" + - path: "pkg/dorks/github_test.go" + provides: "httptest server exercising success/auth/rate-limit paths" + contains: "httptest.NewServer" + key_links: + - from: "pkg/dorks/github.go" + to: "https://api.github.com/search/code" + via: "net/http client" + pattern: "api.github.com/search/code" + - from: "pkg/dorks/github.go" + to: "pkg/dorks/executor.go Executor interface" + via: "interface satisfaction" + pattern: "Execute\\(ctx" +--- + + +Implement the live GitHub Code Search executor — the only source that actually +runs in Phase 8 (all other executors stay stubbed with ErrSourceNotImplemented). +Hits `GET https://api.github.com/search/code?q={query}`, authenticated via +GITHUB_TOKEN env var / viper config. Honors rate-limit response codes. Maps +response items to pkg/dorks.Match entries consumable by the engine pipeline in +downstream phases. + +Purpose: Satisfies the "GitHub live" slice of DORK-02 and unblocks `keyhunter +dorks run --source=github` in Plan 08-06. +Output: Working pkg/dorks.GitHubExecutor + httptest-backed test suite. + + + +@$HOME/.claude/get-shit-done/workflows/execute-plan.md +@$HOME/.claude/get-shit-done/templates/summary.md + + + +@.planning/phases/08-dork-engine/08-CONTEXT.md +@.planning/phases/08-dork-engine/08-01-PLAN.md +@pkg/dorks/executor.go + + + +```go +type Executor interface { + Source() string + Execute(ctx context.Context, d Dork, limit int) ([]Match, error) +} + +type Match struct { + DorkID string + Source string + URL string + Snippet string + Path string +} + +var ErrMissingAuth = errors.New("dork source requires auth credentials") +``` + + + + + + + Task 1: GitHubExecutor with net/http + Retry-After handling + pkg/dorks/github.go, pkg/dorks/github_test.go + + - Test: Execute with empty token returns ErrMissingAuth (wrapped) without hitting HTTP + - Test: Execute with httptest server returning 200 + items parses response into []Match with URL/Path/Snippet + - Test: limit=5 caps returned Match count at 5 even if API returns 10 + - Test: 403 with X-RateLimit-Remaining=0 and Retry-After=1 sleeps and retries once, then succeeds + - Test: 401 returns ErrMissingAuth (token rejected) + - Test: 422 (invalid query) returns a descriptive error containing the status code + - Test: Source() returns "github" + + + Create pkg/dorks/github.go: + + ```go + package dorks + + import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "strconv" + "time" + ) + + type GitHubExecutor struct { + Token string + BaseURL string // default "https://api.github.com", overridable for tests + HTTPClient *http.Client + MaxRetries int // default 1 + } + + func NewGitHubExecutor(token string) *GitHubExecutor { + return &GitHubExecutor{ + Token: token, + BaseURL: "https://api.github.com", + HTTPClient: &http.Client{Timeout: 30 * time.Second}, + MaxRetries: 1, + } + } + + func (g *GitHubExecutor) Source() string { return "github" } + + type ghSearchResponse struct { + TotalCount int `json:"total_count"` + Items []struct { + Name string `json:"name"` + Path string `json:"path"` + HTMLURL string `json:"html_url"` + Repository struct { + FullName string `json:"full_name"` + } `json:"repository"` + TextMatches []struct { + Fragment string `json:"fragment"` + } `json:"text_matches"` + } `json:"items"` + } + + func (g *GitHubExecutor) Execute(ctx context.Context, d Dork, limit int) ([]Match, error) { + if g.Token == "" { + return nil, fmt.Errorf("%w: set GITHUB_TOKEN env var or `keyhunter config set dorks.github.token ` (needs public_repo scope)", ErrMissingAuth) + } + if limit <= 0 || limit > 100 { + limit = 30 + } + + url := fmt.Sprintf("%s/search/code?q=%s&per_page=%d", g.BaseURL, urlQueryEscape(d.Query), limit) + + var resp *http.Response + for attempt := 0; attempt <= g.MaxRetries; attempt++ { + req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + if err != nil { return nil, err } + req.Header.Set("Accept", "application/vnd.github.v3.text-match+json") + req.Header.Set("Authorization", "Bearer "+g.Token) + req.Header.Set("User-Agent", "keyhunter-dork-engine") + + r, err := g.HTTPClient.Do(req) + if err != nil { return nil, fmt.Errorf("github search: %w", err) } + + if r.StatusCode == http.StatusOK { + resp = r + break + } + + body, _ := io.ReadAll(r.Body) + r.Body.Close() + + switch r.StatusCode { + case http.StatusUnauthorized: + return nil, fmt.Errorf("%w: github token rejected (401)", ErrMissingAuth) + case http.StatusForbidden, http.StatusTooManyRequests: + if attempt < g.MaxRetries { + sleep := parseRetryAfter(r.Header.Get("Retry-After")) + select { + case <-time.After(sleep): + continue + case <-ctx.Done(): + return nil, ctx.Err() + } + } + return nil, fmt.Errorf("github rate limit: %d %s", r.StatusCode, string(body)) + default: + return nil, fmt.Errorf("github search failed: %d %s", r.StatusCode, string(body)) + } + } + defer resp.Body.Close() + + var parsed ghSearchResponse + if err := json.NewDecoder(resp.Body).Decode(&parsed); err != nil { + return nil, fmt.Errorf("decoding github response: %w", err) + } + + out := make([]Match, 0, len(parsed.Items)) + for _, it := range parsed.Items { + snippet := "" + if len(it.TextMatches) > 0 { + snippet = it.TextMatches[0].Fragment + } + out = append(out, Match{ + DorkID: d.ID, + Source: "github", + URL: it.HTMLURL, + Path: it.Repository.FullName + "/" + it.Path, + Snippet: snippet, + }) + if len(out) >= limit { break } + } + return out, nil + } + + func parseRetryAfter(v string) time.Duration { + if v == "" { return time.Second } + if secs, err := strconv.Atoi(v); err == nil { + return time.Duration(secs) * time.Second + } + return time.Second + } + + func urlQueryEscape(s string) string { + return (&url.URL{Path: s}).EscapedPath() // wrong — use url.QueryEscape + } + ``` + + Fix the helper: import "net/url" and use `url.QueryEscape(s)` — do NOT hand-roll. + + Create pkg/dorks/github_test.go using httptest.NewServer. Override + executor.BaseURL to the test server URL. One subtest per behavior case. + For Retry-After test: server returns 403 with Retry-After: 1 on first + request, 200 with fake items on second. + + Do NOT register GitHubExecutor into a global Runner here — Plan 08-06 does + the wiring inside cmd/dorks.go via NewGitHubExecutor(viper.GetString(...)). + + + cd /home/salva/Documents/apikey && go test ./pkg/dorks/... -run GitHub -v + + + All GitHub executor test cases pass; Execute honors token, rate limit, and + limit cap; Match fields populated from real response shape. + + + + + + +`go test ./pkg/dorks/...` passes including all new GitHub cases. + + + +- pkg/dorks.GitHubExecutor implements Executor interface +- Live GitHub Code Search calls are testable via httptest (BaseURL override) +- ErrMissingAuth surfaces with actionable setup instructions +- Retry-After respected once before giving up + + + +After completion, create `.planning/phases/08-dork-engine/08-05-SUMMARY.md` + diff --git a/.planning/phases/08-dork-engine/08-06-PLAN.md b/.planning/phases/08-dork-engine/08-06-PLAN.md new file mode 100644 index 0000000..ae8755d --- /dev/null +++ b/.planning/phases/08-dork-engine/08-06-PLAN.md @@ -0,0 +1,258 @@ +--- +phase: 08-dork-engine +plan: 06 +type: execute +wave: 3 +depends_on: [08-01, 08-02, 08-03, 08-04, 08-05] +files_modified: + - cmd/dorks.go + - cmd/dorks_test.go + - cmd/stubs.go + - cmd/root.go +autonomous: true +requirements: + - DORK-03 + - DORK-04 +must_haves: + truths: + - "`keyhunter dorks list` prints a table of embedded + custom dorks" + - "`keyhunter dorks list --source=github --category=frontier` filters correctly" + - "`keyhunter dorks info ` prints full dork detail" + - "`keyhunter dorks run --source=github --id=` calls GitHubExecutor" + - "`keyhunter dorks run --source=shodan` returns ErrSourceNotImplemented with a helpful message" + - "`keyhunter dorks add --source=github --category=frontier --query=... --description=...` persists to custom_dorks table" + - "`keyhunter dorks delete ` removes from custom_dorks and refuses embedded IDs" + - "`keyhunter dorks export --format=yaml` dumps embedded+custom" + artifacts: + - path: "cmd/dorks.go" + provides: "dorks Cobra command tree replacing stub" + contains: "dorksCmd.AddCommand" + key_links: + - from: "cmd/dorks.go" + to: "pkg/dorks.Registry" + via: "dorks.NewRegistry()" + pattern: "dorks.NewRegistry" + - from: "cmd/dorks.go run" + to: "pkg/dorks.GitHubExecutor" + via: "NewGitHubExecutor(viper.GetString(\"dorks.github.token\"))" + pattern: "NewGitHubExecutor" + - from: "cmd/dorks.go add/delete" + to: "storage.DB custom_dorks CRUD" + via: "db.SaveCustomDork / DeleteCustomDork" + pattern: "SaveCustomDork|DeleteCustomDork" +--- + + +Replace the Phase-8 dorks stub in cmd/stubs.go with a full Cobra command tree: +list / run / add / export / info / delete. Wires the Registry (embedded dorks) +together with storage custom_dorks (user dorks) and the GitHubExecutor (live +execution). Satisfies DORK-03 (all four CLI verbs) and DORK-04 (category +filtering via --category flag). + +Purpose: User-facing surface of the dork engine — everything built in 08-01 +through 08-05 becomes usable from the CLI. +Output: Working `keyhunter dorks ...` subcommands. + + + +@$HOME/.claude/get-shit-done/workflows/execute-plan.md +@$HOME/.claude/get-shit-done/templates/summary.md + + + +@.planning/phases/08-dork-engine/08-CONTEXT.md +@.planning/phases/08-dork-engine/08-01-PLAN.md +@.planning/phases/08-dork-engine/08-05-PLAN.md +@cmd/stubs.go +@cmd/root.go +@cmd/keys.go +@pkg/dorks/schema.go +@pkg/dorks/registry.go +@pkg/dorks/executor.go +@pkg/storage/custom_dorks.go + + + +```go +// pkg/dorks +func NewRegistry() (*Registry, error) +func (r *Registry) List() []Dork +func (r *Registry) Get(id string) (Dork, bool) +func (r *Registry) ListBySource(src string) []Dork +func (r *Registry) ListByCategory(cat string) []Dork +func NewGitHubExecutor(token string) *GitHubExecutor + +// pkg/storage +func (db *DB) SaveCustomDork(d CustomDork) (int64, error) +func (db *DB) ListCustomDorks() ([]CustomDork, error) +func (db *DB) GetCustomDorkByDorkID(dorkID string) (CustomDork, error) +func (db *DB) DeleteCustomDork(id int64) (int64, error) +``` + + + + + + + Task 1: cmd/dorks.go command tree (list/info/export) + cmd/dorks.go, cmd/stubs.go, cmd/root.go + + 1. Remove `var dorksCmd` from cmd/stubs.go (keep the rest of the stubs file + untouched). + + 2. Create cmd/dorks.go with the full command tree. Wire into root.go by + leaving the existing `rootCmd.AddCommand(dorksCmd)` in place — only the + declaration moves. + + Structure: + ```go + package cmd + + import ( + "context" + "encoding/json" + "errors" + "fmt" + "os" + "strings" + + "github.com/spf13/cobra" + "github.com/spf13/viper" + "gopkg.in/yaml.v3" + + "github.com///pkg/dorks" + "github.com///pkg/storage" + ) + + var ( + dorksFilterSource string + dorksFilterCategory string + dorksRunID string + dorksRunLimit int + dorksAddSource string + dorksAddCategory string + dorksAddID string + dorksAddName string + dorksAddQuery string + dorksAddDescription string + dorksExportFormat string + ) + + var dorksCmd = &cobra.Command{ + Use: "dorks", + Short: "Manage and run dork queries (DORK-01..DORK-04)", + } + + // list, info, run, add, delete, export subcommands declared here. + ``` + + Replace `/` by reading the existing `module` line in + /home/salva/Documents/apikey/go.mod before writing the file (use the same + import path that cmd/keys.go uses). + + **dorks list** — merges Registry.List() + db.ListCustomDorks(). Applies + --source and --category filters in memory. Prints a lipgloss table + (reuse pkg/output helpers if present, else a simple tab-aligned fmt). Mark + custom rows with a `*` prefix on the ID column. + + **dorks info ** — looks up by dork_id first in Registry.Get, falls back + to db.GetCustomDorkByDorkID. Prints all fields including tags. + + **dorks export [--format=yaml|json]** — emits a single combined list of + embedded + custom dorks in the requested format. Default yaml. + + Use `initDorksDB()` helper that opens storage.DB using the same path logic + cmd/keys.go uses (viper.GetString("database.path")) so tests can inject a + temp path. + + + cd /home/salva/Documents/apikey && go build ./... && go run ./ dorks list --source=github --category=frontier 2>&1 | grep -Ei '(openai|anthropic)' + + + list/info/export commands compile and run. Filtering by source/category + works. Stub removed from cmd/stubs.go. + + + + + Task 2: dorks run/add/delete + command tests + cmd/dorks.go, cmd/dorks_test.go + + - Test: `dorks add` persists a custom dork and `dorks list` shows it marked with `*` + - Test: `dorks delete ` removes it; `dorks delete ` errors "embedded dorks cannot be deleted" + - Test: `dorks run --source=shodan --id=` returns error wrapping ErrSourceNotImplemented + - Test: `dorks run --source=github --id=` with empty GITHUB_TOKEN returns ErrMissingAuth setup message + - Test: `dorks run --source=github --id=` with injected fake GitHubExecutor returns mocked matches + - Use cobra.Command.SetArgs + bytes.Buffer to capture stdout in tests; inject + a tempdir DB path via viper.Set("database.path", tmp). + + + 1. Add **dorks add** subcommand to cmd/dorks.go. Builds a storage.CustomDork + from flags, validates source and category against dorks.ValidSources / + valid categories (frontier, specialized, infrastructure, emerging, + enterprise), calls db.SaveCustomDork. Fails if dork_id collides with an + embedded dork ID. + + 2. Add **dorks delete ** subcommand. If the ID matches an embedded + dork, print "embedded dorks cannot be deleted" and exit 2. Otherwise + call db.GetCustomDorkByDorkID -> db.DeleteCustomDork. + + 3. Add **dorks run** subcommand: + - Required flags: --source; optional: --id, --category, --limit (default 10) + - If --id is set, run just that single dork (from registry or custom) + - Else, run all dorks matching --source and --category filters + - For each dork: dispatch via a Runner wired only with GitHub executor. + Non-github sources return fmt.Errorf wrapping ErrSourceNotImplemented + with message: "source %q not yet implemented (coming Phase 9-16)" + - Print matches as a simple list: `[dork-id] URL (path) snippet` + - Exit code 0 on success (even with zero matches), 2 on any executor + error other than ErrSourceNotImplemented (which should exit 2 also + but with the friendly message). + + 4. Extract the executor factory into a package-level var so tests can inject: + ```go + var newGitHubExecutor = func() dorks.Executor { + return dorks.NewGitHubExecutor(viper.GetString("dorks.github.token")) + } + ``` + Tests override `newGitHubExecutor` to return a fake Executor. + + 5. Create cmd/dorks_test.go: + - Helper `setupDorksTest(t)` — creates tempdir, viper.Set("database.path", ...), + opens DB via initDorksDB, runs schema, returns cleanup func. + - Tests execute rootCmd with SetArgs([]string{"dorks", "add", "--source=github", ...}). + - Capture output via rootCmd.SetOut(&buf). + - FakeExecutor type implements dorks.Executor with a stubbed Execute() + returning canned Match slice. + + 6. **Config binding:** In cmd/root.go initConfig, ensure + `viper.BindEnv("dorks.github.token", "GITHUB_TOKEN")` is set so the + executor picks up the env var. + + + cd /home/salva/Documents/apikey && go test ./cmd/... -run Dorks -v + + + run/add/delete subcommands work end-to-end. All cmd/dorks_test.go tests pass. + GITHUB_TOKEN env var bound. Embedded dork deletion refused. + + + + + + +- `go build ./...` succeeds +- `go test ./cmd/... ./pkg/dorks/... ./pkg/storage/...` passes +- `./keyhunter dorks list --source=github --category=frontier` shows real dorks +- `./keyhunter dorks run --source=shodan --id=shodan-ollama-default` emits ErrSourceNotImplemented message + + + +- DORK-03 satisfied: list, run, add, export, info, delete all functional +- DORK-04 satisfied: --source and --category filters applied in both list and run +- dorks stub removed from cmd/stubs.go + + + +After completion, create `.planning/phases/08-dork-engine/08-06-SUMMARY.md` + diff --git a/.planning/phases/08-dork-engine/08-07-PLAN.md b/.planning/phases/08-dork-engine/08-07-PLAN.md new file mode 100644 index 0000000..30c8e57 --- /dev/null +++ b/.planning/phases/08-dork-engine/08-07-PLAN.md @@ -0,0 +1,158 @@ +--- +phase: 08-dork-engine +plan: 07 +type: execute +wave: 3 +depends_on: [08-02, 08-03, 08-04, 08-06] +files_modified: + - pkg/dorks/count_test.go +autonomous: true +requirements: + - DORK-02 +must_haves: + truths: + - "A guardrail test asserts pkg/dorks.NewRegistry() loads >= 150 dorks" + - "Per-source minimums enforced: github>=50, google>=30, shodan>=20, censys>=15, zoomeye>=10, fofa>=10, gitlab>=10, bing>=5" + - "All 8 sources present in Stats().BySource" + - "All 5 categories present in Stats().ByCategory" + - "All dork IDs are unique (no collisions between source files)" + artifacts: + - path: "pkg/dorks/count_test.go" + provides: "Guardrail test preventing regression below 150-dork threshold" + contains: "TestDorkCountGuardrail" + key_links: + - from: "pkg/dorks/count_test.go" + to: "pkg/dorks.NewRegistry()" + via: "direct call to real embedded FS" + pattern: "NewRegistry\\(\\)" +--- + + +Lock in the DORK-02 "150+ built-in dorks" requirement with a guardrail test +that runs against the real embedded filesystem. If a future contributor removes +a dork file or breaks a source's YAML, CI fails loudly instead of silently +dropping coverage. Also asserts per-source minimums and ID uniqueness so partial +regressions are caught. + +Purpose: Prevents silent regression of the requirement that took plans 02-04 +to satisfy. +Output: One test file with a few targeted assertions. + + + +@$HOME/.claude/get-shit-done/workflows/execute-plan.md +@$HOME/.claude/get-shit-done/templates/summary.md + + + +@.planning/phases/08-dork-engine/08-CONTEXT.md +@pkg/dorks/registry.go + + + + + + Task 1: Dork count + uniqueness guardrail test + pkg/dorks/count_test.go + + - TestDorkCountGuardrail: NewRegistry() returns >= 150 dorks total + - TestDorkCountPerSource: each of the 8 sources meets its minimum + - TestDorkCategoriesPresent: all 5 categories appear in Stats().ByCategory + - TestDorkIDsUnique: walking Registry.List(), no duplicate IDs + + + Create pkg/dorks/count_test.go: + + ```go + package dorks + + import "testing" + + func TestDorkCountGuardrail(t *testing.T) { + r, err := NewRegistry() + if err != nil { + t.Fatalf("NewRegistry: %v", err) + } + const minTotal = 150 + if got := len(r.List()); got < minTotal { + t.Fatalf("dork count regression: got %d, want >= %d (see DORK-02)", got, minTotal) + } + } + + func TestDorkCountPerSource(t *testing.T) { + r, err := NewRegistry() + if err != nil { t.Fatal(err) } + + minimums := map[string]int{ + "github": 50, + "google": 30, + "shodan": 20, + "censys": 15, + "zoomeye": 10, + "fofa": 10, + "gitlab": 10, + "bing": 5, + } + stats := r.Stats() + for src, min := range minimums { + if stats.BySource[src] < min { + t.Errorf("source %s: got %d, want >= %d", src, stats.BySource[src], min) + } + } + } + + func TestDorkCategoriesPresent(t *testing.T) { + r, err := NewRegistry() + if err != nil { t.Fatal(err) } + required := []string{"frontier", "specialized", "infrastructure", "emerging", "enterprise"} + stats := r.Stats() + for _, c := range required { + if stats.ByCategory[c] == 0 { + t.Errorf("category %q missing", c) + } + } + } + + func TestDorkIDsUnique(t *testing.T) { + r, err := NewRegistry() + if err != nil { t.Fatal(err) } + seen := map[string]string{} + for _, d := range r.List() { + if existing, ok := seen[d.ID]; ok { + t.Errorf("duplicate dork id %q (second occurrence: %s, first: %s)", d.ID, d.Source, existing) + } + seen[d.ID] = d.Source + } + } + ``` + + Note: Stats struct was defined in 08-01 with BySource and ByCategory + map[string]int fields — confirm the exact field names match. If the + implementation used different names, align test accordingly (do not + modify 08-01; adjust the test). + + + cd /home/salva/Documents/apikey && go test ./pkg/dorks/ -run 'TestDork(Count|Categories|IDs)' -v + + + All four guardrail tests pass against the real embedded filesystem. The + DORK-02 150+ floor is now CI-enforced. + + + + + + +`go test ./pkg/dorks/...` passes (full suite). +`go test ./...` for the repo passes. + + + +- DORK-02 enforced by test (not just counted by grep) +- Per-source minimums match the 50/30/20/15/10/10/10/5 distribution +- No duplicate dork IDs across the embedded corpus + + + +After completion, create `.planning/phases/08-dork-engine/08-07-SUMMARY.md` +