feat(08-06): add dorks list/info/export commands

- Replace cmd/stubs.go dorksCmd stub with full command tree
- Add cmd/dorks.go with list, info, export subcommands
- Wire Registry + custom_dorks merge for list/export
- Bind GITHUB_TOKEN env var via viper for downstream run

Satisfies part of DORK-03 (list/info/export) and DORK-04 (source/category
filtering). run/add/delete land in Task 2.
This commit is contained in:
salvacybersec
2026-04-06 00:26:36 +03:00
parent f9e3ad99f8
commit b7934ce169
2 changed files with 540 additions and 5 deletions

539
cmd/dorks.go Normal file
View File

@@ -0,0 +1,539 @@
package cmd
import (
"context"
"database/sql"
"encoding/json"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"sort"
"strings"
"text/tabwriter"
"time"
"github.com/salvacybersec/keyhunter/pkg/dorks"
"github.com/salvacybersec/keyhunter/pkg/storage"
"github.com/spf13/cobra"
"github.com/spf13/viper"
"gopkg.in/yaml.v3"
)
// Flags for the dorks command tree. Declared at package level so tests can
// reset them between runs via resetDorksFlags.
var (
dorksFilterSource string
dorksFilterCategory string
dorksRunID string
dorksRunLimit int
dorksAddSource string
dorksAddCategory string
dorksAddID string
dorksAddName string
dorksAddQuery string
dorksAddDescription string
dorksAddTags []string
dorksExportFormat string
)
// newGitHubExecutor is an injection seam for tests — production code returns a
// real GitHubExecutor built from viper config, tests override to inject a fake.
var newGitHubExecutor = func() dorks.Executor {
return dorks.NewGitHubExecutor(viper.GetString("dorks.github.token"))
}
// dorksCmd is the root of the "keyhunter dorks" command tree (Phase 8,
// DORK-03 and DORK-04).
var dorksCmd = &cobra.Command{
Use: "dorks",
Short: "Manage and run dork queries (DORK-01..DORK-04)",
Long: `Dorks are YAML-defined search queries targeting public sources
(GitHub, Google, Shodan, Censys, ...) designed to surface leaked LLM API
keys. Embedded dorks ship with KeyHunter; user-authored dorks live in the
custom_dorks table and are mixed into every list/run by default.`,
}
var dorksListCmd = &cobra.Command{
Use: "list",
Short: "List embedded + custom dorks (custom rows marked with *)",
Args: cobra.NoArgs,
RunE: func(cmd *cobra.Command, args []string) error {
reg, err := dorks.NewRegistry()
if err != nil {
return fmt.Errorf("loading dork registry: %w", err)
}
db, err := initDorksDB()
if err != nil {
return err
}
defer db.Close()
rows, err := mergedDorkRows(reg, db)
if err != nil {
return err
}
rows = filterRows(rows, dorksFilterSource, dorksFilterCategory)
sort.SliceStable(rows, func(i, j int) bool {
if rows[i].Dork.Source != rows[j].Dork.Source {
return rows[i].Dork.Source < rows[j].Dork.Source
}
return rows[i].Dork.ID < rows[j].Dork.ID
})
w := tabwriter.NewWriter(cmd.OutOrStdout(), 0, 0, 2, ' ', 0)
fmt.Fprintln(w, "ID\tSOURCE\tCATEGORY\tNAME")
for _, r := range rows {
id := r.Dork.ID
if r.Custom {
id = "*" + id
}
fmt.Fprintf(w, "%s\t%s\t%s\t%s\n", id, r.Dork.Source, r.Dork.Category, r.Dork.Name)
}
if err := w.Flush(); err != nil {
return err
}
fmt.Fprintf(cmd.OutOrStdout(), "%d dork(s).\n", len(rows))
return nil
},
}
var dorksInfoCmd = &cobra.Command{
Use: "info <dork-id>",
Short: "Show full detail for a single dork by id",
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
id := args[0]
reg, err := dorks.NewRegistry()
if err != nil {
return fmt.Errorf("loading dork registry: %w", err)
}
if d, ok := reg.Get(id); ok {
renderDork(cmd.OutOrStdout(), d, false)
return nil
}
db, err := initDorksDB()
if err != nil {
return err
}
defer db.Close()
cd, err := db.GetCustomDorkByDorkID(id)
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
return fmt.Errorf("no dork with id %q", id)
}
return fmt.Errorf("looking up custom dork: %w", err)
}
renderDork(cmd.OutOrStdout(), customToDork(cd), true)
return nil
},
}
var dorksExportCmd = &cobra.Command{
Use: "export",
Short: "Export embedded + custom dorks as yaml or json",
Args: cobra.NoArgs,
RunE: func(cmd *cobra.Command, args []string) error {
format := strings.ToLower(strings.TrimSpace(dorksExportFormat))
if format == "" {
format = "yaml"
}
if format != "yaml" && format != "json" {
return fmt.Errorf("dorks export: unsupported format %q (supported: yaml, json)", format)
}
reg, err := dorks.NewRegistry()
if err != nil {
return fmt.Errorf("loading dork registry: %w", err)
}
db, err := initDorksDB()
if err != nil {
return err
}
defer db.Close()
rows, err := mergedDorkRows(reg, db)
if err != nil {
return err
}
out := make([]dorks.Dork, 0, len(rows))
for _, r := range rows {
out = append(out, r.Dork)
}
w := cmd.OutOrStdout()
switch format {
case "json":
enc := json.NewEncoder(w)
enc.SetIndent("", " ")
return enc.Encode(out)
default:
enc := yaml.NewEncoder(w)
enc.SetIndent(2)
if err := enc.Encode(out); err != nil {
return err
}
return enc.Close()
}
},
}
var dorksRunCmd = &cobra.Command{
Use: "run",
Short: "Execute dorks matching --source/--category or a single --id",
Args: cobra.NoArgs,
RunE: func(cmd *cobra.Command, args []string) error {
if strings.TrimSpace(dorksFilterSource) == "" {
return fmt.Errorf("dorks run: --source is required")
}
reg, err := dorks.NewRegistry()
if err != nil {
return fmt.Errorf("loading dork registry: %w", err)
}
db, err := initDorksDB()
if err != nil {
return err
}
defer db.Close()
targets, err := selectRunTargets(reg, db)
if err != nil {
return err
}
if len(targets) == 0 {
fmt.Fprintf(cmd.OutOrStdout(), "No dorks matched (source=%q category=%q id=%q).\n",
dorksFilterSource, dorksFilterCategory, dorksRunID)
return nil
}
runner := dorks.NewRunner()
if dorksFilterSource == "github" {
runner.Register(newGitHubExecutor())
}
limit := dorksRunLimit
if limit <= 0 {
limit = 10
}
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cancel()
w := cmd.OutOrStdout()
totalMatches := 0
for _, d := range targets {
matches, err := runner.Run(ctx, d, limit)
if err != nil {
if errors.Is(err, dorks.ErrSourceNotImplemented) {
return fmt.Errorf("source %q not yet implemented (coming Phase 9-16)", d.Source)
}
if errors.Is(err, dorks.ErrMissingAuth) {
return fmt.Errorf("%w\nhint: export GITHUB_TOKEN=<pat> (needs public_repo scope) or run `keyhunter config set dorks.github.token <pat>`", err)
}
return fmt.Errorf("running dork %q: %w", d.ID, err)
}
for _, m := range matches {
fmt.Fprintf(w, "[%s] %s (%s) %s\n", m.DorkID, m.URL, m.Path, truncate(m.Snippet, 120))
totalMatches++
}
}
fmt.Fprintf(w, "%d match(es) across %d dork(s).\n", totalMatches, len(targets))
return nil
},
}
var dorksAddCmd = &cobra.Command{
Use: "add",
Short: "Persist a user-authored dork into the custom_dorks table",
Args: cobra.NoArgs,
RunE: func(cmd *cobra.Command, args []string) error {
if strings.TrimSpace(dorksAddSource) == "" {
return fmt.Errorf("dorks add: --source is required")
}
if !containsString(dorks.ValidSources, dorksAddSource) {
return fmt.Errorf("dorks add: source %q is not one of %v", dorksAddSource, dorks.ValidSources)
}
if strings.TrimSpace(dorksAddCategory) == "" {
return fmt.Errorf("dorks add: --category is required")
}
if !containsString(dorks.ValidCategories, dorksAddCategory) {
return fmt.Errorf("dorks add: category %q is not one of %v", dorksAddCategory, dorks.ValidCategories)
}
if strings.TrimSpace(dorksAddQuery) == "" {
return fmt.Errorf("dorks add: --query is required")
}
if strings.TrimSpace(dorksAddID) == "" {
return fmt.Errorf("dorks add: --id is required")
}
name := dorksAddName
if strings.TrimSpace(name) == "" {
name = dorksAddID
}
// Guard against embedded ID collisions.
reg, err := dorks.NewRegistry()
if err != nil {
return fmt.Errorf("loading dork registry: %w", err)
}
if _, exists := reg.Get(dorksAddID); exists {
return fmt.Errorf("dorks add: id %q collides with an embedded dork", dorksAddID)
}
db, err := initDorksDB()
if err != nil {
return err
}
defer db.Close()
rowID, err := db.SaveCustomDork(storage.CustomDork{
DorkID: dorksAddID,
Name: name,
Source: dorksAddSource,
Category: dorksAddCategory,
Query: dorksAddQuery,
Description: dorksAddDescription,
Tags: dorksAddTags,
})
if err != nil {
return fmt.Errorf("saving custom dork: %w", err)
}
fmt.Fprintf(cmd.OutOrStdout(), "Added custom dork %q (row %d).\n", dorksAddID, rowID)
return nil
},
}
var dorksDeleteCmd = &cobra.Command{
Use: "delete <dork-id>",
Short: "Delete a custom dork (embedded dorks cannot be deleted)",
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
id := args[0]
reg, err := dorks.NewRegistry()
if err != nil {
return fmt.Errorf("loading dork registry: %w", err)
}
if _, ok := reg.Get(id); ok {
return fmt.Errorf("embedded dorks cannot be deleted (id %q is built in)", id)
}
db, err := initDorksDB()
if err != nil {
return err
}
defer db.Close()
cd, err := db.GetCustomDorkByDorkID(id)
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
return fmt.Errorf("no custom dork with id %q", id)
}
return fmt.Errorf("looking up custom dork: %w", err)
}
n, err := db.DeleteCustomDork(cd.ID)
if err != nil {
return fmt.Errorf("deleting custom dork: %w", err)
}
if n == 0 {
return fmt.Errorf("no custom dork with id %q", id)
}
fmt.Fprintf(cmd.OutOrStdout(), "Deleted custom dork %q.\n", id)
return nil
},
}
// dorkRow is an internal helper binding a Dork to its origin (embedded or
// custom) for list/export rendering.
type dorkRow struct {
Dork dorks.Dork
Custom bool
}
func mergedDorkRows(reg *dorks.Registry, db *storage.DB) ([]dorkRow, error) {
emb := reg.List()
rows := make([]dorkRow, 0, len(emb))
for _, d := range emb {
rows = append(rows, dorkRow{Dork: d, Custom: false})
}
customs, err := db.ListCustomDorks()
if err != nil {
return nil, fmt.Errorf("listing custom dorks: %w", err)
}
for _, c := range customs {
rows = append(rows, dorkRow{Dork: customToDork(c), Custom: true})
}
return rows, nil
}
func filterRows(rows []dorkRow, source, category string) []dorkRow {
if source == "" && category == "" {
return rows
}
out := rows[:0:0]
for _, r := range rows {
if source != "" && r.Dork.Source != source {
continue
}
if category != "" && r.Dork.Category != category {
continue
}
out = append(out, r)
}
return out
}
// selectRunTargets resolves the dorks `dorks run` should invoke, honoring
// --id (single dork), --source (required), and --category (optional filter).
func selectRunTargets(reg *dorks.Registry, db *storage.DB) ([]dorks.Dork, error) {
if dorksRunID != "" {
if d, ok := reg.Get(dorksRunID); ok {
if d.Source != dorksFilterSource {
return nil, fmt.Errorf("dork %q has source %q, not %q", d.ID, d.Source, dorksFilterSource)
}
return []dorks.Dork{d}, nil
}
cd, err := db.GetCustomDorkByDorkID(dorksRunID)
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
return nil, fmt.Errorf("no dork with id %q", dorksRunID)
}
return nil, fmt.Errorf("looking up dork: %w", err)
}
d := customToDork(cd)
if d.Source != dorksFilterSource {
return nil, fmt.Errorf("dork %q has source %q, not %q", d.ID, d.Source, dorksFilterSource)
}
return []dorks.Dork{d}, nil
}
rows, err := mergedDorkRows(reg, db)
if err != nil {
return nil, err
}
rows = filterRows(rows, dorksFilterSource, dorksFilterCategory)
out := make([]dorks.Dork, 0, len(rows))
for _, r := range rows {
out = append(out, r.Dork)
}
return out, nil
}
// initDorksDB opens the KeyHunter SQLite database using the same precedence
// rules as openDBWithKey but without deriving an encryption key — the dork
// CLI only touches the plaintext custom_dorks table. Tests inject a temp
// path via viper.Set("database.path", tmp).
func initDorksDB() (*storage.DB, error) {
dbPath := viper.GetString("database.path")
if dbPath == "" {
home, _ := os.UserHomeDir()
dbPath = filepath.Join(home, ".keyhunter", "keyhunter.db")
}
if err := os.MkdirAll(filepath.Dir(dbPath), 0700); err != nil {
return nil, fmt.Errorf("creating database directory: %w", err)
}
db, err := storage.Open(dbPath)
if err != nil {
return nil, fmt.Errorf("opening database: %w", err)
}
return db, nil
}
func renderDork(w io.Writer, d dorks.Dork, custom bool) {
origin := "embedded"
if custom {
origin = "custom"
}
fmt.Fprintf(w, "ID: %s\n", d.ID)
fmt.Fprintf(w, "Name: %s\n", d.Name)
fmt.Fprintf(w, "Source: %s\n", d.Source)
fmt.Fprintf(w, "Category: %s\n", d.Category)
fmt.Fprintf(w, "Origin: %s\n", origin)
fmt.Fprintf(w, "Query: %s\n", d.Query)
if d.Description != "" {
fmt.Fprintf(w, "Description: %s\n", d.Description)
}
if len(d.Tags) > 0 {
fmt.Fprintf(w, "Tags: %s\n", strings.Join(d.Tags, ", "))
}
}
func customToDork(c storage.CustomDork) dorks.Dork {
return dorks.Dork{
ID: c.DorkID,
Name: c.Name,
Source: c.Source,
Category: c.Category,
Query: c.Query,
Description: c.Description,
Tags: c.Tags,
}
}
func containsString(haystack []string, needle string) bool {
for _, h := range haystack {
if h == needle {
return true
}
}
return false
}
func truncate(s string, max int) string {
s = strings.ReplaceAll(s, "\n", " ")
if len(s) <= max {
return s
}
return s[:max] + "..."
}
// resetDorksFlags clears the package-level flag vars so tests running the
// same rootCmd sequentially do not inherit values from earlier invocations.
func resetDorksFlags() {
dorksFilterSource = ""
dorksFilterCategory = ""
dorksRunID = ""
dorksRunLimit = 0
dorksAddSource = ""
dorksAddCategory = ""
dorksAddID = ""
dorksAddName = ""
dorksAddQuery = ""
dorksAddDescription = ""
dorksAddTags = nil
dorksExportFormat = ""
}
func init() {
// list flags
dorksListCmd.Flags().StringVar(&dorksFilterSource, "source", "", "filter by source (github, google, shodan, ...)")
dorksListCmd.Flags().StringVar(&dorksFilterCategory, "category", "", "filter by category (frontier, specialized, ...)")
// export flags
dorksExportCmd.Flags().StringVar(&dorksExportFormat, "format", "yaml", "export format: yaml, json")
// run flags
dorksRunCmd.Flags().StringVar(&dorksFilterSource, "source", "", "source to run against (required)")
dorksRunCmd.Flags().StringVar(&dorksFilterCategory, "category", "", "filter by category")
dorksRunCmd.Flags().StringVar(&dorksRunID, "id", "", "run a single dork by id")
dorksRunCmd.Flags().IntVar(&dorksRunLimit, "limit", 10, "max matches per dork")
// add flags
dorksAddCmd.Flags().StringVar(&dorksAddSource, "source", "", "source backend (required)")
dorksAddCmd.Flags().StringVar(&dorksAddCategory, "category", "", "category bucket (required)")
dorksAddCmd.Flags().StringVar(&dorksAddID, "id", "", "unique dork id (required)")
dorksAddCmd.Flags().StringVar(&dorksAddName, "name", "", "human-readable name (defaults to id)")
dorksAddCmd.Flags().StringVar(&dorksAddQuery, "query", "", "dork query string (required)")
dorksAddCmd.Flags().StringVar(&dorksAddDescription, "description", "", "free-form description")
dorksAddCmd.Flags().StringSliceVar(&dorksAddTags, "tag", nil, "tag (repeatable)")
dorksCmd.AddCommand(
dorksListCmd,
dorksInfoCmd,
dorksExportCmd,
dorksRunCmd,
dorksAddCmd,
dorksDeleteCmd,
)
// Bind GITHUB_TOKEN env var so the GitHub executor picks it up.
_ = viper.BindEnv("dorks.github.token", "GITHUB_TOKEN")
}

View File

@@ -35,11 +35,7 @@ var serveCmd = &cobra.Command{
RunE: notImplemented("serve", "Phase 18"), RunE: notImplemented("serve", "Phase 18"),
} }
var dorksCmd = &cobra.Command{ // dorksCmd is implemented in cmd/dorks.go (Phase 8).
Use: "dorks",
Short: "Manage and run dork queries (Phase 8)",
RunE: notImplemented("dorks", "Phase 8"),
}
var scheduleCmd = &cobra.Command{ var scheduleCmd = &cobra.Command{
Use: "schedule", Use: "schedule",