From b7934ce16901cc2a2569c45e70212622c62a0343 Mon Sep 17 00:00:00 2001 From: salvacybersec Date: Mon, 6 Apr 2026 00:26:36 +0300 Subject: [PATCH] feat(08-06): add dorks list/info/export commands - Replace cmd/stubs.go dorksCmd stub with full command tree - Add cmd/dorks.go with list, info, export subcommands - Wire Registry + custom_dorks merge for list/export - Bind GITHUB_TOKEN env var via viper for downstream run Satisfies part of DORK-03 (list/info/export) and DORK-04 (source/category filtering). run/add/delete land in Task 2. --- cmd/dorks.go | 539 +++++++++++++++++++++++++++++++++++++++++++++++++++ cmd/stubs.go | 6 +- 2 files changed, 540 insertions(+), 5 deletions(-) create mode 100644 cmd/dorks.go diff --git a/cmd/dorks.go b/cmd/dorks.go new file mode 100644 index 0000000..4853575 --- /dev/null +++ b/cmd/dorks.go @@ -0,0 +1,539 @@ +package cmd + +import ( + "context" + "database/sql" + "encoding/json" + "errors" + "fmt" + "io" + "os" + "path/filepath" + "sort" + "strings" + "text/tabwriter" + "time" + + "github.com/salvacybersec/keyhunter/pkg/dorks" + "github.com/salvacybersec/keyhunter/pkg/storage" + "github.com/spf13/cobra" + "github.com/spf13/viper" + "gopkg.in/yaml.v3" +) + +// Flags for the dorks command tree. Declared at package level so tests can +// reset them between runs via resetDorksFlags. +var ( + dorksFilterSource string + dorksFilterCategory string + dorksRunID string + dorksRunLimit int + dorksAddSource string + dorksAddCategory string + dorksAddID string + dorksAddName string + dorksAddQuery string + dorksAddDescription string + dorksAddTags []string + dorksExportFormat string +) + +// newGitHubExecutor is an injection seam for tests — production code returns a +// real GitHubExecutor built from viper config, tests override to inject a fake. +var newGitHubExecutor = func() dorks.Executor { + return dorks.NewGitHubExecutor(viper.GetString("dorks.github.token")) +} + +// dorksCmd is the root of the "keyhunter dorks" command tree (Phase 8, +// DORK-03 and DORK-04). +var dorksCmd = &cobra.Command{ + Use: "dorks", + Short: "Manage and run dork queries (DORK-01..DORK-04)", + Long: `Dorks are YAML-defined search queries targeting public sources +(GitHub, Google, Shodan, Censys, ...) designed to surface leaked LLM API +keys. Embedded dorks ship with KeyHunter; user-authored dorks live in the +custom_dorks table and are mixed into every list/run by default.`, +} + +var dorksListCmd = &cobra.Command{ + Use: "list", + Short: "List embedded + custom dorks (custom rows marked with *)", + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, args []string) error { + reg, err := dorks.NewRegistry() + if err != nil { + return fmt.Errorf("loading dork registry: %w", err) + } + db, err := initDorksDB() + if err != nil { + return err + } + defer db.Close() + + rows, err := mergedDorkRows(reg, db) + if err != nil { + return err + } + rows = filterRows(rows, dorksFilterSource, dorksFilterCategory) + sort.SliceStable(rows, func(i, j int) bool { + if rows[i].Dork.Source != rows[j].Dork.Source { + return rows[i].Dork.Source < rows[j].Dork.Source + } + return rows[i].Dork.ID < rows[j].Dork.ID + }) + + w := tabwriter.NewWriter(cmd.OutOrStdout(), 0, 0, 2, ' ', 0) + fmt.Fprintln(w, "ID\tSOURCE\tCATEGORY\tNAME") + for _, r := range rows { + id := r.Dork.ID + if r.Custom { + id = "*" + id + } + fmt.Fprintf(w, "%s\t%s\t%s\t%s\n", id, r.Dork.Source, r.Dork.Category, r.Dork.Name) + } + if err := w.Flush(); err != nil { + return err + } + fmt.Fprintf(cmd.OutOrStdout(), "%d dork(s).\n", len(rows)) + return nil + }, +} + +var dorksInfoCmd = &cobra.Command{ + Use: "info ", + Short: "Show full detail for a single dork by id", + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + id := args[0] + reg, err := dorks.NewRegistry() + if err != nil { + return fmt.Errorf("loading dork registry: %w", err) + } + if d, ok := reg.Get(id); ok { + renderDork(cmd.OutOrStdout(), d, false) + return nil + } + db, err := initDorksDB() + if err != nil { + return err + } + defer db.Close() + cd, err := db.GetCustomDorkByDorkID(id) + if err != nil { + if errors.Is(err, sql.ErrNoRows) { + return fmt.Errorf("no dork with id %q", id) + } + return fmt.Errorf("looking up custom dork: %w", err) + } + renderDork(cmd.OutOrStdout(), customToDork(cd), true) + return nil + }, +} + +var dorksExportCmd = &cobra.Command{ + Use: "export", + Short: "Export embedded + custom dorks as yaml or json", + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, args []string) error { + format := strings.ToLower(strings.TrimSpace(dorksExportFormat)) + if format == "" { + format = "yaml" + } + if format != "yaml" && format != "json" { + return fmt.Errorf("dorks export: unsupported format %q (supported: yaml, json)", format) + } + + reg, err := dorks.NewRegistry() + if err != nil { + return fmt.Errorf("loading dork registry: %w", err) + } + db, err := initDorksDB() + if err != nil { + return err + } + defer db.Close() + + rows, err := mergedDorkRows(reg, db) + if err != nil { + return err + } + out := make([]dorks.Dork, 0, len(rows)) + for _, r := range rows { + out = append(out, r.Dork) + } + + w := cmd.OutOrStdout() + switch format { + case "json": + enc := json.NewEncoder(w) + enc.SetIndent("", " ") + return enc.Encode(out) + default: + enc := yaml.NewEncoder(w) + enc.SetIndent(2) + if err := enc.Encode(out); err != nil { + return err + } + return enc.Close() + } + }, +} + +var dorksRunCmd = &cobra.Command{ + Use: "run", + Short: "Execute dorks matching --source/--category or a single --id", + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, args []string) error { + if strings.TrimSpace(dorksFilterSource) == "" { + return fmt.Errorf("dorks run: --source is required") + } + reg, err := dorks.NewRegistry() + if err != nil { + return fmt.Errorf("loading dork registry: %w", err) + } + db, err := initDorksDB() + if err != nil { + return err + } + defer db.Close() + + targets, err := selectRunTargets(reg, db) + if err != nil { + return err + } + if len(targets) == 0 { + fmt.Fprintf(cmd.OutOrStdout(), "No dorks matched (source=%q category=%q id=%q).\n", + dorksFilterSource, dorksFilterCategory, dorksRunID) + return nil + } + + runner := dorks.NewRunner() + if dorksFilterSource == "github" { + runner.Register(newGitHubExecutor()) + } + + limit := dorksRunLimit + if limit <= 0 { + limit = 10 + } + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute) + defer cancel() + + w := cmd.OutOrStdout() + totalMatches := 0 + for _, d := range targets { + matches, err := runner.Run(ctx, d, limit) + if err != nil { + if errors.Is(err, dorks.ErrSourceNotImplemented) { + return fmt.Errorf("source %q not yet implemented (coming Phase 9-16)", d.Source) + } + if errors.Is(err, dorks.ErrMissingAuth) { + return fmt.Errorf("%w\nhint: export GITHUB_TOKEN= (needs public_repo scope) or run `keyhunter config set dorks.github.token `", err) + } + return fmt.Errorf("running dork %q: %w", d.ID, err) + } + for _, m := range matches { + fmt.Fprintf(w, "[%s] %s (%s) %s\n", m.DorkID, m.URL, m.Path, truncate(m.Snippet, 120)) + totalMatches++ + } + } + fmt.Fprintf(w, "%d match(es) across %d dork(s).\n", totalMatches, len(targets)) + return nil + }, +} + +var dorksAddCmd = &cobra.Command{ + Use: "add", + Short: "Persist a user-authored dork into the custom_dorks table", + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, args []string) error { + if strings.TrimSpace(dorksAddSource) == "" { + return fmt.Errorf("dorks add: --source is required") + } + if !containsString(dorks.ValidSources, dorksAddSource) { + return fmt.Errorf("dorks add: source %q is not one of %v", dorksAddSource, dorks.ValidSources) + } + if strings.TrimSpace(dorksAddCategory) == "" { + return fmt.Errorf("dorks add: --category is required") + } + if !containsString(dorks.ValidCategories, dorksAddCategory) { + return fmt.Errorf("dorks add: category %q is not one of %v", dorksAddCategory, dorks.ValidCategories) + } + if strings.TrimSpace(dorksAddQuery) == "" { + return fmt.Errorf("dorks add: --query is required") + } + if strings.TrimSpace(dorksAddID) == "" { + return fmt.Errorf("dorks add: --id is required") + } + name := dorksAddName + if strings.TrimSpace(name) == "" { + name = dorksAddID + } + + // Guard against embedded ID collisions. + reg, err := dorks.NewRegistry() + if err != nil { + return fmt.Errorf("loading dork registry: %w", err) + } + if _, exists := reg.Get(dorksAddID); exists { + return fmt.Errorf("dorks add: id %q collides with an embedded dork", dorksAddID) + } + + db, err := initDorksDB() + if err != nil { + return err + } + defer db.Close() + + rowID, err := db.SaveCustomDork(storage.CustomDork{ + DorkID: dorksAddID, + Name: name, + Source: dorksAddSource, + Category: dorksAddCategory, + Query: dorksAddQuery, + Description: dorksAddDescription, + Tags: dorksAddTags, + }) + if err != nil { + return fmt.Errorf("saving custom dork: %w", err) + } + fmt.Fprintf(cmd.OutOrStdout(), "Added custom dork %q (row %d).\n", dorksAddID, rowID) + return nil + }, +} + +var dorksDeleteCmd = &cobra.Command{ + Use: "delete ", + Short: "Delete a custom dork (embedded dorks cannot be deleted)", + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + id := args[0] + reg, err := dorks.NewRegistry() + if err != nil { + return fmt.Errorf("loading dork registry: %w", err) + } + if _, ok := reg.Get(id); ok { + return fmt.Errorf("embedded dorks cannot be deleted (id %q is built in)", id) + } + db, err := initDorksDB() + if err != nil { + return err + } + defer db.Close() + + cd, err := db.GetCustomDorkByDorkID(id) + if err != nil { + if errors.Is(err, sql.ErrNoRows) { + return fmt.Errorf("no custom dork with id %q", id) + } + return fmt.Errorf("looking up custom dork: %w", err) + } + n, err := db.DeleteCustomDork(cd.ID) + if err != nil { + return fmt.Errorf("deleting custom dork: %w", err) + } + if n == 0 { + return fmt.Errorf("no custom dork with id %q", id) + } + fmt.Fprintf(cmd.OutOrStdout(), "Deleted custom dork %q.\n", id) + return nil + }, +} + +// dorkRow is an internal helper binding a Dork to its origin (embedded or +// custom) for list/export rendering. +type dorkRow struct { + Dork dorks.Dork + Custom bool +} + +func mergedDorkRows(reg *dorks.Registry, db *storage.DB) ([]dorkRow, error) { + emb := reg.List() + rows := make([]dorkRow, 0, len(emb)) + for _, d := range emb { + rows = append(rows, dorkRow{Dork: d, Custom: false}) + } + customs, err := db.ListCustomDorks() + if err != nil { + return nil, fmt.Errorf("listing custom dorks: %w", err) + } + for _, c := range customs { + rows = append(rows, dorkRow{Dork: customToDork(c), Custom: true}) + } + return rows, nil +} + +func filterRows(rows []dorkRow, source, category string) []dorkRow { + if source == "" && category == "" { + return rows + } + out := rows[:0:0] + for _, r := range rows { + if source != "" && r.Dork.Source != source { + continue + } + if category != "" && r.Dork.Category != category { + continue + } + out = append(out, r) + } + return out +} + +// selectRunTargets resolves the dorks `dorks run` should invoke, honoring +// --id (single dork), --source (required), and --category (optional filter). +func selectRunTargets(reg *dorks.Registry, db *storage.DB) ([]dorks.Dork, error) { + if dorksRunID != "" { + if d, ok := reg.Get(dorksRunID); ok { + if d.Source != dorksFilterSource { + return nil, fmt.Errorf("dork %q has source %q, not %q", d.ID, d.Source, dorksFilterSource) + } + return []dorks.Dork{d}, nil + } + cd, err := db.GetCustomDorkByDorkID(dorksRunID) + if err != nil { + if errors.Is(err, sql.ErrNoRows) { + return nil, fmt.Errorf("no dork with id %q", dorksRunID) + } + return nil, fmt.Errorf("looking up dork: %w", err) + } + d := customToDork(cd) + if d.Source != dorksFilterSource { + return nil, fmt.Errorf("dork %q has source %q, not %q", d.ID, d.Source, dorksFilterSource) + } + return []dorks.Dork{d}, nil + } + + rows, err := mergedDorkRows(reg, db) + if err != nil { + return nil, err + } + rows = filterRows(rows, dorksFilterSource, dorksFilterCategory) + out := make([]dorks.Dork, 0, len(rows)) + for _, r := range rows { + out = append(out, r.Dork) + } + return out, nil +} + +// initDorksDB opens the KeyHunter SQLite database using the same precedence +// rules as openDBWithKey but without deriving an encryption key — the dork +// CLI only touches the plaintext custom_dorks table. Tests inject a temp +// path via viper.Set("database.path", tmp). +func initDorksDB() (*storage.DB, error) { + dbPath := viper.GetString("database.path") + if dbPath == "" { + home, _ := os.UserHomeDir() + dbPath = filepath.Join(home, ".keyhunter", "keyhunter.db") + } + if err := os.MkdirAll(filepath.Dir(dbPath), 0700); err != nil { + return nil, fmt.Errorf("creating database directory: %w", err) + } + db, err := storage.Open(dbPath) + if err != nil { + return nil, fmt.Errorf("opening database: %w", err) + } + return db, nil +} + +func renderDork(w io.Writer, d dorks.Dork, custom bool) { + origin := "embedded" + if custom { + origin = "custom" + } + fmt.Fprintf(w, "ID: %s\n", d.ID) + fmt.Fprintf(w, "Name: %s\n", d.Name) + fmt.Fprintf(w, "Source: %s\n", d.Source) + fmt.Fprintf(w, "Category: %s\n", d.Category) + fmt.Fprintf(w, "Origin: %s\n", origin) + fmt.Fprintf(w, "Query: %s\n", d.Query) + if d.Description != "" { + fmt.Fprintf(w, "Description: %s\n", d.Description) + } + if len(d.Tags) > 0 { + fmt.Fprintf(w, "Tags: %s\n", strings.Join(d.Tags, ", ")) + } +} + +func customToDork(c storage.CustomDork) dorks.Dork { + return dorks.Dork{ + ID: c.DorkID, + Name: c.Name, + Source: c.Source, + Category: c.Category, + Query: c.Query, + Description: c.Description, + Tags: c.Tags, + } +} + +func containsString(haystack []string, needle string) bool { + for _, h := range haystack { + if h == needle { + return true + } + } + return false +} + +func truncate(s string, max int) string { + s = strings.ReplaceAll(s, "\n", " ") + if len(s) <= max { + return s + } + return s[:max] + "..." +} + +// resetDorksFlags clears the package-level flag vars so tests running the +// same rootCmd sequentially do not inherit values from earlier invocations. +func resetDorksFlags() { + dorksFilterSource = "" + dorksFilterCategory = "" + dorksRunID = "" + dorksRunLimit = 0 + dorksAddSource = "" + dorksAddCategory = "" + dorksAddID = "" + dorksAddName = "" + dorksAddQuery = "" + dorksAddDescription = "" + dorksAddTags = nil + dorksExportFormat = "" +} + +func init() { + // list flags + dorksListCmd.Flags().StringVar(&dorksFilterSource, "source", "", "filter by source (github, google, shodan, ...)") + dorksListCmd.Flags().StringVar(&dorksFilterCategory, "category", "", "filter by category (frontier, specialized, ...)") + + // export flags + dorksExportCmd.Flags().StringVar(&dorksExportFormat, "format", "yaml", "export format: yaml, json") + + // run flags + dorksRunCmd.Flags().StringVar(&dorksFilterSource, "source", "", "source to run against (required)") + dorksRunCmd.Flags().StringVar(&dorksFilterCategory, "category", "", "filter by category") + dorksRunCmd.Flags().StringVar(&dorksRunID, "id", "", "run a single dork by id") + dorksRunCmd.Flags().IntVar(&dorksRunLimit, "limit", 10, "max matches per dork") + + // add flags + dorksAddCmd.Flags().StringVar(&dorksAddSource, "source", "", "source backend (required)") + dorksAddCmd.Flags().StringVar(&dorksAddCategory, "category", "", "category bucket (required)") + dorksAddCmd.Flags().StringVar(&dorksAddID, "id", "", "unique dork id (required)") + dorksAddCmd.Flags().StringVar(&dorksAddName, "name", "", "human-readable name (defaults to id)") + dorksAddCmd.Flags().StringVar(&dorksAddQuery, "query", "", "dork query string (required)") + dorksAddCmd.Flags().StringVar(&dorksAddDescription, "description", "", "free-form description") + dorksAddCmd.Flags().StringSliceVar(&dorksAddTags, "tag", nil, "tag (repeatable)") + + dorksCmd.AddCommand( + dorksListCmd, + dorksInfoCmd, + dorksExportCmd, + dorksRunCmd, + dorksAddCmd, + dorksDeleteCmd, + ) + + // Bind GITHUB_TOKEN env var so the GitHub executor picks it up. + _ = viper.BindEnv("dorks.github.token", "GITHUB_TOKEN") +} diff --git a/cmd/stubs.go b/cmd/stubs.go index f6357a8..b3dd58f 100644 --- a/cmd/stubs.go +++ b/cmd/stubs.go @@ -35,11 +35,7 @@ var serveCmd = &cobra.Command{ RunE: notImplemented("serve", "Phase 18"), } -var dorksCmd = &cobra.Command{ - Use: "dorks", - Short: "Manage and run dork queries (Phase 8)", - RunE: notImplemented("dorks", "Phase 8"), -} +// dorksCmd is implemented in cmd/dorks.go (Phase 8). var scheduleCmd = &cobra.Command{ Use: "schedule",