Files
keyhunter/pkg/recon/sources/queries.go
salvacybersec 7272e65207 feat(11-01): add GoogleDorkSource and BingDorkSource with formatQuery updates
- GoogleDorkSource uses Google Custom Search JSON API (APIKey+CX required)
- BingDorkSource uses Bing Web Search API v7 (Ocp-Apim-Subscription-Key header)
- formatQuery now handles google/bing/duckduckgo/yandex/brave dork syntax
- Both sources follow established pattern: retry via Client, rate limit via LimiterRegistry
2026-04-06 11:54:36 +03:00

58 lines
1.5 KiB
Go

package sources
import (
"fmt"
"sort"
"github.com/salvacybersec/keyhunter/pkg/providers"
)
// BuildQueries produces the search-string list a source should iterate for the
// given provider registry. Each keyword is formatted per source-specific syntax
// (e.g. GitHub's `"kw" in:file` qualifier). Keywords are deduped across
// providers and returned sorted for deterministic test output.
//
// A nil registry returns nil. An unknown source name falls back to the bare
// keyword form so new sources work safely until they register custom syntax.
func BuildQueries(reg *providers.Registry, source string) []string {
if reg == nil {
return nil
}
seen := make(map[string]struct{})
for _, p := range reg.List() {
for _, k := range p.Keywords {
if k == "" {
continue
}
seen[k] = struct{}{}
}
}
keywords := make([]string, 0, len(seen))
for k := range seen {
keywords = append(keywords, k)
}
sort.Strings(keywords)
out := make([]string, 0, len(keywords))
for _, k := range keywords {
out = append(out, formatQuery(source, k))
}
return out
}
// formatQuery applies source-specific search syntax to a raw keyword.
func formatQuery(source, keyword string) string {
switch source {
case "github", "gist":
return fmt.Sprintf("%q in:file", keyword)
case "google", "bing", "duckduckgo", "yandex", "brave":
return fmt.Sprintf(`site:pastebin.com OR site:github.com "%s"`, keyword)
default:
// GitLab, Bitbucket, Codeberg, HuggingFace, Kaggle, Replit,
// CodeSandbox, sandboxes, and unknown sources use bare keywords.
return keyword
}
}