- GoogleDorkSource uses Google Custom Search JSON API (APIKey+CX required) - BingDorkSource uses Bing Web Search API v7 (Ocp-Apim-Subscription-Key header) - formatQuery now handles google/bing/duckduckgo/yandex/brave dork syntax - Both sources follow established pattern: retry via Client, rate limit via LimiterRegistry
58 lines
1.5 KiB
Go
58 lines
1.5 KiB
Go
package sources
|
|
|
|
import (
|
|
"fmt"
|
|
"sort"
|
|
|
|
"github.com/salvacybersec/keyhunter/pkg/providers"
|
|
)
|
|
|
|
// BuildQueries produces the search-string list a source should iterate for the
|
|
// given provider registry. Each keyword is formatted per source-specific syntax
|
|
// (e.g. GitHub's `"kw" in:file` qualifier). Keywords are deduped across
|
|
// providers and returned sorted for deterministic test output.
|
|
//
|
|
// A nil registry returns nil. An unknown source name falls back to the bare
|
|
// keyword form so new sources work safely until they register custom syntax.
|
|
func BuildQueries(reg *providers.Registry, source string) []string {
|
|
if reg == nil {
|
|
return nil
|
|
}
|
|
|
|
seen := make(map[string]struct{})
|
|
for _, p := range reg.List() {
|
|
for _, k := range p.Keywords {
|
|
if k == "" {
|
|
continue
|
|
}
|
|
seen[k] = struct{}{}
|
|
}
|
|
}
|
|
|
|
keywords := make([]string, 0, len(seen))
|
|
for k := range seen {
|
|
keywords = append(keywords, k)
|
|
}
|
|
sort.Strings(keywords)
|
|
|
|
out := make([]string, 0, len(keywords))
|
|
for _, k := range keywords {
|
|
out = append(out, formatQuery(source, k))
|
|
}
|
|
return out
|
|
}
|
|
|
|
// formatQuery applies source-specific search syntax to a raw keyword.
|
|
func formatQuery(source, keyword string) string {
|
|
switch source {
|
|
case "github", "gist":
|
|
return fmt.Sprintf("%q in:file", keyword)
|
|
case "google", "bing", "duckduckgo", "yandex", "brave":
|
|
return fmt.Sprintf(`site:pastebin.com OR site:github.com "%s"`, keyword)
|
|
default:
|
|
// GitLab, Bitbucket, Codeberg, HuggingFace, Kaggle, Replit,
|
|
// CodeSandbox, sandboxes, and unknown sources use bare keywords.
|
|
return keyword
|
|
}
|
|
}
|