- BuildQueries(reg, source) dedups keywords and formats per-source syntax - github/gist use 'keyword' in:file; others use bare keyword - SourcesConfig placeholder struct for Wave 2 plans to depend on - RegisterAll no-op stub (Plan 10-09 will fill)
56 lines
1.4 KiB
Go
56 lines
1.4 KiB
Go
package sources
|
|
|
|
import (
|
|
"fmt"
|
|
"sort"
|
|
|
|
"github.com/salvacybersec/keyhunter/pkg/providers"
|
|
)
|
|
|
|
// BuildQueries produces the search-string list a source should iterate for the
|
|
// given provider registry. Each keyword is formatted per source-specific syntax
|
|
// (e.g. GitHub's `"kw" in:file` qualifier). Keywords are deduped across
|
|
// providers and returned sorted for deterministic test output.
|
|
//
|
|
// A nil registry returns nil. An unknown source name falls back to the bare
|
|
// keyword form so new sources work safely until they register custom syntax.
|
|
func BuildQueries(reg *providers.Registry, source string) []string {
|
|
if reg == nil {
|
|
return nil
|
|
}
|
|
|
|
seen := make(map[string]struct{})
|
|
for _, p := range reg.List() {
|
|
for _, k := range p.Keywords {
|
|
if k == "" {
|
|
continue
|
|
}
|
|
seen[k] = struct{}{}
|
|
}
|
|
}
|
|
|
|
keywords := make([]string, 0, len(seen))
|
|
for k := range seen {
|
|
keywords = append(keywords, k)
|
|
}
|
|
sort.Strings(keywords)
|
|
|
|
out := make([]string, 0, len(keywords))
|
|
for _, k := range keywords {
|
|
out = append(out, formatQuery(source, k))
|
|
}
|
|
return out
|
|
}
|
|
|
|
// formatQuery applies source-specific search syntax to a raw keyword.
|
|
func formatQuery(source, keyword string) string {
|
|
switch source {
|
|
case "github", "gist":
|
|
return fmt.Sprintf("%q in:file", keyword)
|
|
default:
|
|
// GitLab, Bitbucket, Codeberg, HuggingFace, Kaggle, Replit,
|
|
// CodeSandbox, sandboxes, and unknown sources use bare keywords.
|
|
return keyword
|
|
}
|
|
}
|