package sources import ( "fmt" "sort" "github.com/salvacybersec/keyhunter/pkg/providers" ) // BuildQueries produces the search-string list a source should iterate for the // given provider registry. Each keyword is formatted per source-specific syntax // (e.g. GitHub's `"kw" in:file` qualifier). Keywords are deduped across // providers and returned sorted for deterministic test output. // // A nil registry returns nil. An unknown source name falls back to the bare // keyword form so new sources work safely until they register custom syntax. func BuildQueries(reg *providers.Registry, source string) []string { if reg == nil { return nil } seen := make(map[string]struct{}) for _, p := range reg.List() { for _, k := range p.Keywords { if k == "" { continue } seen[k] = struct{}{} } } keywords := make([]string, 0, len(seen)) for k := range seen { keywords = append(keywords, k) } sort.Strings(keywords) out := make([]string, 0, len(keywords)) for _, k := range keywords { out = append(out, formatQuery(source, k)) } return out } // formatQuery applies source-specific search syntax to a raw keyword. func formatQuery(source, keyword string) string { switch source { case "github", "gist": return fmt.Sprintf("%q in:file", keyword) default: // GitLab, Bitbucket, Codeberg, HuggingFace, Kaggle, Replit, // CodeSandbox, sandboxes, and unknown sources use bare keywords. return keyword } }