Files
keyhunter/pkg/recon/sources/queries.go
salvacybersec 9273f356e6 feat(10-01): add provider-driven query generator and RegisterAll skeleton
- BuildQueries(reg, source) dedups keywords and formats per-source syntax
- github/gist use 'keyword' in:file; others use bare keyword
- SourcesConfig placeholder struct for Wave 2 plans to depend on
- RegisterAll no-op stub (Plan 10-09 will fill)
2026-04-06 01:09:57 +03:00

56 lines
1.4 KiB
Go

package sources
import (
"fmt"
"sort"
"github.com/salvacybersec/keyhunter/pkg/providers"
)
// BuildQueries produces the search-string list a source should iterate for the
// given provider registry. Each keyword is formatted per source-specific syntax
// (e.g. GitHub's `"kw" in:file` qualifier). Keywords are deduped across
// providers and returned sorted for deterministic test output.
//
// A nil registry returns nil. An unknown source name falls back to the bare
// keyword form so new sources work safely until they register custom syntax.
func BuildQueries(reg *providers.Registry, source string) []string {
if reg == nil {
return nil
}
seen := make(map[string]struct{})
for _, p := range reg.List() {
for _, k := range p.Keywords {
if k == "" {
continue
}
seen[k] = struct{}{}
}
}
keywords := make([]string, 0, len(seen))
for k := range seen {
keywords = append(keywords, k)
}
sort.Strings(keywords)
out := make([]string, 0, len(keywords))
for _, k := range keywords {
out = append(out, formatQuery(source, k))
}
return out
}
// formatQuery applies source-specific search syntax to a raw keyword.
func formatQuery(source, keyword string) string {
switch source {
case "github", "gist":
return fmt.Sprintf("%q in:file", keyword)
default:
// GitLab, Bitbucket, Codeberg, HuggingFace, Kaggle, Replit,
// CodeSandbox, sandboxes, and unknown sources use bare keywords.
return keyword
}
}