feat(10-01): add provider-driven query generator and RegisterAll skeleton
- BuildQueries(reg, source) dedups keywords and formats per-source syntax - github/gist use 'keyword' in:file; others use bare keyword - SourcesConfig placeholder struct for Wave 2 plans to depend on - RegisterAll no-op stub (Plan 10-09 will fill)
This commit is contained in:
55
pkg/recon/sources/queries.go
Normal file
55
pkg/recon/sources/queries.go
Normal file
@@ -0,0 +1,55 @@
|
||||
package sources
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sort"
|
||||
|
||||
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||
)
|
||||
|
||||
// BuildQueries produces the search-string list a source should iterate for the
|
||||
// given provider registry. Each keyword is formatted per source-specific syntax
|
||||
// (e.g. GitHub's `"kw" in:file` qualifier). Keywords are deduped across
|
||||
// providers and returned sorted for deterministic test output.
|
||||
//
|
||||
// A nil registry returns nil. An unknown source name falls back to the bare
|
||||
// keyword form so new sources work safely until they register custom syntax.
|
||||
func BuildQueries(reg *providers.Registry, source string) []string {
|
||||
if reg == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
seen := make(map[string]struct{})
|
||||
for _, p := range reg.List() {
|
||||
for _, k := range p.Keywords {
|
||||
if k == "" {
|
||||
continue
|
||||
}
|
||||
seen[k] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
keywords := make([]string, 0, len(seen))
|
||||
for k := range seen {
|
||||
keywords = append(keywords, k)
|
||||
}
|
||||
sort.Strings(keywords)
|
||||
|
||||
out := make([]string, 0, len(keywords))
|
||||
for _, k := range keywords {
|
||||
out = append(out, formatQuery(source, k))
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// formatQuery applies source-specific search syntax to a raw keyword.
|
||||
func formatQuery(source, keyword string) string {
|
||||
switch source {
|
||||
case "github", "gist":
|
||||
return fmt.Sprintf("%q in:file", keyword)
|
||||
default:
|
||||
// GitLab, Bitbucket, Codeberg, HuggingFace, Kaggle, Replit,
|
||||
// CodeSandbox, sandboxes, and unknown sources use bare keywords.
|
||||
return keyword
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user