- frontier.yaml: 6 dorks (OpenAI/Anthropic proxies, Azure OpenAI certs, AWS Bedrock, LiteLLM) - infrastructure.yaml: 14 dorks (Ollama, vLLM, LocalAI, LM Studio, text-generation-webui, Open WebUI, Triton, TGI, LangServe, FastChat, OpenRouter/Portkey/Helicone gateways) - Real Shodan query syntax: http.title, http.html, ssl.cert.subject.cn, product, port, http.component - Dual-located: pkg/dorks/definitions/shodan/ + dorks/shodan/
183 lines
5.3 KiB
Go
183 lines
5.3 KiB
Go
package dorks
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"net/url"
|
|
"strconv"
|
|
"time"
|
|
)
|
|
|
|
// GitHubExecutor runs dorks against the GitHub Code Search REST API.
|
|
// It is the only live executor registered in Phase 8; every other source
|
|
// returns ErrSourceNotImplemented from the Runner until the OSINT phases
|
|
// (9-16) wire them up.
|
|
//
|
|
// Authentication is required: GitHub's code search endpoint rejects
|
|
// anonymous traffic. The caller is expected to populate Token from either
|
|
// the GITHUB_TOKEN env var or `dorks.github.token` viper config key
|
|
// (wiring lives in Plan 08-06's cmd/dorks.go).
|
|
//
|
|
// BaseURL is overridable so tests can point the executor at an
|
|
// httptest.Server. MaxRetries controls how many times a 403/429 response
|
|
// is retried after sleeping for the Retry-After duration; it defaults to
|
|
// one retry per Execute call.
|
|
type GitHubExecutor struct {
|
|
Token string
|
|
BaseURL string
|
|
HTTPClient *http.Client
|
|
MaxRetries int
|
|
}
|
|
|
|
// NewGitHubExecutor returns an executor pre-configured for api.github.com
|
|
// with a 30 second client timeout and a single retry on rate-limit
|
|
// responses.
|
|
func NewGitHubExecutor(token string) *GitHubExecutor {
|
|
return &GitHubExecutor{
|
|
Token: token,
|
|
BaseURL: "https://api.github.com",
|
|
HTTPClient: &http.Client{Timeout: 30 * time.Second},
|
|
MaxRetries: 1,
|
|
}
|
|
}
|
|
|
|
// Source satisfies the Executor interface.
|
|
func (g *GitHubExecutor) Source() string { return "github" }
|
|
|
|
type ghSearchResponse struct {
|
|
TotalCount int `json:"total_count"`
|
|
Items []ghCodeItem `json:"items"`
|
|
}
|
|
|
|
type ghCodeItem struct {
|
|
Name string `json:"name"`
|
|
Path string `json:"path"`
|
|
HTMLURL string `json:"html_url"`
|
|
Repository ghRepository `json:"repository"`
|
|
TextMatches []ghTextMatchEntry `json:"text_matches"`
|
|
}
|
|
|
|
type ghRepository struct {
|
|
FullName string `json:"full_name"`
|
|
}
|
|
|
|
type ghTextMatchEntry struct {
|
|
Fragment string `json:"fragment"`
|
|
}
|
|
|
|
// Execute runs the dork against GitHub Code Search and returns up to
|
|
// limit matches. limit <= 0 or > 100 is clamped to 30 (GitHub's default
|
|
// per_page). A missing token yields ErrMissingAuth with setup
|
|
// instructions; a 401 from the server is treated the same way (rejected
|
|
// token). Transient 403/429 rate-limit responses are retried once after
|
|
// honoring Retry-After.
|
|
func (g *GitHubExecutor) Execute(ctx context.Context, d Dork, limit int) ([]Match, error) {
|
|
if g.Token == "" {
|
|
return nil, fmt.Errorf("%w: set GITHUB_TOKEN env var or `keyhunter config set dorks.github.token <pat>` (needs public_repo scope)", ErrMissingAuth)
|
|
}
|
|
if limit <= 0 || limit > 100 {
|
|
limit = 30
|
|
}
|
|
|
|
base := g.BaseURL
|
|
if base == "" {
|
|
base = "https://api.github.com"
|
|
}
|
|
client := g.HTTPClient
|
|
if client == nil {
|
|
client = &http.Client{Timeout: 30 * time.Second}
|
|
}
|
|
|
|
endpoint := fmt.Sprintf("%s/search/code?q=%s&per_page=%d", base, url.QueryEscape(d.Query), limit)
|
|
|
|
var resp *http.Response
|
|
for attempt := 0; attempt <= g.MaxRetries; attempt++ {
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("github search: build request: %w", err)
|
|
}
|
|
req.Header.Set("Accept", "application/vnd.github.v3.text-match+json")
|
|
req.Header.Set("Authorization", "Bearer "+g.Token)
|
|
req.Header.Set("User-Agent", "keyhunter-dork-engine")
|
|
|
|
r, err := client.Do(req)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("github search: %w", err)
|
|
}
|
|
|
|
if r.StatusCode == http.StatusOK {
|
|
resp = r
|
|
break
|
|
}
|
|
|
|
body, _ := io.ReadAll(r.Body)
|
|
_ = r.Body.Close()
|
|
|
|
switch r.StatusCode {
|
|
case http.StatusUnauthorized:
|
|
return nil, fmt.Errorf("%w: github token rejected (401): %s", ErrMissingAuth, string(body))
|
|
case http.StatusForbidden, http.StatusTooManyRequests:
|
|
if attempt < g.MaxRetries {
|
|
sleep := parseRetryAfter(r.Header.Get("Retry-After"))
|
|
select {
|
|
case <-time.After(sleep):
|
|
continue
|
|
case <-ctx.Done():
|
|
return nil, ctx.Err()
|
|
}
|
|
}
|
|
return nil, fmt.Errorf("github rate limit: %d %s", r.StatusCode, string(body))
|
|
default:
|
|
return nil, fmt.Errorf("github search failed: %d %s", r.StatusCode, string(body))
|
|
}
|
|
}
|
|
if resp == nil {
|
|
return nil, fmt.Errorf("github search: exhausted retries without response")
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
var parsed ghSearchResponse
|
|
if err := json.NewDecoder(resp.Body).Decode(&parsed); err != nil {
|
|
return nil, fmt.Errorf("decoding github response: %w", err)
|
|
}
|
|
|
|
out := make([]Match, 0, len(parsed.Items))
|
|
for _, it := range parsed.Items {
|
|
snippet := ""
|
|
if len(it.TextMatches) > 0 {
|
|
snippet = it.TextMatches[0].Fragment
|
|
}
|
|
path := it.Path
|
|
if it.Repository.FullName != "" {
|
|
path = it.Repository.FullName + "/" + it.Path
|
|
}
|
|
out = append(out, Match{
|
|
DorkID: d.ID,
|
|
Source: "github",
|
|
URL: it.HTMLURL,
|
|
Path: path,
|
|
Snippet: snippet,
|
|
})
|
|
if len(out) >= limit {
|
|
break
|
|
}
|
|
}
|
|
return out, nil
|
|
}
|
|
|
|
// parseRetryAfter interprets the Retry-After header value. Only the
|
|
// integer-seconds form is supported (GitHub uses it for code search rate
|
|
// limits); anything unparseable defaults to a one second backoff.
|
|
func parseRetryAfter(v string) time.Duration {
|
|
if v == "" {
|
|
return time.Second
|
|
}
|
|
if secs, err := strconv.Atoi(v); err == nil && secs > 0 {
|
|
return time.Duration(secs) * time.Second
|
|
}
|
|
return time.Second
|
|
}
|