package dorks import ( "context" "encoding/json" "fmt" "io" "net/http" "net/url" "strconv" "time" ) // GitHubExecutor runs dorks against the GitHub Code Search REST API. // It is the only live executor registered in Phase 8; every other source // returns ErrSourceNotImplemented from the Runner until the OSINT phases // (9-16) wire them up. // // Authentication is required: GitHub's code search endpoint rejects // anonymous traffic. The caller is expected to populate Token from either // the GITHUB_TOKEN env var or `dorks.github.token` viper config key // (wiring lives in Plan 08-06's cmd/dorks.go). // // BaseURL is overridable so tests can point the executor at an // httptest.Server. MaxRetries controls how many times a 403/429 response // is retried after sleeping for the Retry-After duration; it defaults to // one retry per Execute call. type GitHubExecutor struct { Token string BaseURL string HTTPClient *http.Client MaxRetries int } // NewGitHubExecutor returns an executor pre-configured for api.github.com // with a 30 second client timeout and a single retry on rate-limit // responses. func NewGitHubExecutor(token string) *GitHubExecutor { return &GitHubExecutor{ Token: token, BaseURL: "https://api.github.com", HTTPClient: &http.Client{Timeout: 30 * time.Second}, MaxRetries: 1, } } // Source satisfies the Executor interface. func (g *GitHubExecutor) Source() string { return "github" } type ghSearchResponse struct { TotalCount int `json:"total_count"` Items []ghCodeItem `json:"items"` } type ghCodeItem struct { Name string `json:"name"` Path string `json:"path"` HTMLURL string `json:"html_url"` Repository ghRepository `json:"repository"` TextMatches []ghTextMatchEntry `json:"text_matches"` } type ghRepository struct { FullName string `json:"full_name"` } type ghTextMatchEntry struct { Fragment string `json:"fragment"` } // Execute runs the dork against GitHub Code Search and returns up to // limit matches. limit <= 0 or > 100 is clamped to 30 (GitHub's default // per_page). A missing token yields ErrMissingAuth with setup // instructions; a 401 from the server is treated the same way (rejected // token). Transient 403/429 rate-limit responses are retried once after // honoring Retry-After. func (g *GitHubExecutor) Execute(ctx context.Context, d Dork, limit int) ([]Match, error) { if g.Token == "" { return nil, fmt.Errorf("%w: set GITHUB_TOKEN env var or `keyhunter config set dorks.github.token ` (needs public_repo scope)", ErrMissingAuth) } if limit <= 0 || limit > 100 { limit = 30 } base := g.BaseURL if base == "" { base = "https://api.github.com" } client := g.HTTPClient if client == nil { client = &http.Client{Timeout: 30 * time.Second} } endpoint := fmt.Sprintf("%s/search/code?q=%s&per_page=%d", base, url.QueryEscape(d.Query), limit) var resp *http.Response for attempt := 0; attempt <= g.MaxRetries; attempt++ { req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil) if err != nil { return nil, fmt.Errorf("github search: build request: %w", err) } req.Header.Set("Accept", "application/vnd.github.v3.text-match+json") req.Header.Set("Authorization", "Bearer "+g.Token) req.Header.Set("User-Agent", "keyhunter-dork-engine") r, err := client.Do(req) if err != nil { return nil, fmt.Errorf("github search: %w", err) } if r.StatusCode == http.StatusOK { resp = r break } body, _ := io.ReadAll(r.Body) _ = r.Body.Close() switch r.StatusCode { case http.StatusUnauthorized: return nil, fmt.Errorf("%w: github token rejected (401): %s", ErrMissingAuth, string(body)) case http.StatusForbidden, http.StatusTooManyRequests: if attempt < g.MaxRetries { sleep := parseRetryAfter(r.Header.Get("Retry-After")) select { case <-time.After(sleep): continue case <-ctx.Done(): return nil, ctx.Err() } } return nil, fmt.Errorf("github rate limit: %d %s", r.StatusCode, string(body)) default: return nil, fmt.Errorf("github search failed: %d %s", r.StatusCode, string(body)) } } if resp == nil { return nil, fmt.Errorf("github search: exhausted retries without response") } defer resp.Body.Close() var parsed ghSearchResponse if err := json.NewDecoder(resp.Body).Decode(&parsed); err != nil { return nil, fmt.Errorf("decoding github response: %w", err) } out := make([]Match, 0, len(parsed.Items)) for _, it := range parsed.Items { snippet := "" if len(it.TextMatches) > 0 { snippet = it.TextMatches[0].Fragment } path := it.Path if it.Repository.FullName != "" { path = it.Repository.FullName + "/" + it.Path } out = append(out, Match{ DorkID: d.ID, Source: "github", URL: it.HTMLURL, Path: path, Snippet: snippet, }) if len(out) >= limit { break } } return out, nil } // parseRetryAfter interprets the Retry-After header value. Only the // integer-seconds form is supported (GitHub uses it for code search rate // limits); anything unparseable defaults to a one second backoff. func parseRetryAfter(v string) time.Duration { if v == "" { return time.Second } if secs, err := strconv.Atoi(v); err == nil && secs > 0 { return time.Duration(secs) * time.Second } return time.Second }