feat(10-01): add shared retry HTTP client for recon sources

- Client.Do retries 429/403/5xx honoring Retry-After
- 401 returns ErrUnauthorized immediately (no retry)
- Context cancellation honored during retry sleeps
- Default UA keyhunter-recon/1.0, 30s timeout, 2 retries
This commit is contained in:
salvacybersec
2026-04-06 01:09:02 +03:00
parent 191bdee3bc
commit 75024e4701
3 changed files with 312 additions and 0 deletions

4
pkg/recon/sources/doc.go Normal file
View File

@@ -0,0 +1,4 @@
// Package sources hosts per-OSINT-source ReconSource implementations for Phase 10
// code hosting (GitHub, GitLab, Bitbucket, Gist, Codeberg, HuggingFace, Kaggle,
// Replit, CodeSandbox, sandboxes). Each source implements pkg/recon.ReconSource.
package sources

View File

@@ -0,0 +1,108 @@
package sources
import (
"context"
"errors"
"fmt"
"io"
"net/http"
"strconv"
"time"
)
// ErrUnauthorized is returned when an API rejects credentials (401).
var ErrUnauthorized = errors.New("sources: unauthorized (check credentials)")
// Client is the shared retry wrapper every Phase 10 source uses.
//
// It handles 429/403/5xx retries honoring Retry-After, context cancellation
// during backoff, and wraps 401 responses in ErrUnauthorized. Rate limiting
// is intentionally out of scope — callers call recon.LimiterRegistry.Wait
// before Do to keep this type single-purpose.
type Client struct {
HTTP *http.Client
MaxRetries int // default 2
UserAgent string // default "keyhunter-recon/1.0"
}
// NewClient returns a Client with a 30s timeout and 2 retries.
func NewClient() *Client {
return &Client{
HTTP: &http.Client{Timeout: 30 * time.Second},
MaxRetries: 2,
UserAgent: "keyhunter-recon/1.0",
}
}
// Do executes req with retries on 429/403/5xx honoring Retry-After.
// 401 returns ErrUnauthorized wrapped with the response body.
// Ctx cancellation is honored both during the request and during retry sleeps.
func (c *Client) Do(ctx context.Context, req *http.Request) (*http.Response, error) {
if req.Header.Get("User-Agent") == "" {
req.Header.Set("User-Agent", c.UserAgent)
}
var lastStatus int
var lastBody string
for attempt := 0; attempt <= c.MaxRetries; attempt++ {
r, err := c.HTTP.Do(req.WithContext(ctx))
if err != nil {
return nil, fmt.Errorf("sources http: %w", err)
}
if r.StatusCode == http.StatusOK {
return r, nil
}
if r.StatusCode == http.StatusUnauthorized {
body := readBody(r)
return nil, fmt.Errorf("%w: %s", ErrUnauthorized, body)
}
retriable := r.StatusCode == http.StatusTooManyRequests ||
r.StatusCode == http.StatusForbidden ||
r.StatusCode >= 500
if !retriable || attempt == c.MaxRetries {
body := readBody(r)
return nil, fmt.Errorf("sources http %d: %s", r.StatusCode, body)
}
sleep := ParseRetryAfter(r.Header.Get("Retry-After"))
lastStatus = r.StatusCode
lastBody = readBody(r)
select {
case <-time.After(sleep):
case <-ctx.Done():
return nil, ctx.Err()
}
}
return nil, fmt.Errorf("sources http: retries exhausted (last %d: %s)", lastStatus, lastBody)
}
// ParseRetryAfter decodes integer-seconds Retry-After, defaulting to 1s when
// the header is missing, unparseable, or zero.
func ParseRetryAfter(v string) time.Duration {
if v == "" {
return 1 * time.Second
}
n, err := strconv.Atoi(v)
if err != nil || n <= 0 {
return 1 * time.Second
}
return time.Duration(n) * time.Second
}
// readBody reads up to 4KB of the response body and closes it.
func readBody(r *http.Response) string {
if r.Body == nil {
return ""
}
defer r.Body.Close()
b, err := io.ReadAll(io.LimitReader(r.Body, 4096))
if err != nil {
return ""
}
return string(b)
}

View File

@@ -0,0 +1,200 @@
package sources
import (
"context"
"errors"
"io"
"net/http"
"net/http/httptest"
"strings"
"sync/atomic"
"testing"
"time"
)
func newTestClient() *Client {
c := NewClient()
c.MaxRetries = 2
return c
}
func TestClient_Do_OKPassThrough(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(200)
_, _ = w.Write([]byte("hello"))
}))
defer srv.Close()
req, _ := http.NewRequest("GET", srv.URL, nil)
resp, err := newTestClient().Do(context.Background(), req)
if err != nil {
t.Fatalf("expected nil err, got %v", err)
}
defer resp.Body.Close()
body, _ := io.ReadAll(resp.Body)
if string(body) != "hello" {
t.Fatalf("unexpected body: %q", body)
}
}
func TestClient_Do_RetryOn429(t *testing.T) {
var calls int32
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
n := atomic.AddInt32(&calls, 1)
if n == 1 {
w.Header().Set("Retry-After", "1")
w.WriteHeader(429)
_, _ = w.Write([]byte("slow down"))
return
}
w.WriteHeader(200)
_, _ = w.Write([]byte("ok"))
}))
defer srv.Close()
req, _ := http.NewRequest("GET", srv.URL, nil)
resp, err := newTestClient().Do(context.Background(), req)
if err != nil {
t.Fatalf("expected success after retry, got %v", err)
}
defer resp.Body.Close()
if atomic.LoadInt32(&calls) != 2 {
t.Fatalf("expected 2 calls, got %d", calls)
}
}
func TestClient_Do_RetryOn403(t *testing.T) {
var calls int32
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
n := atomic.AddInt32(&calls, 1)
if n == 1 {
w.Header().Set("Retry-After", "1")
w.WriteHeader(403)
return
}
w.WriteHeader(200)
}))
defer srv.Close()
req, _ := http.NewRequest("GET", srv.URL, nil)
resp, err := newTestClient().Do(context.Background(), req)
if err != nil {
t.Fatalf("expected success after 403 retry, got %v", err)
}
defer resp.Body.Close()
if atomic.LoadInt32(&calls) != 2 {
t.Fatalf("expected 2 calls, got %d", calls)
}
}
func TestClient_Do_UnauthorizedNoRetry(t *testing.T) {
var calls int32
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
atomic.AddInt32(&calls, 1)
w.WriteHeader(401)
_, _ = w.Write([]byte("bad token"))
}))
defer srv.Close()
req, _ := http.NewRequest("GET", srv.URL, nil)
_, err := newTestClient().Do(context.Background(), req)
if err == nil {
t.Fatal("expected error on 401")
}
if !errors.Is(err, ErrUnauthorized) {
t.Fatalf("expected ErrUnauthorized, got %v", err)
}
if atomic.LoadInt32(&calls) != 1 {
t.Fatalf("expected 1 call (no retry), got %d", calls)
}
if !strings.Contains(err.Error(), "bad token") {
t.Fatalf("expected body in error, got %v", err)
}
}
func TestClient_Do_CtxCancelDuringRetrySleep(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Retry-After", "10")
w.WriteHeader(429)
}))
defer srv.Close()
ctx, cancel := context.WithCancel(context.Background())
go func() {
time.Sleep(100 * time.Millisecond)
cancel()
}()
req, _ := http.NewRequest("GET", srv.URL, nil)
start := time.Now()
_, err := newTestClient().Do(ctx, req)
elapsed := time.Since(start)
if err == nil {
t.Fatal("expected ctx error")
}
if !errors.Is(err, context.Canceled) {
t.Fatalf("expected ctx.Canceled, got %v", err)
}
if elapsed > 500*time.Millisecond {
t.Fatalf("ctx cancellation too slow: %v", elapsed)
}
}
func TestClient_Do_RetriesExhausted(t *testing.T) {
var calls int32
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
atomic.AddInt32(&calls, 1)
w.Header().Set("Retry-After", "0")
w.WriteHeader(500)
_, _ = w.Write([]byte("boom"))
}))
defer srv.Close()
c := NewClient()
c.MaxRetries = 1
req, _ := http.NewRequest("GET", srv.URL, nil)
_, err := c.Do(context.Background(), req)
if err == nil {
t.Fatal("expected error after exhausted retries")
}
if atomic.LoadInt32(&calls) != 2 {
t.Fatalf("expected 2 calls (1 + 1 retry), got %d", calls)
}
if !strings.Contains(err.Error(), "500") {
t.Fatalf("expected 500 in error, got %v", err)
}
}
func TestClient_Do_DefaultUserAgent(t *testing.T) {
var gotUA string
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
gotUA = r.Header.Get("User-Agent")
w.WriteHeader(200)
}))
defer srv.Close()
req, _ := http.NewRequest("GET", srv.URL, nil)
resp, err := newTestClient().Do(context.Background(), req)
if err != nil {
t.Fatal(err)
}
resp.Body.Close()
if gotUA != "keyhunter-recon/1.0" {
t.Fatalf("expected default UA, got %q", gotUA)
}
}
func TestParseRetryAfter(t *testing.T) {
cases := map[string]time.Duration{
"": 1 * time.Second,
"0": 1 * time.Second,
"2": 2 * time.Second,
"60": 60 * time.Second,
"abc": 1 * time.Second,
}
for in, want := range cases {
if got := ParseRetryAfter(in); got != want {
t.Errorf("ParseRetryAfter(%q) = %v, want %v", in, got, want)
}
}
}