diff --git a/pkg/recon/sources/doc.go b/pkg/recon/sources/doc.go new file mode 100644 index 0000000..10f9156 --- /dev/null +++ b/pkg/recon/sources/doc.go @@ -0,0 +1,4 @@ +// Package sources hosts per-OSINT-source ReconSource implementations for Phase 10 +// code hosting (GitHub, GitLab, Bitbucket, Gist, Codeberg, HuggingFace, Kaggle, +// Replit, CodeSandbox, sandboxes). Each source implements pkg/recon.ReconSource. +package sources diff --git a/pkg/recon/sources/httpclient.go b/pkg/recon/sources/httpclient.go new file mode 100644 index 0000000..b40d83b --- /dev/null +++ b/pkg/recon/sources/httpclient.go @@ -0,0 +1,108 @@ +package sources + +import ( + "context" + "errors" + "fmt" + "io" + "net/http" + "strconv" + "time" +) + +// ErrUnauthorized is returned when an API rejects credentials (401). +var ErrUnauthorized = errors.New("sources: unauthorized (check credentials)") + +// Client is the shared retry wrapper every Phase 10 source uses. +// +// It handles 429/403/5xx retries honoring Retry-After, context cancellation +// during backoff, and wraps 401 responses in ErrUnauthorized. Rate limiting +// is intentionally out of scope — callers call recon.LimiterRegistry.Wait +// before Do to keep this type single-purpose. +type Client struct { + HTTP *http.Client + MaxRetries int // default 2 + UserAgent string // default "keyhunter-recon/1.0" +} + +// NewClient returns a Client with a 30s timeout and 2 retries. +func NewClient() *Client { + return &Client{ + HTTP: &http.Client{Timeout: 30 * time.Second}, + MaxRetries: 2, + UserAgent: "keyhunter-recon/1.0", + } +} + +// Do executes req with retries on 429/403/5xx honoring Retry-After. +// 401 returns ErrUnauthorized wrapped with the response body. +// Ctx cancellation is honored both during the request and during retry sleeps. +func (c *Client) Do(ctx context.Context, req *http.Request) (*http.Response, error) { + if req.Header.Get("User-Agent") == "" { + req.Header.Set("User-Agent", c.UserAgent) + } + + var lastStatus int + var lastBody string + for attempt := 0; attempt <= c.MaxRetries; attempt++ { + r, err := c.HTTP.Do(req.WithContext(ctx)) + if err != nil { + return nil, fmt.Errorf("sources http: %w", err) + } + + if r.StatusCode == http.StatusOK { + return r, nil + } + + if r.StatusCode == http.StatusUnauthorized { + body := readBody(r) + return nil, fmt.Errorf("%w: %s", ErrUnauthorized, body) + } + + retriable := r.StatusCode == http.StatusTooManyRequests || + r.StatusCode == http.StatusForbidden || + r.StatusCode >= 500 + if !retriable || attempt == c.MaxRetries { + body := readBody(r) + return nil, fmt.Errorf("sources http %d: %s", r.StatusCode, body) + } + + sleep := ParseRetryAfter(r.Header.Get("Retry-After")) + lastStatus = r.StatusCode + lastBody = readBody(r) + + select { + case <-time.After(sleep): + case <-ctx.Done(): + return nil, ctx.Err() + } + } + + return nil, fmt.Errorf("sources http: retries exhausted (last %d: %s)", lastStatus, lastBody) +} + +// ParseRetryAfter decodes integer-seconds Retry-After, defaulting to 1s when +// the header is missing, unparseable, or zero. +func ParseRetryAfter(v string) time.Duration { + if v == "" { + return 1 * time.Second + } + n, err := strconv.Atoi(v) + if err != nil || n <= 0 { + return 1 * time.Second + } + return time.Duration(n) * time.Second +} + +// readBody reads up to 4KB of the response body and closes it. +func readBody(r *http.Response) string { + if r.Body == nil { + return "" + } + defer r.Body.Close() + b, err := io.ReadAll(io.LimitReader(r.Body, 4096)) + if err != nil { + return "" + } + return string(b) +} diff --git a/pkg/recon/sources/httpclient_test.go b/pkg/recon/sources/httpclient_test.go new file mode 100644 index 0000000..d5292a5 --- /dev/null +++ b/pkg/recon/sources/httpclient_test.go @@ -0,0 +1,200 @@ +package sources + +import ( + "context" + "errors" + "io" + "net/http" + "net/http/httptest" + "strings" + "sync/atomic" + "testing" + "time" +) + +func newTestClient() *Client { + c := NewClient() + c.MaxRetries = 2 + return c +} + +func TestClient_Do_OKPassThrough(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(200) + _, _ = w.Write([]byte("hello")) + })) + defer srv.Close() + + req, _ := http.NewRequest("GET", srv.URL, nil) + resp, err := newTestClient().Do(context.Background(), req) + if err != nil { + t.Fatalf("expected nil err, got %v", err) + } + defer resp.Body.Close() + body, _ := io.ReadAll(resp.Body) + if string(body) != "hello" { + t.Fatalf("unexpected body: %q", body) + } +} + +func TestClient_Do_RetryOn429(t *testing.T) { + var calls int32 + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + n := atomic.AddInt32(&calls, 1) + if n == 1 { + w.Header().Set("Retry-After", "1") + w.WriteHeader(429) + _, _ = w.Write([]byte("slow down")) + return + } + w.WriteHeader(200) + _, _ = w.Write([]byte("ok")) + })) + defer srv.Close() + + req, _ := http.NewRequest("GET", srv.URL, nil) + resp, err := newTestClient().Do(context.Background(), req) + if err != nil { + t.Fatalf("expected success after retry, got %v", err) + } + defer resp.Body.Close() + if atomic.LoadInt32(&calls) != 2 { + t.Fatalf("expected 2 calls, got %d", calls) + } +} + +func TestClient_Do_RetryOn403(t *testing.T) { + var calls int32 + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + n := atomic.AddInt32(&calls, 1) + if n == 1 { + w.Header().Set("Retry-After", "1") + w.WriteHeader(403) + return + } + w.WriteHeader(200) + })) + defer srv.Close() + + req, _ := http.NewRequest("GET", srv.URL, nil) + resp, err := newTestClient().Do(context.Background(), req) + if err != nil { + t.Fatalf("expected success after 403 retry, got %v", err) + } + defer resp.Body.Close() + if atomic.LoadInt32(&calls) != 2 { + t.Fatalf("expected 2 calls, got %d", calls) + } +} + +func TestClient_Do_UnauthorizedNoRetry(t *testing.T) { + var calls int32 + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + atomic.AddInt32(&calls, 1) + w.WriteHeader(401) + _, _ = w.Write([]byte("bad token")) + })) + defer srv.Close() + + req, _ := http.NewRequest("GET", srv.URL, nil) + _, err := newTestClient().Do(context.Background(), req) + if err == nil { + t.Fatal("expected error on 401") + } + if !errors.Is(err, ErrUnauthorized) { + t.Fatalf("expected ErrUnauthorized, got %v", err) + } + if atomic.LoadInt32(&calls) != 1 { + t.Fatalf("expected 1 call (no retry), got %d", calls) + } + if !strings.Contains(err.Error(), "bad token") { + t.Fatalf("expected body in error, got %v", err) + } +} + +func TestClient_Do_CtxCancelDuringRetrySleep(t *testing.T) { + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Retry-After", "10") + w.WriteHeader(429) + })) + defer srv.Close() + + ctx, cancel := context.WithCancel(context.Background()) + go func() { + time.Sleep(100 * time.Millisecond) + cancel() + }() + + req, _ := http.NewRequest("GET", srv.URL, nil) + start := time.Now() + _, err := newTestClient().Do(ctx, req) + elapsed := time.Since(start) + if err == nil { + t.Fatal("expected ctx error") + } + if !errors.Is(err, context.Canceled) { + t.Fatalf("expected ctx.Canceled, got %v", err) + } + if elapsed > 500*time.Millisecond { + t.Fatalf("ctx cancellation too slow: %v", elapsed) + } +} + +func TestClient_Do_RetriesExhausted(t *testing.T) { + var calls int32 + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + atomic.AddInt32(&calls, 1) + w.Header().Set("Retry-After", "0") + w.WriteHeader(500) + _, _ = w.Write([]byte("boom")) + })) + defer srv.Close() + + c := NewClient() + c.MaxRetries = 1 + req, _ := http.NewRequest("GET", srv.URL, nil) + _, err := c.Do(context.Background(), req) + if err == nil { + t.Fatal("expected error after exhausted retries") + } + if atomic.LoadInt32(&calls) != 2 { + t.Fatalf("expected 2 calls (1 + 1 retry), got %d", calls) + } + if !strings.Contains(err.Error(), "500") { + t.Fatalf("expected 500 in error, got %v", err) + } +} + +func TestClient_Do_DefaultUserAgent(t *testing.T) { + var gotUA string + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + gotUA = r.Header.Get("User-Agent") + w.WriteHeader(200) + })) + defer srv.Close() + + req, _ := http.NewRequest("GET", srv.URL, nil) + resp, err := newTestClient().Do(context.Background(), req) + if err != nil { + t.Fatal(err) + } + resp.Body.Close() + if gotUA != "keyhunter-recon/1.0" { + t.Fatalf("expected default UA, got %q", gotUA) + } +} + +func TestParseRetryAfter(t *testing.T) { + cases := map[string]time.Duration{ + "": 1 * time.Second, + "0": 1 * time.Second, + "2": 2 * time.Second, + "60": 60 * time.Second, + "abc": 1 * time.Second, + } + for in, want := range cases { + if got := ParseRetryAfter(in); got != want { + t.Errorf("ParseRetryAfter(%q) = %v, want %v", in, got, want) + } + } +}