diff --git a/pkg/recon/robots_test.go b/pkg/recon/robots_test.go new file mode 100644 index 0000000..67875b9 --- /dev/null +++ b/pkg/recon/robots_test.go @@ -0,0 +1,118 @@ +package recon + +import ( + "context" + "net/http" + "net/http/httptest" + "sync/atomic" + "testing" + "time" +) + +func newRobotsServer(body string, status int, counter *atomic.Int32) *httptest.Server { + return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if counter != nil { + counter.Add(1) + } + if status >= 400 { + http.Error(w, "boom", status) + return + } + w.WriteHeader(http.StatusOK) + _, _ = w.Write([]byte(body)) + })) +} + +func TestRobotsAllowed(t *testing.T) { + srv := newRobotsServer("User-agent: *\nDisallow:\n", 200, nil) + defer srv.Close() + + rc := NewRobotsCache() + rc.Client = srv.Client() + + ok, err := rc.Allowed(context.Background(), srv.URL+"/public") + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + if !ok { + t.Fatalf("expected allowed, got disallowed") + } +} + +func TestRobotsDisallowed(t *testing.T) { + srv := newRobotsServer("User-agent: *\nDisallow: /private\n", 200, nil) + defer srv.Close() + + rc := NewRobotsCache() + rc.Client = srv.Client() + + ok, err := rc.Allowed(context.Background(), srv.URL+"/private") + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + if ok { + t.Fatalf("expected disallowed, got allowed") + } +} + +func TestRobotsCacheHit(t *testing.T) { + var counter atomic.Int32 + srv := newRobotsServer("User-agent: *\nDisallow:\n", 200, &counter) + defer srv.Close() + + rc := NewRobotsCache() + rc.Client = srv.Client() + + for i := 0; i < 3; i++ { + if _, err := rc.Allowed(context.Background(), srv.URL+"/any"); err != nil { + t.Fatalf("iter %d: %v", i, err) + } + } + if got := counter.Load(); got != 1 { + t.Fatalf("expected 1 http fetch for robots.txt, got %d", got) + } +} + +func TestRobotsNetworkError(t *testing.T) { + srv := newRobotsServer("", 500, nil) + defer srv.Close() + + rc := NewRobotsCache() + rc.Client = srv.Client() + + ok, err := rc.Allowed(context.Background(), srv.URL+"/anything") + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + if !ok { + t.Fatalf("expected default-allow on 500, got disallowed") + } +} + +func TestRobotsUAKeyhunter(t *testing.T) { + body := "User-agent: keyhunter\nDisallow: /blocked\n\nUser-agent: *\nDisallow:\n" + srv := newRobotsServer(body, 200, nil) + defer srv.Close() + + rc := NewRobotsCache() + rc.Client = srv.Client() + + ok, err := rc.Allowed(context.Background(), srv.URL+"/blocked") + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + if ok { + t.Fatalf("expected /blocked disallowed for keyhunter UA") + } + + ok, err = rc.Allowed(context.Background(), srv.URL+"/open") + if err != nil { + t.Fatalf("unexpected err: %v", err) + } + if !ok { + t.Fatalf("expected /open allowed for keyhunter UA") + } +} + +// Compile-time assertion that TTL constant is exported usage-ready +var _ = time.Hour