test(09-04): add failing tests for RobotsCache
- Allowed/Disallowed path matching - Cache hit counter assertion - Default-allow on 5xx network error - keyhunter UA matching precedence
This commit is contained in:
118
pkg/recon/robots_test.go
Normal file
118
pkg/recon/robots_test.go
Normal file
@@ -0,0 +1,118 @@
|
||||
package recon
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func newRobotsServer(body string, status int, counter *atomic.Int32) *httptest.Server {
|
||||
return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
if counter != nil {
|
||||
counter.Add(1)
|
||||
}
|
||||
if status >= 400 {
|
||||
http.Error(w, "boom", status)
|
||||
return
|
||||
}
|
||||
w.WriteHeader(http.StatusOK)
|
||||
_, _ = w.Write([]byte(body))
|
||||
}))
|
||||
}
|
||||
|
||||
func TestRobotsAllowed(t *testing.T) {
|
||||
srv := newRobotsServer("User-agent: *\nDisallow:\n", 200, nil)
|
||||
defer srv.Close()
|
||||
|
||||
rc := NewRobotsCache()
|
||||
rc.Client = srv.Client()
|
||||
|
||||
ok, err := rc.Allowed(context.Background(), srv.URL+"/public")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected err: %v", err)
|
||||
}
|
||||
if !ok {
|
||||
t.Fatalf("expected allowed, got disallowed")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRobotsDisallowed(t *testing.T) {
|
||||
srv := newRobotsServer("User-agent: *\nDisallow: /private\n", 200, nil)
|
||||
defer srv.Close()
|
||||
|
||||
rc := NewRobotsCache()
|
||||
rc.Client = srv.Client()
|
||||
|
||||
ok, err := rc.Allowed(context.Background(), srv.URL+"/private")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected err: %v", err)
|
||||
}
|
||||
if ok {
|
||||
t.Fatalf("expected disallowed, got allowed")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRobotsCacheHit(t *testing.T) {
|
||||
var counter atomic.Int32
|
||||
srv := newRobotsServer("User-agent: *\nDisallow:\n", 200, &counter)
|
||||
defer srv.Close()
|
||||
|
||||
rc := NewRobotsCache()
|
||||
rc.Client = srv.Client()
|
||||
|
||||
for i := 0; i < 3; i++ {
|
||||
if _, err := rc.Allowed(context.Background(), srv.URL+"/any"); err != nil {
|
||||
t.Fatalf("iter %d: %v", i, err)
|
||||
}
|
||||
}
|
||||
if got := counter.Load(); got != 1 {
|
||||
t.Fatalf("expected 1 http fetch for robots.txt, got %d", got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRobotsNetworkError(t *testing.T) {
|
||||
srv := newRobotsServer("", 500, nil)
|
||||
defer srv.Close()
|
||||
|
||||
rc := NewRobotsCache()
|
||||
rc.Client = srv.Client()
|
||||
|
||||
ok, err := rc.Allowed(context.Background(), srv.URL+"/anything")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected err: %v", err)
|
||||
}
|
||||
if !ok {
|
||||
t.Fatalf("expected default-allow on 500, got disallowed")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRobotsUAKeyhunter(t *testing.T) {
|
||||
body := "User-agent: keyhunter\nDisallow: /blocked\n\nUser-agent: *\nDisallow:\n"
|
||||
srv := newRobotsServer(body, 200, nil)
|
||||
defer srv.Close()
|
||||
|
||||
rc := NewRobotsCache()
|
||||
rc.Client = srv.Client()
|
||||
|
||||
ok, err := rc.Allowed(context.Background(), srv.URL+"/blocked")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected err: %v", err)
|
||||
}
|
||||
if ok {
|
||||
t.Fatalf("expected /blocked disallowed for keyhunter UA")
|
||||
}
|
||||
|
||||
ok, err = rc.Allowed(context.Background(), srv.URL+"/open")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected err: %v", err)
|
||||
}
|
||||
if !ok {
|
||||
t.Fatalf("expected /open allowed for keyhunter UA")
|
||||
}
|
||||
}
|
||||
|
||||
// Compile-time assertion that TTL constant is exported usage-ready
|
||||
var _ = time.Hour
|
||||
Reference in New Issue
Block a user