package sources import ( "context" "os" "path/filepath" "strings" "testing" "time" "github.com/stretchr/testify/require" "github.com/salvacybersec/keyhunter/pkg/types" ) func drain(t *testing.T, src Source) []types.Chunk { t.Helper() ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) defer cancel() out := make(chan types.Chunk, 1024) errCh := make(chan error, 1) go func() { errCh <- src.Chunks(ctx, out); close(out) }() var got []types.Chunk for c := range out { got = append(got, c) } require.NoError(t, <-errCh) return got } func writeTestFile(t *testing.T, path, content string) { t.Helper() require.NoError(t, os.MkdirAll(filepath.Dir(path), 0o755)) require.NoError(t, os.WriteFile(path, []byte(content), 0o644)) } func isSortedStrings(s []string) bool { for i := 1; i < len(s); i++ { if s[i-1] > s[i] { return false } } return true } func TestDirSource_RecursiveWalk(t *testing.T) { root := t.TempDir() writeTestFile(t, filepath.Join(root, "a.txt"), "alpha content") writeTestFile(t, filepath.Join(root, "sub", "b.txt"), "bravo content") writeTestFile(t, filepath.Join(root, "sub", "deep", "c.txt"), "charlie content") chunks := drain(t, NewDirSourceRaw(root, nil)) require.Len(t, chunks, 3) sources := make([]string, 0, len(chunks)) for _, c := range chunks { sources = append(sources, c.Source) } require.True(t, isSortedStrings(sources), "emission order must be sorted, got %v", sources) } func TestDirSource_DefaultExcludes(t *testing.T) { root := t.TempDir() writeTestFile(t, filepath.Join(root, "keep.txt"), "keep me") writeTestFile(t, filepath.Join(root, ".git", "config"), "[core]") writeTestFile(t, filepath.Join(root, "node_modules", "foo.js"), "x") writeTestFile(t, filepath.Join(root, "vendor", "bar.go"), "package x") writeTestFile(t, filepath.Join(root, "app.min.js"), "y") writeTestFile(t, filepath.Join(root, "app.js.map"), "{}") chunks := drain(t, NewDirSource(root)) require.Len(t, chunks, 1) require.Contains(t, chunks[0].Source, "keep.txt") } func TestDirSource_UserExclude(t *testing.T) { root := t.TempDir() writeTestFile(t, filepath.Join(root, "keep.txt"), "keep") writeTestFile(t, filepath.Join(root, "drop.log"), "drop") chunks := drain(t, NewDirSourceRaw(root, []string{"*.log"})) require.Len(t, chunks, 1) require.Contains(t, chunks[0].Source, "keep.txt") } func TestDirSource_BinarySkipped(t *testing.T) { root := t.TempDir() writeTestFile(t, filepath.Join(root, "text.txt"), "plain text content") binPath := filepath.Join(root, "blob.bin") require.NoError(t, os.WriteFile(binPath, []byte{0x7f, 'E', 'L', 'F', 0x00, 0x01, 0x02}, 0o644)) chunks := drain(t, NewDirSourceRaw(root, nil)) require.Len(t, chunks, 1) require.Contains(t, chunks[0].Source, "text.txt") } func TestDirSource_MmapLargeFile(t *testing.T) { if testing.Short() { t.Skip("skipping large file test in short mode") } root := t.TempDir() big := filepath.Join(root, "big.txt") payload := strings.Repeat("API_KEY=xxxxxxxxxxxxxxxxxxxx\n", (int(MmapThreshold)/28)+10) require.NoError(t, os.WriteFile(big, []byte(payload), 0o644)) chunks := drain(t, NewDirSourceRaw(root, nil)) require.NotEmpty(t, chunks) require.Equal(t, big, chunks[0].Source) } func TestDirSource_DeterministicOrdering(t *testing.T) { root := t.TempDir() writeTestFile(t, filepath.Join(root, "zeta.txt"), "z") writeTestFile(t, filepath.Join(root, "alpha.txt"), "a") writeTestFile(t, filepath.Join(root, "mike.txt"), "m") run := func() []string { chunks := drain(t, NewDirSourceRaw(root, nil)) srcs := make([]string, 0, len(chunks)) for _, c := range chunks { srcs = append(srcs, c.Source) } return srcs } require.Equal(t, run(), run()) } func TestDirSource_MissingRoot(t *testing.T) { src := NewDirSourceRaw("/definitely/does/not/exist/keyhunter-xyz-zzz", nil) ctx := context.Background() out := make(chan types.Chunk, 1) err := src.Chunks(ctx, out) require.Error(t, err) } func TestDirSource_CtxCancellation(t *testing.T) { root := t.TempDir() for i := 0; i < 50; i++ { writeTestFile(t, filepath.Join(root, "f", string(rune('a'+i%26))+"_"+string(rune('a'+i%26))+".txt"), "payload") } ctx, cancel := context.WithCancel(context.Background()) cancel() // pre-cancelled out := make(chan types.Chunk, 1024) err := NewDirSourceRaw(root, nil).Chunks(ctx, out) require.ErrorIs(t, err, context.Canceled) }