package sources import ( "context" "os" "github.com/salvacybersec/keyhunter/pkg/types" ) const defaultChunkSize = 4096 const chunkOverlap = 256 // overlap between chunks to avoid splitting keys at boundaries // FileSource reads a single file and emits overlapping chunks. type FileSource struct { Path string ChunkSize int } // NewFileSource creates a FileSource for the given path with the default chunk size. func NewFileSource(path string) *FileSource { return &FileSource{Path: path, ChunkSize: defaultChunkSize} } // Chunks reads the file in overlapping segments and sends each chunk to out. // Uses os.ReadFile for simplicity in Phase 1. mmap for files > 10MB is implemented // in Phase 4 (Input Sources) alongside all other source adapter enhancements. func (f *FileSource) Chunks(ctx context.Context, out chan<- types.Chunk) error { data, err := os.ReadFile(f.Path) if err != nil { return err } size := f.ChunkSize if size <= 0 { size = defaultChunkSize } if len(data) <= size { // File fits in one chunk select { case <-ctx.Done(): return ctx.Err() case out <- types.Chunk{Data: data, Source: f.Path, Offset: 0}: } return nil } // Emit overlapping chunks var offset int64 for start := 0; start < len(data); start += size - chunkOverlap { end := start + size if end > len(data) { end = len(data) } chunk := types.Chunk{ Data: data[start:end], Source: f.Path, Offset: offset, } select { case <-ctx.Done(): return ctx.Err() case out <- chunk: } offset += int64(end - start) if end == len(data) { break } } return nil }