19 KiB
phase, plan, type, wave, depends_on, files_modified, autonomous, requirements, must_haves
| phase | plan | type | wave | depends_on | files_modified | autonomous | requirements | must_haves | |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 04-input-sources | 04 | execute | 1 |
|
|
true |
|
|
Purpose: These three adapters complete the Phase 4 input surface area. Bundling them into one plan keeps wave-1 parallelism healthy (04-02 + 04-03 + 04-04 run simultaneously) while respecting the ~50% context budget since each adapter is self-contained and small. Output: Six files total (three sources + three test files).
<execution_context> @$HOME/.claude/get-shit-done/workflows/execute-plan.md @$HOME/.claude/get-shit-done/templates/summary.md </execution_context>
@.planning/PROJECT.md @.planning/phases/04-input-sources/04-CONTEXT.md @pkg/engine/sources/source.go @pkg/types/chunk.go Source interface: ```go type Source interface { Chunks(ctx context.Context, out chan<- types.Chunk) error } ```Shared helper (produced by plan 04-02 in pkg/engine/sources/dir.go):
func emitChunks(ctx context.Context, data []byte, source string, chunkSize int, out chan<- types.Chunk) error
atotto/clipboard API:
import "github.com/atotto/clipboard"
func ReadAll() (string, error)
func Unsupported bool // set on platforms without clipboard tooling
Create pkg/engine/sources/stdin.go:
package sources
import (
"context"
"io"
"os"
"github.com/salvacybersec/keyhunter/pkg/types"
)
// StdinSource reads content from an io.Reader (defaults to os.Stdin) and
// emits overlapping chunks. Used when a user runs `keyhunter scan stdin`
// or `keyhunter scan -`.
type StdinSource struct {
Reader io.Reader
ChunkSize int
}
// NewStdinSource returns a StdinSource bound to os.Stdin.
func NewStdinSource() *StdinSource {
return &StdinSource{Reader: os.Stdin, ChunkSize: defaultChunkSize}
}
// NewStdinSourceFrom returns a StdinSource bound to the given reader
// (used primarily by tests).
func NewStdinSourceFrom(r io.Reader) *StdinSource {
return &StdinSource{Reader: r, ChunkSize: defaultChunkSize}
}
// Chunks reads the entire input, then hands it to the shared chunk emitter.
func (s *StdinSource) Chunks(ctx context.Context, out chan<- types.Chunk) error {
if s.Reader == nil {
s.Reader = os.Stdin
}
data, err := io.ReadAll(s.Reader)
if err != nil {
return err
}
if len(data) == 0 {
return nil
}
return emitChunks(ctx, data, "stdin", s.ChunkSize, out)
}
Create pkg/engine/sources/stdin_test.go:
package sources
import (
"bytes"
"context"
"testing"
"time"
"github.com/stretchr/testify/require"
"github.com/salvacybersec/keyhunter/pkg/types"
)
func TestStdinSource_Basic(t *testing.T) {
src := NewStdinSourceFrom(bytes.NewBufferString("API_KEY=sk-test-xyz"))
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
out := make(chan types.Chunk, 8)
errCh := make(chan error, 1)
go func() { errCh <- src.Chunks(ctx, out); close(out) }()
var got []types.Chunk
for c := range out {
got = append(got, c)
}
require.NoError(t, <-errCh)
require.Len(t, got, 1)
require.Equal(t, "stdin", got[0].Source)
require.Equal(t, "API_KEY=sk-test-xyz", string(got[0].Data))
}
func TestStdinSource_Empty(t *testing.T) {
src := NewStdinSourceFrom(bytes.NewBuffer(nil))
out := make(chan types.Chunk, 1)
err := src.Chunks(context.Background(), out)
close(out)
require.NoError(t, err)
require.Len(t, out, 0)
}
func TestStdinSource_CtxCancel(t *testing.T) {
// Large buffer so emitChunks iterates and can observe cancellation.
data := make([]byte, 1<<20)
src := NewStdinSourceFrom(bytes.NewReader(data))
ctx, cancel := context.WithCancel(context.Background())
cancel()
out := make(chan types.Chunk) // unbuffered forces select on ctx
err := src.Chunks(ctx, out)
require.ErrorIs(t, err, context.Canceled)
}
Create pkg/engine/sources/url.go:
package sources
import (
"context"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"strings"
"time"
"github.com/salvacybersec/keyhunter/pkg/types"
)
// MaxURLContentLength is the hard cap on URLSource response bodies.
const MaxURLContentLength int64 = 50 * 1024 * 1024 // 50 MB
// DefaultURLTimeout is the overall request timeout (connect + read + body).
const DefaultURLTimeout = 30 * time.Second
// allowedContentTypes is the whitelist of Content-Type prefixes URLSource
// will accept. Binary types (images, archives, executables) are rejected.
var allowedContentTypes = []string{
"text/",
"application/json",
"application/javascript",
"application/xml",
"application/x-yaml",
"application/yaml",
}
// URLSource fetches a remote resource over HTTP(S) and emits its body as chunks.
type URLSource struct {
URL string
Client *http.Client
UserAgent string
Insecure bool // skip TLS verification (default false)
ChunkSize int
}
// NewURLSource creates a URLSource with sane defaults.
func NewURLSource(rawURL string) *URLSource {
return &URLSource{
URL: rawURL,
Client: defaultHTTPClient(),
UserAgent: "keyhunter/dev",
ChunkSize: defaultChunkSize,
}
}
func defaultHTTPClient() *http.Client {
return &http.Client{
Timeout: DefaultURLTimeout,
CheckRedirect: func(req *http.Request, via []*http.Request) error {
if len(via) >= 5 {
return errors.New("stopped after 5 redirects")
}
return nil
},
}
}
// Chunks validates the URL, issues a GET, and emits the response body as chunks.
func (u *URLSource) Chunks(ctx context.Context, out chan<- types.Chunk) error {
parsed, err := url.Parse(u.URL)
if err != nil {
return fmt.Errorf("URLSource: parse %q: %w", u.URL, err)
}
if parsed.Scheme != "http" && parsed.Scheme != "https" {
return fmt.Errorf("URLSource: unsupported scheme %q (only http/https)", parsed.Scheme)
}
req, err := http.NewRequestWithContext(ctx, http.MethodGet, u.URL, nil)
if err != nil {
return fmt.Errorf("URLSource: new request: %w", err)
}
req.Header.Set("User-Agent", u.UserAgent)
client := u.Client
if client == nil {
client = defaultHTTPClient()
}
resp, err := client.Do(req)
if err != nil {
return fmt.Errorf("URLSource: fetch: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return fmt.Errorf("URLSource: non-2xx status %d from %s", resp.StatusCode, u.URL)
}
ct := resp.Header.Get("Content-Type")
if !isAllowedContentType(ct) {
return fmt.Errorf("URLSource: disallowed Content-Type %q", ct)
}
if resp.ContentLength > MaxURLContentLength {
return fmt.Errorf("URLSource: Content-Length %d exceeds cap %d", resp.ContentLength, MaxURLContentLength)
}
// LimitReader cap + 1 to detect overflow even if ContentLength was missing/wrong.
limited := io.LimitReader(resp.Body, MaxURLContentLength+1)
data, err := io.ReadAll(limited)
if err != nil {
return fmt.Errorf("URLSource: read body: %w", err)
}
if int64(len(data)) > MaxURLContentLength {
return fmt.Errorf("URLSource: body exceeds %d bytes", MaxURLContentLength)
}
if len(data) == 0 {
return nil
}
source := "url:" + u.URL
return emitChunks(ctx, data, source, u.ChunkSize, out)
}
func isAllowedContentType(ct string) bool {
if ct == "" {
return true // some servers omit; trust and scan
}
// Strip parameters like "; charset=utf-8".
if idx := strings.Index(ct, ";"); idx >= 0 {
ct = ct[:idx]
}
ct = strings.TrimSpace(strings.ToLower(ct))
for _, prefix := range allowedContentTypes {
if strings.HasPrefix(ct, prefix) {
return true
}
}
return false
}
Create pkg/engine/sources/url_test.go:
package sources
import (
"context"
"net/http"
"net/http/httptest"
"strings"
"testing"
"time"
"github.com/stretchr/testify/require"
"github.com/salvacybersec/keyhunter/pkg/types"
)
func drainURL(t *testing.T, src Source) ([]types.Chunk, error) {
t.Helper()
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
out := make(chan types.Chunk, 256)
errCh := make(chan error, 1)
go func() { errCh <- src.Chunks(ctx, out); close(out) }()
var got []types.Chunk
for c := range out {
got = append(got, c)
}
return got, <-errCh
}
func TestURLSource_Fetches(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain")
_, _ = w.Write([]byte("API_KEY=sk-live-xyz"))
}))
defer srv.Close()
chunks, err := drainURL(t, NewURLSource(srv.URL))
require.NoError(t, err)
require.Len(t, chunks, 1)
require.Equal(t, "url:"+srv.URL, chunks[0].Source)
require.Equal(t, "API_KEY=sk-live-xyz", string(chunks[0].Data))
}
func TestURLSource_RejectsBinaryContentType(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "image/png")
_, _ = w.Write([]byte{0x89, 0x50, 0x4e, 0x47})
}))
defer srv.Close()
_, err := drainURL(t, NewURLSource(srv.URL))
require.Error(t, err)
require.Contains(t, err.Error(), "Content-Type")
}
func TestURLSource_RejectsNonHTTPScheme(t *testing.T) {
_, err := drainURL(t, NewURLSource("file:///etc/passwd"))
require.Error(t, err)
require.Contains(t, err.Error(), "unsupported scheme")
}
func TestURLSource_Rejects500(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
http.Error(w, "boom", http.StatusInternalServerError)
}))
defer srv.Close()
_, err := drainURL(t, NewURLSource(srv.URL))
require.Error(t, err)
require.Contains(t, err.Error(), "500")
}
func TestURLSource_RejectsOversizeBody(t *testing.T) {
// Serve body just over the cap. Use a small override to keep the test fast.
big := strings.Repeat("a", int(MaxURLContentLength)+10)
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain")
_, _ = w.Write([]byte(big))
}))
defer srv.Close()
_, err := drainURL(t, NewURLSource(srv.URL))
require.Error(t, err)
}
func TestURLSource_FollowsRedirect(t *testing.T) {
target := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain")
_, _ = w.Write([]byte("redirected body"))
}))
defer target.Close()
redirector := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
http.Redirect(w, r, target.URL, http.StatusMovedPermanently)
}))
defer redirector.Close()
chunks, err := drainURL(t, NewURLSource(redirector.URL))
require.NoError(t, err)
require.NotEmpty(t, chunks)
require.Contains(t, string(chunks[0].Data), "redirected body")
}
Create pkg/engine/sources/clipboard.go:
package sources
import (
"context"
"errors"
"fmt"
"github.com/atotto/clipboard"
"github.com/salvacybersec/keyhunter/pkg/types"
)
// ClipboardSource reads the current OS clipboard contents and emits them
// as a single chunk stream with Source="clipboard". Requires xclip/xsel/
// wl-clipboard on Linux, pbpaste on macOS, or native API on Windows.
type ClipboardSource struct {
// Reader overrides the clipboard reader; when nil the real clipboard is used.
// Tests inject a func returning a fixture.
Reader func() (string, error)
ChunkSize int
}
// NewClipboardSource returns a ClipboardSource bound to the real OS clipboard.
func NewClipboardSource() *ClipboardSource {
return &ClipboardSource{Reader: clipboard.ReadAll, ChunkSize: defaultChunkSize}
}
// Chunks reads the clipboard and emits its contents.
func (c *ClipboardSource) Chunks(ctx context.Context, out chan<- types.Chunk) error {
if clipboard.Unsupported && c.Reader == nil {
return errors.New("ClipboardSource: clipboard tooling unavailable (install xclip/xsel/wl-clipboard on Linux)")
}
reader := c.Reader
if reader == nil {
reader = clipboard.ReadAll
}
text, err := reader()
if err != nil {
return fmt.Errorf("ClipboardSource: read: %w", err)
}
if text == "" {
return nil
}
return emitChunks(ctx, []byte(text), "clipboard", c.ChunkSize, out)
}
Create pkg/engine/sources/clipboard_test.go:
package sources
import (
"context"
"errors"
"testing"
"time"
"github.com/stretchr/testify/require"
"github.com/salvacybersec/keyhunter/pkg/types"
)
func TestClipboardSource_FixtureReader(t *testing.T) {
src := &ClipboardSource{
Reader: func() (string, error) { return "sk-live-xxxxxx", nil },
ChunkSize: defaultChunkSize,
}
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
out := make(chan types.Chunk, 4)
errCh := make(chan error, 1)
go func() { errCh <- src.Chunks(ctx, out); close(out) }()
var got []types.Chunk
for c := range out {
got = append(got, c)
}
require.NoError(t, <-errCh)
require.Len(t, got, 1)
require.Equal(t, "clipboard", got[0].Source)
require.Equal(t, "sk-live-xxxxxx", string(got[0].Data))
}
func TestClipboardSource_ReaderError(t *testing.T) {
src := &ClipboardSource{
Reader: func() (string, error) { return "", errors.New("no xclip installed") },
}
out := make(chan types.Chunk, 1)
err := src.Chunks(context.Background(), out)
require.Error(t, err)
require.Contains(t, err.Error(), "clipboard")
}
func TestClipboardSource_EmptyClipboard(t *testing.T) {
src := &ClipboardSource{
Reader: func() (string, error) { return "", nil },
}
out := make(chan types.Chunk, 1)
err := src.Chunks(context.Background(), out)
require.NoError(t, err)
require.Len(t, out, 0)
}
Do NOT modify cmd/scan.go in this plan. Do NOT create pkg/engine/sources/dir.go, git.go, or touch file.go — those are owned by plans 04-02 and 04-03.
go test ./pkg/engine/sources/... -run 'TestStdinSource|TestURLSource|TestClipboardSource' -race -count=1
<acceptance_criteria>
- go build ./pkg/engine/sources/... exits 0
- go test ./pkg/engine/sources/... -run 'TestStdinSource|TestURLSource|TestClipboardSource' -race passes all subtests
- grep -n "http.Client" pkg/engine/sources/url.go hits
- grep -n "LimitReader" pkg/engine/sources/url.go hits
- grep -n "clipboard.ReadAll" pkg/engine/sources/clipboard.go hits
- grep -n "\"stdin\"" pkg/engine/sources/stdin.go hits (source label)
- grep -n "\"url:\" + u.URL\\|\"url:\"+u.URL" pkg/engine/sources/url.go hits
</acceptance_criteria>
StdinSource, URLSource, and ClipboardSource all implement Source, enforce their respective safety limits (stdin read-to-EOF, url scheme/size/content-type whitelist, clipboard tooling check), and their tests pass under -race.
<success_criteria> Three new source adapters exist, each self-contained, each with test coverage, and none conflicting with file ownership of plans 04-02 (dir/file) or 04-03 (git). </success_criteria>
After completion, create `.planning/phases/04-input-sources/04-04-SUMMARY.md` listing the six files created, test names with pass status, and any platform-specific notes about clipboard tests on the executor's CI environment.