feat(09-01): add ReconSource interface and Config

- Define ReconSource interface: Name/RateLimit/Burst/RespectsRobots/Enabled/Sweep
- Alias recon.Finding = engine.Finding for shared storage path
- Config struct carries Stealth, RespectRobots, EnabledSources, Query
This commit is contained in:
salvacybersec
2026-04-06 00:40:46 +03:00
parent c3b9fb4043
commit 10af12d358

54
pkg/recon/source.go Normal file
View File

@@ -0,0 +1,54 @@
// Package recon implements the OSINT/recon framework for KeyHunter.
//
// Every OSINT source (Shodan, GitHub, Pastebin, ...) implements the
// ReconSource interface defined here. The Engine in engine.go orchestrates
// parallel fanout across all registered sources via an ants worker pool.
//
// Findings produced by recon sources reuse the canonical engine.Finding
// type with SourceType set to "recon:<source-name>" so downstream storage
// and verification paths are shared with file/git/stdin scanning.
package recon
import (
"context"
"golang.org/x/time/rate"
"github.com/salvacybersec/keyhunter/pkg/engine"
)
// Finding is the recon package's alias for the canonical engine.Finding.
// Recon sources set SourceType = "recon:<source-name>".
type Finding = engine.Finding
// Config controls a recon sweep across all registered sources.
type Config struct {
// Stealth enables user-agent rotation and jitter delays (Plan 09-02).
Stealth bool
// RespectRobots toggles robots.txt enforcement for sources where
// RespectsRobots() returns true (Plan 09-04).
RespectRobots bool
// EnabledSources filters which source names are run. Empty = all.
EnabledSources []string
// Query is the search string passed to each source's Sweep method.
Query string
}
// ReconSource is implemented by every OSINT source module (Phases 10-16).
// Each source owns its own rate.Limiter constructed from RateLimit()/Burst().
type ReconSource interface {
// Name returns a stable, lowercase identifier (e.g. "shodan", "github").
Name() string
// RateLimit returns the per-source token bucket rate.
RateLimit() rate.Limit
// Burst returns the per-source burst capacity.
Burst() int
// RespectsRobots reports whether this source should honor robots.txt
// (true for HTML scrapers, false for authenticated APIs).
RespectsRobots() bool
// Enabled reports whether this source should run under the given cfg.
Enabled(cfg Config) bool
// Sweep performs the source's search and emits Findings on out.
// Implementations must return promptly when ctx is cancelled.
Sweep(ctx context.Context, query string, out chan<- Finding) error
}