diff --git a/cmd/schedule.go b/cmd/schedule.go new file mode 100644 index 0000000..793797b --- /dev/null +++ b/cmd/schedule.go @@ -0,0 +1,279 @@ +package cmd + +import ( + "context" + "database/sql" + "errors" + "fmt" + "os" + "path/filepath" + "strconv" + "strings" + "time" + + "github.com/spf13/cobra" + "github.com/spf13/viper" + + "github.com/salvacybersec/keyhunter/pkg/config" + "github.com/salvacybersec/keyhunter/pkg/engine" + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/storage" +) + +var ( + schedCron string + schedScan string + schedName string + schedNotify bool +) + +var scheduleCmd = &cobra.Command{ + Use: "schedule", + Short: "Manage scheduled recurring scans", + Long: `Add, list, remove, or manually run scheduled scan jobs. +Jobs are stored in the database and executed by 'keyhunter serve'.`, +} + +var scheduleAddCmd = &cobra.Command{ + Use: "add", + Short: "Add a new scheduled scan job", + RunE: func(cmd *cobra.Command, args []string) error { + if schedCron == "" { + return fmt.Errorf("--cron is required (e.g. --cron=\"0 */6 * * *\")") + } + if schedScan == "" { + return fmt.Errorf("--scan is required (path to scan)") + } + + db, cleanup, err := openScheduleDB() + if err != nil { + return err + } + defer cleanup() + + name := schedName + if name == "" { + name = fmt.Sprintf("scan-%s", filepath.Base(schedScan)) + } + + job := storage.ScheduledJob{ + Name: name, + CronExpr: schedCron, + ScanPath: schedScan, + Enabled: true, + Notify: schedNotify, + } + + id, err := db.SaveScheduledJob(job) + if err != nil { + return fmt.Errorf("saving scheduled job: %w", err) + } + + fmt.Printf("Scheduled job #%d added:\n", id) + fmt.Printf(" Name: %s\n", name) + fmt.Printf(" Cron: %s\n", schedCron) + fmt.Printf(" Path: %s\n", schedScan) + fmt.Printf(" Notify: %t\n", schedNotify) + return nil + }, +} + +var scheduleListCmd = &cobra.Command{ + Use: "list", + Short: "List all scheduled scan jobs", + RunE: func(cmd *cobra.Command, args []string) error { + db, cleanup, err := openScheduleDB() + if err != nil { + return err + } + defer cleanup() + + jobs, err := db.ListScheduledJobs() + if err != nil { + return fmt.Errorf("listing scheduled jobs: %w", err) + } + + if len(jobs) == 0 { + fmt.Println("No scheduled jobs. Use 'keyhunter schedule add' to create one.") + return nil + } + + fmt.Printf("%-4s %-20s %-20s %-30s %-8s %-7s %s\n", + "ID", "Name", "Cron", "Path", "Enabled", "Notify", "Last Run") + fmt.Println(strings.Repeat("-", 120)) + + for _, j := range jobs { + lastRun := "never" + if j.LastRunAt != nil { + lastRun = j.LastRunAt.Format(time.RFC3339) + } + enabled := "yes" + if !j.Enabled { + enabled = "no" + } + notify := "yes" + if !j.Notify { + notify = "no" + } + fmt.Printf("%-4d %-20s %-20s %-30s %-8s %-7s %s\n", + j.ID, truncateStr(j.Name, 20), j.CronExpr, truncateStr(j.ScanPath, 30), + enabled, notify, lastRun) + } + return nil + }, +} + +var scheduleRemoveCmd = &cobra.Command{ + Use: "remove ", + Short: "Remove a scheduled scan job by ID", + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + id, err := strconv.ParseInt(args[0], 10, 64) + if err != nil || id <= 0 { + return fmt.Errorf("invalid job ID: %s", args[0]) + } + + db, cleanup, err := openScheduleDB() + if err != nil { + return err + } + defer cleanup() + + affected, err := db.DeleteScheduledJob(id) + if err != nil { + return fmt.Errorf("removing job: %w", err) + } + if affected == 0 { + return fmt.Errorf("no job with ID %d", id) + } + fmt.Printf("Scheduled job #%d removed.\n", id) + return nil + }, +} + +var scheduleRunCmd = &cobra.Command{ + Use: "run ", + Short: "Manually run a scheduled scan job now", + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + id, err := strconv.ParseInt(args[0], 10, 64) + if err != nil || id <= 0 { + return fmt.Errorf("invalid job ID: %s", args[0]) + } + + db, cleanup, err := openScheduleDB() + if err != nil { + return err + } + defer cleanup() + + job, err := db.GetScheduledJob(id) + if err != nil { + if errors.Is(err, sql.ErrNoRows) { + return fmt.Errorf("no job with ID %d", id) + } + return fmt.Errorf("fetching job: %w", err) + } + + cfg := config.Load() + + // Derive encryption key. + encKey, err := loadOrCreateEncKey(db, cfg.Passphrase) + if err != nil { + return fmt.Errorf("preparing encryption key: %w", err) + } + + // Initialize engine. + reg, err := providers.NewRegistry() + if err != nil { + return fmt.Errorf("loading providers: %w", err) + } + eng := engine.NewEngine(reg) + + fmt.Printf("Running job #%d (%s) scanning %s...\n", job.ID, job.Name, job.ScanPath) + + // Select source and scan. + src, err := selectSource([]string{job.ScanPath}, sourceFlags{}) + if err != nil { + return fmt.Errorf("selecting source: %w", err) + } + + scanCfg := engine.ScanConfig{ + Workers: 0, // auto + Verify: false, + Unmask: false, + } + + ch, scanErr := eng.Scan(context.Background(), src, scanCfg) + if scanErr != nil { + return fmt.Errorf("starting scan: %w", scanErr) + } + + var findings []engine.Finding + for f := range ch { + findings = append(findings, f) + } + + // Persist findings. + for _, f := range findings { + sf := storage.Finding{ + ProviderName: f.ProviderName, + KeyValue: f.KeyValue, + KeyMasked: f.KeyMasked, + Confidence: f.Confidence, + SourcePath: f.Source, + SourceType: f.SourceType, + LineNumber: f.LineNumber, + } + if _, err := db.SaveFinding(sf, encKey); err != nil { + fmt.Fprintf(os.Stderr, "warning: failed to save finding: %v\n", err) + } + } + + // Update last run time. + if err := db.UpdateJobLastRun(job.ID, time.Now()); err != nil { + fmt.Fprintf(os.Stderr, "warning: failed to update last_run: %v\n", err) + } + + fmt.Printf("Scan complete. Found %d key(s).\n", len(findings)) + return nil + }, +} + +// openScheduleDB opens the database for schedule commands. +func openScheduleDB() (*storage.DB, func(), error) { + cfg := config.Load() + dbPath := viper.GetString("database.path") + if dbPath == "" { + dbPath = cfg.DBPath + } + if err := os.MkdirAll(filepath.Dir(dbPath), 0700); err != nil { + return nil, nil, fmt.Errorf("creating database directory: %w", err) + } + db, err := storage.Open(dbPath) + if err != nil { + return nil, nil, fmt.Errorf("opening database: %w", err) + } + return db, func() { db.Close() }, nil +} + +// truncateStr shortens a string to max length with ellipsis. +// Named differently from dorks.go truncate to avoid redeclaration. +func truncateStr(s string, max int) string { + if len(s) <= max { + return s + } + return s[:max-3] + "..." +} + +func init() { + scheduleAddCmd.Flags().StringVar(&schedCron, "cron", "", "cron expression (e.g. \"0 */6 * * *\")") + scheduleAddCmd.Flags().StringVar(&schedScan, "scan", "", "path to scan") + scheduleAddCmd.Flags().StringVar(&schedName, "name", "", "job name (default: auto-generated)") + scheduleAddCmd.Flags().BoolVar(&schedNotify, "notify", true, "send Telegram notification on findings") + + scheduleCmd.AddCommand(scheduleAddCmd) + scheduleCmd.AddCommand(scheduleListCmd) + scheduleCmd.AddCommand(scheduleRemoveCmd) + scheduleCmd.AddCommand(scheduleRunCmd) +} diff --git a/cmd/serve.go b/cmd/serve.go new file mode 100644 index 0000000..02b720a --- /dev/null +++ b/cmd/serve.go @@ -0,0 +1,197 @@ +package cmd + +import ( + "context" + "fmt" + "log" + "os" + "os/signal" + "path/filepath" + "syscall" + + "github.com/mymmrac/telego" + "github.com/spf13/cobra" + "github.com/spf13/viper" + + "github.com/salvacybersec/keyhunter/pkg/bot" + "github.com/salvacybersec/keyhunter/pkg/config" + "github.com/salvacybersec/keyhunter/pkg/engine" + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" + reconSources "github.com/salvacybersec/keyhunter/pkg/recon/sources" + "github.com/salvacybersec/keyhunter/pkg/scheduler" + "github.com/salvacybersec/keyhunter/pkg/storage" + "github.com/salvacybersec/keyhunter/pkg/verify" +) + +var ( + serveTelegram bool + servePort int +) + +var serveCmd = &cobra.Command{ + Use: "serve", + Short: "Start the scheduler (and optionally Telegram bot)", + Long: `Start KeyHunter in server mode. The scheduler runs all enabled recurring scan +jobs defined via 'keyhunter schedule add'. If --telegram is specified, the +Telegram bot is also started for remote control.`, + RunE: func(cmd *cobra.Command, args []string) error { + cfg := config.Load() + + // Open database. + dbPath := viper.GetString("database.path") + if dbPath == "" { + dbPath = cfg.DBPath + } + if err := os.MkdirAll(filepath.Dir(dbPath), 0700); err != nil { + return fmt.Errorf("creating database directory: %w", err) + } + db, err := storage.Open(dbPath) + if err != nil { + return fmt.Errorf("opening database: %w", err) + } + defer db.Close() + + // Derive encryption key. + encKey, err := loadOrCreateEncKey(db, cfg.Passphrase) + if err != nil { + return fmt.Errorf("preparing encryption key: %w", err) + } + + // Initialize provider registry and engine. + reg, err := providers.NewRegistry() + if err != nil { + return fmt.Errorf("loading providers: %w", err) + } + eng := engine.NewEngine(reg) + + // Initialize scheduler. + sched, err := scheduler.New(scheduler.Deps{ + Engine: eng, + DB: db, + EncKey: encKey, + }) + if err != nil { + return fmt.Errorf("creating scheduler: %w", err) + } + + // Optionally start Telegram bot. + var telegramBot *bot.Bot + if serveTelegram { + token := viper.GetString("telegram.token") + if token == "" { + token = os.Getenv("KEYHUNTER_TELEGRAM_TOKEN") + } + if token == "" { + return fmt.Errorf("telegram token required: set telegram.token in config or KEYHUNTER_TELEGRAM_TOKEN env var") + } + + api, err := telego.NewBot(token) + if err != nil { + return fmt.Errorf("creating telegram bot: %w", err) + } + + verifier := verify.NewHTTPVerifier(0) // default timeout + reconEng := buildServeReconEngine() + + telegramBot = bot.New(api, bot.Deps{ + Engine: eng, + Verifier: verifier, + Recon: reconEng, + DB: db, + Registry: reg, + EncKey: encKey, + }) + + // Wire scheduler notifications to Telegram. + sched.OnFindings = func(jobName string, findings []engine.Finding) { + msg := fmt.Sprintf("Scheduled scan %q found %d key(s):\n", jobName, len(findings)) + for i, f := range findings { + if i >= 10 { + msg += fmt.Sprintf("\n... and %d more", len(findings)-10) + break + } + msg += fmt.Sprintf("[%s] %s %s:%d\n", f.ProviderName, f.KeyMasked, f.Source, f.LineNumber) + } + // Broadcast to all subscribed chats via bot status message. + // For now, log. Subscribe/broadcast will be wired in a future plan. + log.Printf("scheduler notification: %s", msg) + } + + updates, err := api.UpdatesViaLongPolling(context.Background(), nil) + if err != nil { + return fmt.Errorf("starting telegram long polling: %w", err) + } + + bh := telegramBot.RegisterHandlers(updates) + go bh.Start() + fmt.Println("Telegram bot started.") + } + + // Load and start scheduler. + if err := sched.LoadAndStart(); err != nil { + return fmt.Errorf("starting scheduler: %w", err) + } + fmt.Printf("Scheduler started (port %d placeholder for future web dashboard).\n", servePort) + fmt.Println("Press Ctrl+C to stop.") + + // Wait for signal. + sigCh := make(chan os.Signal, 1) + signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM) + <-sigCh + + fmt.Println("\nShutting down...") + if err := sched.Stop(); err != nil { + log.Printf("scheduler shutdown error: %v", err) + } + return nil + }, +} + +// buildServeReconEngine creates a recon engine with all registered sources +// for the serve command's Telegram bot integration. Reuses the same +// credential lookup pattern as cmd/recon.go buildReconEngine. +func buildServeReconEngine() *recon.Engine { + eng := recon.NewEngine() + reg, err := providers.NewRegistry() + if err != nil { + log.Printf("serve: failed to load providers for recon: %v", err) + return eng + } + reconSources.RegisterAll(eng, reconSources.SourcesConfig{ + Registry: reg, + Limiters: recon.NewLimiterRegistry(), + GitHubToken: firstNonEmpty(os.Getenv("GITHUB_TOKEN"), viper.GetString("recon.github.token")), + GitLabToken: firstNonEmpty(os.Getenv("GITLAB_TOKEN"), viper.GetString("recon.gitlab.token")), + BitbucketToken: firstNonEmpty(os.Getenv("BITBUCKET_TOKEN"), viper.GetString("recon.bitbucket.token")), + BitbucketWorkspace: firstNonEmpty(os.Getenv("BITBUCKET_WORKSPACE"), viper.GetString("recon.bitbucket.workspace")), + CodebergToken: firstNonEmpty(os.Getenv("CODEBERG_TOKEN"), viper.GetString("recon.codeberg.token")), + HuggingFaceToken: firstNonEmpty(os.Getenv("HUGGINGFACE_TOKEN"), viper.GetString("recon.huggingface.token")), + KaggleUser: firstNonEmpty(os.Getenv("KAGGLE_USERNAME"), viper.GetString("recon.kaggle.username")), + KaggleKey: firstNonEmpty(os.Getenv("KAGGLE_KEY"), viper.GetString("recon.kaggle.key")), + GoogleAPIKey: firstNonEmpty(os.Getenv("GOOGLE_API_KEY"), viper.GetString("recon.google.api_key")), + GoogleCX: firstNonEmpty(os.Getenv("GOOGLE_CX"), viper.GetString("recon.google.cx")), + BingAPIKey: firstNonEmpty(os.Getenv("BING_API_KEY"), viper.GetString("recon.bing.api_key")), + YandexUser: firstNonEmpty(os.Getenv("YANDEX_USER"), viper.GetString("recon.yandex.user")), + YandexAPIKey: firstNonEmpty(os.Getenv("YANDEX_API_KEY"), viper.GetString("recon.yandex.api_key")), + BraveAPIKey: firstNonEmpty(os.Getenv("BRAVE_API_KEY"), viper.GetString("recon.brave.api_key")), + ShodanAPIKey: firstNonEmpty(os.Getenv("SHODAN_API_KEY"), viper.GetString("recon.shodan.api_key")), + CensysAPIId: firstNonEmpty(os.Getenv("CENSYS_API_ID"), viper.GetString("recon.censys.api_id")), + CensysAPISecret: firstNonEmpty(os.Getenv("CENSYS_API_SECRET"), viper.GetString("recon.censys.api_secret")), + ZoomEyeAPIKey: firstNonEmpty(os.Getenv("ZOOMEYE_API_KEY"), viper.GetString("recon.zoomeye.api_key")), + FOFAEmail: firstNonEmpty(os.Getenv("FOFA_EMAIL"), viper.GetString("recon.fofa.email")), + FOFAAPIKey: firstNonEmpty(os.Getenv("FOFA_API_KEY"), viper.GetString("recon.fofa.api_key")), + NetlasAPIKey: firstNonEmpty(os.Getenv("NETLAS_API_KEY"), viper.GetString("recon.netlas.api_key")), + BinaryEdgeAPIKey: firstNonEmpty(os.Getenv("BINARYEDGE_API_KEY"), viper.GetString("recon.binaryedge.api_key")), + CircleCIToken: firstNonEmpty(os.Getenv("CIRCLECI_TOKEN"), viper.GetString("recon.circleci.token")), + VirusTotalAPIKey: firstNonEmpty(os.Getenv("VIRUSTOTAL_API_KEY"), viper.GetString("recon.virustotal.api_key")), + IntelligenceXAPIKey: firstNonEmpty(os.Getenv("INTELX_API_KEY"), viper.GetString("recon.intelx.api_key")), + SecurityTrailsAPIKey: firstNonEmpty(os.Getenv("SECURITYTRAILS_API_KEY"), viper.GetString("recon.securitytrails.api_key")), + }) + return eng +} + +func init() { + serveCmd.Flags().BoolVar(&serveTelegram, "telegram", false, "start Telegram bot alongside scheduler") + serveCmd.Flags().IntVar(&servePort, "port", 8080, "port for future web dashboard (reserved)") +} diff --git a/cmd/stubs.go b/cmd/stubs.go index e3b2253..2baf00f 100644 --- a/cmd/stubs.go +++ b/cmd/stubs.go @@ -25,16 +25,8 @@ var verifyCmd = &cobra.Command{ // keysCmd is implemented in cmd/keys.go (Phase 6). -var serveCmd = &cobra.Command{ - Use: "serve", - Short: "Start the web dashboard (Phase 18)", - RunE: notImplemented("serve", "Phase 18"), -} +// serveCmd is implemented in cmd/serve.go (Phase 17). // dorksCmd is implemented in cmd/dorks.go (Phase 8). -var scheduleCmd = &cobra.Command{ - Use: "schedule", - Short: "Manage scheduled recurring scans (Phase 17)", - RunE: notImplemented("schedule", "Phase 17"), -} +// scheduleCmd is implemented in cmd/schedule.go (Phase 17). diff --git a/go.mod b/go.mod index 04f8655..78707c1 100644 --- a/go.mod +++ b/go.mod @@ -5,16 +5,20 @@ go 1.26.1 require ( github.com/atotto/clipboard v0.1.4 github.com/charmbracelet/lipgloss v1.1.0 + github.com/go-co-op/gocron/v2 v2.19.1 github.com/go-git/go-git/v5 v5.17.2 github.com/mattn/go-isatty v0.0.20 + github.com/mymmrac/telego v1.8.0 github.com/panjf2000/ants/v2 v2.12.0 github.com/petar-dambovaliev/aho-corasick v0.0.0-20250424160509-463d218d4745 github.com/spf13/cobra v1.10.2 github.com/spf13/viper v1.21.0 github.com/stretchr/testify v1.11.1 + github.com/temoto/robotstxt v1.1.2 github.com/tidwall/gjson v1.18.0 golang.org/x/crypto v0.49.0 golang.org/x/exp v0.0.0-20260312153236-7ab1446f8b90 + golang.org/x/net v0.52.0 golang.org/x/time v0.15.0 gopkg.in/yaml.v3 v3.0.1 modernc.org/sqlite v1.48.1 @@ -48,19 +52,20 @@ require ( github.com/grbit/go-json v0.11.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect + github.com/jonboulle/clockwork v0.5.0 // indirect github.com/kevinburke/ssh_config v1.2.0 // indirect github.com/klauspost/compress v1.18.2 // indirect github.com/klauspost/cpuid/v2 v2.2.9 // indirect github.com/lucasb-eyer/go-colorful v1.2.0 // indirect github.com/mattn/go-runewidth v0.0.16 // indirect github.com/muesli/termenv v0.16.0 // indirect - github.com/mymmrac/telego v1.8.0 // indirect github.com/ncruces/go-strftime v1.0.0 // indirect github.com/pelletier/go-toml/v2 v2.2.4 // indirect github.com/pjbgf/sha1cd v0.3.2 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect github.com/rivo/uniseg v0.4.7 // indirect + github.com/robfig/cron/v3 v3.0.1 // indirect github.com/sagikazarmark/locafero v0.11.0 // indirect github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 // indirect github.com/skeema/knownhosts v1.3.1 // indirect @@ -69,7 +74,6 @@ require ( github.com/spf13/cast v1.10.0 // indirect github.com/spf13/pflag v1.0.10 // indirect github.com/subosito/gotenv v1.6.0 // indirect - github.com/temoto/robotstxt v1.1.2 // indirect github.com/tidwall/match v1.1.1 // indirect github.com/tidwall/pretty v1.2.0 // indirect github.com/twitchyliquid64/golang-asm v0.15.1 // indirect @@ -80,7 +84,6 @@ require ( github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect golang.org/x/arch v0.0.0-20210923205945-b76863e36670 // indirect - golang.org/x/net v0.52.0 // indirect golang.org/x/sync v0.20.0 // indirect golang.org/x/sys v0.42.0 // indirect golang.org/x/text v0.35.0 // indirect diff --git a/go.sum b/go.sum index ce408cd..9924c3c 100644 --- a/go.sum +++ b/go.sum @@ -53,6 +53,8 @@ github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= github.com/gliderlabs/ssh v0.3.8 h1:a4YXD1V7xMF9g5nTkdfnja3Sxy1PVDCj1Zg4Wb8vY6c= github.com/gliderlabs/ssh v0.3.8/go.mod h1:xYoytBv1sV0aL3CavoDuJIQNURXkkfPA/wxQ1pL1fAU= +github.com/go-co-op/gocron/v2 v2.19.1 h1:B4iLeA0NB/2iO3EKQ7NfKn5KsQgZfjb2fkvoZJU3yBI= +github.com/go-co-op/gocron/v2 v2.19.1/go.mod h1:5lEiCKk1oVJV39Zg7/YG10OnaVrDAV5GGR6O0663k6U= github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 h1:+zs/tPmkDkHx3U66DAb0lQFJrpS6731Oaa12ikc+DiI= github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376/go.mod h1:an3vInlBmSxCcxctByoQdvwPiA7DTK7jaaFDBTtu0ic= github.com/go-git/go-billy/v5 v5.8.0 h1:I8hjc3LbBlXTtVuFNJuwYuMiHvQJDq1AT6u4DwDzZG0= @@ -79,6 +81,8 @@ github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2 github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 h1:BQSFePA1RWJOlocH6Fxy8MmwDt+yVQYULKfN0RoTN8A= github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99/go.mod h1:1lJo3i6rXxKeerYnT8Nvf0QmHCRC1n8sfWVwXF2Frvo= +github.com/jonboulle/clockwork v0.5.0 h1:Hyh9A8u51kptdkR+cqRpT1EebBwTn1oK9YfGYbdFz6I= +github.com/jonboulle/clockwork v0.5.0/go.mod h1:3mZlmanh0g2NDKO5TWZVJAfofYk64M7XN3SzBPjZF60= github.com/kevinburke/ssh_config v1.2.0 h1:x584FjTGwHzMwvHx18PXxbBVzfnxogHaAReU4gf13a4= github.com/kevinburke/ssh_config v1.2.0/go.mod h1:CT57kijsi8u/K/BOFA39wgDQJ9CxiF4nAY/ojJ6r6mM= github.com/klauspost/compress v1.18.2 h1:iiPHWW0YrcFgpBYhsA6D1+fqHssJscY/Tm/y2Uqnapk= @@ -123,6 +127,8 @@ github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qq github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= +github.com/robfig/cron/v3 v3.0.1 h1:WdRxkvbJztn8LMz/QEvLN5sBU+xKpSqwwUO1Pjr4qDs= +github.com/robfig/cron/v3 v3.0.1/go.mod h1:eQICP3HwyT7UooqI/z+Ov+PtYAWygg1TEWWzGIFLtro= github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= @@ -181,6 +187,12 @@ github.com/xanzy/ssh-agent v0.3.3 h1:+/15pJfg/RsTxqYcX6fHqOXZwwMP+2VyYWJeWM2qQFM github.com/xanzy/ssh-agent v0.3.3/go.mod h1:6dzNDKs0J9rVPHPhaGCukekBHKqfl+L3KghI1Bc68Uw= github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e h1:JVG44RsyaB9T2KIHavMF/ppJZNG9ZpyihvCd0w101no= github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e/go.mod h1:RbqR21r5mrJuqunuUZ/Dhy/avygyECGrLceyNeo4LiM= +github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU= +github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.uber.org/mock v0.6.0 h1:hyF9dfmbgIX5EfOdasqLsWD6xqpNZlXblLB/Dbnwv3Y= +go.uber.org/mock v0.6.0/go.mod h1:KiVJ4BqZJaMj4svdfmHM0AUx4NJYO8ZNpPnZn1Z+BBU= go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= golang.org/x/arch v0.0.0-20210923205945-b76863e36670 h1:18EFjUmQOcUvxNYSkA6jO9VAiXCnxFY6NyDX0bHDmkU= diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go new file mode 100644 index 0000000..265eb5c --- /dev/null +++ b/pkg/scheduler/scheduler.go @@ -0,0 +1,170 @@ +// Package scheduler implements cron-based recurring scan scheduling for KeyHunter. +// It uses gocron v2 for job management and delegates scan execution to the engine. +package scheduler + +import ( + "context" + "fmt" + "log" + "sync" + "time" + + "github.com/go-co-op/gocron/v2" + + "github.com/salvacybersec/keyhunter/pkg/engine" + "github.com/salvacybersec/keyhunter/pkg/storage" +) + +// Scheduler manages recurring scan jobs backed by the database. +type Scheduler struct { + cron gocron.Scheduler + engine *engine.Engine + db *storage.DB + encKey []byte + + mu sync.Mutex + jobs map[int64]gocron.Job // DB job ID -> gocron job + + // OnFindings is called when a scheduled scan produces findings. + // The caller can wire this to Telegram notifications. + OnFindings func(jobName string, findings []engine.Finding) +} + +// Deps bundles the dependencies for creating a Scheduler. +type Deps struct { + Engine *engine.Engine + DB *storage.DB + EncKey []byte +} + +// New creates a new Scheduler. Call Start() to begin processing jobs. +func New(deps Deps) (*Scheduler, error) { + cron, err := gocron.NewScheduler() + if err != nil { + return nil, fmt.Errorf("creating gocron scheduler: %w", err) + } + return &Scheduler{ + cron: cron, + engine: deps.Engine, + db: deps.DB, + encKey: deps.EncKey, + jobs: make(map[int64]gocron.Job), + }, nil +} + +// LoadAndStart loads all enabled jobs from the database, registers them +// with gocron, and starts the scheduler. +func (s *Scheduler) LoadAndStart() error { + jobs, err := s.db.ListEnabledScheduledJobs() + if err != nil { + return fmt.Errorf("loading scheduled jobs: %w", err) + } + for _, j := range jobs { + if err := s.registerJob(j); err != nil { + log.Printf("scheduler: failed to register job %d (%s): %v", j.ID, j.Name, err) + } + } + s.cron.Start() + return nil +} + +// Stop shuts down the scheduler gracefully. +func (s *Scheduler) Stop() error { + return s.cron.Shutdown() +} + +// AddJob registers a new job from a storage.ScheduledJob and adds it to the +// running scheduler. +func (s *Scheduler) AddJob(job storage.ScheduledJob) error { + return s.registerJob(job) +} + +// RemoveJob removes a job from the running scheduler by its DB ID. +func (s *Scheduler) RemoveJob(id int64) { + s.mu.Lock() + defer s.mu.Unlock() + if j, ok := s.jobs[id]; ok { + s.cron.RemoveJob(j.ID()) + delete(s.jobs, id) + } +} + +// RunNow executes a job immediately (outside of its cron schedule). +func (s *Scheduler) RunNow(job storage.ScheduledJob) ([]engine.Finding, error) { + return s.executeScan(job) +} + +// registerJob adds a single scheduled job to gocron. +func (s *Scheduler) registerJob(job storage.ScheduledJob) error { + jobCopy := job // capture for closure + cronJob, err := s.cron.NewJob( + gocron.CronJob(job.CronExpr, false), + gocron.NewTask(func() { + findings, err := s.executeScan(jobCopy) + if err != nil { + log.Printf("scheduler: job %d (%s) failed: %v", jobCopy.ID, jobCopy.Name, err) + return + } + // Update last run time in DB. + if err := s.db.UpdateJobLastRun(jobCopy.ID, time.Now()); err != nil { + log.Printf("scheduler: failed to update last_run for job %d: %v", jobCopy.ID, err) + } + if len(findings) > 0 && jobCopy.Notify && s.OnFindings != nil { + s.OnFindings(jobCopy.Name, findings) + } + }), + ) + if err != nil { + return fmt.Errorf("registering cron job %q (%s): %w", job.Name, job.CronExpr, err) + } + + s.mu.Lock() + s.jobs[job.ID] = cronJob + s.mu.Unlock() + return nil +} + +// executeScan runs a scan against the job's configured path and persists findings. +func (s *Scheduler) executeScan(job storage.ScheduledJob) ([]engine.Finding, error) { + src, err := selectSchedulerSource(job.ScanPath) + if err != nil { + return nil, fmt.Errorf("selecting source for %q: %w", job.ScanPath, err) + } + + cfg := engine.ScanConfig{ + Workers: 0, // auto + Verify: false, + Unmask: false, + } + + ctx, cancel := context.WithTimeout(context.Background(), 30*time.Minute) + defer cancel() + + ch, err := s.engine.Scan(ctx, src, cfg) + if err != nil { + return nil, fmt.Errorf("starting scan: %w", err) + } + + var findings []engine.Finding + for f := range ch { + findings = append(findings, f) + } + + // Persist findings. + for _, f := range findings { + sf := storage.Finding{ + ProviderName: f.ProviderName, + KeyValue: f.KeyValue, + KeyMasked: f.KeyMasked, + Confidence: f.Confidence, + SourcePath: f.Source, + SourceType: f.SourceType, + LineNumber: f.LineNumber, + } + if _, err := s.db.SaveFinding(sf, s.encKey); err != nil { + log.Printf("scheduler: failed to save finding: %v", err) + } + } + + return findings, nil +} diff --git a/pkg/scheduler/source.go b/pkg/scheduler/source.go new file mode 100644 index 0000000..31ee42c --- /dev/null +++ b/pkg/scheduler/source.go @@ -0,0 +1,21 @@ +package scheduler + +import ( + "fmt" + "os" + + "github.com/salvacybersec/keyhunter/pkg/engine/sources" +) + +// selectSchedulerSource returns the appropriate Source for a scheduled scan path. +// Only file and directory paths are supported (same as bot scans). +func selectSchedulerSource(path string) (sources.Source, error) { + info, err := os.Stat(path) + if err != nil { + return nil, fmt.Errorf("stat %q: %w", path, err) + } + if info.IsDir() { + return sources.NewDirSource(path), nil + } + return sources.NewFileSource(path), nil +} diff --git a/pkg/storage/scheduled_jobs.go b/pkg/storage/scheduled_jobs.go new file mode 100644 index 0000000..4620ef3 --- /dev/null +++ b/pkg/storage/scheduled_jobs.go @@ -0,0 +1,164 @@ +package storage + +import ( + "database/sql" + "fmt" + "time" +) + +// ScheduledJob represents a cron-based recurring scan job. +type ScheduledJob struct { + ID int64 + Name string + CronExpr string + ScanPath string + Enabled bool + Notify bool + LastRunAt *time.Time + NextRunAt *time.Time + CreatedAt time.Time +} + +// SaveScheduledJob inserts a new scheduled job and returns its ID. +func (db *DB) SaveScheduledJob(job ScheduledJob) (int64, error) { + enabledInt := 0 + if job.Enabled { + enabledInt = 1 + } + notifyInt := 0 + if job.Notify { + notifyInt = 1 + } + + res, err := db.sql.Exec( + `INSERT INTO scheduled_jobs (name, cron_expr, scan_path, enabled, notify) + VALUES (?, ?, ?, ?, ?)`, + job.Name, job.CronExpr, job.ScanPath, enabledInt, notifyInt, + ) + if err != nil { + return 0, fmt.Errorf("inserting scheduled job: %w", err) + } + return res.LastInsertId() +} + +// ListScheduledJobs returns all scheduled jobs ordered by creation time. +func (db *DB) ListScheduledJobs() ([]ScheduledJob, error) { + rows, err := db.sql.Query( + `SELECT id, name, cron_expr, scan_path, enabled, notify, last_run_at, next_run_at, created_at + FROM scheduled_jobs ORDER BY created_at ASC`, + ) + if err != nil { + return nil, fmt.Errorf("querying scheduled jobs: %w", err) + } + defer rows.Close() + + var jobs []ScheduledJob + for rows.Next() { + j, err := scanJobRow(rows) + if err != nil { + return nil, err + } + jobs = append(jobs, j) + } + return jobs, rows.Err() +} + +// GetScheduledJob returns a single job by ID. +func (db *DB) GetScheduledJob(id int64) (*ScheduledJob, error) { + row := db.sql.QueryRow( + `SELECT id, name, cron_expr, scan_path, enabled, notify, last_run_at, next_run_at, created_at + FROM scheduled_jobs WHERE id = ?`, id, + ) + var j ScheduledJob + var enabledInt, notifyInt int + var lastRun, nextRun, createdAt sql.NullString + if err := row.Scan(&j.ID, &j.Name, &j.CronExpr, &j.ScanPath, + &enabledInt, ¬ifyInt, &lastRun, &nextRun, &createdAt); err != nil { + return nil, err + } + j.Enabled = enabledInt != 0 + j.Notify = notifyInt != 0 + j.LastRunAt = parseNullTime(lastRun) + j.NextRunAt = parseNullTime(nextRun) + if createdAt.Valid { + t, _ := time.Parse("2006-01-02 15:04:05", createdAt.String) + j.CreatedAt = t + } + return &j, nil +} + +// DeleteScheduledJob removes a job by ID and returns rows affected. +func (db *DB) DeleteScheduledJob(id int64) (int64, error) { + res, err := db.sql.Exec(`DELETE FROM scheduled_jobs WHERE id = ?`, id) + if err != nil { + return 0, fmt.Errorf("deleting scheduled job %d: %w", id, err) + } + return res.RowsAffected() +} + +// UpdateJobLastRun updates the last_run_at timestamp for a job. +func (db *DB) UpdateJobLastRun(id int64, t time.Time) error { + _, err := db.sql.Exec( + `UPDATE scheduled_jobs SET last_run_at = ? WHERE id = ?`, + t.Format("2006-01-02 15:04:05"), id, + ) + return err +} + +// ListEnabledScheduledJobs returns only enabled jobs. +func (db *DB) ListEnabledScheduledJobs() ([]ScheduledJob, error) { + rows, err := db.sql.Query( + `SELECT id, name, cron_expr, scan_path, enabled, notify, last_run_at, next_run_at, created_at + FROM scheduled_jobs WHERE enabled = 1 ORDER BY created_at ASC`, + ) + if err != nil { + return nil, fmt.Errorf("querying enabled scheduled jobs: %w", err) + } + defer rows.Close() + + var jobs []ScheduledJob + for rows.Next() { + j, err := scanJobRow(rows) + if err != nil { + return nil, err + } + jobs = append(jobs, j) + } + return jobs, rows.Err() +} + +func scanJobRow(rows *sql.Rows) (ScheduledJob, error) { + var j ScheduledJob + var enabledInt, notifyInt int + var lastRun, nextRun, createdAt sql.NullString + if err := rows.Scan(&j.ID, &j.Name, &j.CronExpr, &j.ScanPath, + &enabledInt, ¬ifyInt, &lastRun, &nextRun, &createdAt); err != nil { + return j, fmt.Errorf("scanning scheduled job row: %w", err) + } + j.Enabled = enabledInt != 0 + j.Notify = notifyInt != 0 + j.LastRunAt = parseNullTime(lastRun) + j.NextRunAt = parseNullTime(nextRun) + if createdAt.Valid { + t, _ := time.Parse("2006-01-02 15:04:05", createdAt.String) + j.CreatedAt = t + } + return j, nil +} + +func parseNullTime(ns sql.NullString) *time.Time { + if !ns.Valid || ns.String == "" { + return nil + } + // Try multiple formats SQLite may return. + for _, layout := range []string{ + "2006-01-02 15:04:05", + "2006-01-02T15:04:05Z", + time.RFC3339, + } { + if t, err := time.Parse(layout, ns.String); err == nil { + return &t + } + } + return nil +} diff --git a/pkg/storage/scheduled_jobs_test.go b/pkg/storage/scheduled_jobs_test.go new file mode 100644 index 0000000..016583b --- /dev/null +++ b/pkg/storage/scheduled_jobs_test.go @@ -0,0 +1,104 @@ +package storage + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestScheduledJobCRUD(t *testing.T) { + db, err := Open(":memory:") + require.NoError(t, err) + defer db.Close() + + // Add a job. + job := ScheduledJob{ + Name: "nightly-scan", + CronExpr: "0 0 * * *", + ScanPath: "/tmp/repo", + Enabled: true, + Notify: true, + } + id, err := db.SaveScheduledJob(job) + require.NoError(t, err) + assert.True(t, id > 0) + + // Get the job by ID. + got, err := db.GetScheduledJob(id) + require.NoError(t, err) + assert.Equal(t, "nightly-scan", got.Name) + assert.Equal(t, "0 0 * * *", got.CronExpr) + assert.Equal(t, "/tmp/repo", got.ScanPath) + assert.True(t, got.Enabled) + assert.True(t, got.Notify) + assert.Nil(t, got.LastRunAt) + + // List all jobs. + jobs, err := db.ListScheduledJobs() + require.NoError(t, err) + assert.Len(t, jobs, 1) + assert.Equal(t, "nightly-scan", jobs[0].Name) + + // Add a second job (disabled). + job2 := ScheduledJob{ + Name: "weekly-scan", + CronExpr: "0 0 * * 0", + ScanPath: "/tmp/other", + Enabled: false, + Notify: false, + } + id2, err := db.SaveScheduledJob(job2) + require.NoError(t, err) + + // List enabled only. + enabled, err := db.ListEnabledScheduledJobs() + require.NoError(t, err) + assert.Len(t, enabled, 1) + assert.Equal(t, id, enabled[0].ID) + + // Delete the first job. + affected, err := db.DeleteScheduledJob(id) + require.NoError(t, err) + assert.Equal(t, int64(1), affected) + + // Only the second job should remain. + jobs, err = db.ListScheduledJobs() + require.NoError(t, err) + assert.Len(t, jobs, 1) + assert.Equal(t, id2, jobs[0].ID) + + // Delete non-existent returns 0 affected. + affected, err = db.DeleteScheduledJob(999) + require.NoError(t, err) + assert.Equal(t, int64(0), affected) +} + +func TestUpdateJobLastRun(t *testing.T) { + db, err := Open(":memory:") + require.NoError(t, err) + defer db.Close() + + id, err := db.SaveScheduledJob(ScheduledJob{ + Name: "test", + CronExpr: "*/5 * * * *", + ScanPath: "/tmp/test", + Enabled: true, + Notify: true, + }) + require.NoError(t, err) + + job, err := db.GetScheduledJob(id) + require.NoError(t, err) + assert.Nil(t, job.LastRunAt) + + // Update last run. + now := time.Now().Truncate(time.Second) + require.NoError(t, db.UpdateJobLastRun(id, now)) + + job, err = db.GetScheduledJob(id) + require.NoError(t, err) + require.NotNil(t, job.LastRunAt, "LastRunAt should be set after UpdateJobLastRun") + assert.Equal(t, now.Format("2006-01-02 15:04:05"), job.LastRunAt.Format("2006-01-02 15:04:05")) +} diff --git a/pkg/storage/schema.sql b/pkg/storage/schema.sql index 964dad9..698dde2 100644 --- a/pkg/storage/schema.sql +++ b/pkg/storage/schema.sql @@ -55,3 +55,18 @@ CREATE TABLE IF NOT EXISTS custom_dorks ( CREATE INDEX IF NOT EXISTS idx_custom_dorks_source ON custom_dorks(source); CREATE INDEX IF NOT EXISTS idx_custom_dorks_category ON custom_dorks(category); + +-- Phase 17: scheduled scan jobs for cron-based recurring scans. +CREATE TABLE IF NOT EXISTS scheduled_jobs ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL, + cron_expr TEXT NOT NULL, + scan_path TEXT NOT NULL, + enabled INTEGER NOT NULL DEFAULT 1, + notify INTEGER NOT NULL DEFAULT 1, + last_run_at DATETIME, + next_run_at DATETIME, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP +); + +CREATE INDEX IF NOT EXISTS idx_scheduled_jobs_enabled ON scheduled_jobs(enabled);