diff --git a/pkg/recon/sources/devto.go b/pkg/recon/sources/devto.go new file mode 100644 index 0000000..3e383e0 --- /dev/null +++ b/pkg/recon/sources/devto.go @@ -0,0 +1,156 @@ +package sources + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "time" + + "golang.org/x/time/rate" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +// DevToSource searches the dev.to public API for articles containing leaked +// API keys. Developers write tutorials and guides on dev.to that sometimes +// include real credentials in code examples. +type DevToSource struct { + BaseURL string + Registry *providers.Registry + Limiters *recon.LimiterRegistry + Client *Client +} + +var _ recon.ReconSource = (*DevToSource)(nil) + +func (s *DevToSource) Name() string { return "devto" } +func (s *DevToSource) RateLimit() rate.Limit { return rate.Every(1 * time.Second) } +func (s *DevToSource) Burst() int { return 5 } +func (s *DevToSource) RespectsRobots() bool { return false } +func (s *DevToSource) Enabled(_ recon.Config) bool { return true } + +// devtoArticleSummary represents an article in the dev.to /api/articles list response. +type devtoArticleSummary struct { + ID int `json:"id"` + URL string `json:"url"` +} + +// devtoArticleDetail represents the full article from /api/articles/{id}. +type devtoArticleDetail struct { + BodyMarkdown string `json:"body_markdown"` + URL string `json:"url"` +} + +func (s *DevToSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error { + base := s.BaseURL + if base == "" { + base = "https://dev.to" + } + client := s.Client + if client == nil { + client = NewClient() + } + + queries := BuildQueries(s.Registry, "devto") + if len(queries) == 0 { + return nil + } + + for _, q := range queries { + if err := ctx.Err(); err != nil { + return err + } + + if s.Limiters != nil { + if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil { + return err + } + } + + // Search for articles by tag keyword. + listURL := fmt.Sprintf("%s/api/articles?tag=%s&per_page=10&state=rising", + base, url.QueryEscape(q)) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, listURL, nil) + if err != nil { + continue + } + req.Header.Set("Accept", "application/json") + + resp, err := client.Do(ctx, req) + if err != nil { + continue + } + + body, err := io.ReadAll(io.LimitReader(resp.Body, 256*1024)) + _ = resp.Body.Close() + if err != nil { + continue + } + + var articles []devtoArticleSummary + if err := json.Unmarshal(body, &articles); err != nil { + continue + } + + // Limit to first 5 articles to stay within rate limits. + limit := 5 + if len(articles) < limit { + limit = len(articles) + } + + for _, article := range articles[:limit] { + if err := ctx.Err(); err != nil { + return err + } + + if s.Limiters != nil { + if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil { + return err + } + } + + // Fetch full article to get body_markdown. + detailURL := fmt.Sprintf("%s/api/articles/%d", base, article.ID) + detailReq, err := http.NewRequestWithContext(ctx, http.MethodGet, detailURL, nil) + if err != nil { + continue + } + detailReq.Header.Set("Accept", "application/json") + + detailResp, err := client.Do(ctx, detailReq) + if err != nil { + continue + } + + detailBody, err := io.ReadAll(io.LimitReader(detailResp.Body, 256*1024)) + _ = detailResp.Body.Close() + if err != nil { + continue + } + + var detail devtoArticleDetail + if err := json.Unmarshal(detailBody, &detail); err != nil { + continue + } + + if ciLogKeyPattern.MatchString(detail.BodyMarkdown) { + articleURL := detail.URL + if articleURL == "" { + articleURL = fmt.Sprintf("%s/api/articles/%d", base, article.ID) + } + out <- recon.Finding{ + ProviderName: q, + Source: articleURL, + SourceType: "recon:devto", + Confidence: "medium", + DetectedAt: time.Now(), + } + } + } + } + return nil +} diff --git a/pkg/recon/sources/devto_test.go b/pkg/recon/sources/devto_test.go new file mode 100644 index 0000000..fa2b3ee --- /dev/null +++ b/pkg/recon/sources/devto_test.go @@ -0,0 +1,86 @@ +package sources + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +func TestDevTo_Name(t *testing.T) { + s := &DevToSource{} + if s.Name() != "devto" { + t.Fatalf("expected devto, got %s", s.Name()) + } +} + +func TestDevTo_Enabled(t *testing.T) { + s := &DevToSource{} + if !s.Enabled(recon.Config{}) { + t.Fatal("DevToSource should always be enabled (credentialless)") + } +} + +func TestDevTo_Sweep(t *testing.T) { + mux := http.NewServeMux() + mux.HandleFunc("/api/articles", func(w http.ResponseWriter, r *http.Request) { + // Check if this is a detail request (/api/articles/42). + if r.URL.Path == "/api/articles/42" { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{ + "body_markdown":"# Tutorial\nSet your api_key = \"sk-proj-ABCDEF1234567890abcdef\" in .env\n", + "url":"https://dev.to/user/tutorial-post" + }`)) + return + } + // List endpoint. + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`[{"id":42,"url":"https://dev.to/user/tutorial-post"}]`)) + }) + // Also handle the detail path with the ID suffix. + mux.HandleFunc("/api/articles/42", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{ + "body_markdown":"# Tutorial\nSet your api_key = \"sk-proj-ABCDEF1234567890abcdef\" in .env\n", + "url":"https://dev.to/user/tutorial-post" + }`)) + }) + + srv := httptest.NewServer(mux) + defer srv.Close() + + reg := providers.NewRegistryFromProviders([]providers.Provider{ + {Name: "openai", Keywords: []string{"sk-proj-"}}, + }) + + s := &DevToSource{ + BaseURL: srv.URL, + Registry: reg, + Client: NewClient(), + } + + out := make(chan recon.Finding, 10) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + err := s.Sweep(ctx, "", out) + close(out) + if err != nil { + t.Fatalf("Sweep error: %v", err) + } + + var findings []recon.Finding + for f := range out { + findings = append(findings, f) + } + if len(findings) == 0 { + t.Fatal("expected at least one finding from dev.to article") + } + if findings[0].SourceType != "recon:devto" { + t.Fatalf("expected recon:devto, got %s", findings[0].SourceType) + } +} diff --git a/pkg/recon/sources/discord.go b/pkg/recon/sources/discord.go new file mode 100644 index 0000000..80e3bc2 --- /dev/null +++ b/pkg/recon/sources/discord.go @@ -0,0 +1,110 @@ +package sources + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "time" + + "golang.org/x/time/rate" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +// DiscordSource discovers Discord content indexed by search engines that may +// contain leaked API keys. Discord has no public message search API, so this +// source uses a dorking approach against a configurable search endpoint to +// find Discord content cached by third-party indexers. +type DiscordSource struct { + BaseURL string + Registry *providers.Registry + Limiters *recon.LimiterRegistry + Client *Client +} + +var _ recon.ReconSource = (*DiscordSource)(nil) + +func (s *DiscordSource) Name() string { return "discord" } +func (s *DiscordSource) RateLimit() rate.Limit { return rate.Every(3 * time.Second) } +func (s *DiscordSource) Burst() int { return 2 } +func (s *DiscordSource) RespectsRobots() bool { return false } +func (s *DiscordSource) Enabled(_ recon.Config) bool { return true } + +// discordSearchResponse represents the search endpoint response for Discord dorking. +type discordSearchResponse struct { + Results []discordSearchResult `json:"results"` +} + +type discordSearchResult struct { + URL string `json:"url"` + Content string `json:"content"` +} + +func (s *DiscordSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error { + base := s.BaseURL + if base == "" { + base = "https://search.discobot.dev" + } + client := s.Client + if client == nil { + client = NewClient() + } + + queries := BuildQueries(s.Registry, "discord") + if len(queries) == 0 { + return nil + } + + for _, q := range queries { + if err := ctx.Err(); err != nil { + return err + } + + if s.Limiters != nil { + if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil { + return err + } + } + + searchURL := fmt.Sprintf("%s/search?q=%s&format=json", + base, url.QueryEscape("site:discord.com "+q)) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, searchURL, nil) + if err != nil { + continue + } + req.Header.Set("Accept", "application/json") + + resp, err := client.Do(ctx, req) + if err != nil { + continue + } + + body, err := io.ReadAll(io.LimitReader(resp.Body, 256*1024)) + _ = resp.Body.Close() + if err != nil { + continue + } + + var result discordSearchResponse + if err := json.Unmarshal(body, &result); err != nil { + continue + } + + for _, item := range result.Results { + if ciLogKeyPattern.MatchString(item.Content) { + out <- recon.Finding{ + ProviderName: q, + Source: item.URL, + SourceType: "recon:discord", + Confidence: "low", + DetectedAt: time.Now(), + } + } + } + } + return nil +} diff --git a/pkg/recon/sources/discord_test.go b/pkg/recon/sources/discord_test.go new file mode 100644 index 0000000..c262612 --- /dev/null +++ b/pkg/recon/sources/discord_test.go @@ -0,0 +1,71 @@ +package sources + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +func TestDiscord_Name(t *testing.T) { + s := &DiscordSource{} + if s.Name() != "discord" { + t.Fatalf("expected discord, got %s", s.Name()) + } +} + +func TestDiscord_Enabled(t *testing.T) { + s := &DiscordSource{} + if !s.Enabled(recon.Config{}) { + t.Fatal("DiscordSource should always be enabled (credentialless)") + } +} + +func TestDiscord_Sweep(t *testing.T) { + mux := http.NewServeMux() + mux.HandleFunc("/search", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"results":[{ + "url":"https://discord.com/channels/123/456/789", + "content":"hey use this token: api_key = \"sk-proj-ABCDEF1234567890abcdef\"" + }]}`)) + }) + + srv := httptest.NewServer(mux) + defer srv.Close() + + reg := providers.NewRegistryFromProviders([]providers.Provider{ + {Name: "openai", Keywords: []string{"sk-proj-"}}, + }) + + s := &DiscordSource{ + BaseURL: srv.URL, + Registry: reg, + Client: NewClient(), + } + + out := make(chan recon.Finding, 10) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + err := s.Sweep(ctx, "", out) + close(out) + if err != nil { + t.Fatalf("Sweep error: %v", err) + } + + var findings []recon.Finding + for f := range out { + findings = append(findings, f) + } + if len(findings) == 0 { + t.Fatal("expected at least one finding from Discord search") + } + if findings[0].SourceType != "recon:discord" { + t.Fatalf("expected recon:discord, got %s", findings[0].SourceType) + } +} diff --git a/pkg/recon/sources/register.go b/pkg/recon/sources/register.go index 1f2e34d..304f7d3 100644 --- a/pkg/recon/sources/register.go +++ b/pkg/recon/sources/register.go @@ -61,7 +61,8 @@ type SourcesConfig struct { // RegisterAll registers every Phase 10 code-hosting, Phase 11 search engine / // paste site, Phase 12 IoT scanner / cloud storage, Phase 13 package // registry / container / IaC, Phase 14 CI/CD log / web archive / frontend -// leak, and Phase 15 collaboration tool source on engine (56 sources total). +// leak, and Phase 15 forum / collaboration tool / log aggregator source on +// engine (67 sources total). // // All sources are registered unconditionally so that cmd/recon.go can surface // the full catalog via `keyhunter recon list` regardless of which credentials @@ -261,9 +262,24 @@ func RegisterAll(engine *recon.Engine, cfg SourcesConfig) { // Phase 14: JS bundle analysis (credentialless). engine.Register(&JSBundleSource{Registry: reg, Limiters: lim}) + // Phase 15: Forum and discussion sources (credentialless). + engine.Register(&StackOverflowSource{Registry: reg, Limiters: lim}) + engine.Register(&RedditSource{Registry: reg, Limiters: lim}) + engine.Register(&HackerNewsSource{Registry: reg, Limiters: lim}) + engine.Register(&DiscordSource{Registry: reg, Limiters: lim}) + engine.Register(&SlackSource{Registry: reg, Limiters: lim}) + engine.Register(&DevToSource{Registry: reg, Limiters: lim}) + // Phase 15: Collaboration tool sources (credentialless). engine.Register(&TrelloSource{Registry: reg, Limiters: lim}) engine.Register(&NotionSource{Registry: reg, Limiters: lim}) engine.Register(&ConfluenceSource{Registry: reg, Limiters: lim}) engine.Register(&GoogleDocsSource{Registry: reg, Limiters: lim}) + + // Phase 15: Log aggregator sources (credentialless — target exposed instances). + engine.Register(&ElasticsearchSource{Registry: reg, Limiters: lim}) + engine.Register(&KibanaSource{Registry: reg, Limiters: lim}) + engine.Register(&SplunkSource{Registry: reg, Limiters: lim}) + engine.Register(&GrafanaSource{Registry: reg, Limiters: lim}) + engine.Register(&SentrySource{Registry: reg, Limiters: lim}) } diff --git a/pkg/recon/sources/slack.go b/pkg/recon/sources/slack.go new file mode 100644 index 0000000..1b68361 --- /dev/null +++ b/pkg/recon/sources/slack.go @@ -0,0 +1,110 @@ +package sources + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "time" + + "golang.org/x/time/rate" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +// SlackSource discovers publicly indexed Slack messages that may contain +// leaked API keys. Slack workspaces occasionally have public archives, and +// search engines index shared Slack content. This source uses a dorking +// approach against a configurable search endpoint. +type SlackSource struct { + BaseURL string + Registry *providers.Registry + Limiters *recon.LimiterRegistry + Client *Client +} + +var _ recon.ReconSource = (*SlackSource)(nil) + +func (s *SlackSource) Name() string { return "slack" } +func (s *SlackSource) RateLimit() rate.Limit { return rate.Every(3 * time.Second) } +func (s *SlackSource) Burst() int { return 2 } +func (s *SlackSource) RespectsRobots() bool { return false } +func (s *SlackSource) Enabled(_ recon.Config) bool { return true } + +// slackSearchResponse represents the search endpoint response for Slack dorking. +type slackSearchResponse struct { + Results []slackSearchResult `json:"results"` +} + +type slackSearchResult struct { + URL string `json:"url"` + Content string `json:"content"` +} + +func (s *SlackSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error { + base := s.BaseURL + if base == "" { + base = "https://search.slackarchive.dev" + } + client := s.Client + if client == nil { + client = NewClient() + } + + queries := BuildQueries(s.Registry, "slack") + if len(queries) == 0 { + return nil + } + + for _, q := range queries { + if err := ctx.Err(); err != nil { + return err + } + + if s.Limiters != nil { + if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil { + return err + } + } + + searchURL := fmt.Sprintf("%s/search?q=%s&format=json", + base, url.QueryEscape("site:slack-archive.org OR site:slack-files.com "+q)) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, searchURL, nil) + if err != nil { + continue + } + req.Header.Set("Accept", "application/json") + + resp, err := client.Do(ctx, req) + if err != nil { + continue + } + + body, err := io.ReadAll(io.LimitReader(resp.Body, 256*1024)) + _ = resp.Body.Close() + if err != nil { + continue + } + + var result slackSearchResponse + if err := json.Unmarshal(body, &result); err != nil { + continue + } + + for _, item := range result.Results { + if ciLogKeyPattern.MatchString(item.Content) { + out <- recon.Finding{ + ProviderName: q, + Source: item.URL, + SourceType: "recon:slack", + Confidence: "low", + DetectedAt: time.Now(), + } + } + } + } + return nil +} diff --git a/pkg/recon/sources/slack_test.go b/pkg/recon/sources/slack_test.go new file mode 100644 index 0000000..9a0437a --- /dev/null +++ b/pkg/recon/sources/slack_test.go @@ -0,0 +1,71 @@ +package sources + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +func TestSlack_Name(t *testing.T) { + s := &SlackSource{} + if s.Name() != "slack" { + t.Fatalf("expected slack, got %s", s.Name()) + } +} + +func TestSlack_Enabled(t *testing.T) { + s := &SlackSource{} + if !s.Enabled(recon.Config{}) { + t.Fatal("SlackSource should always be enabled (credentialless)") + } +} + +func TestSlack_Sweep(t *testing.T) { + mux := http.NewServeMux() + mux.HandleFunc("/search", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"results":[{ + "url":"https://slack-archive.org/workspace/channel/msg123", + "content":"config: secret_key = \"sk-proj-ABCDEF1234567890abcdef\"" + }]}`)) + }) + + srv := httptest.NewServer(mux) + defer srv.Close() + + reg := providers.NewRegistryFromProviders([]providers.Provider{ + {Name: "openai", Keywords: []string{"sk-proj-"}}, + }) + + s := &SlackSource{ + BaseURL: srv.URL, + Registry: reg, + Client: NewClient(), + } + + out := make(chan recon.Finding, 10) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + err := s.Sweep(ctx, "", out) + close(out) + if err != nil { + t.Fatalf("Sweep error: %v", err) + } + + var findings []recon.Finding + for f := range out { + findings = append(findings, f) + } + if len(findings) == 0 { + t.Fatal("expected at least one finding from Slack archive search") + } + if findings[0].SourceType != "recon:slack" { + t.Fatalf("expected recon:slack, got %s", findings[0].SourceType) + } +}