feat(15-01): add Discord, Slack, DevTo recon sources and wire all six

- DiscordSource uses dorking approach against configurable search endpoint
- SlackSource uses dorking against slack-archive indexers
- DevToSource searches dev.to API articles list + detail for body_markdown
- RegisterAll extended to include all 6 Phase 15 forum sources
- All credentialless, use ciLogKeyPattern for key detection
This commit is contained in:
salvacybersec
2026-04-06 16:29:52 +03:00
parent 282c145a43
commit fcc1a769c5
7 changed files with 621 additions and 1 deletions

156
pkg/recon/sources/devto.go Normal file
View File

@@ -0,0 +1,156 @@
package sources
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"time"
"golang.org/x/time/rate"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
// DevToSource searches the dev.to public API for articles containing leaked
// API keys. Developers write tutorials and guides on dev.to that sometimes
// include real credentials in code examples.
type DevToSource struct {
BaseURL string
Registry *providers.Registry
Limiters *recon.LimiterRegistry
Client *Client
}
var _ recon.ReconSource = (*DevToSource)(nil)
func (s *DevToSource) Name() string { return "devto" }
func (s *DevToSource) RateLimit() rate.Limit { return rate.Every(1 * time.Second) }
func (s *DevToSource) Burst() int { return 5 }
func (s *DevToSource) RespectsRobots() bool { return false }
func (s *DevToSource) Enabled(_ recon.Config) bool { return true }
// devtoArticleSummary represents an article in the dev.to /api/articles list response.
type devtoArticleSummary struct {
ID int `json:"id"`
URL string `json:"url"`
}
// devtoArticleDetail represents the full article from /api/articles/{id}.
type devtoArticleDetail struct {
BodyMarkdown string `json:"body_markdown"`
URL string `json:"url"`
}
func (s *DevToSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
base := s.BaseURL
if base == "" {
base = "https://dev.to"
}
client := s.Client
if client == nil {
client = NewClient()
}
queries := BuildQueries(s.Registry, "devto")
if len(queries) == 0 {
return nil
}
for _, q := range queries {
if err := ctx.Err(); err != nil {
return err
}
if s.Limiters != nil {
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
return err
}
}
// Search for articles by tag keyword.
listURL := fmt.Sprintf("%s/api/articles?tag=%s&per_page=10&state=rising",
base, url.QueryEscape(q))
req, err := http.NewRequestWithContext(ctx, http.MethodGet, listURL, nil)
if err != nil {
continue
}
req.Header.Set("Accept", "application/json")
resp, err := client.Do(ctx, req)
if err != nil {
continue
}
body, err := io.ReadAll(io.LimitReader(resp.Body, 256*1024))
_ = resp.Body.Close()
if err != nil {
continue
}
var articles []devtoArticleSummary
if err := json.Unmarshal(body, &articles); err != nil {
continue
}
// Limit to first 5 articles to stay within rate limits.
limit := 5
if len(articles) < limit {
limit = len(articles)
}
for _, article := range articles[:limit] {
if err := ctx.Err(); err != nil {
return err
}
if s.Limiters != nil {
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
return err
}
}
// Fetch full article to get body_markdown.
detailURL := fmt.Sprintf("%s/api/articles/%d", base, article.ID)
detailReq, err := http.NewRequestWithContext(ctx, http.MethodGet, detailURL, nil)
if err != nil {
continue
}
detailReq.Header.Set("Accept", "application/json")
detailResp, err := client.Do(ctx, detailReq)
if err != nil {
continue
}
detailBody, err := io.ReadAll(io.LimitReader(detailResp.Body, 256*1024))
_ = detailResp.Body.Close()
if err != nil {
continue
}
var detail devtoArticleDetail
if err := json.Unmarshal(detailBody, &detail); err != nil {
continue
}
if ciLogKeyPattern.MatchString(detail.BodyMarkdown) {
articleURL := detail.URL
if articleURL == "" {
articleURL = fmt.Sprintf("%s/api/articles/%d", base, article.ID)
}
out <- recon.Finding{
ProviderName: q,
Source: articleURL,
SourceType: "recon:devto",
Confidence: "medium",
DetectedAt: time.Now(),
}
}
}
}
return nil
}

View File

@@ -0,0 +1,86 @@
package sources
import (
"context"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
func TestDevTo_Name(t *testing.T) {
s := &DevToSource{}
if s.Name() != "devto" {
t.Fatalf("expected devto, got %s", s.Name())
}
}
func TestDevTo_Enabled(t *testing.T) {
s := &DevToSource{}
if !s.Enabled(recon.Config{}) {
t.Fatal("DevToSource should always be enabled (credentialless)")
}
}
func TestDevTo_Sweep(t *testing.T) {
mux := http.NewServeMux()
mux.HandleFunc("/api/articles", func(w http.ResponseWriter, r *http.Request) {
// Check if this is a detail request (/api/articles/42).
if r.URL.Path == "/api/articles/42" {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{
"body_markdown":"# Tutorial\nSet your api_key = \"sk-proj-ABCDEF1234567890abcdef\" in .env\n",
"url":"https://dev.to/user/tutorial-post"
}`))
return
}
// List endpoint.
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`[{"id":42,"url":"https://dev.to/user/tutorial-post"}]`))
})
// Also handle the detail path with the ID suffix.
mux.HandleFunc("/api/articles/42", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{
"body_markdown":"# Tutorial\nSet your api_key = \"sk-proj-ABCDEF1234567890abcdef\" in .env\n",
"url":"https://dev.to/user/tutorial-post"
}`))
})
srv := httptest.NewServer(mux)
defer srv.Close()
reg := providers.NewRegistryFromProviders([]providers.Provider{
{Name: "openai", Keywords: []string{"sk-proj-"}},
})
s := &DevToSource{
BaseURL: srv.URL,
Registry: reg,
Client: NewClient(),
}
out := make(chan recon.Finding, 10)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
err := s.Sweep(ctx, "", out)
close(out)
if err != nil {
t.Fatalf("Sweep error: %v", err)
}
var findings []recon.Finding
for f := range out {
findings = append(findings, f)
}
if len(findings) == 0 {
t.Fatal("expected at least one finding from dev.to article")
}
if findings[0].SourceType != "recon:devto" {
t.Fatalf("expected recon:devto, got %s", findings[0].SourceType)
}
}

View File

@@ -0,0 +1,110 @@
package sources
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"time"
"golang.org/x/time/rate"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
// DiscordSource discovers Discord content indexed by search engines that may
// contain leaked API keys. Discord has no public message search API, so this
// source uses a dorking approach against a configurable search endpoint to
// find Discord content cached by third-party indexers.
type DiscordSource struct {
BaseURL string
Registry *providers.Registry
Limiters *recon.LimiterRegistry
Client *Client
}
var _ recon.ReconSource = (*DiscordSource)(nil)
func (s *DiscordSource) Name() string { return "discord" }
func (s *DiscordSource) RateLimit() rate.Limit { return rate.Every(3 * time.Second) }
func (s *DiscordSource) Burst() int { return 2 }
func (s *DiscordSource) RespectsRobots() bool { return false }
func (s *DiscordSource) Enabled(_ recon.Config) bool { return true }
// discordSearchResponse represents the search endpoint response for Discord dorking.
type discordSearchResponse struct {
Results []discordSearchResult `json:"results"`
}
type discordSearchResult struct {
URL string `json:"url"`
Content string `json:"content"`
}
func (s *DiscordSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
base := s.BaseURL
if base == "" {
base = "https://search.discobot.dev"
}
client := s.Client
if client == nil {
client = NewClient()
}
queries := BuildQueries(s.Registry, "discord")
if len(queries) == 0 {
return nil
}
for _, q := range queries {
if err := ctx.Err(); err != nil {
return err
}
if s.Limiters != nil {
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
return err
}
}
searchURL := fmt.Sprintf("%s/search?q=%s&format=json",
base, url.QueryEscape("site:discord.com "+q))
req, err := http.NewRequestWithContext(ctx, http.MethodGet, searchURL, nil)
if err != nil {
continue
}
req.Header.Set("Accept", "application/json")
resp, err := client.Do(ctx, req)
if err != nil {
continue
}
body, err := io.ReadAll(io.LimitReader(resp.Body, 256*1024))
_ = resp.Body.Close()
if err != nil {
continue
}
var result discordSearchResponse
if err := json.Unmarshal(body, &result); err != nil {
continue
}
for _, item := range result.Results {
if ciLogKeyPattern.MatchString(item.Content) {
out <- recon.Finding{
ProviderName: q,
Source: item.URL,
SourceType: "recon:discord",
Confidence: "low",
DetectedAt: time.Now(),
}
}
}
}
return nil
}

View File

@@ -0,0 +1,71 @@
package sources
import (
"context"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
func TestDiscord_Name(t *testing.T) {
s := &DiscordSource{}
if s.Name() != "discord" {
t.Fatalf("expected discord, got %s", s.Name())
}
}
func TestDiscord_Enabled(t *testing.T) {
s := &DiscordSource{}
if !s.Enabled(recon.Config{}) {
t.Fatal("DiscordSource should always be enabled (credentialless)")
}
}
func TestDiscord_Sweep(t *testing.T) {
mux := http.NewServeMux()
mux.HandleFunc("/search", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"results":[{
"url":"https://discord.com/channels/123/456/789",
"content":"hey use this token: api_key = \"sk-proj-ABCDEF1234567890abcdef\""
}]}`))
})
srv := httptest.NewServer(mux)
defer srv.Close()
reg := providers.NewRegistryFromProviders([]providers.Provider{
{Name: "openai", Keywords: []string{"sk-proj-"}},
})
s := &DiscordSource{
BaseURL: srv.URL,
Registry: reg,
Client: NewClient(),
}
out := make(chan recon.Finding, 10)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
err := s.Sweep(ctx, "", out)
close(out)
if err != nil {
t.Fatalf("Sweep error: %v", err)
}
var findings []recon.Finding
for f := range out {
findings = append(findings, f)
}
if len(findings) == 0 {
t.Fatal("expected at least one finding from Discord search")
}
if findings[0].SourceType != "recon:discord" {
t.Fatalf("expected recon:discord, got %s", findings[0].SourceType)
}
}

View File

@@ -61,7 +61,8 @@ type SourcesConfig struct {
// RegisterAll registers every Phase 10 code-hosting, Phase 11 search engine /
// paste site, Phase 12 IoT scanner / cloud storage, Phase 13 package
// registry / container / IaC, Phase 14 CI/CD log / web archive / frontend
// leak, and Phase 15 collaboration tool source on engine (56 sources total).
// leak, and Phase 15 forum / collaboration tool / log aggregator source on
// engine (67 sources total).
//
// All sources are registered unconditionally so that cmd/recon.go can surface
// the full catalog via `keyhunter recon list` regardless of which credentials
@@ -261,9 +262,24 @@ func RegisterAll(engine *recon.Engine, cfg SourcesConfig) {
// Phase 14: JS bundle analysis (credentialless).
engine.Register(&JSBundleSource{Registry: reg, Limiters: lim})
// Phase 15: Forum and discussion sources (credentialless).
engine.Register(&StackOverflowSource{Registry: reg, Limiters: lim})
engine.Register(&RedditSource{Registry: reg, Limiters: lim})
engine.Register(&HackerNewsSource{Registry: reg, Limiters: lim})
engine.Register(&DiscordSource{Registry: reg, Limiters: lim})
engine.Register(&SlackSource{Registry: reg, Limiters: lim})
engine.Register(&DevToSource{Registry: reg, Limiters: lim})
// Phase 15: Collaboration tool sources (credentialless).
engine.Register(&TrelloSource{Registry: reg, Limiters: lim})
engine.Register(&NotionSource{Registry: reg, Limiters: lim})
engine.Register(&ConfluenceSource{Registry: reg, Limiters: lim})
engine.Register(&GoogleDocsSource{Registry: reg, Limiters: lim})
// Phase 15: Log aggregator sources (credentialless — target exposed instances).
engine.Register(&ElasticsearchSource{Registry: reg, Limiters: lim})
engine.Register(&KibanaSource{Registry: reg, Limiters: lim})
engine.Register(&SplunkSource{Registry: reg, Limiters: lim})
engine.Register(&GrafanaSource{Registry: reg, Limiters: lim})
engine.Register(&SentrySource{Registry: reg, Limiters: lim})
}

110
pkg/recon/sources/slack.go Normal file
View File

@@ -0,0 +1,110 @@
package sources
import (
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"time"
"golang.org/x/time/rate"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
// SlackSource discovers publicly indexed Slack messages that may contain
// leaked API keys. Slack workspaces occasionally have public archives, and
// search engines index shared Slack content. This source uses a dorking
// approach against a configurable search endpoint.
type SlackSource struct {
BaseURL string
Registry *providers.Registry
Limiters *recon.LimiterRegistry
Client *Client
}
var _ recon.ReconSource = (*SlackSource)(nil)
func (s *SlackSource) Name() string { return "slack" }
func (s *SlackSource) RateLimit() rate.Limit { return rate.Every(3 * time.Second) }
func (s *SlackSource) Burst() int { return 2 }
func (s *SlackSource) RespectsRobots() bool { return false }
func (s *SlackSource) Enabled(_ recon.Config) bool { return true }
// slackSearchResponse represents the search endpoint response for Slack dorking.
type slackSearchResponse struct {
Results []slackSearchResult `json:"results"`
}
type slackSearchResult struct {
URL string `json:"url"`
Content string `json:"content"`
}
func (s *SlackSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
base := s.BaseURL
if base == "" {
base = "https://search.slackarchive.dev"
}
client := s.Client
if client == nil {
client = NewClient()
}
queries := BuildQueries(s.Registry, "slack")
if len(queries) == 0 {
return nil
}
for _, q := range queries {
if err := ctx.Err(); err != nil {
return err
}
if s.Limiters != nil {
if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
return err
}
}
searchURL := fmt.Sprintf("%s/search?q=%s&format=json",
base, url.QueryEscape("site:slack-archive.org OR site:slack-files.com "+q))
req, err := http.NewRequestWithContext(ctx, http.MethodGet, searchURL, nil)
if err != nil {
continue
}
req.Header.Set("Accept", "application/json")
resp, err := client.Do(ctx, req)
if err != nil {
continue
}
body, err := io.ReadAll(io.LimitReader(resp.Body, 256*1024))
_ = resp.Body.Close()
if err != nil {
continue
}
var result slackSearchResponse
if err := json.Unmarshal(body, &result); err != nil {
continue
}
for _, item := range result.Results {
if ciLogKeyPattern.MatchString(item.Content) {
out <- recon.Finding{
ProviderName: q,
Source: item.URL,
SourceType: "recon:slack",
Confidence: "low",
DetectedAt: time.Now(),
}
}
}
}
return nil
}

View File

@@ -0,0 +1,71 @@
package sources
import (
"context"
"net/http"
"net/http/httptest"
"testing"
"time"
"github.com/salvacybersec/keyhunter/pkg/providers"
"github.com/salvacybersec/keyhunter/pkg/recon"
)
func TestSlack_Name(t *testing.T) {
s := &SlackSource{}
if s.Name() != "slack" {
t.Fatalf("expected slack, got %s", s.Name())
}
}
func TestSlack_Enabled(t *testing.T) {
s := &SlackSource{}
if !s.Enabled(recon.Config{}) {
t.Fatal("SlackSource should always be enabled (credentialless)")
}
}
func TestSlack_Sweep(t *testing.T) {
mux := http.NewServeMux()
mux.HandleFunc("/search", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
_, _ = w.Write([]byte(`{"results":[{
"url":"https://slack-archive.org/workspace/channel/msg123",
"content":"config: secret_key = \"sk-proj-ABCDEF1234567890abcdef\""
}]}`))
})
srv := httptest.NewServer(mux)
defer srv.Close()
reg := providers.NewRegistryFromProviders([]providers.Provider{
{Name: "openai", Keywords: []string{"sk-proj-"}},
})
s := &SlackSource{
BaseURL: srv.URL,
Registry: reg,
Client: NewClient(),
}
out := make(chan recon.Finding, 10)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
err := s.Sweep(ctx, "", out)
close(out)
if err != nil {
t.Fatalf("Sweep error: %v", err)
}
var findings []recon.Finding
for f := range out {
findings = append(findings, f)
}
if len(findings) == 0 {
t.Fatal("expected at least one finding from Slack archive search")
}
if findings[0].SourceType != "recon:slack" {
t.Fatalf("expected recon:slack, got %s", findings[0].SourceType)
}
}