From 7bb614678da9ed7edf4a7203a4b6c8ed8a5cdca5 Mon Sep 17 00:00:00 2001 From: salvacybersec Date: Mon, 6 Apr 2026 13:50:04 +0300 Subject: [PATCH] feat(15-02): add Trello and Notion ReconSource implementations - TrelloSource searches public Trello boards via /1/search API - NotionSource uses dorking to discover and scrape public Notion pages - Both credentialless, follow established Phase 10 pattern - Tests with httptest mocks confirm Sweep emits findings --- pkg/recon/sources/notion.go | 138 +++++++++++++++++++++++++++++++ pkg/recon/sources/notion_test.go | 76 +++++++++++++++++ pkg/recon/sources/trello.go | 110 ++++++++++++++++++++++++ pkg/recon/sources/trello_test.go | 71 ++++++++++++++++ 4 files changed, 395 insertions(+) create mode 100644 pkg/recon/sources/notion.go create mode 100644 pkg/recon/sources/notion_test.go create mode 100644 pkg/recon/sources/trello.go create mode 100644 pkg/recon/sources/trello_test.go diff --git a/pkg/recon/sources/notion.go b/pkg/recon/sources/notion.go new file mode 100644 index 0000000..1bc38eb --- /dev/null +++ b/pkg/recon/sources/notion.go @@ -0,0 +1,138 @@ +package sources + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "time" + + "golang.org/x/time/rate" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +// NotionSource searches publicly shared Notion pages for leaked API keys. +// Notion pages shared with "anyone with the link" are indexable by search +// engines. This source uses a dorking approach to discover such pages and +// then scrapes their content for credentials. +type NotionSource struct { + BaseURL string + Registry *providers.Registry + Limiters *recon.LimiterRegistry + Client *Client +} + +var _ recon.ReconSource = (*NotionSource)(nil) + +func (s *NotionSource) Name() string { return "notion" } +func (s *NotionSource) RateLimit() rate.Limit { return rate.Every(3 * time.Second) } +func (s *NotionSource) Burst() int { return 2 } +func (s *NotionSource) RespectsRobots() bool { return true } +func (s *NotionSource) Enabled(_ recon.Config) bool { return true } + +// notionSearchResponse represents dork search results pointing to Notion pages. +type notionSearchResponse struct { + Results []notionSearchResult `json:"results"` +} + +type notionSearchResult struct { + URL string `json:"url"` + Title string `json:"title"` +} + +func (s *NotionSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error { + base := s.BaseURL + if base == "" { + base = "https://search.notion.dev" + } + client := s.Client + if client == nil { + client = NewClient() + } + + queries := BuildQueries(s.Registry, "notion") + if len(queries) == 0 { + return nil + } + + for _, q := range queries { + if err := ctx.Err(); err != nil { + return err + } + + if s.Limiters != nil { + if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil { + return err + } + } + + // Search for public Notion pages via dorking. + searchURL := fmt.Sprintf("%s/search?q=%s&format=json", + base, url.QueryEscape("site:notion.site OR site:notion.so "+q)) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, searchURL, nil) + if err != nil { + continue + } + req.Header.Set("Accept", "application/json") + + resp, err := client.Do(ctx, req) + if err != nil { + continue + } + + body, err := io.ReadAll(io.LimitReader(resp.Body, 256*1024)) + _ = resp.Body.Close() + if err != nil { + continue + } + + var results notionSearchResponse + if err := json.Unmarshal(body, &results); err != nil { + continue + } + + // Fetch each discovered Notion page and scan for keys. + for _, result := range results.Results { + if err := ctx.Err(); err != nil { + return err + } + + if s.Limiters != nil { + if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil { + return err + } + } + + pageReq, err := http.NewRequestWithContext(ctx, http.MethodGet, result.URL, nil) + if err != nil { + continue + } + + pageResp, err := client.Do(ctx, pageReq) + if err != nil { + continue + } + + pageBody, err := io.ReadAll(io.LimitReader(pageResp.Body, 256*1024)) + _ = pageResp.Body.Close() + if err != nil { + continue + } + + if ciLogKeyPattern.Match(pageBody) { + out <- recon.Finding{ + ProviderName: q, + Source: result.URL, + SourceType: "recon:notion", + Confidence: "medium", + DetectedAt: time.Now(), + } + } + } + } + return nil +} diff --git a/pkg/recon/sources/notion_test.go b/pkg/recon/sources/notion_test.go new file mode 100644 index 0000000..6c3d264 --- /dev/null +++ b/pkg/recon/sources/notion_test.go @@ -0,0 +1,76 @@ +package sources + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +func TestNotion_Name(t *testing.T) { + s := &NotionSource{} + if s.Name() != "notion" { + t.Fatalf("expected notion, got %s", s.Name()) + } +} + +func TestNotion_Enabled(t *testing.T) { + s := &NotionSource{} + if !s.Enabled(recon.Config{}) { + t.Fatal("NotionSource should always be enabled (credentialless)") + } +} + +func TestNotion_Sweep(t *testing.T) { + mux := http.NewServeMux() + + // Mock search endpoint returning a Notion page URL. + mux.HandleFunc("/search", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"results":[{"url":"` + "http://" + r.Host + `/page/abc123","title":"API Keys"}]}`)) + }) + + // Mock page content with a leaked key. + mux.HandleFunc("/page/abc123", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html") + _, _ = w.Write([]byte(`
Our API credentials: api_key = sk-proj-ABCDEF1234567890abcdef
`)) + }) + + srv := httptest.NewServer(mux) + defer srv.Close() + + reg := providers.NewRegistryFromProviders([]providers.Provider{ + {Name: "openai", Keywords: []string{"sk-proj-"}}, + }) + + s := &NotionSource{ + BaseURL: srv.URL, + Registry: reg, + Client: NewClient(), + } + + out := make(chan recon.Finding, 10) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + err := s.Sweep(ctx, "", out) + close(out) + if err != nil { + t.Fatalf("Sweep error: %v", err) + } + + var findings []recon.Finding + for f := range out { + findings = append(findings, f) + } + if len(findings) == 0 { + t.Fatal("expected at least one finding from Notion page") + } + if findings[0].SourceType != "recon:notion" { + t.Fatalf("expected recon:notion, got %s", findings[0].SourceType) + } +} diff --git a/pkg/recon/sources/trello.go b/pkg/recon/sources/trello.go new file mode 100644 index 0000000..6b1f0cc --- /dev/null +++ b/pkg/recon/sources/trello.go @@ -0,0 +1,110 @@ +package sources + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/url" + "time" + + "golang.org/x/time/rate" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +// TrelloSource searches public Trello boards for leaked API keys. +// Trello public boards are searchable without authentication, and developers +// often paste credentials into card descriptions or comments. +type TrelloSource struct { + BaseURL string + Registry *providers.Registry + Limiters *recon.LimiterRegistry + Client *Client +} + +var _ recon.ReconSource = (*TrelloSource)(nil) + +func (s *TrelloSource) Name() string { return "trello" } +func (s *TrelloSource) RateLimit() rate.Limit { return rate.Every(2 * time.Second) } +func (s *TrelloSource) Burst() int { return 3 } +func (s *TrelloSource) RespectsRobots() bool { return false } +func (s *TrelloSource) Enabled(_ recon.Config) bool { return true } + +// trelloSearchResponse represents the Trello search API response. +type trelloSearchResponse struct { + Cards []trelloCard `json:"cards"` +} + +type trelloCard struct { + ID string `json:"id"` + Name string `json:"name"` + Desc string `json:"desc"` +} + +func (s *TrelloSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error { + base := s.BaseURL + if base == "" { + base = "https://api.trello.com" + } + client := s.Client + if client == nil { + client = NewClient() + } + + queries := BuildQueries(s.Registry, "trello") + if len(queries) == 0 { + return nil + } + + for _, q := range queries { + if err := ctx.Err(); err != nil { + return err + } + + if s.Limiters != nil { + if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil { + return err + } + } + + searchURL := fmt.Sprintf("%s/1/search?query=%s&modelTypes=cards&card_fields=name,desc&cards_limit=10", + base, url.QueryEscape(q)) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, searchURL, nil) + if err != nil { + continue + } + req.Header.Set("Accept", "application/json") + + resp, err := client.Do(ctx, req) + if err != nil { + continue + } + + body, err := io.ReadAll(io.LimitReader(resp.Body, 256*1024)) + _ = resp.Body.Close() + if err != nil { + continue + } + + var result trelloSearchResponse + if err := json.Unmarshal(body, &result); err != nil { + continue + } + + for _, card := range result.Cards { + if ciLogKeyPattern.MatchString(card.Desc) { + out <- recon.Finding{ + ProviderName: q, + Source: fmt.Sprintf("https://trello.com/c/%s", card.ID), + SourceType: "recon:trello", + Confidence: "medium", + DetectedAt: time.Now(), + } + } + } + } + return nil +} diff --git a/pkg/recon/sources/trello_test.go b/pkg/recon/sources/trello_test.go new file mode 100644 index 0000000..2b2d9d6 --- /dev/null +++ b/pkg/recon/sources/trello_test.go @@ -0,0 +1,71 @@ +package sources + +import ( + "context" + "net/http" + "net/http/httptest" + "testing" + "time" + + "github.com/salvacybersec/keyhunter/pkg/providers" + "github.com/salvacybersec/keyhunter/pkg/recon" +) + +func TestTrello_Name(t *testing.T) { + s := &TrelloSource{} + if s.Name() != "trello" { + t.Fatalf("expected trello, got %s", s.Name()) + } +} + +func TestTrello_Enabled(t *testing.T) { + s := &TrelloSource{} + if !s.Enabled(recon.Config{}) { + t.Fatal("TrelloSource should always be enabled (credentialless)") + } +} + +func TestTrello_Sweep(t *testing.T) { + mux := http.NewServeMux() + mux.HandleFunc("/1/search", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"cards":[{"id":"abc123","name":"Config","desc":"api_key = sk-proj-ABCDEF1234567890abcdef"}]}`)) + }) + + srv := httptest.NewServer(mux) + defer srv.Close() + + reg := providers.NewRegistryFromProviders([]providers.Provider{ + {Name: "openai", Keywords: []string{"sk-proj-"}}, + }) + + s := &TrelloSource{ + BaseURL: srv.URL, + Registry: reg, + Client: NewClient(), + } + + out := make(chan recon.Finding, 10) + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + err := s.Sweep(ctx, "", out) + close(out) + if err != nil { + t.Fatalf("Sweep error: %v", err) + } + + var findings []recon.Finding + for f := range out { + findings = append(findings, f) + } + if len(findings) == 0 { + t.Fatal("expected at least one finding from Trello card") + } + if findings[0].SourceType != "recon:trello" { + t.Fatalf("expected recon:trello, got %s", findings[0].SourceType) + } + if findings[0].Source != "https://trello.com/c/abc123" { + t.Fatalf("expected trello card URL, got %s", findings[0].Source) + } +}