diff --git a/pkg/recon/sources/confluence.go b/pkg/recon/sources/confluence.go
new file mode 100644
index 0000000..6059403
--- /dev/null
+++ b/pkg/recon/sources/confluence.go
@@ -0,0 +1,133 @@
+package sources
+
+import (
+ "context"
+ "encoding/json"
+ "fmt"
+ "io"
+ "net/http"
+ "net/url"
+ "regexp"
+ "time"
+
+ "golang.org/x/time/rate"
+
+ "github.com/salvacybersec/keyhunter/pkg/providers"
+ "github.com/salvacybersec/keyhunter/pkg/recon"
+)
+
+// ConfluenceSource searches publicly exposed Confluence wikis for leaked API
+// keys. Many Confluence instances are misconfigured to allow anonymous access
+// and their REST API exposes page content including credentials pasted into
+// documentation.
+type ConfluenceSource struct {
+ BaseURL string
+ Registry *providers.Registry
+ Limiters *recon.LimiterRegistry
+ Client *Client
+}
+
+var _ recon.ReconSource = (*ConfluenceSource)(nil)
+
+func (s *ConfluenceSource) Name() string { return "confluence" }
+func (s *ConfluenceSource) RateLimit() rate.Limit { return rate.Every(3 * time.Second) }
+func (s *ConfluenceSource) Burst() int { return 2 }
+func (s *ConfluenceSource) RespectsRobots() bool { return true }
+func (s *ConfluenceSource) Enabled(_ recon.Config) bool { return true }
+
+// confluenceSearchResponse represents the Confluence REST API content search response.
+type confluenceSearchResponse struct {
+ Results []confluenceResult `json:"results"`
+}
+
+type confluenceResult struct {
+ ID string `json:"id"`
+ Title string `json:"title"`
+ Body confluenceBody `json:"body"`
+ Links confluenceLinks `json:"_links"`
+}
+
+type confluenceBody struct {
+ Storage confluenceStorage `json:"storage"`
+}
+
+type confluenceStorage struct {
+ Value string `json:"value"`
+}
+
+type confluenceLinks struct {
+ WebUI string `json:"webui"`
+}
+
+// htmlTagPattern strips HTML tags to extract text content from Confluence storage format.
+var htmlTagPattern = regexp.MustCompile(`<[^>]*>`)
+
+func (s *ConfluenceSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
+ base := s.BaseURL
+ if base == "" {
+ base = "https://confluence.example.com"
+ }
+ client := s.Client
+ if client == nil {
+ client = NewClient()
+ }
+
+ queries := BuildQueries(s.Registry, "confluence")
+ if len(queries) == 0 {
+ return nil
+ }
+
+ for _, q := range queries {
+ if err := ctx.Err(); err != nil {
+ return err
+ }
+
+ if s.Limiters != nil {
+ if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
+ return err
+ }
+ }
+
+ // Search Confluence via CQL (Confluence Query Language).
+ searchURL := fmt.Sprintf("%s/rest/api/content/search?cql=%s&limit=10&expand=body.storage",
+ base, url.QueryEscape(fmt.Sprintf(`text~"%s"`, q)))
+ req, err := http.NewRequestWithContext(ctx, http.MethodGet, searchURL, nil)
+ if err != nil {
+ continue
+ }
+ req.Header.Set("Accept", "application/json")
+
+ resp, err := client.Do(ctx, req)
+ if err != nil {
+ continue
+ }
+
+ body, err := io.ReadAll(io.LimitReader(resp.Body, 256*1024))
+ _ = resp.Body.Close()
+ if err != nil {
+ continue
+ }
+
+ var result confluenceSearchResponse
+ if err := json.Unmarshal(body, &result); err != nil {
+ continue
+ }
+
+ for _, page := range result.Results {
+ // Strip HTML tags to get plain text for key matching.
+ plainText := htmlTagPattern.ReplaceAllString(page.Body.Storage.Value, " ")
+
+ if ciLogKeyPattern.MatchString(plainText) {
+ pageURL := fmt.Sprintf("%s%s", base, page.Links.WebUI)
+ out <- recon.Finding{
+ ProviderName: q,
+ Source: pageURL,
+ SourceType: "recon:confluence",
+ Confidence: "medium",
+ DetectedAt: time.Now(),
+ }
+ }
+ }
+ }
+ return nil
+}
diff --git a/pkg/recon/sources/confluence_test.go b/pkg/recon/sources/confluence_test.go
new file mode 100644
index 0000000..18bac86
--- /dev/null
+++ b/pkg/recon/sources/confluence_test.go
@@ -0,0 +1,77 @@
+package sources
+
+import (
+ "context"
+ "net/http"
+ "net/http/httptest"
+ "testing"
+ "time"
+
+ "github.com/salvacybersec/keyhunter/pkg/providers"
+ "github.com/salvacybersec/keyhunter/pkg/recon"
+)
+
+func TestConfluence_Name(t *testing.T) {
+ s := &ConfluenceSource{}
+ if s.Name() != "confluence" {
+ t.Fatalf("expected confluence, got %s", s.Name())
+ }
+}
+
+func TestConfluence_Enabled(t *testing.T) {
+ s := &ConfluenceSource{}
+ if !s.Enabled(recon.Config{}) {
+ t.Fatal("ConfluenceSource should always be enabled (credentialless)")
+ }
+}
+
+func TestConfluence_Sweep(t *testing.T) {
+ mux := http.NewServeMux()
+ mux.HandleFunc("/rest/api/content/search", func(w http.ResponseWriter, r *http.Request) {
+ w.Header().Set("Content-Type", "application/json")
+ _, _ = w.Write([]byte(`{"results":[{
+ "id":"12345",
+ "title":"API Configuration",
+ "body":{"storage":{"value":"
Production credentials: secret_key = sk-proj-ABCDEF1234567890abcdef
"}},
+ "_links":{"webui":"/display/TEAM/API+Configuration"}
+ }]}`))
+ })
+
+ srv := httptest.NewServer(mux)
+ defer srv.Close()
+
+ reg := providers.NewRegistryFromProviders([]providers.Provider{
+ {Name: "openai", Keywords: []string{"sk-proj-"}},
+ })
+
+ s := &ConfluenceSource{
+ BaseURL: srv.URL,
+ Registry: reg,
+ Client: NewClient(),
+ }
+
+ out := make(chan recon.Finding, 10)
+ ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+ defer cancel()
+
+ err := s.Sweep(ctx, "", out)
+ close(out)
+ if err != nil {
+ t.Fatalf("Sweep error: %v", err)
+ }
+
+ var findings []recon.Finding
+ for f := range out {
+ findings = append(findings, f)
+ }
+ if len(findings) == 0 {
+ t.Fatal("expected at least one finding from Confluence page")
+ }
+ if findings[0].SourceType != "recon:confluence" {
+ t.Fatalf("expected recon:confluence, got %s", findings[0].SourceType)
+ }
+ expected := srv.URL + "/display/TEAM/API+Configuration"
+ if findings[0].Source != expected {
+ t.Fatalf("expected %s, got %s", expected, findings[0].Source)
+ }
+}
diff --git a/pkg/recon/sources/googledocs.go b/pkg/recon/sources/googledocs.go
new file mode 100644
index 0000000..96251a3
--- /dev/null
+++ b/pkg/recon/sources/googledocs.go
@@ -0,0 +1,139 @@
+package sources
+
+import (
+ "context"
+ "encoding/json"
+ "fmt"
+ "io"
+ "net/http"
+ "net/url"
+ "time"
+
+ "golang.org/x/time/rate"
+
+ "github.com/salvacybersec/keyhunter/pkg/providers"
+ "github.com/salvacybersec/keyhunter/pkg/recon"
+)
+
+// GoogleDocsSource searches publicly shared Google Docs for leaked API keys.
+// Google Docs shared with "anyone with the link" are indexable by search
+// engines. This source uses a dorking approach to discover public docs and
+// then fetches their plain-text export for credential scanning.
+type GoogleDocsSource struct {
+ BaseURL string
+ Registry *providers.Registry
+ Limiters *recon.LimiterRegistry
+ Client *Client
+}
+
+var _ recon.ReconSource = (*GoogleDocsSource)(nil)
+
+func (s *GoogleDocsSource) Name() string { return "googledocs" }
+func (s *GoogleDocsSource) RateLimit() rate.Limit { return rate.Every(3 * time.Second) }
+func (s *GoogleDocsSource) Burst() int { return 2 }
+func (s *GoogleDocsSource) RespectsRobots() bool { return true }
+func (s *GoogleDocsSource) Enabled(_ recon.Config) bool { return true }
+
+// googleDocsSearchResponse represents dork search results for Google Docs.
+type googleDocsSearchResponse struct {
+ Results []googleDocsSearchResult `json:"results"`
+}
+
+type googleDocsSearchResult struct {
+ URL string `json:"url"`
+ Title string `json:"title"`
+}
+
+func (s *GoogleDocsSource) Sweep(ctx context.Context, _ string, out chan<- recon.Finding) error {
+ base := s.BaseURL
+ if base == "" {
+ base = "https://search.googledocs.dev"
+ }
+ client := s.Client
+ if client == nil {
+ client = NewClient()
+ }
+
+ queries := BuildQueries(s.Registry, "googledocs")
+ if len(queries) == 0 {
+ return nil
+ }
+
+ for _, q := range queries {
+ if err := ctx.Err(); err != nil {
+ return err
+ }
+
+ if s.Limiters != nil {
+ if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
+ return err
+ }
+ }
+
+ // Search for public Google Docs via dorking.
+ searchURL := fmt.Sprintf("%s/search?q=%s&format=json",
+ base, url.QueryEscape("site:docs.google.com "+q))
+ req, err := http.NewRequestWithContext(ctx, http.MethodGet, searchURL, nil)
+ if err != nil {
+ continue
+ }
+ req.Header.Set("Accept", "application/json")
+
+ resp, err := client.Do(ctx, req)
+ if err != nil {
+ continue
+ }
+
+ body, err := io.ReadAll(io.LimitReader(resp.Body, 256*1024))
+ _ = resp.Body.Close()
+ if err != nil {
+ continue
+ }
+
+ var results googleDocsSearchResponse
+ if err := json.Unmarshal(body, &results); err != nil {
+ continue
+ }
+
+ // Fetch each discovered doc's plain-text export.
+ for _, result := range results.Results {
+ if err := ctx.Err(); err != nil {
+ return err
+ }
+
+ if s.Limiters != nil {
+ if err := s.Limiters.Wait(ctx, s.Name(), s.RateLimit(), s.Burst(), false); err != nil {
+ return err
+ }
+ }
+
+ exportURL := result.URL + "/export?format=txt"
+ docReq, err := http.NewRequestWithContext(ctx, http.MethodGet, exportURL, nil)
+ if err != nil {
+ continue
+ }
+
+ docResp, err := client.Do(ctx, docReq)
+ if err != nil {
+ continue
+ }
+
+ docBody, err := io.ReadAll(io.LimitReader(docResp.Body, 256*1024))
+ _ = docResp.Body.Close()
+ if err != nil {
+ continue
+ }
+
+ if ciLogKeyPattern.Match(docBody) {
+ out <- recon.Finding{
+ ProviderName: q,
+ Source: result.URL,
+ SourceType: "recon:googledocs",
+ Confidence: "medium",
+ DetectedAt: time.Now(),
+ }
+ }
+ }
+ }
+ return nil
+}
diff --git a/pkg/recon/sources/googledocs_test.go b/pkg/recon/sources/googledocs_test.go
new file mode 100644
index 0000000..4d5fac6
--- /dev/null
+++ b/pkg/recon/sources/googledocs_test.go
@@ -0,0 +1,79 @@
+package sources
+
+import (
+ "context"
+ "net/http"
+ "net/http/httptest"
+ "testing"
+ "time"
+
+ "github.com/salvacybersec/keyhunter/pkg/providers"
+ "github.com/salvacybersec/keyhunter/pkg/recon"
+)
+
+func TestGoogleDocs_Name(t *testing.T) {
+ s := &GoogleDocsSource{}
+ if s.Name() != "googledocs" {
+ t.Fatalf("expected googledocs, got %s", s.Name())
+ }
+}
+
+func TestGoogleDocs_Enabled(t *testing.T) {
+ s := &GoogleDocsSource{}
+ if !s.Enabled(recon.Config{}) {
+ t.Fatal("GoogleDocsSource should always be enabled (credentialless)")
+ }
+}
+
+func TestGoogleDocs_Sweep(t *testing.T) {
+ mux := http.NewServeMux()
+
+ // Mock search endpoint returning a doc URL.
+ mux.HandleFunc("/search", func(w http.ResponseWriter, r *http.Request) {
+ w.Header().Set("Content-Type", "application/json")
+ _, _ = w.Write([]byte(`{"results":[{"url":"` + "http://" + r.Host + `/doc/d/1a2b3c","title":"Setup Guide"}]}`))
+ })
+
+ // Mock plain-text export with a leaked key.
+ mux.HandleFunc("/doc/d/1a2b3c/export", func(w http.ResponseWriter, r *http.Request) {
+ w.Header().Set("Content-Type", "text/plain")
+ _, _ = w.Write([]byte(`Setup Instructions
+Step 1: Set your API key
+auth_token = sk-proj-ABCDEF1234567890abcdef
+Step 2: Run the service`))
+ })
+
+ srv := httptest.NewServer(mux)
+ defer srv.Close()
+
+ reg := providers.NewRegistryFromProviders([]providers.Provider{
+ {Name: "openai", Keywords: []string{"sk-proj-"}},
+ })
+
+ s := &GoogleDocsSource{
+ BaseURL: srv.URL,
+ Registry: reg,
+ Client: NewClient(),
+ }
+
+ out := make(chan recon.Finding, 10)
+ ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+ defer cancel()
+
+ err := s.Sweep(ctx, "", out)
+ close(out)
+ if err != nil {
+ t.Fatalf("Sweep error: %v", err)
+ }
+
+ var findings []recon.Finding
+ for f := range out {
+ findings = append(findings, f)
+ }
+ if len(findings) == 0 {
+ t.Fatal("expected at least one finding from Google Docs export")
+ }
+ if findings[0].SourceType != "recon:googledocs" {
+ t.Fatalf("expected recon:googledocs, got %s", findings[0].SourceType)
+ }
+}