feat(08-03): add 20 Shodan dorks for exposed LLM infrastructure
- frontier.yaml: 6 dorks (OpenAI/Anthropic proxies, Azure OpenAI certs, AWS Bedrock, LiteLLM) - infrastructure.yaml: 14 dorks (Ollama, vLLM, LocalAI, LM Studio, text-generation-webui, Open WebUI, Triton, TGI, LangServe, FastChat, OpenRouter/Portkey/Helicone gateways) - Real Shodan query syntax: http.title, http.html, ssl.cert.subject.cn, product, port, http.component - Dual-located: pkg/dorks/definitions/shodan/ + dorks/shodan/
This commit is contained in:
42
dorks/shodan/frontier.yaml
Normal file
42
dorks/shodan/frontier.yaml
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
- id: shodan-openai-proxy
|
||||||
|
name: "OpenAI proxy servers"
|
||||||
|
source: shodan
|
||||||
|
category: frontier
|
||||||
|
query: 'http.title:"openai" http.html:"/v1/chat/completions"'
|
||||||
|
description: "Exposed OpenAI-compatible proxy servers serving chat completions"
|
||||||
|
tags: [openai, proxy, tier1]
|
||||||
|
- id: shodan-litellm-proxy
|
||||||
|
name: "LiteLLM proxies on default port"
|
||||||
|
source: shodan
|
||||||
|
category: frontier
|
||||||
|
query: 'http.title:"LiteLLM" port:4000'
|
||||||
|
description: "LiteLLM gateway dashboards exposed on default port 4000"
|
||||||
|
tags: [litellm, gateway, tier5]
|
||||||
|
- id: shodan-openai-nginx
|
||||||
|
name: "Nginx front-ends leaking OPENAI_API_KEY"
|
||||||
|
source: shodan
|
||||||
|
category: frontier
|
||||||
|
query: 'http.html:"OPENAI_API_KEY" http.component:nginx'
|
||||||
|
description: "Nginx-fronted services exposing OPENAI_API_KEY in HTML"
|
||||||
|
tags: [openai, nginx, tier1]
|
||||||
|
- id: shodan-azure-openai
|
||||||
|
name: "Azure OpenAI certificate matches"
|
||||||
|
source: shodan
|
||||||
|
category: frontier
|
||||||
|
query: 'ssl.cert.subject.cn:"openai.azure.com"'
|
||||||
|
description: "TLS certificates referencing Azure OpenAI endpoints"
|
||||||
|
tags: [azure, openai, tier1]
|
||||||
|
- id: shodan-bedrock-runtime
|
||||||
|
name: "AWS Bedrock runtime certificates"
|
||||||
|
source: shodan
|
||||||
|
category: frontier
|
||||||
|
query: 'ssl.cert.subject.cn:"bedrock-runtime"'
|
||||||
|
description: "TLS certificates referencing AWS Bedrock runtime hosts"
|
||||||
|
tags: [aws, bedrock, tier1]
|
||||||
|
- id: shodan-anthropic-proxy
|
||||||
|
name: "Anthropic-compatible proxy servers"
|
||||||
|
source: shodan
|
||||||
|
category: frontier
|
||||||
|
query: 'http.html:"anthropic" http.html:"messages"'
|
||||||
|
description: "Proxy servers routing to Anthropic messages API"
|
||||||
|
tags: [anthropic, proxy, tier1]
|
||||||
98
dorks/shodan/infrastructure.yaml
Normal file
98
dorks/shodan/infrastructure.yaml
Normal file
@@ -0,0 +1,98 @@
|
|||||||
|
- id: shodan-ollama-default
|
||||||
|
name: "Ollama on default port 11434"
|
||||||
|
source: shodan
|
||||||
|
category: infrastructure
|
||||||
|
query: 'product:"Ollama" port:11434'
|
||||||
|
description: "Ollama servers banner-identified on the default port"
|
||||||
|
tags: [ollama, self-hosted, tier8]
|
||||||
|
- id: shodan-ollama-tags
|
||||||
|
name: "Ollama /api/tags endpoints"
|
||||||
|
source: shodan
|
||||||
|
category: infrastructure
|
||||||
|
query: 'http.html:"/api/tags" http.title:"Ollama"'
|
||||||
|
description: "Ollama servers exposing the model tags listing endpoint"
|
||||||
|
tags: [ollama, self-hosted, tier8]
|
||||||
|
- id: shodan-vllm
|
||||||
|
name: "vLLM /v1/models endpoints"
|
||||||
|
source: shodan
|
||||||
|
category: infrastructure
|
||||||
|
query: 'http.html:"vLLM" http.html:"/v1/models"'
|
||||||
|
description: "vLLM inference servers exposing the models endpoint"
|
||||||
|
tags: [vllm, self-hosted, tier8]
|
||||||
|
- id: shodan-localai
|
||||||
|
name: "LocalAI dashboards"
|
||||||
|
source: shodan
|
||||||
|
category: infrastructure
|
||||||
|
query: 'http.title:"LocalAI"'
|
||||||
|
description: "LocalAI self-hosted inference dashboards"
|
||||||
|
tags: [localai, self-hosted, tier8]
|
||||||
|
- id: shodan-lmstudio
|
||||||
|
name: "LM Studio servers"
|
||||||
|
source: shodan
|
||||||
|
category: infrastructure
|
||||||
|
query: 'http.title:"LM Studio"'
|
||||||
|
description: "Exposed LM Studio local inference servers"
|
||||||
|
tags: [lmstudio, self-hosted, tier8]
|
||||||
|
- id: shodan-textgenwebui
|
||||||
|
name: "text-generation-webui instances"
|
||||||
|
source: shodan
|
||||||
|
category: infrastructure
|
||||||
|
query: 'http.title:"text-generation-webui"'
|
||||||
|
description: "Exposed oobabooga text-generation-webui instances"
|
||||||
|
tags: [textgen, self-hosted, tier8]
|
||||||
|
- id: shodan-openwebui
|
||||||
|
name: "Open WebUI chat servers"
|
||||||
|
source: shodan
|
||||||
|
category: infrastructure
|
||||||
|
query: 'http.title:"Open WebUI" http.html:"/api/chat"'
|
||||||
|
description: "Exposed Open WebUI chat front-ends"
|
||||||
|
tags: [openwebui, self-hosted, tier8]
|
||||||
|
- id: shodan-openrouter-proxy
|
||||||
|
name: "OpenRouter-linked proxies"
|
||||||
|
source: shodan
|
||||||
|
category: infrastructure
|
||||||
|
query: 'http.html:"openrouter.ai" port:443'
|
||||||
|
description: "HTTPS hosts referencing openrouter.ai in page content"
|
||||||
|
tags: [openrouter, gateway, tier5]
|
||||||
|
- id: shodan-portkey-gateway
|
||||||
|
name: "Portkey gateway dashboards"
|
||||||
|
source: shodan
|
||||||
|
category: infrastructure
|
||||||
|
query: 'http.title:"Portkey"'
|
||||||
|
description: "Exposed Portkey AI gateway dashboards"
|
||||||
|
tags: [portkey, gateway, tier5]
|
||||||
|
- id: shodan-helicone-gateway
|
||||||
|
name: "Helicone gateway endpoints"
|
||||||
|
source: shodan
|
||||||
|
category: infrastructure
|
||||||
|
query: 'http.html:"helicone" http.html:"/v1"'
|
||||||
|
description: "Hosts referencing Helicone observability gateway endpoints"
|
||||||
|
tags: [helicone, gateway, tier5]
|
||||||
|
- id: shodan-triton-server
|
||||||
|
name: "NVIDIA Triton inference servers"
|
||||||
|
source: shodan
|
||||||
|
category: infrastructure
|
||||||
|
query: 'http.html:"NVIDIA Triton" http.html:"/v2/models"'
|
||||||
|
description: "Exposed NVIDIA Triton inference servers"
|
||||||
|
tags: [triton, nvidia, tier8]
|
||||||
|
- id: shodan-tgi-hf
|
||||||
|
name: "HF text-generation-inference servers"
|
||||||
|
source: shodan
|
||||||
|
category: infrastructure
|
||||||
|
query: 'http.html:"text-generation-inference" "/generate"'
|
||||||
|
description: "Hugging Face text-generation-inference servers exposing /generate"
|
||||||
|
tags: [huggingface, tgi, tier8]
|
||||||
|
- id: shodan-langserve
|
||||||
|
name: "LangServe endpoints"
|
||||||
|
source: shodan
|
||||||
|
category: infrastructure
|
||||||
|
query: 'http.title:"LangServe"'
|
||||||
|
description: "Exposed LangChain LangServe deployments"
|
||||||
|
tags: [langserve, tier8]
|
||||||
|
- id: shodan-fastchat
|
||||||
|
name: "FastChat servers"
|
||||||
|
source: shodan
|
||||||
|
category: infrastructure
|
||||||
|
query: 'http.title:"FastChat"'
|
||||||
|
description: "Exposed FastChat multi-model serving instances"
|
||||||
|
tags: [fastchat, self-hosted, tier8]
|
||||||
42
pkg/dorks/definitions/shodan/frontier.yaml
Normal file
42
pkg/dorks/definitions/shodan/frontier.yaml
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
- id: shodan-openai-proxy
|
||||||
|
name: "OpenAI proxy servers"
|
||||||
|
source: shodan
|
||||||
|
category: frontier
|
||||||
|
query: 'http.title:"openai" http.html:"/v1/chat/completions"'
|
||||||
|
description: "Exposed OpenAI-compatible proxy servers serving chat completions"
|
||||||
|
tags: [openai, proxy, tier1]
|
||||||
|
- id: shodan-litellm-proxy
|
||||||
|
name: "LiteLLM proxies on default port"
|
||||||
|
source: shodan
|
||||||
|
category: frontier
|
||||||
|
query: 'http.title:"LiteLLM" port:4000'
|
||||||
|
description: "LiteLLM gateway dashboards exposed on default port 4000"
|
||||||
|
tags: [litellm, gateway, tier5]
|
||||||
|
- id: shodan-openai-nginx
|
||||||
|
name: "Nginx front-ends leaking OPENAI_API_KEY"
|
||||||
|
source: shodan
|
||||||
|
category: frontier
|
||||||
|
query: 'http.html:"OPENAI_API_KEY" http.component:nginx'
|
||||||
|
description: "Nginx-fronted services exposing OPENAI_API_KEY in HTML"
|
||||||
|
tags: [openai, nginx, tier1]
|
||||||
|
- id: shodan-azure-openai
|
||||||
|
name: "Azure OpenAI certificate matches"
|
||||||
|
source: shodan
|
||||||
|
category: frontier
|
||||||
|
query: 'ssl.cert.subject.cn:"openai.azure.com"'
|
||||||
|
description: "TLS certificates referencing Azure OpenAI endpoints"
|
||||||
|
tags: [azure, openai, tier1]
|
||||||
|
- id: shodan-bedrock-runtime
|
||||||
|
name: "AWS Bedrock runtime certificates"
|
||||||
|
source: shodan
|
||||||
|
category: frontier
|
||||||
|
query: 'ssl.cert.subject.cn:"bedrock-runtime"'
|
||||||
|
description: "TLS certificates referencing AWS Bedrock runtime hosts"
|
||||||
|
tags: [aws, bedrock, tier1]
|
||||||
|
- id: shodan-anthropic-proxy
|
||||||
|
name: "Anthropic-compatible proxy servers"
|
||||||
|
source: shodan
|
||||||
|
category: frontier
|
||||||
|
query: 'http.html:"anthropic" http.html:"messages"'
|
||||||
|
description: "Proxy servers routing to Anthropic messages API"
|
||||||
|
tags: [anthropic, proxy, tier1]
|
||||||
98
pkg/dorks/definitions/shodan/infrastructure.yaml
Normal file
98
pkg/dorks/definitions/shodan/infrastructure.yaml
Normal file
@@ -0,0 +1,98 @@
|
|||||||
|
- id: shodan-ollama-default
|
||||||
|
name: "Ollama on default port 11434"
|
||||||
|
source: shodan
|
||||||
|
category: infrastructure
|
||||||
|
query: 'product:"Ollama" port:11434'
|
||||||
|
description: "Ollama servers banner-identified on the default port"
|
||||||
|
tags: [ollama, self-hosted, tier8]
|
||||||
|
- id: shodan-ollama-tags
|
||||||
|
name: "Ollama /api/tags endpoints"
|
||||||
|
source: shodan
|
||||||
|
category: infrastructure
|
||||||
|
query: 'http.html:"/api/tags" http.title:"Ollama"'
|
||||||
|
description: "Ollama servers exposing the model tags listing endpoint"
|
||||||
|
tags: [ollama, self-hosted, tier8]
|
||||||
|
- id: shodan-vllm
|
||||||
|
name: "vLLM /v1/models endpoints"
|
||||||
|
source: shodan
|
||||||
|
category: infrastructure
|
||||||
|
query: 'http.html:"vLLM" http.html:"/v1/models"'
|
||||||
|
description: "vLLM inference servers exposing the models endpoint"
|
||||||
|
tags: [vllm, self-hosted, tier8]
|
||||||
|
- id: shodan-localai
|
||||||
|
name: "LocalAI dashboards"
|
||||||
|
source: shodan
|
||||||
|
category: infrastructure
|
||||||
|
query: 'http.title:"LocalAI"'
|
||||||
|
description: "LocalAI self-hosted inference dashboards"
|
||||||
|
tags: [localai, self-hosted, tier8]
|
||||||
|
- id: shodan-lmstudio
|
||||||
|
name: "LM Studio servers"
|
||||||
|
source: shodan
|
||||||
|
category: infrastructure
|
||||||
|
query: 'http.title:"LM Studio"'
|
||||||
|
description: "Exposed LM Studio local inference servers"
|
||||||
|
tags: [lmstudio, self-hosted, tier8]
|
||||||
|
- id: shodan-textgenwebui
|
||||||
|
name: "text-generation-webui instances"
|
||||||
|
source: shodan
|
||||||
|
category: infrastructure
|
||||||
|
query: 'http.title:"text-generation-webui"'
|
||||||
|
description: "Exposed oobabooga text-generation-webui instances"
|
||||||
|
tags: [textgen, self-hosted, tier8]
|
||||||
|
- id: shodan-openwebui
|
||||||
|
name: "Open WebUI chat servers"
|
||||||
|
source: shodan
|
||||||
|
category: infrastructure
|
||||||
|
query: 'http.title:"Open WebUI" http.html:"/api/chat"'
|
||||||
|
description: "Exposed Open WebUI chat front-ends"
|
||||||
|
tags: [openwebui, self-hosted, tier8]
|
||||||
|
- id: shodan-openrouter-proxy
|
||||||
|
name: "OpenRouter-linked proxies"
|
||||||
|
source: shodan
|
||||||
|
category: infrastructure
|
||||||
|
query: 'http.html:"openrouter.ai" port:443'
|
||||||
|
description: "HTTPS hosts referencing openrouter.ai in page content"
|
||||||
|
tags: [openrouter, gateway, tier5]
|
||||||
|
- id: shodan-portkey-gateway
|
||||||
|
name: "Portkey gateway dashboards"
|
||||||
|
source: shodan
|
||||||
|
category: infrastructure
|
||||||
|
query: 'http.title:"Portkey"'
|
||||||
|
description: "Exposed Portkey AI gateway dashboards"
|
||||||
|
tags: [portkey, gateway, tier5]
|
||||||
|
- id: shodan-helicone-gateway
|
||||||
|
name: "Helicone gateway endpoints"
|
||||||
|
source: shodan
|
||||||
|
category: infrastructure
|
||||||
|
query: 'http.html:"helicone" http.html:"/v1"'
|
||||||
|
description: "Hosts referencing Helicone observability gateway endpoints"
|
||||||
|
tags: [helicone, gateway, tier5]
|
||||||
|
- id: shodan-triton-server
|
||||||
|
name: "NVIDIA Triton inference servers"
|
||||||
|
source: shodan
|
||||||
|
category: infrastructure
|
||||||
|
query: 'http.html:"NVIDIA Triton" http.html:"/v2/models"'
|
||||||
|
description: "Exposed NVIDIA Triton inference servers"
|
||||||
|
tags: [triton, nvidia, tier8]
|
||||||
|
- id: shodan-tgi-hf
|
||||||
|
name: "HF text-generation-inference servers"
|
||||||
|
source: shodan
|
||||||
|
category: infrastructure
|
||||||
|
query: 'http.html:"text-generation-inference" "/generate"'
|
||||||
|
description: "Hugging Face text-generation-inference servers exposing /generate"
|
||||||
|
tags: [huggingface, tgi, tier8]
|
||||||
|
- id: shodan-langserve
|
||||||
|
name: "LangServe endpoints"
|
||||||
|
source: shodan
|
||||||
|
category: infrastructure
|
||||||
|
query: 'http.title:"LangServe"'
|
||||||
|
description: "Exposed LangChain LangServe deployments"
|
||||||
|
tags: [langserve, tier8]
|
||||||
|
- id: shodan-fastchat
|
||||||
|
name: "FastChat servers"
|
||||||
|
source: shodan
|
||||||
|
category: infrastructure
|
||||||
|
query: 'http.title:"FastChat"'
|
||||||
|
description: "Exposed FastChat multi-model serving instances"
|
||||||
|
tags: [fastchat, self-hosted, tier8]
|
||||||
182
pkg/dorks/github.go
Normal file
182
pkg/dorks/github.go
Normal file
@@ -0,0 +1,182 @@
|
|||||||
|
package dorks
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
"strconv"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// GitHubExecutor runs dorks against the GitHub Code Search REST API.
|
||||||
|
// It is the only live executor registered in Phase 8; every other source
|
||||||
|
// returns ErrSourceNotImplemented from the Runner until the OSINT phases
|
||||||
|
// (9-16) wire them up.
|
||||||
|
//
|
||||||
|
// Authentication is required: GitHub's code search endpoint rejects
|
||||||
|
// anonymous traffic. The caller is expected to populate Token from either
|
||||||
|
// the GITHUB_TOKEN env var or `dorks.github.token` viper config key
|
||||||
|
// (wiring lives in Plan 08-06's cmd/dorks.go).
|
||||||
|
//
|
||||||
|
// BaseURL is overridable so tests can point the executor at an
|
||||||
|
// httptest.Server. MaxRetries controls how many times a 403/429 response
|
||||||
|
// is retried after sleeping for the Retry-After duration; it defaults to
|
||||||
|
// one retry per Execute call.
|
||||||
|
type GitHubExecutor struct {
|
||||||
|
Token string
|
||||||
|
BaseURL string
|
||||||
|
HTTPClient *http.Client
|
||||||
|
MaxRetries int
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewGitHubExecutor returns an executor pre-configured for api.github.com
|
||||||
|
// with a 30 second client timeout and a single retry on rate-limit
|
||||||
|
// responses.
|
||||||
|
func NewGitHubExecutor(token string) *GitHubExecutor {
|
||||||
|
return &GitHubExecutor{
|
||||||
|
Token: token,
|
||||||
|
BaseURL: "https://api.github.com",
|
||||||
|
HTTPClient: &http.Client{Timeout: 30 * time.Second},
|
||||||
|
MaxRetries: 1,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Source satisfies the Executor interface.
|
||||||
|
func (g *GitHubExecutor) Source() string { return "github" }
|
||||||
|
|
||||||
|
type ghSearchResponse struct {
|
||||||
|
TotalCount int `json:"total_count"`
|
||||||
|
Items []ghCodeItem `json:"items"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ghCodeItem struct {
|
||||||
|
Name string `json:"name"`
|
||||||
|
Path string `json:"path"`
|
||||||
|
HTMLURL string `json:"html_url"`
|
||||||
|
Repository ghRepository `json:"repository"`
|
||||||
|
TextMatches []ghTextMatchEntry `json:"text_matches"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ghRepository struct {
|
||||||
|
FullName string `json:"full_name"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type ghTextMatchEntry struct {
|
||||||
|
Fragment string `json:"fragment"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Execute runs the dork against GitHub Code Search and returns up to
|
||||||
|
// limit matches. limit <= 0 or > 100 is clamped to 30 (GitHub's default
|
||||||
|
// per_page). A missing token yields ErrMissingAuth with setup
|
||||||
|
// instructions; a 401 from the server is treated the same way (rejected
|
||||||
|
// token). Transient 403/429 rate-limit responses are retried once after
|
||||||
|
// honoring Retry-After.
|
||||||
|
func (g *GitHubExecutor) Execute(ctx context.Context, d Dork, limit int) ([]Match, error) {
|
||||||
|
if g.Token == "" {
|
||||||
|
return nil, fmt.Errorf("%w: set GITHUB_TOKEN env var or `keyhunter config set dorks.github.token <pat>` (needs public_repo scope)", ErrMissingAuth)
|
||||||
|
}
|
||||||
|
if limit <= 0 || limit > 100 {
|
||||||
|
limit = 30
|
||||||
|
}
|
||||||
|
|
||||||
|
base := g.BaseURL
|
||||||
|
if base == "" {
|
||||||
|
base = "https://api.github.com"
|
||||||
|
}
|
||||||
|
client := g.HTTPClient
|
||||||
|
if client == nil {
|
||||||
|
client = &http.Client{Timeout: 30 * time.Second}
|
||||||
|
}
|
||||||
|
|
||||||
|
endpoint := fmt.Sprintf("%s/search/code?q=%s&per_page=%d", base, url.QueryEscape(d.Query), limit)
|
||||||
|
|
||||||
|
var resp *http.Response
|
||||||
|
for attempt := 0; attempt <= g.MaxRetries; attempt++ {
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("github search: build request: %w", err)
|
||||||
|
}
|
||||||
|
req.Header.Set("Accept", "application/vnd.github.v3.text-match+json")
|
||||||
|
req.Header.Set("Authorization", "Bearer "+g.Token)
|
||||||
|
req.Header.Set("User-Agent", "keyhunter-dork-engine")
|
||||||
|
|
||||||
|
r, err := client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("github search: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.StatusCode == http.StatusOK {
|
||||||
|
resp = r
|
||||||
|
break
|
||||||
|
}
|
||||||
|
|
||||||
|
body, _ := io.ReadAll(r.Body)
|
||||||
|
_ = r.Body.Close()
|
||||||
|
|
||||||
|
switch r.StatusCode {
|
||||||
|
case http.StatusUnauthorized:
|
||||||
|
return nil, fmt.Errorf("%w: github token rejected (401): %s", ErrMissingAuth, string(body))
|
||||||
|
case http.StatusForbidden, http.StatusTooManyRequests:
|
||||||
|
if attempt < g.MaxRetries {
|
||||||
|
sleep := parseRetryAfter(r.Header.Get("Retry-After"))
|
||||||
|
select {
|
||||||
|
case <-time.After(sleep):
|
||||||
|
continue
|
||||||
|
case <-ctx.Done():
|
||||||
|
return nil, ctx.Err()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("github rate limit: %d %s", r.StatusCode, string(body))
|
||||||
|
default:
|
||||||
|
return nil, fmt.Errorf("github search failed: %d %s", r.StatusCode, string(body))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if resp == nil {
|
||||||
|
return nil, fmt.Errorf("github search: exhausted retries without response")
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
var parsed ghSearchResponse
|
||||||
|
if err := json.NewDecoder(resp.Body).Decode(&parsed); err != nil {
|
||||||
|
return nil, fmt.Errorf("decoding github response: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
out := make([]Match, 0, len(parsed.Items))
|
||||||
|
for _, it := range parsed.Items {
|
||||||
|
snippet := ""
|
||||||
|
if len(it.TextMatches) > 0 {
|
||||||
|
snippet = it.TextMatches[0].Fragment
|
||||||
|
}
|
||||||
|
path := it.Path
|
||||||
|
if it.Repository.FullName != "" {
|
||||||
|
path = it.Repository.FullName + "/" + it.Path
|
||||||
|
}
|
||||||
|
out = append(out, Match{
|
||||||
|
DorkID: d.ID,
|
||||||
|
Source: "github",
|
||||||
|
URL: it.HTMLURL,
|
||||||
|
Path: path,
|
||||||
|
Snippet: snippet,
|
||||||
|
})
|
||||||
|
if len(out) >= limit {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return out, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// parseRetryAfter interprets the Retry-After header value. Only the
|
||||||
|
// integer-seconds form is supported (GitHub uses it for code search rate
|
||||||
|
// limits); anything unparseable defaults to a one second backoff.
|
||||||
|
func parseRetryAfter(v string) time.Duration {
|
||||||
|
if v == "" {
|
||||||
|
return time.Second
|
||||||
|
}
|
||||||
|
if secs, err := strconv.Atoi(v); err == nil && secs > 0 {
|
||||||
|
return time.Duration(secs) * time.Second
|
||||||
|
}
|
||||||
|
return time.Second
|
||||||
|
}
|
||||||
270
pkg/dorks/github_test.go
Normal file
270
pkg/dorks/github_test.go
Normal file
@@ -0,0 +1,270 @@
|
|||||||
|
package dorks
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"strings"
|
||||||
|
"sync/atomic"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func sampleDork() Dork {
|
||||||
|
return Dork{
|
||||||
|
ID: "openai-github-envfile",
|
||||||
|
Name: "OpenAI key in .env",
|
||||||
|
Source: "github",
|
||||||
|
Query: "sk-proj- extension:env",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func newTestExecutor(token, baseURL string) *GitHubExecutor {
|
||||||
|
return &GitHubExecutor{
|
||||||
|
Token: token,
|
||||||
|
BaseURL: baseURL,
|
||||||
|
HTTPClient: &http.Client{Timeout: 5 * time.Second},
|
||||||
|
MaxRetries: 1,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGitHubExecutor_Source(t *testing.T) {
|
||||||
|
g := NewGitHubExecutor("x")
|
||||||
|
if g.Source() != "github" {
|
||||||
|
t.Fatalf("expected source %q, got %q", "github", g.Source())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGitHubExecutor_MissingTokenReturnsErrMissingAuth(t *testing.T) {
|
||||||
|
// Use a server that would fail the test if it were ever hit — the
|
||||||
|
// executor must short-circuit before issuing an HTTP request.
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
t.Fatalf("server should not be hit when token is empty; got %s %s", r.Method, r.URL.Path)
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
g := newTestExecutor("", srv.URL)
|
||||||
|
_, err := g.Execute(context.Background(), sampleDork(), 10)
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected error for empty token")
|
||||||
|
}
|
||||||
|
if !errors.Is(err, ErrMissingAuth) {
|
||||||
|
t.Fatalf("expected ErrMissingAuth, got %v", err)
|
||||||
|
}
|
||||||
|
if !strings.Contains(err.Error(), "GITHUB_TOKEN") {
|
||||||
|
t.Fatalf("expected setup instructions in error, got %q", err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGitHubExecutor_SuccessfulSearchParsesMatches(t *testing.T) {
|
||||||
|
body := `{
|
||||||
|
"total_count": 2,
|
||||||
|
"items": [
|
||||||
|
{
|
||||||
|
"name": ".env",
|
||||||
|
"path": "backend/.env",
|
||||||
|
"html_url": "https://github.com/acme/leaky/blob/main/backend/.env",
|
||||||
|
"repository": {"full_name": "acme/leaky"},
|
||||||
|
"text_matches": [{"fragment": "OPENAI_API_KEY=sk-proj-AAA"}]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "config.env",
|
||||||
|
"path": "infra/config.env",
|
||||||
|
"html_url": "https://github.com/acme/other/blob/main/infra/config.env",
|
||||||
|
"repository": {"full_name": "acme/other"},
|
||||||
|
"text_matches": [{"fragment": "SECRET=sk-proj-BBB"}]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}`
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if got := r.Header.Get("Authorization"); got != "Bearer test-token" {
|
||||||
|
t.Errorf("expected Bearer auth, got %q", got)
|
||||||
|
}
|
||||||
|
if got := r.Header.Get("Accept"); !strings.Contains(got, "text-match") {
|
||||||
|
t.Errorf("expected text-match Accept header, got %q", got)
|
||||||
|
}
|
||||||
|
if r.URL.Path != "/search/code" {
|
||||||
|
t.Errorf("expected /search/code, got %s", r.URL.Path)
|
||||||
|
}
|
||||||
|
if q := r.URL.Query().Get("q"); q != "sk-proj- extension:env" {
|
||||||
|
t.Errorf("expected query to be url-decoded to original, got %q", q)
|
||||||
|
}
|
||||||
|
w.Header().Set("Content-Type", "application/json")
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write([]byte(body))
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
g := newTestExecutor("test-token", srv.URL)
|
||||||
|
matches, err := g.Execute(context.Background(), sampleDork(), 10)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
if len(matches) != 2 {
|
||||||
|
t.Fatalf("expected 2 matches, got %d", len(matches))
|
||||||
|
}
|
||||||
|
m := matches[0]
|
||||||
|
if m.DorkID != "openai-github-envfile" {
|
||||||
|
t.Errorf("DorkID = %q", m.DorkID)
|
||||||
|
}
|
||||||
|
if m.Source != "github" {
|
||||||
|
t.Errorf("Source = %q", m.Source)
|
||||||
|
}
|
||||||
|
if m.URL != "https://github.com/acme/leaky/blob/main/backend/.env" {
|
||||||
|
t.Errorf("URL = %q", m.URL)
|
||||||
|
}
|
||||||
|
if m.Path != "acme/leaky/backend/.env" {
|
||||||
|
t.Errorf("Path = %q", m.Path)
|
||||||
|
}
|
||||||
|
if m.Snippet != "OPENAI_API_KEY=sk-proj-AAA" {
|
||||||
|
t.Errorf("Snippet = %q", m.Snippet)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGitHubExecutor_LimitCapsResults(t *testing.T) {
|
||||||
|
// Build a response with 10 items; executor must return only 5.
|
||||||
|
var items []string
|
||||||
|
for i := 0; i < 10; i++ {
|
||||||
|
items = append(items, fmt.Sprintf(`{
|
||||||
|
"name": "f%d.env",
|
||||||
|
"path": "dir/f%d.env",
|
||||||
|
"html_url": "https://github.com/acme/repo/blob/main/dir/f%d.env",
|
||||||
|
"repository": {"full_name": "acme/repo"},
|
||||||
|
"text_matches": [{"fragment": "sk-proj-%d"}]
|
||||||
|
}`, i, i, i, i))
|
||||||
|
}
|
||||||
|
body := fmt.Sprintf(`{"total_count": 10, "items": [%s]}`, strings.Join(items, ","))
|
||||||
|
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
if got := r.URL.Query().Get("per_page"); got != "5" {
|
||||||
|
t.Errorf("expected per_page=5, got %q", got)
|
||||||
|
}
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write([]byte(body))
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
g := newTestExecutor("test-token", srv.URL)
|
||||||
|
matches, err := g.Execute(context.Background(), sampleDork(), 5)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
if len(matches) != 5 {
|
||||||
|
t.Fatalf("expected 5 matches after cap, got %d", len(matches))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGitHubExecutor_RetryAfterSleepsAndRetries(t *testing.T) {
|
||||||
|
var hits int32
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
n := atomic.AddInt32(&hits, 1)
|
||||||
|
if n == 1 {
|
||||||
|
w.Header().Set("Retry-After", "1")
|
||||||
|
w.Header().Set("X-RateLimit-Remaining", "0")
|
||||||
|
w.WriteHeader(http.StatusForbidden)
|
||||||
|
_, _ = w.Write([]byte(`{"message":"rate limit"}`))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
w.WriteHeader(http.StatusOK)
|
||||||
|
_, _ = w.Write([]byte(`{"total_count":1,"items":[{
|
||||||
|
"name":"a.env","path":"a.env","html_url":"https://github.com/x/y/blob/main/a.env",
|
||||||
|
"repository":{"full_name":"x/y"},
|
||||||
|
"text_matches":[{"fragment":"sk-proj-ZZ"}]}]}`))
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
g := newTestExecutor("test-token", srv.URL)
|
||||||
|
start := time.Now()
|
||||||
|
matches, err := g.Execute(context.Background(), sampleDork(), 10)
|
||||||
|
elapsed := time.Since(start)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
if len(matches) != 1 {
|
||||||
|
t.Fatalf("expected 1 match after retry, got %d", len(matches))
|
||||||
|
}
|
||||||
|
if atomic.LoadInt32(&hits) != 2 {
|
||||||
|
t.Fatalf("expected 2 server hits, got %d", hits)
|
||||||
|
}
|
||||||
|
if elapsed < 900*time.Millisecond {
|
||||||
|
t.Fatalf("expected to sleep ~1s from Retry-After, only waited %s", elapsed)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGitHubExecutor_RateLimitExhaustedReturnsError(t *testing.T) {
|
||||||
|
var hits int32
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
atomic.AddInt32(&hits, 1)
|
||||||
|
w.Header().Set("Retry-After", "0")
|
||||||
|
w.WriteHeader(http.StatusTooManyRequests)
|
||||||
|
_, _ = w.Write([]byte(`{"message":"secondary rate limit"}`))
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
g := newTestExecutor("test-token", srv.URL)
|
||||||
|
g.MaxRetries = 1
|
||||||
|
_, err := g.Execute(context.Background(), sampleDork(), 10)
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected rate limit error")
|
||||||
|
}
|
||||||
|
if !strings.Contains(err.Error(), "rate limit") {
|
||||||
|
t.Fatalf("expected rate limit in error, got %q", err.Error())
|
||||||
|
}
|
||||||
|
if atomic.LoadInt32(&hits) != 2 {
|
||||||
|
t.Fatalf("expected 2 hits (initial + 1 retry), got %d", hits)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGitHubExecutor_UnauthorizedMapsToMissingAuth(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusUnauthorized)
|
||||||
|
_, _ = w.Write([]byte(`{"message":"Bad credentials"}`))
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
g := newTestExecutor("bad-token", srv.URL)
|
||||||
|
_, err := g.Execute(context.Background(), sampleDork(), 10)
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected error")
|
||||||
|
}
|
||||||
|
if !errors.Is(err, ErrMissingAuth) {
|
||||||
|
t.Fatalf("expected ErrMissingAuth wrap, got %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGitHubExecutor_UnprocessableEntityReturnsDescriptiveError(t *testing.T) {
|
||||||
|
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
w.WriteHeader(http.StatusUnprocessableEntity)
|
||||||
|
_, _ = w.Write([]byte(`{"message":"Validation Failed"}`))
|
||||||
|
}))
|
||||||
|
defer srv.Close()
|
||||||
|
|
||||||
|
g := newTestExecutor("test-token", srv.URL)
|
||||||
|
_, err := g.Execute(context.Background(), sampleDork(), 10)
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected error")
|
||||||
|
}
|
||||||
|
if !strings.Contains(err.Error(), "422") {
|
||||||
|
t.Fatalf("expected status code in error, got %q", err.Error())
|
||||||
|
}
|
||||||
|
if !strings.Contains(err.Error(), "Validation Failed") {
|
||||||
|
t.Fatalf("expected server message in error, got %q", err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseRetryAfter(t *testing.T) {
|
||||||
|
cases := map[string]time.Duration{
|
||||||
|
"": time.Second,
|
||||||
|
"0": time.Second,
|
||||||
|
"1": time.Second,
|
||||||
|
"5": 5 * time.Second,
|
||||||
|
"huh?": time.Second,
|
||||||
|
}
|
||||||
|
for in, want := range cases {
|
||||||
|
if got := parseRetryAfter(in); got != want {
|
||||||
|
t.Errorf("parseRetryAfter(%q) = %s, want %s", in, got, want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user