package output import ( "encoding/json" "fmt" "io" "github.com/salvacybersec/keyhunter/pkg/engine" ) func init() { Register("sarif", SARIFFormatter{}) } // SARIFFormatter emits SARIF 2.1.0 JSON suitable for GitHub code scanning // uploads and other CI/CD ingestion pipelines. // // The implementation uses hand-rolled structs rather than an external library // per project convention (see CLAUDE.md). Only the subset of the SARIF 2.1.0 // schema required for code-scanning uploads is modeled; this keeps the // surface area small (~150 LoC) while producing documents that GitHub and // other SARIF consumers accept as valid. type SARIFFormatter struct{} // sarifDoc is the top-level SARIF 2.1.0 document. type sarifDoc struct { Schema string `json:"$schema"` Version string `json:"version"` Runs []sarifRun `json:"runs"` } type sarifRun struct { Tool sarifTool `json:"tool"` Results []sarifResult `json:"results"` } type sarifTool struct { Driver sarifDriver `json:"driver"` } type sarifDriver struct { Name string `json:"name"` Version string `json:"version"` Rules []sarifRule `json:"rules"` } type sarifRule struct { ID string `json:"id"` Name string `json:"name"` ShortDescription sarifText `json:"shortDescription"` } type sarifText struct { Text string `json:"text"` } type sarifResult struct { RuleID string `json:"ruleId"` Level string `json:"level"` Message sarifText `json:"message"` Locations []sarifLocation `json:"locations"` } type sarifLocation struct { PhysicalLocation sarifPhysicalLocation `json:"physicalLocation"` } type sarifPhysicalLocation struct { ArtifactLocation sarifArtifactLocation `json:"artifactLocation"` Region sarifRegion `json:"region"` } type sarifArtifactLocation struct { URI string `json:"uri"` } type sarifRegion struct { StartLine int `json:"startLine"` } // Format implements the Formatter interface. It produces a SARIF 2.1.0 // document with one rule per distinct provider and one result per finding. // The output is always a valid SARIF document, even when findings is empty. func (SARIFFormatter) Format(findings []engine.Finding, w io.Writer, opts Options) error { toolName := opts.ToolName if toolName == "" { toolName = "keyhunter" } toolVersion := opts.ToolVersion if toolVersion == "" { toolVersion = "dev" } // Dedup rules by provider name, preserving first-seen order so that the // output is deterministic for a given input slice. seen := map[string]bool{} rules := make([]sarifRule, 0) for _, f := range findings { if seen[f.ProviderName] { continue } seen[f.ProviderName] = true rules = append(rules, sarifRule{ ID: f.ProviderName, Name: f.ProviderName, ShortDescription: sarifText{Text: fmt.Sprintf("Leaked %s API key", f.ProviderName)}, }) } results := make([]sarifResult, 0, len(findings)) for _, f := range findings { key := f.KeyMasked if opts.Unmask { key = f.KeyValue } // SARIF requires startLine >= 1. Floor any non-positive line numbers // (which occur for stdin/URL sources or when line tracking is absent). startLine := f.LineNumber if startLine < 1 { startLine = 1 } results = append(results, sarifResult{ RuleID: f.ProviderName, Level: sarifLevel(f.Confidence), Message: sarifText{Text: fmt.Sprintf("Detected %s key (%s): %s", f.ProviderName, f.Confidence, key)}, Locations: []sarifLocation{{ PhysicalLocation: sarifPhysicalLocation{ ArtifactLocation: sarifArtifactLocation{URI: f.Source}, Region: sarifRegion{StartLine: startLine}, }, }}, }) } doc := sarifDoc{ Schema: "https://json.schemastore.org/sarif-2.1.0.json", Version: "2.1.0", Runs: []sarifRun{{ Tool: sarifTool{Driver: sarifDriver{ Name: toolName, Version: toolVersion, Rules: rules, }}, Results: results, }}, } enc := json.NewEncoder(w) enc.SetIndent("", " ") return enc.Encode(doc) } // sarifLevel maps KeyHunter confidence strings to SARIF result levels. // Unknown confidence values fall back to "warning" so that unexpected input // never produces an invalid SARIF document. func sarifLevel(confidence string) string { switch confidence { case "high": return "error" case "medium": return "warning" case "low": return "note" default: return "warning" } }