From 2717aa319605ea99c5aac3a2a273bfd0f967cee8 Mon Sep 17 00:00:00 2001 From: salvacybersec Date: Sun, 5 Apr 2026 23:31:15 +0300 Subject: [PATCH] feat(06-03): implement SARIF 2.1.0 formatter with hand-rolled structs - SARIFFormatter emits schema-valid SARIF 2.1.0 JSON for CI ingestion - One rule per distinct provider, deduped in first-seen order - Confidence mapped high/medium/low to error/warning/note - startLine floored to 1 per SARIF spec requirement - Registered under name 'sarif' via init() --- pkg/output/sarif.go | 167 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 167 insertions(+) create mode 100644 pkg/output/sarif.go diff --git a/pkg/output/sarif.go b/pkg/output/sarif.go new file mode 100644 index 0000000..f3424f7 --- /dev/null +++ b/pkg/output/sarif.go @@ -0,0 +1,167 @@ +package output + +import ( + "encoding/json" + "fmt" + "io" + + "github.com/salvacybersec/keyhunter/pkg/engine" +) + +func init() { + Register("sarif", SARIFFormatter{}) +} + +// SARIFFormatter emits SARIF 2.1.0 JSON suitable for GitHub code scanning +// uploads and other CI/CD ingestion pipelines. +// +// The implementation uses hand-rolled structs rather than an external library +// per project convention (see CLAUDE.md). Only the subset of the SARIF 2.1.0 +// schema required for code-scanning uploads is modeled; this keeps the +// surface area small (~150 LoC) while producing documents that GitHub and +// other SARIF consumers accept as valid. +type SARIFFormatter struct{} + +// sarifDoc is the top-level SARIF 2.1.0 document. +type sarifDoc struct { + Schema string `json:"$schema"` + Version string `json:"version"` + Runs []sarifRun `json:"runs"` +} + +type sarifRun struct { + Tool sarifTool `json:"tool"` + Results []sarifResult `json:"results"` +} + +type sarifTool struct { + Driver sarifDriver `json:"driver"` +} + +type sarifDriver struct { + Name string `json:"name"` + Version string `json:"version"` + Rules []sarifRule `json:"rules"` +} + +type sarifRule struct { + ID string `json:"id"` + Name string `json:"name"` + ShortDescription sarifText `json:"shortDescription"` +} + +type sarifText struct { + Text string `json:"text"` +} + +type sarifResult struct { + RuleID string `json:"ruleId"` + Level string `json:"level"` + Message sarifText `json:"message"` + Locations []sarifLocation `json:"locations"` +} + +type sarifLocation struct { + PhysicalLocation sarifPhysicalLocation `json:"physicalLocation"` +} + +type sarifPhysicalLocation struct { + ArtifactLocation sarifArtifactLocation `json:"artifactLocation"` + Region sarifRegion `json:"region"` +} + +type sarifArtifactLocation struct { + URI string `json:"uri"` +} + +type sarifRegion struct { + StartLine int `json:"startLine"` +} + +// Format implements the Formatter interface. It produces a SARIF 2.1.0 +// document with one rule per distinct provider and one result per finding. +// The output is always a valid SARIF document, even when findings is empty. +func (SARIFFormatter) Format(findings []engine.Finding, w io.Writer, opts Options) error { + toolName := opts.ToolName + if toolName == "" { + toolName = "keyhunter" + } + toolVersion := opts.ToolVersion + if toolVersion == "" { + toolVersion = "dev" + } + + // Dedup rules by provider name, preserving first-seen order so that the + // output is deterministic for a given input slice. + seen := map[string]bool{} + rules := make([]sarifRule, 0) + for _, f := range findings { + if seen[f.ProviderName] { + continue + } + seen[f.ProviderName] = true + rules = append(rules, sarifRule{ + ID: f.ProviderName, + Name: f.ProviderName, + ShortDescription: sarifText{Text: fmt.Sprintf("Leaked %s API key", f.ProviderName)}, + }) + } + + results := make([]sarifResult, 0, len(findings)) + for _, f := range findings { + key := f.KeyMasked + if opts.Unmask { + key = f.KeyValue + } + // SARIF requires startLine >= 1. Floor any non-positive line numbers + // (which occur for stdin/URL sources or when line tracking is absent). + startLine := f.LineNumber + if startLine < 1 { + startLine = 1 + } + results = append(results, sarifResult{ + RuleID: f.ProviderName, + Level: sarifLevel(f.Confidence), + Message: sarifText{Text: fmt.Sprintf("Detected %s key (%s): %s", f.ProviderName, f.Confidence, key)}, + Locations: []sarifLocation{{ + PhysicalLocation: sarifPhysicalLocation{ + ArtifactLocation: sarifArtifactLocation{URI: f.Source}, + Region: sarifRegion{StartLine: startLine}, + }, + }}, + }) + } + + doc := sarifDoc{ + Schema: "https://json.schemastore.org/sarif-2.1.0.json", + Version: "2.1.0", + Runs: []sarifRun{{ + Tool: sarifTool{Driver: sarifDriver{ + Name: toolName, + Version: toolVersion, + Rules: rules, + }}, + Results: results, + }}, + } + + enc := json.NewEncoder(w) + enc.SetIndent("", " ") + return enc.Encode(doc) +} + +// sarifLevel maps KeyHunter confidence strings to SARIF result levels. +// Unknown confidence values fall back to "warning" so that unexpected input +// never produces an invalid SARIF document. +func sarifLevel(confidence string) string { + switch confidence { + case "high": + return "error" + case "medium": + return "warning" + case "low": + return "note" + default: + return "warning" + } +}