feat(06-03): implement SARIF 2.1.0 formatter with hand-rolled structs

- SARIFFormatter emits schema-valid SARIF 2.1.0 JSON for CI ingestion
- One rule per distinct provider, deduped in first-seen order
- Confidence mapped high/medium/low to error/warning/note
- startLine floored to 1 per SARIF spec requirement
- Registered under name 'sarif' via init()
This commit is contained in:
salvacybersec
2026-04-05 23:31:15 +03:00
parent b1e4dea51c
commit 2717aa3196

167
pkg/output/sarif.go Normal file
View File

@@ -0,0 +1,167 @@
package output
import (
"encoding/json"
"fmt"
"io"
"github.com/salvacybersec/keyhunter/pkg/engine"
)
func init() {
Register("sarif", SARIFFormatter{})
}
// SARIFFormatter emits SARIF 2.1.0 JSON suitable for GitHub code scanning
// uploads and other CI/CD ingestion pipelines.
//
// The implementation uses hand-rolled structs rather than an external library
// per project convention (see CLAUDE.md). Only the subset of the SARIF 2.1.0
// schema required for code-scanning uploads is modeled; this keeps the
// surface area small (~150 LoC) while producing documents that GitHub and
// other SARIF consumers accept as valid.
type SARIFFormatter struct{}
// sarifDoc is the top-level SARIF 2.1.0 document.
type sarifDoc struct {
Schema string `json:"$schema"`
Version string `json:"version"`
Runs []sarifRun `json:"runs"`
}
type sarifRun struct {
Tool sarifTool `json:"tool"`
Results []sarifResult `json:"results"`
}
type sarifTool struct {
Driver sarifDriver `json:"driver"`
}
type sarifDriver struct {
Name string `json:"name"`
Version string `json:"version"`
Rules []sarifRule `json:"rules"`
}
type sarifRule struct {
ID string `json:"id"`
Name string `json:"name"`
ShortDescription sarifText `json:"shortDescription"`
}
type sarifText struct {
Text string `json:"text"`
}
type sarifResult struct {
RuleID string `json:"ruleId"`
Level string `json:"level"`
Message sarifText `json:"message"`
Locations []sarifLocation `json:"locations"`
}
type sarifLocation struct {
PhysicalLocation sarifPhysicalLocation `json:"physicalLocation"`
}
type sarifPhysicalLocation struct {
ArtifactLocation sarifArtifactLocation `json:"artifactLocation"`
Region sarifRegion `json:"region"`
}
type sarifArtifactLocation struct {
URI string `json:"uri"`
}
type sarifRegion struct {
StartLine int `json:"startLine"`
}
// Format implements the Formatter interface. It produces a SARIF 2.1.0
// document with one rule per distinct provider and one result per finding.
// The output is always a valid SARIF document, even when findings is empty.
func (SARIFFormatter) Format(findings []engine.Finding, w io.Writer, opts Options) error {
toolName := opts.ToolName
if toolName == "" {
toolName = "keyhunter"
}
toolVersion := opts.ToolVersion
if toolVersion == "" {
toolVersion = "dev"
}
// Dedup rules by provider name, preserving first-seen order so that the
// output is deterministic for a given input slice.
seen := map[string]bool{}
rules := make([]sarifRule, 0)
for _, f := range findings {
if seen[f.ProviderName] {
continue
}
seen[f.ProviderName] = true
rules = append(rules, sarifRule{
ID: f.ProviderName,
Name: f.ProviderName,
ShortDescription: sarifText{Text: fmt.Sprintf("Leaked %s API key", f.ProviderName)},
})
}
results := make([]sarifResult, 0, len(findings))
for _, f := range findings {
key := f.KeyMasked
if opts.Unmask {
key = f.KeyValue
}
// SARIF requires startLine >= 1. Floor any non-positive line numbers
// (which occur for stdin/URL sources or when line tracking is absent).
startLine := f.LineNumber
if startLine < 1 {
startLine = 1
}
results = append(results, sarifResult{
RuleID: f.ProviderName,
Level: sarifLevel(f.Confidence),
Message: sarifText{Text: fmt.Sprintf("Detected %s key (%s): %s", f.ProviderName, f.Confidence, key)},
Locations: []sarifLocation{{
PhysicalLocation: sarifPhysicalLocation{
ArtifactLocation: sarifArtifactLocation{URI: f.Source},
Region: sarifRegion{StartLine: startLine},
},
}},
})
}
doc := sarifDoc{
Schema: "https://json.schemastore.org/sarif-2.1.0.json",
Version: "2.1.0",
Runs: []sarifRun{{
Tool: sarifTool{Driver: sarifDriver{
Name: toolName,
Version: toolVersion,
Rules: rules,
}},
Results: results,
}},
}
enc := json.NewEncoder(w)
enc.SetIndent("", " ")
return enc.Encode(doc)
}
// sarifLevel maps KeyHunter confidence strings to SARIF result levels.
// Unknown confidence values fall back to "warning" so that unexpected input
// never produces an invalid SARIF document.
func sarifLevel(confidence string) string {
switch confidence {
case "high":
return "error"
case "medium":
return "warning"
case "low":
return "note"
default:
return "warning"
}
}