feat(06-03): implement SARIF 2.1.0 formatter with hand-rolled structs
- SARIFFormatter emits schema-valid SARIF 2.1.0 JSON for CI ingestion - One rule per distinct provider, deduped in first-seen order - Confidence mapped high/medium/low to error/warning/note - startLine floored to 1 per SARIF spec requirement - Registered under name 'sarif' via init()
This commit is contained in:
167
pkg/output/sarif.go
Normal file
167
pkg/output/sarif.go
Normal file
@@ -0,0 +1,167 @@
|
||||
package output
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"github.com/salvacybersec/keyhunter/pkg/engine"
|
||||
)
|
||||
|
||||
func init() {
|
||||
Register("sarif", SARIFFormatter{})
|
||||
}
|
||||
|
||||
// SARIFFormatter emits SARIF 2.1.0 JSON suitable for GitHub code scanning
|
||||
// uploads and other CI/CD ingestion pipelines.
|
||||
//
|
||||
// The implementation uses hand-rolled structs rather than an external library
|
||||
// per project convention (see CLAUDE.md). Only the subset of the SARIF 2.1.0
|
||||
// schema required for code-scanning uploads is modeled; this keeps the
|
||||
// surface area small (~150 LoC) while producing documents that GitHub and
|
||||
// other SARIF consumers accept as valid.
|
||||
type SARIFFormatter struct{}
|
||||
|
||||
// sarifDoc is the top-level SARIF 2.1.0 document.
|
||||
type sarifDoc struct {
|
||||
Schema string `json:"$schema"`
|
||||
Version string `json:"version"`
|
||||
Runs []sarifRun `json:"runs"`
|
||||
}
|
||||
|
||||
type sarifRun struct {
|
||||
Tool sarifTool `json:"tool"`
|
||||
Results []sarifResult `json:"results"`
|
||||
}
|
||||
|
||||
type sarifTool struct {
|
||||
Driver sarifDriver `json:"driver"`
|
||||
}
|
||||
|
||||
type sarifDriver struct {
|
||||
Name string `json:"name"`
|
||||
Version string `json:"version"`
|
||||
Rules []sarifRule `json:"rules"`
|
||||
}
|
||||
|
||||
type sarifRule struct {
|
||||
ID string `json:"id"`
|
||||
Name string `json:"name"`
|
||||
ShortDescription sarifText `json:"shortDescription"`
|
||||
}
|
||||
|
||||
type sarifText struct {
|
||||
Text string `json:"text"`
|
||||
}
|
||||
|
||||
type sarifResult struct {
|
||||
RuleID string `json:"ruleId"`
|
||||
Level string `json:"level"`
|
||||
Message sarifText `json:"message"`
|
||||
Locations []sarifLocation `json:"locations"`
|
||||
}
|
||||
|
||||
type sarifLocation struct {
|
||||
PhysicalLocation sarifPhysicalLocation `json:"physicalLocation"`
|
||||
}
|
||||
|
||||
type sarifPhysicalLocation struct {
|
||||
ArtifactLocation sarifArtifactLocation `json:"artifactLocation"`
|
||||
Region sarifRegion `json:"region"`
|
||||
}
|
||||
|
||||
type sarifArtifactLocation struct {
|
||||
URI string `json:"uri"`
|
||||
}
|
||||
|
||||
type sarifRegion struct {
|
||||
StartLine int `json:"startLine"`
|
||||
}
|
||||
|
||||
// Format implements the Formatter interface. It produces a SARIF 2.1.0
|
||||
// document with one rule per distinct provider and one result per finding.
|
||||
// The output is always a valid SARIF document, even when findings is empty.
|
||||
func (SARIFFormatter) Format(findings []engine.Finding, w io.Writer, opts Options) error {
|
||||
toolName := opts.ToolName
|
||||
if toolName == "" {
|
||||
toolName = "keyhunter"
|
||||
}
|
||||
toolVersion := opts.ToolVersion
|
||||
if toolVersion == "" {
|
||||
toolVersion = "dev"
|
||||
}
|
||||
|
||||
// Dedup rules by provider name, preserving first-seen order so that the
|
||||
// output is deterministic for a given input slice.
|
||||
seen := map[string]bool{}
|
||||
rules := make([]sarifRule, 0)
|
||||
for _, f := range findings {
|
||||
if seen[f.ProviderName] {
|
||||
continue
|
||||
}
|
||||
seen[f.ProviderName] = true
|
||||
rules = append(rules, sarifRule{
|
||||
ID: f.ProviderName,
|
||||
Name: f.ProviderName,
|
||||
ShortDescription: sarifText{Text: fmt.Sprintf("Leaked %s API key", f.ProviderName)},
|
||||
})
|
||||
}
|
||||
|
||||
results := make([]sarifResult, 0, len(findings))
|
||||
for _, f := range findings {
|
||||
key := f.KeyMasked
|
||||
if opts.Unmask {
|
||||
key = f.KeyValue
|
||||
}
|
||||
// SARIF requires startLine >= 1. Floor any non-positive line numbers
|
||||
// (which occur for stdin/URL sources or when line tracking is absent).
|
||||
startLine := f.LineNumber
|
||||
if startLine < 1 {
|
||||
startLine = 1
|
||||
}
|
||||
results = append(results, sarifResult{
|
||||
RuleID: f.ProviderName,
|
||||
Level: sarifLevel(f.Confidence),
|
||||
Message: sarifText{Text: fmt.Sprintf("Detected %s key (%s): %s", f.ProviderName, f.Confidence, key)},
|
||||
Locations: []sarifLocation{{
|
||||
PhysicalLocation: sarifPhysicalLocation{
|
||||
ArtifactLocation: sarifArtifactLocation{URI: f.Source},
|
||||
Region: sarifRegion{StartLine: startLine},
|
||||
},
|
||||
}},
|
||||
})
|
||||
}
|
||||
|
||||
doc := sarifDoc{
|
||||
Schema: "https://json.schemastore.org/sarif-2.1.0.json",
|
||||
Version: "2.1.0",
|
||||
Runs: []sarifRun{{
|
||||
Tool: sarifTool{Driver: sarifDriver{
|
||||
Name: toolName,
|
||||
Version: toolVersion,
|
||||
Rules: rules,
|
||||
}},
|
||||
Results: results,
|
||||
}},
|
||||
}
|
||||
|
||||
enc := json.NewEncoder(w)
|
||||
enc.SetIndent("", " ")
|
||||
return enc.Encode(doc)
|
||||
}
|
||||
|
||||
// sarifLevel maps KeyHunter confidence strings to SARIF result levels.
|
||||
// Unknown confidence values fall back to "warning" so that unexpected input
|
||||
// never produces an invalid SARIF document.
|
||||
func sarifLevel(confidence string) string {
|
||||
switch confidence {
|
||||
case "high":
|
||||
return "error"
|
||||
case "medium":
|
||||
return "warning"
|
||||
case "low":
|
||||
return "note"
|
||||
default:
|
||||
return "warning"
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user