- SARIFFormatter emits schema-valid SARIF 2.1.0 JSON for CI ingestion - One rule per distinct provider, deduped in first-seen order - Confidence mapped high/medium/low to error/warning/note - startLine floored to 1 per SARIF spec requirement - Registered under name 'sarif' via init()
168 lines
4.3 KiB
Go
168 lines
4.3 KiB
Go
package output
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
|
|
"github.com/salvacybersec/keyhunter/pkg/engine"
|
|
)
|
|
|
|
func init() {
|
|
Register("sarif", SARIFFormatter{})
|
|
}
|
|
|
|
// SARIFFormatter emits SARIF 2.1.0 JSON suitable for GitHub code scanning
|
|
// uploads and other CI/CD ingestion pipelines.
|
|
//
|
|
// The implementation uses hand-rolled structs rather than an external library
|
|
// per project convention (see CLAUDE.md). Only the subset of the SARIF 2.1.0
|
|
// schema required for code-scanning uploads is modeled; this keeps the
|
|
// surface area small (~150 LoC) while producing documents that GitHub and
|
|
// other SARIF consumers accept as valid.
|
|
type SARIFFormatter struct{}
|
|
|
|
// sarifDoc is the top-level SARIF 2.1.0 document.
|
|
type sarifDoc struct {
|
|
Schema string `json:"$schema"`
|
|
Version string `json:"version"`
|
|
Runs []sarifRun `json:"runs"`
|
|
}
|
|
|
|
type sarifRun struct {
|
|
Tool sarifTool `json:"tool"`
|
|
Results []sarifResult `json:"results"`
|
|
}
|
|
|
|
type sarifTool struct {
|
|
Driver sarifDriver `json:"driver"`
|
|
}
|
|
|
|
type sarifDriver struct {
|
|
Name string `json:"name"`
|
|
Version string `json:"version"`
|
|
Rules []sarifRule `json:"rules"`
|
|
}
|
|
|
|
type sarifRule struct {
|
|
ID string `json:"id"`
|
|
Name string `json:"name"`
|
|
ShortDescription sarifText `json:"shortDescription"`
|
|
}
|
|
|
|
type sarifText struct {
|
|
Text string `json:"text"`
|
|
}
|
|
|
|
type sarifResult struct {
|
|
RuleID string `json:"ruleId"`
|
|
Level string `json:"level"`
|
|
Message sarifText `json:"message"`
|
|
Locations []sarifLocation `json:"locations"`
|
|
}
|
|
|
|
type sarifLocation struct {
|
|
PhysicalLocation sarifPhysicalLocation `json:"physicalLocation"`
|
|
}
|
|
|
|
type sarifPhysicalLocation struct {
|
|
ArtifactLocation sarifArtifactLocation `json:"artifactLocation"`
|
|
Region sarifRegion `json:"region"`
|
|
}
|
|
|
|
type sarifArtifactLocation struct {
|
|
URI string `json:"uri"`
|
|
}
|
|
|
|
type sarifRegion struct {
|
|
StartLine int `json:"startLine"`
|
|
}
|
|
|
|
// Format implements the Formatter interface. It produces a SARIF 2.1.0
|
|
// document with one rule per distinct provider and one result per finding.
|
|
// The output is always a valid SARIF document, even when findings is empty.
|
|
func (SARIFFormatter) Format(findings []engine.Finding, w io.Writer, opts Options) error {
|
|
toolName := opts.ToolName
|
|
if toolName == "" {
|
|
toolName = "keyhunter"
|
|
}
|
|
toolVersion := opts.ToolVersion
|
|
if toolVersion == "" {
|
|
toolVersion = "dev"
|
|
}
|
|
|
|
// Dedup rules by provider name, preserving first-seen order so that the
|
|
// output is deterministic for a given input slice.
|
|
seen := map[string]bool{}
|
|
rules := make([]sarifRule, 0)
|
|
for _, f := range findings {
|
|
if seen[f.ProviderName] {
|
|
continue
|
|
}
|
|
seen[f.ProviderName] = true
|
|
rules = append(rules, sarifRule{
|
|
ID: f.ProviderName,
|
|
Name: f.ProviderName,
|
|
ShortDescription: sarifText{Text: fmt.Sprintf("Leaked %s API key", f.ProviderName)},
|
|
})
|
|
}
|
|
|
|
results := make([]sarifResult, 0, len(findings))
|
|
for _, f := range findings {
|
|
key := f.KeyMasked
|
|
if opts.Unmask {
|
|
key = f.KeyValue
|
|
}
|
|
// SARIF requires startLine >= 1. Floor any non-positive line numbers
|
|
// (which occur for stdin/URL sources or when line tracking is absent).
|
|
startLine := f.LineNumber
|
|
if startLine < 1 {
|
|
startLine = 1
|
|
}
|
|
results = append(results, sarifResult{
|
|
RuleID: f.ProviderName,
|
|
Level: sarifLevel(f.Confidence),
|
|
Message: sarifText{Text: fmt.Sprintf("Detected %s key (%s): %s", f.ProviderName, f.Confidence, key)},
|
|
Locations: []sarifLocation{{
|
|
PhysicalLocation: sarifPhysicalLocation{
|
|
ArtifactLocation: sarifArtifactLocation{URI: f.Source},
|
|
Region: sarifRegion{StartLine: startLine},
|
|
},
|
|
}},
|
|
})
|
|
}
|
|
|
|
doc := sarifDoc{
|
|
Schema: "https://json.schemastore.org/sarif-2.1.0.json",
|
|
Version: "2.1.0",
|
|
Runs: []sarifRun{{
|
|
Tool: sarifTool{Driver: sarifDriver{
|
|
Name: toolName,
|
|
Version: toolVersion,
|
|
Rules: rules,
|
|
}},
|
|
Results: results,
|
|
}},
|
|
}
|
|
|
|
enc := json.NewEncoder(w)
|
|
enc.SetIndent("", " ")
|
|
return enc.Encode(doc)
|
|
}
|
|
|
|
// sarifLevel maps KeyHunter confidence strings to SARIF result levels.
|
|
// Unknown confidence values fall back to "warning" so that unexpected input
|
|
// never produces an invalid SARIF document.
|
|
func sarifLevel(confidence string) string {
|
|
switch confidence {
|
|
case "high":
|
|
return "error"
|
|
case "medium":
|
|
return "warning"
|
|
case "low":
|
|
return "note"
|
|
default:
|
|
return "warning"
|
|
}
|
|
}
|