test(07-03): SARIF GitHub code scanning validation

- Minimal required-fields fixture for GitHub SARIF upload schema
- TestSARIFGitHubValidation: asserts $schema/version/runs, tool.driver.name,
  per-result ruleId/level/message/locations, physicalLocation.region.startLine >= 1
- Covers startLine floor for LineNumber=0 inputs
- TestSARIFGitHubValidation_EmptyFindings: empty input still yields a valid
  document with results: [] (not null)
This commit is contained in:
salvacybersec
2026-04-05 23:55:38 +03:00
parent 83640ac200
commit bd8eb9b611
2 changed files with 273 additions and 0 deletions

View File

@@ -0,0 +1,264 @@
package output
import (
"bytes"
"encoding/json"
"os"
"path/filepath"
"strings"
"testing"
"github.com/salvacybersec/keyhunter/pkg/engine"
)
// minimalSchema mirrors the JSON fixture at
// testdata/sarif/sarif-2.1.0-minimal-schema.json and documents the subset of
// SARIF 2.1.0 that GitHub Code Scanning enforces on upload.
type minimalSchema struct {
RequiredTopLevel []string `json:"required_top_level"`
RequiredRun []string `json:"required_run"`
RequiredToolDriver []string `json:"required_tool_driver"`
RequiredResult []string `json:"required_result"`
RequiredLocationPhysical []string `json:"required_location_physical"`
RequiredRegion []string `json:"required_region"`
AllowedLevels []string `json:"allowed_levels"`
}
func loadMinimalSchema(t *testing.T) minimalSchema {
t.Helper()
// The test file lives at pkg/output/; the fixture is at
// <repo-root>/testdata/sarif/. Walk upward to find the repo root
// deterministically (go test always runs with cwd == package dir).
wd, err := os.Getwd()
if err != nil {
t.Fatalf("os.Getwd: %v", err)
}
// pkg/output -> pkg -> <repo-root>
repoRoot := filepath.Clean(filepath.Join(wd, "..", ".."))
path := filepath.Join(repoRoot, "testdata", "sarif", "sarif-2.1.0-minimal-schema.json")
raw, err := os.ReadFile(path)
if err != nil {
t.Fatalf("read schema fixture: %v", err)
}
var s minimalSchema
if err := json.Unmarshal(raw, &s); err != nil {
t.Fatalf("unmarshal schema fixture: %v", err)
}
return s
}
func assertHasKey(t *testing.T, m map[string]any, key, ctx string) {
t.Helper()
if _, ok := m[key]; !ok {
t.Fatalf("missing required field %q in %s", key, ctx)
}
}
func asMap(t *testing.T, v any, ctx string) map[string]any {
t.Helper()
m, ok := v.(map[string]any)
if !ok {
t.Fatalf("%s: expected object, got %T", ctx, v)
}
return m
}
func asSlice(t *testing.T, v any, ctx string) []any {
t.Helper()
s, ok := v.([]any)
if !ok {
t.Fatalf("%s: expected array, got %T", ctx, v)
}
return s
}
func containsStr(haystack []string, needle string) bool {
for _, s := range haystack {
if s == needle {
return true
}
}
return false
}
func TestSARIFGitHubValidation(t *testing.T) {
schema := loadMinimalSchema(t)
findings := []engine.Finding{
{
ProviderName: "openai",
KeyValue: "sk-proj-AAAAAAAAAAAAAAAAAAAAAAAA",
KeyMasked: "sk-proj-...AAAA",
Confidence: "high",
Source: "src/config/api.go",
SourceType: "file",
LineNumber: 42,
},
{
ProviderName: "anthropic",
KeyValue: "sk-ant-api03-BBBBBBBBBBBBBBBBBBBB",
KeyMasked: "sk-ant-a...BBBB",
Confidence: "medium",
Source: "scripts/deploy.sh",
SourceType: "file",
LineNumber: 7,
},
{
// LineNumber intentionally zero to exercise the startLine floor.
ProviderName: "groq",
KeyValue: "gsk_CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC",
KeyMasked: "gsk_CCCC...CCCC",
Confidence: "low",
Source: "stdin",
SourceType: "stdin",
LineNumber: 0,
},
}
var buf bytes.Buffer
if err := (SARIFFormatter{}).Format(findings, &buf, Options{ToolName: "keyhunter", ToolVersion: "test"}); err != nil {
t.Fatalf("Format: %v", err)
}
var doc map[string]any
if err := json.Unmarshal(buf.Bytes(), &doc); err != nil {
t.Fatalf("unmarshal SARIF: %v\nraw=%s", err, buf.String())
}
for _, k := range schema.RequiredTopLevel {
assertHasKey(t, doc, k, "sarif root")
}
if v, _ := doc["version"].(string); v != "2.1.0" {
t.Fatalf("version: expected 2.1.0, got %q", v)
}
schemaURL, _ := doc["$schema"].(string)
if schemaURL == "" || !strings.HasPrefix(schemaURL, "https://") {
t.Fatalf("$schema: expected non-empty https URL, got %q", schemaURL)
}
runs := asSlice(t, doc["runs"], "runs")
if len(runs) != 1 {
t.Fatalf("expected exactly 1 run, got %d", len(runs))
}
run := asMap(t, runs[0], "runs[0]")
for _, k := range schema.RequiredRun {
assertHasKey(t, run, k, "runs[0]")
}
tool := asMap(t, run["tool"], "runs[0].tool")
driver := asMap(t, tool["driver"], "runs[0].tool.driver")
for _, k := range schema.RequiredToolDriver {
assertHasKey(t, driver, k, "runs[0].tool.driver")
}
if name, _ := driver["name"].(string); name != "keyhunter" {
t.Fatalf("tool.driver.name: expected keyhunter, got %q", name)
}
if ver, _ := driver["version"].(string); ver == "" {
t.Fatalf("tool.driver.version must be non-empty")
}
results := asSlice(t, run["results"], "runs[0].results")
if len(results) != len(findings) {
t.Fatalf("expected %d results, got %d", len(findings), len(results))
}
// Track whether we saw a line-floor conversion.
sawFlooredStartLine := false
for i, r := range results {
rm := asMap(t, r, "result")
for _, k := range schema.RequiredResult {
assertHasKey(t, rm, k, "result")
}
if ruleID, _ := rm["ruleId"].(string); ruleID == "" {
t.Fatalf("result[%d].ruleId must be non-empty", i)
}
level, _ := rm["level"].(string)
if !containsStr(schema.AllowedLevels, level) {
t.Fatalf("result[%d].level=%q not in allowed_levels %v", i, level, schema.AllowedLevels)
}
msg := asMap(t, rm["message"], "result.message")
if text, _ := msg["text"].(string); text == "" {
t.Fatalf("result[%d].message.text must be non-empty", i)
}
locations := asSlice(t, rm["locations"], "result.locations")
if len(locations) == 0 {
t.Fatalf("result[%d].locations must be non-empty", i)
}
for j, loc := range locations {
lm := asMap(t, loc, "location")
phys := asMap(t, lm["physicalLocation"], "location.physicalLocation")
for _, k := range schema.RequiredLocationPhysical {
assertHasKey(t, phys, k, "physicalLocation")
}
art := asMap(t, phys["artifactLocation"], "physicalLocation.artifactLocation")
if uri, _ := art["uri"].(string); uri == "" {
t.Fatalf("result[%d].location[%d].artifactLocation.uri must be non-empty", i, j)
}
region := asMap(t, phys["region"], "physicalLocation.region")
for _, k := range schema.RequiredRegion {
assertHasKey(t, region, k, "region")
}
// JSON numbers decode as float64.
startLineF, ok := region["startLine"].(float64)
if !ok {
t.Fatalf("result[%d].location[%d].region.startLine must be a number", i, j)
}
startLine := int(startLineF)
if startLine < 1 {
t.Fatalf("result[%d].location[%d].region.startLine must be >= 1, got %d", i, j, startLine)
}
// The groq finding had LineNumber: 0; it should have been floored to 1.
if findings[i].LineNumber == 0 && startLine == 1 {
sawFlooredStartLine = true
}
}
}
if !sawFlooredStartLine {
t.Fatal("expected startLine to be floored to 1 for LineNumber==0 finding")
}
}
func TestSARIFGitHubValidation_EmptyFindings(t *testing.T) {
schema := loadMinimalSchema(t)
var buf bytes.Buffer
if err := (SARIFFormatter{}).Format(nil, &buf, Options{ToolName: "keyhunter", ToolVersion: "test"}); err != nil {
t.Fatalf("Format: %v", err)
}
var doc map[string]any
if err := json.Unmarshal(buf.Bytes(), &doc); err != nil {
t.Fatalf("unmarshal SARIF: %v", err)
}
for _, k := range schema.RequiredTopLevel {
assertHasKey(t, doc, k, "sarif root")
}
runs := asSlice(t, doc["runs"], "runs")
if len(runs) != 1 {
t.Fatalf("expected exactly 1 run, got %d", len(runs))
}
run := asMap(t, runs[0], "runs[0]")
// results must be present AND a non-nil array (not JSON null).
rawResults, ok := run["results"]
if !ok {
t.Fatal("runs[0].results missing")
}
if rawResults == nil {
t.Fatal("runs[0].results must be [] not null")
}
results := asSlice(t, rawResults, "runs[0].results")
if len(results) != 0 {
t.Fatalf("expected empty results, got %d", len(results))
}
tool := asMap(t, run["tool"], "runs[0].tool")
driver := asMap(t, tool["driver"], "runs[0].tool.driver")
for _, k := range schema.RequiredToolDriver {
assertHasKey(t, driver, k, "runs[0].tool.driver")
}
}

View File

@@ -0,0 +1,9 @@
{
"required_top_level": ["$schema", "version", "runs"],
"required_run": ["tool", "results"],
"required_tool_driver": ["name", "version"],
"required_result": ["ruleId", "level", "message", "locations"],
"required_location_physical": ["artifactLocation", "region"],
"required_region": ["startLine"],
"allowed_levels": ["error", "warning", "note", "none"]
}