From bd8eb9b611fc475264aa4c9081338cc02db77d12 Mon Sep 17 00:00:00 2001 From: salvacybersec Date: Sun, 5 Apr 2026 23:55:38 +0300 Subject: [PATCH] test(07-03): SARIF GitHub code scanning validation - Minimal required-fields fixture for GitHub SARIF upload schema - TestSARIFGitHubValidation: asserts $schema/version/runs, tool.driver.name, per-result ruleId/level/message/locations, physicalLocation.region.startLine >= 1 - Covers startLine floor for LineNumber=0 inputs - TestSARIFGitHubValidation_EmptyFindings: empty input still yields a valid document with results: [] (not null) --- pkg/output/sarif_github_test.go | 264 ++++++++++++++++++ .../sarif/sarif-2.1.0-minimal-schema.json | 9 + 2 files changed, 273 insertions(+) create mode 100644 pkg/output/sarif_github_test.go create mode 100644 testdata/sarif/sarif-2.1.0-minimal-schema.json diff --git a/pkg/output/sarif_github_test.go b/pkg/output/sarif_github_test.go new file mode 100644 index 0000000..41e686d --- /dev/null +++ b/pkg/output/sarif_github_test.go @@ -0,0 +1,264 @@ +package output + +import ( + "bytes" + "encoding/json" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/salvacybersec/keyhunter/pkg/engine" +) + +// minimalSchema mirrors the JSON fixture at +// testdata/sarif/sarif-2.1.0-minimal-schema.json and documents the subset of +// SARIF 2.1.0 that GitHub Code Scanning enforces on upload. +type minimalSchema struct { + RequiredTopLevel []string `json:"required_top_level"` + RequiredRun []string `json:"required_run"` + RequiredToolDriver []string `json:"required_tool_driver"` + RequiredResult []string `json:"required_result"` + RequiredLocationPhysical []string `json:"required_location_physical"` + RequiredRegion []string `json:"required_region"` + AllowedLevels []string `json:"allowed_levels"` +} + +func loadMinimalSchema(t *testing.T) minimalSchema { + t.Helper() + // The test file lives at pkg/output/; the fixture is at + // /testdata/sarif/. Walk upward to find the repo root + // deterministically (go test always runs with cwd == package dir). + wd, err := os.Getwd() + if err != nil { + t.Fatalf("os.Getwd: %v", err) + } + // pkg/output -> pkg -> + repoRoot := filepath.Clean(filepath.Join(wd, "..", "..")) + path := filepath.Join(repoRoot, "testdata", "sarif", "sarif-2.1.0-minimal-schema.json") + raw, err := os.ReadFile(path) + if err != nil { + t.Fatalf("read schema fixture: %v", err) + } + var s minimalSchema + if err := json.Unmarshal(raw, &s); err != nil { + t.Fatalf("unmarshal schema fixture: %v", err) + } + return s +} + +func assertHasKey(t *testing.T, m map[string]any, key, ctx string) { + t.Helper() + if _, ok := m[key]; !ok { + t.Fatalf("missing required field %q in %s", key, ctx) + } +} + +func asMap(t *testing.T, v any, ctx string) map[string]any { + t.Helper() + m, ok := v.(map[string]any) + if !ok { + t.Fatalf("%s: expected object, got %T", ctx, v) + } + return m +} + +func asSlice(t *testing.T, v any, ctx string) []any { + t.Helper() + s, ok := v.([]any) + if !ok { + t.Fatalf("%s: expected array, got %T", ctx, v) + } + return s +} + +func containsStr(haystack []string, needle string) bool { + for _, s := range haystack { + if s == needle { + return true + } + } + return false +} + +func TestSARIFGitHubValidation(t *testing.T) { + schema := loadMinimalSchema(t) + + findings := []engine.Finding{ + { + ProviderName: "openai", + KeyValue: "sk-proj-AAAAAAAAAAAAAAAAAAAAAAAA", + KeyMasked: "sk-proj-...AAAA", + Confidence: "high", + Source: "src/config/api.go", + SourceType: "file", + LineNumber: 42, + }, + { + ProviderName: "anthropic", + KeyValue: "sk-ant-api03-BBBBBBBBBBBBBBBBBBBB", + KeyMasked: "sk-ant-a...BBBB", + Confidence: "medium", + Source: "scripts/deploy.sh", + SourceType: "file", + LineNumber: 7, + }, + { + // LineNumber intentionally zero to exercise the startLine floor. + ProviderName: "groq", + KeyValue: "gsk_CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC", + KeyMasked: "gsk_CCCC...CCCC", + Confidence: "low", + Source: "stdin", + SourceType: "stdin", + LineNumber: 0, + }, + } + + var buf bytes.Buffer + if err := (SARIFFormatter{}).Format(findings, &buf, Options{ToolName: "keyhunter", ToolVersion: "test"}); err != nil { + t.Fatalf("Format: %v", err) + } + + var doc map[string]any + if err := json.Unmarshal(buf.Bytes(), &doc); err != nil { + t.Fatalf("unmarshal SARIF: %v\nraw=%s", err, buf.String()) + } + + for _, k := range schema.RequiredTopLevel { + assertHasKey(t, doc, k, "sarif root") + } + + if v, _ := doc["version"].(string); v != "2.1.0" { + t.Fatalf("version: expected 2.1.0, got %q", v) + } + schemaURL, _ := doc["$schema"].(string) + if schemaURL == "" || !strings.HasPrefix(schemaURL, "https://") { + t.Fatalf("$schema: expected non-empty https URL, got %q", schemaURL) + } + + runs := asSlice(t, doc["runs"], "runs") + if len(runs) != 1 { + t.Fatalf("expected exactly 1 run, got %d", len(runs)) + } + run := asMap(t, runs[0], "runs[0]") + for _, k := range schema.RequiredRun { + assertHasKey(t, run, k, "runs[0]") + } + + tool := asMap(t, run["tool"], "runs[0].tool") + driver := asMap(t, tool["driver"], "runs[0].tool.driver") + for _, k := range schema.RequiredToolDriver { + assertHasKey(t, driver, k, "runs[0].tool.driver") + } + if name, _ := driver["name"].(string); name != "keyhunter" { + t.Fatalf("tool.driver.name: expected keyhunter, got %q", name) + } + if ver, _ := driver["version"].(string); ver == "" { + t.Fatalf("tool.driver.version must be non-empty") + } + + results := asSlice(t, run["results"], "runs[0].results") + if len(results) != len(findings) { + t.Fatalf("expected %d results, got %d", len(findings), len(results)) + } + + // Track whether we saw a line-floor conversion. + sawFlooredStartLine := false + + for i, r := range results { + rm := asMap(t, r, "result") + for _, k := range schema.RequiredResult { + assertHasKey(t, rm, k, "result") + } + if ruleID, _ := rm["ruleId"].(string); ruleID == "" { + t.Fatalf("result[%d].ruleId must be non-empty", i) + } + level, _ := rm["level"].(string) + if !containsStr(schema.AllowedLevels, level) { + t.Fatalf("result[%d].level=%q not in allowed_levels %v", i, level, schema.AllowedLevels) + } + msg := asMap(t, rm["message"], "result.message") + if text, _ := msg["text"].(string); text == "" { + t.Fatalf("result[%d].message.text must be non-empty", i) + } + locations := asSlice(t, rm["locations"], "result.locations") + if len(locations) == 0 { + t.Fatalf("result[%d].locations must be non-empty", i) + } + for j, loc := range locations { + lm := asMap(t, loc, "location") + phys := asMap(t, lm["physicalLocation"], "location.physicalLocation") + for _, k := range schema.RequiredLocationPhysical { + assertHasKey(t, phys, k, "physicalLocation") + } + art := asMap(t, phys["artifactLocation"], "physicalLocation.artifactLocation") + if uri, _ := art["uri"].(string); uri == "" { + t.Fatalf("result[%d].location[%d].artifactLocation.uri must be non-empty", i, j) + } + region := asMap(t, phys["region"], "physicalLocation.region") + for _, k := range schema.RequiredRegion { + assertHasKey(t, region, k, "region") + } + // JSON numbers decode as float64. + startLineF, ok := region["startLine"].(float64) + if !ok { + t.Fatalf("result[%d].location[%d].region.startLine must be a number", i, j) + } + startLine := int(startLineF) + if startLine < 1 { + t.Fatalf("result[%d].location[%d].region.startLine must be >= 1, got %d", i, j, startLine) + } + // The groq finding had LineNumber: 0; it should have been floored to 1. + if findings[i].LineNumber == 0 && startLine == 1 { + sawFlooredStartLine = true + } + } + } + + if !sawFlooredStartLine { + t.Fatal("expected startLine to be floored to 1 for LineNumber==0 finding") + } +} + +func TestSARIFGitHubValidation_EmptyFindings(t *testing.T) { + schema := loadMinimalSchema(t) + + var buf bytes.Buffer + if err := (SARIFFormatter{}).Format(nil, &buf, Options{ToolName: "keyhunter", ToolVersion: "test"}); err != nil { + t.Fatalf("Format: %v", err) + } + + var doc map[string]any + if err := json.Unmarshal(buf.Bytes(), &doc); err != nil { + t.Fatalf("unmarshal SARIF: %v", err) + } + + for _, k := range schema.RequiredTopLevel { + assertHasKey(t, doc, k, "sarif root") + } + runs := asSlice(t, doc["runs"], "runs") + if len(runs) != 1 { + t.Fatalf("expected exactly 1 run, got %d", len(runs)) + } + run := asMap(t, runs[0], "runs[0]") + + // results must be present AND a non-nil array (not JSON null). + rawResults, ok := run["results"] + if !ok { + t.Fatal("runs[0].results missing") + } + if rawResults == nil { + t.Fatal("runs[0].results must be [] not null") + } + results := asSlice(t, rawResults, "runs[0].results") + if len(results) != 0 { + t.Fatalf("expected empty results, got %d", len(results)) + } + + tool := asMap(t, run["tool"], "runs[0].tool") + driver := asMap(t, tool["driver"], "runs[0].tool.driver") + for _, k := range schema.RequiredToolDriver { + assertHasKey(t, driver, k, "runs[0].tool.driver") + } +} diff --git a/testdata/sarif/sarif-2.1.0-minimal-schema.json b/testdata/sarif/sarif-2.1.0-minimal-schema.json new file mode 100644 index 0000000..b117606 --- /dev/null +++ b/testdata/sarif/sarif-2.1.0-minimal-schema.json @@ -0,0 +1,9 @@ +{ + "required_top_level": ["$schema", "version", "runs"], + "required_run": ["tool", "results"], + "required_tool_driver": ["name", "version"], + "required_result": ["ruleId", "level", "message", "locations"], + "required_location_physical": ["artifactLocation", "region"], + "required_region": ["startLine"], + "allowed_levels": ["error", "warning", "note", "none"] +}