package output import ( "bytes" "encoding/json" "os" "path/filepath" "strings" "testing" "github.com/salvacybersec/keyhunter/pkg/engine" ) // minimalSchema mirrors the JSON fixture at // testdata/sarif/sarif-2.1.0-minimal-schema.json and documents the subset of // SARIF 2.1.0 that GitHub Code Scanning enforces on upload. type minimalSchema struct { RequiredTopLevel []string `json:"required_top_level"` RequiredRun []string `json:"required_run"` RequiredToolDriver []string `json:"required_tool_driver"` RequiredResult []string `json:"required_result"` RequiredLocationPhysical []string `json:"required_location_physical"` RequiredRegion []string `json:"required_region"` AllowedLevels []string `json:"allowed_levels"` } func loadMinimalSchema(t *testing.T) minimalSchema { t.Helper() // The test file lives at pkg/output/; the fixture is at // /testdata/sarif/. Walk upward to find the repo root // deterministically (go test always runs with cwd == package dir). wd, err := os.Getwd() if err != nil { t.Fatalf("os.Getwd: %v", err) } // pkg/output -> pkg -> repoRoot := filepath.Clean(filepath.Join(wd, "..", "..")) path := filepath.Join(repoRoot, "testdata", "sarif", "sarif-2.1.0-minimal-schema.json") raw, err := os.ReadFile(path) if err != nil { t.Fatalf("read schema fixture: %v", err) } var s minimalSchema if err := json.Unmarshal(raw, &s); err != nil { t.Fatalf("unmarshal schema fixture: %v", err) } return s } func assertHasKey(t *testing.T, m map[string]any, key, ctx string) { t.Helper() if _, ok := m[key]; !ok { t.Fatalf("missing required field %q in %s", key, ctx) } } func asMap(t *testing.T, v any, ctx string) map[string]any { t.Helper() m, ok := v.(map[string]any) if !ok { t.Fatalf("%s: expected object, got %T", ctx, v) } return m } func asSlice(t *testing.T, v any, ctx string) []any { t.Helper() s, ok := v.([]any) if !ok { t.Fatalf("%s: expected array, got %T", ctx, v) } return s } func containsStr(haystack []string, needle string) bool { for _, s := range haystack { if s == needle { return true } } return false } func TestSARIFGitHubValidation(t *testing.T) { schema := loadMinimalSchema(t) findings := []engine.Finding{ { ProviderName: "openai", KeyValue: "sk-proj-AAAAAAAAAAAAAAAAAAAAAAAA", KeyMasked: "sk-proj-...AAAA", Confidence: "high", Source: "src/config/api.go", SourceType: "file", LineNumber: 42, }, { ProviderName: "anthropic", KeyValue: "sk-ant-api03-BBBBBBBBBBBBBBBBBBBB", KeyMasked: "sk-ant-a...BBBB", Confidence: "medium", Source: "scripts/deploy.sh", SourceType: "file", LineNumber: 7, }, { // LineNumber intentionally zero to exercise the startLine floor. ProviderName: "groq", KeyValue: "gsk_CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC", KeyMasked: "gsk_CCCC...CCCC", Confidence: "low", Source: "stdin", SourceType: "stdin", LineNumber: 0, }, } var buf bytes.Buffer if err := (SARIFFormatter{}).Format(findings, &buf, Options{ToolName: "keyhunter", ToolVersion: "test"}); err != nil { t.Fatalf("Format: %v", err) } var doc map[string]any if err := json.Unmarshal(buf.Bytes(), &doc); err != nil { t.Fatalf("unmarshal SARIF: %v\nraw=%s", err, buf.String()) } for _, k := range schema.RequiredTopLevel { assertHasKey(t, doc, k, "sarif root") } if v, _ := doc["version"].(string); v != "2.1.0" { t.Fatalf("version: expected 2.1.0, got %q", v) } schemaURL, _ := doc["$schema"].(string) if schemaURL == "" || !strings.HasPrefix(schemaURL, "https://") { t.Fatalf("$schema: expected non-empty https URL, got %q", schemaURL) } runs := asSlice(t, doc["runs"], "runs") if len(runs) != 1 { t.Fatalf("expected exactly 1 run, got %d", len(runs)) } run := asMap(t, runs[0], "runs[0]") for _, k := range schema.RequiredRun { assertHasKey(t, run, k, "runs[0]") } tool := asMap(t, run["tool"], "runs[0].tool") driver := asMap(t, tool["driver"], "runs[0].tool.driver") for _, k := range schema.RequiredToolDriver { assertHasKey(t, driver, k, "runs[0].tool.driver") } if name, _ := driver["name"].(string); name != "keyhunter" { t.Fatalf("tool.driver.name: expected keyhunter, got %q", name) } if ver, _ := driver["version"].(string); ver == "" { t.Fatalf("tool.driver.version must be non-empty") } results := asSlice(t, run["results"], "runs[0].results") if len(results) != len(findings) { t.Fatalf("expected %d results, got %d", len(findings), len(results)) } // Track whether we saw a line-floor conversion. sawFlooredStartLine := false for i, r := range results { rm := asMap(t, r, "result") for _, k := range schema.RequiredResult { assertHasKey(t, rm, k, "result") } if ruleID, _ := rm["ruleId"].(string); ruleID == "" { t.Fatalf("result[%d].ruleId must be non-empty", i) } level, _ := rm["level"].(string) if !containsStr(schema.AllowedLevels, level) { t.Fatalf("result[%d].level=%q not in allowed_levels %v", i, level, schema.AllowedLevels) } msg := asMap(t, rm["message"], "result.message") if text, _ := msg["text"].(string); text == "" { t.Fatalf("result[%d].message.text must be non-empty", i) } locations := asSlice(t, rm["locations"], "result.locations") if len(locations) == 0 { t.Fatalf("result[%d].locations must be non-empty", i) } for j, loc := range locations { lm := asMap(t, loc, "location") phys := asMap(t, lm["physicalLocation"], "location.physicalLocation") for _, k := range schema.RequiredLocationPhysical { assertHasKey(t, phys, k, "physicalLocation") } art := asMap(t, phys["artifactLocation"], "physicalLocation.artifactLocation") if uri, _ := art["uri"].(string); uri == "" { t.Fatalf("result[%d].location[%d].artifactLocation.uri must be non-empty", i, j) } region := asMap(t, phys["region"], "physicalLocation.region") for _, k := range schema.RequiredRegion { assertHasKey(t, region, k, "region") } // JSON numbers decode as float64. startLineF, ok := region["startLine"].(float64) if !ok { t.Fatalf("result[%d].location[%d].region.startLine must be a number", i, j) } startLine := int(startLineF) if startLine < 1 { t.Fatalf("result[%d].location[%d].region.startLine must be >= 1, got %d", i, j, startLine) } // The groq finding had LineNumber: 0; it should have been floored to 1. if findings[i].LineNumber == 0 && startLine == 1 { sawFlooredStartLine = true } } } if !sawFlooredStartLine { t.Fatal("expected startLine to be floored to 1 for LineNumber==0 finding") } } func TestSARIFGitHubValidation_EmptyFindings(t *testing.T) { schema := loadMinimalSchema(t) var buf bytes.Buffer if err := (SARIFFormatter{}).Format(nil, &buf, Options{ToolName: "keyhunter", ToolVersion: "test"}); err != nil { t.Fatalf("Format: %v", err) } var doc map[string]any if err := json.Unmarshal(buf.Bytes(), &doc); err != nil { t.Fatalf("unmarshal SARIF: %v", err) } for _, k := range schema.RequiredTopLevel { assertHasKey(t, doc, k, "sarif root") } runs := asSlice(t, doc["runs"], "runs") if len(runs) != 1 { t.Fatalf("expected exactly 1 run, got %d", len(runs)) } run := asMap(t, runs[0], "runs[0]") // results must be present AND a non-nil array (not JSON null). rawResults, ok := run["results"] if !ok { t.Fatal("runs[0].results missing") } if rawResults == nil { t.Fatal("runs[0].results must be [] not null") } results := asSlice(t, rawResults, "runs[0].results") if len(results) != 0 { t.Fatalf("expected empty results, got %d", len(results)) } tool := asMap(t, run["tool"], "runs[0].tool") driver := asMap(t, tool["driver"], "runs[0].tool.driver") for _, k := range schema.RequiredToolDriver { assertHasKey(t, driver, k, "runs[0].tool.driver") } }