feat(01-04): add shared Chunk type, Finding struct, Shannon entropy, and MaskKey

- pkg/types/chunk.go: shared Chunk struct breaking engine<->sources circular import
- pkg/engine/finding.go: Finding struct with MaskKey for pipeline output
- pkg/engine/entropy.go: Shannon entropy function using math.Log2
- pkg/engine/entropy_test.go: TDD tests for Shannon and MaskKey
This commit is contained in:
salvacybersec
2026-04-05 12:18:26 +03:00
parent ef8717b9ab
commit 45cc676f55
4 changed files with 90 additions and 0 deletions

23
pkg/engine/entropy.go Normal file
View File

@@ -0,0 +1,23 @@
package engine
import "math"
// Shannon computes the Shannon entropy of a string in bits per character.
// Returns 0.0 for empty strings.
// A value >= 3.5 indicates high randomness, consistent with real API keys.
func Shannon(s string) float64 {
if len(s) == 0 {
return 0.0
}
freq := make(map[rune]float64)
for _, c := range s {
freq[c]++
}
n := float64(len([]rune(s)))
var entropy float64
for _, count := range freq {
p := count / n
entropy -= p * math.Log2(p)
}
return entropy
}

View File

@@ -0,0 +1,31 @@
package engine
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestShannonAllSame(t *testing.T) {
assert.InDelta(t, 0.0, Shannon("aaaaaaa"), 0.01)
}
func TestShannonDistinct(t *testing.T) {
assert.InDelta(t, 3.0, Shannon("abcdefgh"), 0.1)
}
func TestShannonRealKey(t *testing.T) {
assert.GreaterOrEqual(t, Shannon("sk-proj-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqr"), 3.5)
}
func TestShannonEmpty(t *testing.T) {
assert.Equal(t, 0.0, Shannon(""))
}
func TestMaskKeyNormal(t *testing.T) {
assert.Equal(t, "sk-proj-...1234", MaskKey("sk-proj-abc1234"))
}
func TestMaskKeyShort(t *testing.T) {
assert.Equal(t, "****", MaskKey("abc"))
}

26
pkg/engine/finding.go Normal file
View File

@@ -0,0 +1,26 @@
package engine
import "time"
// Finding represents a detected API key from the scanning pipeline.
// KeyValue holds the plaintext key -- the storage layer encrypts it before persisting.
type Finding struct {
ProviderName string
KeyValue string // full plaintext key
KeyMasked string // first8...last4
Confidence string // "high", "medium", "low"
Source string // file path or description
SourceType string // "file", "dir", "git", "stdin", "url"
LineNumber int
Offset int64
DetectedAt time.Time
}
// MaskKey returns a masked representation: first 8 chars + "..." + last 4 chars.
// Returns "****" if the key is shorter than 12 characters.
func MaskKey(key string) string {
if len(key) < 12 {
return "****"
}
return key[:8] + "..." + key[len(key)-4:]
}