feat(01-04): add shared Chunk type, Finding struct, Shannon entropy, and MaskKey
- pkg/types/chunk.go: shared Chunk struct breaking engine<->sources circular import - pkg/engine/finding.go: Finding struct with MaskKey for pipeline output - pkg/engine/entropy.go: Shannon entropy function using math.Log2 - pkg/engine/entropy_test.go: TDD tests for Shannon and MaskKey
This commit is contained in:
23
pkg/engine/entropy.go
Normal file
23
pkg/engine/entropy.go
Normal file
@@ -0,0 +1,23 @@
|
||||
package engine
|
||||
|
||||
import "math"
|
||||
|
||||
// Shannon computes the Shannon entropy of a string in bits per character.
|
||||
// Returns 0.0 for empty strings.
|
||||
// A value >= 3.5 indicates high randomness, consistent with real API keys.
|
||||
func Shannon(s string) float64 {
|
||||
if len(s) == 0 {
|
||||
return 0.0
|
||||
}
|
||||
freq := make(map[rune]float64)
|
||||
for _, c := range s {
|
||||
freq[c]++
|
||||
}
|
||||
n := float64(len([]rune(s)))
|
||||
var entropy float64
|
||||
for _, count := range freq {
|
||||
p := count / n
|
||||
entropy -= p * math.Log2(p)
|
||||
}
|
||||
return entropy
|
||||
}
|
||||
31
pkg/engine/entropy_test.go
Normal file
31
pkg/engine/entropy_test.go
Normal file
@@ -0,0 +1,31 @@
|
||||
package engine
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestShannonAllSame(t *testing.T) {
|
||||
assert.InDelta(t, 0.0, Shannon("aaaaaaa"), 0.01)
|
||||
}
|
||||
|
||||
func TestShannonDistinct(t *testing.T) {
|
||||
assert.InDelta(t, 3.0, Shannon("abcdefgh"), 0.1)
|
||||
}
|
||||
|
||||
func TestShannonRealKey(t *testing.T) {
|
||||
assert.GreaterOrEqual(t, Shannon("sk-proj-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqr"), 3.5)
|
||||
}
|
||||
|
||||
func TestShannonEmpty(t *testing.T) {
|
||||
assert.Equal(t, 0.0, Shannon(""))
|
||||
}
|
||||
|
||||
func TestMaskKeyNormal(t *testing.T) {
|
||||
assert.Equal(t, "sk-proj-...1234", MaskKey("sk-proj-abc1234"))
|
||||
}
|
||||
|
||||
func TestMaskKeyShort(t *testing.T) {
|
||||
assert.Equal(t, "****", MaskKey("abc"))
|
||||
}
|
||||
26
pkg/engine/finding.go
Normal file
26
pkg/engine/finding.go
Normal file
@@ -0,0 +1,26 @@
|
||||
package engine
|
||||
|
||||
import "time"
|
||||
|
||||
// Finding represents a detected API key from the scanning pipeline.
|
||||
// KeyValue holds the plaintext key -- the storage layer encrypts it before persisting.
|
||||
type Finding struct {
|
||||
ProviderName string
|
||||
KeyValue string // full plaintext key
|
||||
KeyMasked string // first8...last4
|
||||
Confidence string // "high", "medium", "low"
|
||||
Source string // file path or description
|
||||
SourceType string // "file", "dir", "git", "stdin", "url"
|
||||
LineNumber int
|
||||
Offset int64
|
||||
DetectedAt time.Time
|
||||
}
|
||||
|
||||
// MaskKey returns a masked representation: first 8 chars + "..." + last 4 chars.
|
||||
// Returns "****" if the key is shorter than 12 characters.
|
||||
func MaskKey(key string) string {
|
||||
if len(key) < 12 {
|
||||
return "****"
|
||||
}
|
||||
return key[:8] + "..." + key[len(key)-4:]
|
||||
}
|
||||
10
pkg/types/chunk.go
Normal file
10
pkg/types/chunk.go
Normal file
@@ -0,0 +1,10 @@
|
||||
package types
|
||||
|
||||
// Chunk is a segment of file content passed through the scanning pipeline.
|
||||
// Defined in pkg/types (not pkg/engine) so that pkg/engine/sources can use it
|
||||
// without creating a circular import with pkg/engine.
|
||||
type Chunk struct {
|
||||
Data []byte // raw bytes
|
||||
Source string // file path, URL, or description
|
||||
Offset int64 // byte offset of this chunk within the source
|
||||
}
|
||||
Reference in New Issue
Block a user