feat(01-04): add shared Chunk type, Finding struct, Shannon entropy, and MaskKey
- pkg/types/chunk.go: shared Chunk struct breaking engine<->sources circular import - pkg/engine/finding.go: Finding struct with MaskKey for pipeline output - pkg/engine/entropy.go: Shannon entropy function using math.Log2 - pkg/engine/entropy_test.go: TDD tests for Shannon and MaskKey
This commit is contained in:
23
pkg/engine/entropy.go
Normal file
23
pkg/engine/entropy.go
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
package engine
|
||||||
|
|
||||||
|
import "math"
|
||||||
|
|
||||||
|
// Shannon computes the Shannon entropy of a string in bits per character.
|
||||||
|
// Returns 0.0 for empty strings.
|
||||||
|
// A value >= 3.5 indicates high randomness, consistent with real API keys.
|
||||||
|
func Shannon(s string) float64 {
|
||||||
|
if len(s) == 0 {
|
||||||
|
return 0.0
|
||||||
|
}
|
||||||
|
freq := make(map[rune]float64)
|
||||||
|
for _, c := range s {
|
||||||
|
freq[c]++
|
||||||
|
}
|
||||||
|
n := float64(len([]rune(s)))
|
||||||
|
var entropy float64
|
||||||
|
for _, count := range freq {
|
||||||
|
p := count / n
|
||||||
|
entropy -= p * math.Log2(p)
|
||||||
|
}
|
||||||
|
return entropy
|
||||||
|
}
|
||||||
31
pkg/engine/entropy_test.go
Normal file
31
pkg/engine/entropy_test.go
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
package engine
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/stretchr/testify/assert"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestShannonAllSame(t *testing.T) {
|
||||||
|
assert.InDelta(t, 0.0, Shannon("aaaaaaa"), 0.01)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestShannonDistinct(t *testing.T) {
|
||||||
|
assert.InDelta(t, 3.0, Shannon("abcdefgh"), 0.1)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestShannonRealKey(t *testing.T) {
|
||||||
|
assert.GreaterOrEqual(t, Shannon("sk-proj-ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqr"), 3.5)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestShannonEmpty(t *testing.T) {
|
||||||
|
assert.Equal(t, 0.0, Shannon(""))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMaskKeyNormal(t *testing.T) {
|
||||||
|
assert.Equal(t, "sk-proj-...1234", MaskKey("sk-proj-abc1234"))
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestMaskKeyShort(t *testing.T) {
|
||||||
|
assert.Equal(t, "****", MaskKey("abc"))
|
||||||
|
}
|
||||||
26
pkg/engine/finding.go
Normal file
26
pkg/engine/finding.go
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
package engine
|
||||||
|
|
||||||
|
import "time"
|
||||||
|
|
||||||
|
// Finding represents a detected API key from the scanning pipeline.
|
||||||
|
// KeyValue holds the plaintext key -- the storage layer encrypts it before persisting.
|
||||||
|
type Finding struct {
|
||||||
|
ProviderName string
|
||||||
|
KeyValue string // full plaintext key
|
||||||
|
KeyMasked string // first8...last4
|
||||||
|
Confidence string // "high", "medium", "low"
|
||||||
|
Source string // file path or description
|
||||||
|
SourceType string // "file", "dir", "git", "stdin", "url"
|
||||||
|
LineNumber int
|
||||||
|
Offset int64
|
||||||
|
DetectedAt time.Time
|
||||||
|
}
|
||||||
|
|
||||||
|
// MaskKey returns a masked representation: first 8 chars + "..." + last 4 chars.
|
||||||
|
// Returns "****" if the key is shorter than 12 characters.
|
||||||
|
func MaskKey(key string) string {
|
||||||
|
if len(key) < 12 {
|
||||||
|
return "****"
|
||||||
|
}
|
||||||
|
return key[:8] + "..." + key[len(key)-4:]
|
||||||
|
}
|
||||||
10
pkg/types/chunk.go
Normal file
10
pkg/types/chunk.go
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
package types
|
||||||
|
|
||||||
|
// Chunk is a segment of file content passed through the scanning pipeline.
|
||||||
|
// Defined in pkg/types (not pkg/engine) so that pkg/engine/sources can use it
|
||||||
|
// without creating a circular import with pkg/engine.
|
||||||
|
type Chunk struct {
|
||||||
|
Data []byte // raw bytes
|
||||||
|
Source string // file path, URL, or description
|
||||||
|
Offset int64 // byte offset of this chunk within the source
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user