fix(01-foundation): address all checker blockers and warnings in phase plans
This commit is contained in:
@@ -127,16 +127,15 @@ The embed directive must reference providers relative to loader.go location.
|
|||||||
loader.go is at pkg/providers/loader.go.
|
loader.go is at pkg/providers/loader.go.
|
||||||
providers/ directory is at project root.
|
providers/ directory is at project root.
|
||||||
Use: //go:embed ../../providers/*.yaml
|
Use: //go:embed ../../providers/*.yaml
|
||||||
and embed.FS path will be "../../providers/openai.yaml" etc.
|
|
||||||
|
|
||||||
Actually: Go embed paths must be relative and cannot use "..".
|
Actually: Go embed paths must be relative and cannot use "..".
|
||||||
Correct approach: place the embed in a file at project root level, or adjust.
|
Correct approach: place the embed in a file at project root level, or adjust.
|
||||||
Better approach from research: put loader in providers package, embed from pkg/providers,
|
Better approach from research: put loader in providers package, embed from pkg/providers,
|
||||||
but reference the providers/ dir which sits at root.
|
but reference the providers/ dir which sits at root.
|
||||||
|
|
||||||
Resolution: The go:embed directive path is relative to the SOURCE FILE, not the module root.
|
Resolution: The go:embed directive path is relative to the SOURCE FILE, not the module root.
|
||||||
Since loader.go is at pkg/providers/loader.go, to embed ../../providers/*.yaml would work
|
Since loader.go is at pkg/providers/loader.go, to embed ../../providers/*.yaml would work
|
||||||
syntactically but Go's embed restricts paths containing "..".
|
syntactically but Go's embed restricts paths containing "..".
|
||||||
|
|
||||||
Use this instead: place a providers_embed.go at the PROJECT ROOT (same dir as go.mod):
|
Use this instead: place a providers_embed.go at the PROJECT ROOT (same dir as go.mod):
|
||||||
package main -- NO, this breaks package separation
|
package main -- NO, this breaks package separation
|
||||||
@@ -408,6 +407,8 @@ Create **pkg/providers/registry.go**:
|
|||||||
package providers
|
package providers
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
ahocorasick "github.com/petar-dambovaliev/aho-corasick"
|
ahocorasick "github.com/petar-dambovaliev/aho-corasick"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -480,8 +481,6 @@ func (r *Registry) AC() ahocorasick.AhoCorasick {
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
Note: registry.go needs `import "fmt"` added.
|
|
||||||
|
|
||||||
Then copy the three YAML files into the embed location:
|
Then copy the three YAML files into the embed location:
|
||||||
```bash
|
```bash
|
||||||
mkdir -p /home/salva/Documents/apikey/pkg/providers/definitions
|
mkdir -p /home/salva/Documents/apikey/pkg/providers/definitions
|
||||||
@@ -490,76 +489,8 @@ cp /home/salva/Documents/apikey/providers/anthropic.yaml /home/salva/Documents/a
|
|||||||
cp /home/salva/Documents/apikey/providers/huggingface.yaml /home/salva/Documents/apikey/pkg/providers/definitions/
|
cp /home/salva/Documents/apikey/providers/huggingface.yaml /home/salva/Documents/apikey/pkg/providers/definitions/
|
||||||
```
|
```
|
||||||
|
|
||||||
Finally, fill in **pkg/providers/registry_test.go** (replacing the stubs from Plan 01):
|
Finally, fill in **pkg/providers/registry_test.go** (replacing the stubs from Plan 01).
|
||||||
```go
|
Write ONLY the following content — do not include any earlier draft versions:
|
||||||
package providers_test
|
|
||||||
|
|
||||||
import (
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/salvacybersec/keyhunter/pkg/providers"
|
|
||||||
"github.com/stretchr/testify/assert"
|
|
||||||
"github.com/stretchr/testify/require"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestRegistryLoad(t *testing.T) {
|
|
||||||
reg, err := providers.NewRegistry()
|
|
||||||
require.NoError(t, err)
|
|
||||||
assert.GreaterOrEqual(t, len(reg.List()), 3, "expected at least 3 providers loaded")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestRegistryGet(t *testing.T) {
|
|
||||||
reg, err := providers.NewRegistry()
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
p, ok := reg.Get("openai")
|
|
||||||
assert.True(t, ok)
|
|
||||||
assert.Equal(t, "openai", p.Name)
|
|
||||||
assert.Equal(t, 1, p.Tier)
|
|
||||||
|
|
||||||
_, ok = reg.Get("nonexistent-provider")
|
|
||||||
assert.False(t, ok)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestRegistryStats(t *testing.T) {
|
|
||||||
reg, err := providers.NewRegistry()
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
stats := reg.Stats()
|
|
||||||
assert.GreaterOrEqual(t, stats.Total, 3)
|
|
||||||
assert.GreaterOrEqual(t, stats.ByTier[1], 2, "expected at least 2 tier-1 providers")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestAhoCorasickBuild(t *testing.T) {
|
|
||||||
reg, err := providers.NewRegistry()
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
ac := reg.AC()
|
|
||||||
|
|
||||||
// Should match OpenAI keyword
|
|
||||||
matches := ac.FindAll("OPENAI_API_KEY=sk-proj-abc")
|
|
||||||
assert.NotEmpty(t, matches, "expected AC to find keyword in string containing 'sk-proj-'")
|
|
||||||
|
|
||||||
// Should not match clean text
|
|
||||||
noMatches := ac.FindAll("hello world no secrets here")
|
|
||||||
assert.Empty(t, noMatches, "expected no AC matches in text with no provider keywords")
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestProviderSchemaValidation(t *testing.T) {
|
|
||||||
import_yaml := `
|
|
||||||
format_version: 0
|
|
||||||
name: invalid
|
|
||||||
last_verified: ""
|
|
||||||
`
|
|
||||||
// Directly test UnmarshalYAML via yaml.Unmarshal
|
|
||||||
var p providers.Provider
|
|
||||||
err := yaml.Unmarshal([]byte(import_yaml), &p) // NOTE: need import "gopkg.in/yaml.v3"
|
|
||||||
assert.Error(t, err, "expected validation error for format_version=0")
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
Note: The TestProviderSchemaValidation test needs `import "gopkg.in/yaml.v3"` added.
|
|
||||||
Add it to the imports. Full corrected test file with proper imports:
|
|
||||||
|
|
||||||
```go
|
```go
|
||||||
package providers_test
|
package providers_test
|
||||||
@@ -632,7 +563,7 @@ func TestProviderSchemaValidation(t *testing.T) {
|
|||||||
- TestRegistryStats passes — Total >= 3
|
- TestRegistryStats passes — Total >= 3
|
||||||
- TestAhoCorasickBuild passes — "sk-proj-" match found, "hello world" empty
|
- TestAhoCorasickBuild passes — "sk-proj-" match found, "hello world" empty
|
||||||
- TestProviderSchemaValidation passes — error on format_version=0
|
- TestProviderSchemaValidation passes — error on format_version=0
|
||||||
- `grep -r 'go:embed' pkg/providers/loader.go` exits 0
|
- `grep -q 'go:embed' pkg/providers/loader.go` exits 0
|
||||||
- pkg/providers/definitions/ directory exists with 3 YAML files
|
- pkg/providers/definitions/ directory exists with 3 YAML files
|
||||||
</acceptance_criteria>
|
</acceptance_criteria>
|
||||||
<done>Registry loads providers from embedded YAML, builds Aho-Corasick automaton, exposes List/Get/Stats/AC. All 5 tests pass.</done>
|
<done>Registry loads providers from embedded YAML, builds Aho-Corasick automaton, exposes List/Get/Stats/AC. All 5 tests pass.</done>
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ must_haves:
|
|||||||
- "AES-256-GCM Encrypt/Decrypt roundtrip produces the original plaintext"
|
- "AES-256-GCM Encrypt/Decrypt roundtrip produces the original plaintext"
|
||||||
- "Argon2id DeriveKey with the same passphrase and salt always returns the same 32-byte key"
|
- "Argon2id DeriveKey with the same passphrase and salt always returns the same 32-byte key"
|
||||||
- "A Finding can be saved to the database with the key_value stored encrypted and retrieved as plaintext"
|
- "A Finding can be saved to the database with the key_value stored encrypted and retrieved as plaintext"
|
||||||
- "The raw database file does NOT contain plaintext API key values"
|
- "The raw database file does NOT contain plaintext API key values — verified by querying raw bytes from the BLOB column"
|
||||||
artifacts:
|
artifacts:
|
||||||
- path: "pkg/storage/encrypt.go"
|
- path: "pkg/storage/encrypt.go"
|
||||||
provides: "Encrypt(plaintext, key) and Decrypt(ciphertext, key) using AES-256-GCM"
|
provides: "Encrypt(plaintext, key) and Decrypt(ciphertext, key) using AES-256-GCM"
|
||||||
@@ -271,7 +271,7 @@ func NewSalt() ([]byte, error) {
|
|||||||
- Test 3: DeriveKey(passphrase, salt) twice returns identical 32 bytes
|
- Test 3: DeriveKey(passphrase, salt) twice returns identical 32 bytes
|
||||||
- Test 4: NewSalt() twice returns different slices
|
- Test 4: NewSalt() twice returns different slices
|
||||||
- Test 5: SaveFinding stores finding → ListFindings decrypts and returns KeyValue == "sk-proj-test"
|
- Test 5: SaveFinding stores finding → ListFindings decrypts and returns KeyValue == "sk-proj-test"
|
||||||
- Test 6: Database file (when not :memory:) does NOT contain literal "sk-proj-test" in raw bytes
|
- Test 6: Raw BLOB bytes retrieved directly from the database do NOT contain the plaintext key string
|
||||||
</behavior>
|
</behavior>
|
||||||
<action>
|
<action>
|
||||||
Create **pkg/storage/schema.sql**:
|
Create **pkg/storage/schema.sql**:
|
||||||
@@ -473,6 +473,7 @@ Fill **pkg/storage/db_test.go** (replacing stubs from Plan 01):
|
|||||||
package storage_test
|
package storage_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/salvacybersec/keyhunter/pkg/storage"
|
"github.com/salvacybersec/keyhunter/pkg/storage"
|
||||||
@@ -567,9 +568,10 @@ func TestSaveFindingEncrypted(t *testing.T) {
|
|||||||
// Derive a test key
|
// Derive a test key
|
||||||
key := storage.DeriveKey([]byte("testpassphrase"), []byte("testsalt1234xxxx"))
|
key := storage.DeriveKey([]byte("testpassphrase"), []byte("testsalt1234xxxx"))
|
||||||
|
|
||||||
|
plainKey := "sk-proj-test1234567890abcdefghijklmnopqr"
|
||||||
f := storage.Finding{
|
f := storage.Finding{
|
||||||
ProviderName: "openai",
|
ProviderName: "openai",
|
||||||
KeyValue: "sk-proj-test1234567890abcdefghijklmnopqr",
|
KeyValue: plainKey,
|
||||||
Confidence: "high",
|
Confidence: "high",
|
||||||
SourcePath: "/test/file.env",
|
SourcePath: "/test/file.env",
|
||||||
SourceType: "file",
|
SourceType: "file",
|
||||||
@@ -583,10 +585,17 @@ func TestSaveFindingEncrypted(t *testing.T) {
|
|||||||
findings, err := db.ListFindings(key)
|
findings, err := db.ListFindings(key)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.Len(t, findings, 1)
|
require.Len(t, findings, 1)
|
||||||
assert.Equal(t, "sk-proj-test1234567890abcdefghijklmnopqr", findings[0].KeyValue)
|
assert.Equal(t, plainKey, findings[0].KeyValue)
|
||||||
assert.Equal(t, "openai", findings[0].ProviderName)
|
assert.Equal(t, "openai", findings[0].ProviderName)
|
||||||
// Verify masking
|
// Verify masking
|
||||||
assert.Equal(t, "sk-proj-...opqr", findings[0].KeyMasked)
|
assert.Equal(t, "sk-proj-...opqr", findings[0].KeyMasked)
|
||||||
|
|
||||||
|
// Verify encryption contract: raw BLOB bytes in the database must NOT contain the plaintext key.
|
||||||
|
// This confirms Encrypt() was called before INSERT, not that the key was stored verbatim.
|
||||||
|
var rawBlob []byte
|
||||||
|
require.NoError(t, db.SQL().QueryRow("SELECT key_value FROM findings WHERE id = ?", id).Scan(&rawBlob))
|
||||||
|
assert.False(t, bytes.Contains(rawBlob, []byte(plainKey)),
|
||||||
|
"raw database BLOB must not contain plaintext key — encryption was not applied")
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
</action>
|
</action>
|
||||||
@@ -601,12 +610,12 @@ func TestSaveFindingEncrypted(t *testing.T) {
|
|||||||
- TestDecryptWrongKey passes — wrong key causes error
|
- TestDecryptWrongKey passes — wrong key causes error
|
||||||
- TestArgon2KeyDerivation passes — 32 bytes, deterministic
|
- TestArgon2KeyDerivation passes — 32 bytes, deterministic
|
||||||
- TestNewSalt passes — 16 bytes, non-deterministic
|
- TestNewSalt passes — 16 bytes, non-deterministic
|
||||||
- TestSaveFindingEncrypted passes — stored and retrieved with correct KeyValue and KeyMasked
|
- TestSaveFindingEncrypted passes — stored and retrieved with correct KeyValue, KeyMasked, AND raw BLOB does not contain plaintext
|
||||||
- `grep -q 'go:embed.*schema' pkg/storage/db.go` exits 0
|
- `grep -q 'go:embed.*schema' pkg/storage/db.go` exits 0
|
||||||
- `grep -q 'modernc.org/sqlite' pkg/storage/db.go` exits 0
|
- `grep -q 'modernc.org/sqlite' pkg/storage/db.go` exits 0
|
||||||
- `grep -q 'journal_mode=WAL' pkg/storage/db.go` exits 0
|
- `grep -q 'journal_mode=WAL' pkg/storage/db.go` exits 0
|
||||||
</acceptance_criteria>
|
</acceptance_criteria>
|
||||||
<done>Storage layer complete — SQLite opens with schema, AES-256-GCM encrypt/decrypt works, Argon2id key derivation works, SaveFinding/ListFindings encrypt/decrypt transparently. All 7 tests pass.</done>
|
<done>Storage layer complete — SQLite opens with schema, AES-256-GCM encrypt/decrypt works, Argon2id key derivation works, SaveFinding/ListFindings encrypt/decrypt transparently. Raw BLOB bytes verified to not contain plaintext. All 7 tests pass.</done>
|
||||||
</task>
|
</task>
|
||||||
|
|
||||||
</tasks>
|
</tasks>
|
||||||
@@ -619,6 +628,7 @@ After both tasks:
|
|||||||
- `grep -q 'cipher\.NewGCM' pkg/storage/encrypt.go` exits 0
|
- `grep -q 'cipher\.NewGCM' pkg/storage/encrypt.go` exits 0
|
||||||
- `grep -q 'journal_mode=WAL' pkg/storage/db.go` exits 0
|
- `grep -q 'journal_mode=WAL' pkg/storage/db.go` exits 0
|
||||||
- schema.sql contains CREATE TABLE for findings, scans, settings
|
- schema.sql contains CREATE TABLE for findings, scans, settings
|
||||||
|
- TestSaveFindingEncrypted asserts raw BLOB does not contain plaintext key
|
||||||
</verification>
|
</verification>
|
||||||
|
|
||||||
<success_criteria>
|
<success_criteria>
|
||||||
@@ -626,6 +636,7 @@ After both tasks:
|
|||||||
- AES-256-GCM column encryption works: Encrypt + Decrypt roundtrip returns original (STOR-02)
|
- AES-256-GCM column encryption works: Encrypt + Decrypt roundtrip returns original (STOR-02)
|
||||||
- Argon2id key derivation: DeriveKey deterministic, 32 bytes, RFC 9106 params (STOR-03)
|
- Argon2id key derivation: DeriveKey deterministic, 32 bytes, RFC 9106 params (STOR-03)
|
||||||
- FindingCRUD: SaveFinding encrypts before INSERT, ListFindings decrypts after SELECT
|
- FindingCRUD: SaveFinding encrypts before INSERT, ListFindings decrypts after SELECT
|
||||||
|
- Raw BLOB in database does not contain plaintext key — verified by automated test
|
||||||
- All 7 storage tests pass
|
- All 7 storage tests pass
|
||||||
</success_criteria>
|
</success_criteria>
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ type: execute
|
|||||||
wave: 2
|
wave: 2
|
||||||
depends_on: [01-02]
|
depends_on: [01-02]
|
||||||
files_modified:
|
files_modified:
|
||||||
- pkg/engine/chunk.go
|
- pkg/types/chunk.go
|
||||||
- pkg/engine/finding.go
|
- pkg/engine/finding.go
|
||||||
- pkg/engine/entropy.go
|
- pkg/engine/entropy.go
|
||||||
- pkg/engine/filter.go
|
- pkg/engine/filter.go
|
||||||
@@ -15,7 +15,7 @@ files_modified:
|
|||||||
- pkg/engine/sources/file.go
|
- pkg/engine/sources/file.go
|
||||||
- pkg/engine/scanner_test.go
|
- pkg/engine/scanner_test.go
|
||||||
autonomous: true
|
autonomous: true
|
||||||
requirements: [CORE-01, CORE-04, CORE-05, CORE-06, CORE-07]
|
requirements: [CORE-01, CORE-04, CORE-05, CORE-06]
|
||||||
|
|
||||||
must_haves:
|
must_haves:
|
||||||
truths:
|
truths:
|
||||||
@@ -26,8 +26,8 @@ must_haves:
|
|||||||
- "Full scan pipeline: scan testdata/samples/no_keys.txt → zero findings"
|
- "Full scan pipeline: scan testdata/samples/no_keys.txt → zero findings"
|
||||||
- "Worker pool uses ants v2 with configurable worker count"
|
- "Worker pool uses ants v2 with configurable worker count"
|
||||||
artifacts:
|
artifacts:
|
||||||
- path: "pkg/engine/chunk.go"
|
- path: "pkg/types/chunk.go"
|
||||||
provides: "Chunk struct (Data []byte, Source string, Offset int64)"
|
provides: "Chunk struct (Data []byte, Source string, Offset int64) — shared by engine and sources packages"
|
||||||
exports: ["Chunk"]
|
exports: ["Chunk"]
|
||||||
- path: "pkg/engine/finding.go"
|
- path: "pkg/engine/finding.go"
|
||||||
provides: "Finding struct (provider, key value, masked, confidence, source, line)"
|
provides: "Finding struct (provider, key value, masked, confidence, source, line)"
|
||||||
@@ -40,12 +40,12 @@ must_haves:
|
|||||||
exports: ["KeywordFilter"]
|
exports: ["KeywordFilter"]
|
||||||
- path: "pkg/engine/detector.go"
|
- path: "pkg/engine/detector.go"
|
||||||
provides: "Detector stage — applies provider regexps and entropy check to chunks"
|
provides: "Detector stage — applies provider regexps and entropy check to chunks"
|
||||||
exports: ["Detector"]
|
exports: ["Detect"]
|
||||||
- path: "pkg/engine/engine.go"
|
- path: "pkg/engine/engine.go"
|
||||||
provides: "Engine struct with Scan(ctx, src, cfg) <-chan Finding"
|
provides: "Engine struct with Scan(ctx, src, cfg) <-chan Finding"
|
||||||
exports: ["Engine", "NewEngine", "ScanConfig"]
|
exports: ["Engine", "NewEngine", "ScanConfig"]
|
||||||
- path: "pkg/engine/sources/source.go"
|
- path: "pkg/engine/sources/source.go"
|
||||||
provides: "Source interface with Chunks(ctx, chan<- Chunk) error"
|
provides: "Source interface with Chunks(ctx, chan<- types.Chunk) error"
|
||||||
exports: ["Source"]
|
exports: ["Source"]
|
||||||
- path: "pkg/engine/sources/file.go"
|
- path: "pkg/engine/sources/file.go"
|
||||||
provides: "FileSource implementing Source for single-file scanning"
|
provides: "FileSource implementing Source for single-file scanning"
|
||||||
@@ -67,13 +67,19 @@ must_haves:
|
|||||||
to: "github.com/panjf2000/ants/v2"
|
to: "github.com/panjf2000/ants/v2"
|
||||||
via: "ants.NewPool for detector workers"
|
via: "ants.NewPool for detector workers"
|
||||||
pattern: "ants\\.NewPool"
|
pattern: "ants\\.NewPool"
|
||||||
|
- from: "pkg/engine/sources/source.go"
|
||||||
|
to: "pkg/types/chunk.go"
|
||||||
|
via: "Source interface uses types.Chunk — avoids circular import with pkg/engine"
|
||||||
|
pattern: "types\\.Chunk"
|
||||||
---
|
---
|
||||||
|
|
||||||
<objective>
|
<objective>
|
||||||
Build the three-stage scanning engine pipeline: Aho-Corasick keyword pre-filter, regex + entropy detector workers using ants goroutine pool, and a FileSource adapter. Wire them together in an Engine that emits Findings on a channel.
|
Build the three-stage scanning engine pipeline: Aho-Corasick keyword pre-filter, regex + entropy detector workers using ants goroutine pool, and a FileSource adapter. Wire them together in an Engine that emits Findings on a channel.
|
||||||
|
|
||||||
Purpose: The scan engine is the core differentiator. Plans 02 and 03 provide its dependencies (Registry for patterns + keywords, storage types for Finding). The CLI (Plan 05) calls Engine.Scan() to implement `keyhunter scan`.
|
Purpose: The scan engine is the core differentiator. Plans 02 and 03 provide its dependencies (Registry for patterns + keywords, storage types for Finding). The CLI (Plan 05) calls Engine.Scan() to implement `keyhunter scan`.
|
||||||
Output: pkg/engine/{chunk,finding,entropy,filter,detector,engine}.go and sources/{source,file}.go. scanner_test.go stubs filled.
|
Output: pkg/types/chunk.go, pkg/engine/{finding,entropy,filter,detector,engine}.go and sources/{source,file}.go. scanner_test.go stubs filled.
|
||||||
|
|
||||||
|
NOTE on CORE-07 (mmap large file reading): FileSource uses os.ReadFile() in Phase 1, which is sufficient for the test fixtures. mmap-based reading for files > 10MB is deferred to Phase 4 (Input Sources) where it belongs architecturally alongside all other source adapter work.
|
||||||
</objective>
|
</objective>
|
||||||
|
|
||||||
<execution_context>
|
<execution_context>
|
||||||
@@ -86,6 +92,16 @@ Output: pkg/engine/{chunk,finding,entropy,filter,detector,engine}.go and sources
|
|||||||
@.planning/phases/01-foundation/01-02-SUMMARY.md
|
@.planning/phases/01-foundation/01-02-SUMMARY.md
|
||||||
|
|
||||||
<interfaces>
|
<interfaces>
|
||||||
|
<!-- IMPORTANT: Circular import prevention -->
|
||||||
|
The sources sub-package (pkg/engine/sources) needs the Chunk type.
|
||||||
|
If Chunk were defined in pkg/engine, then sources would import engine, and engine imports
|
||||||
|
sources (for the Source interface) — a circular import. Go will refuse to compile.
|
||||||
|
|
||||||
|
Resolution: Define Chunk in pkg/types (a shared, import-free package):
|
||||||
|
pkg/types/chunk.go — defines types.Chunk
|
||||||
|
pkg/engine/sources — imports pkg/types (no circular dep)
|
||||||
|
pkg/engine — imports pkg/types and pkg/engine/sources (no circular dep)
|
||||||
|
|
||||||
<!-- Provider Registry types (from Plan 02) -->
|
<!-- Provider Registry types (from Plan 02) -->
|
||||||
package providers
|
package providers
|
||||||
|
|
||||||
@@ -107,9 +123,9 @@ func (r *Registry) List() []Provider
|
|||||||
func (r *Registry) AC() ahocorasick.AhoCorasick // pre-built Aho-Corasick
|
func (r *Registry) AC() ahocorasick.AhoCorasick // pre-built Aho-Corasick
|
||||||
|
|
||||||
<!-- Three-stage pipeline pattern from RESEARCH.md Pattern 2 -->
|
<!-- Three-stage pipeline pattern from RESEARCH.md Pattern 2 -->
|
||||||
chunksChan chan Chunk (buffer: 1000)
|
chunksChan chan types.Chunk (buffer: 1000)
|
||||||
detectableChan chan Chunk (buffer: 500)
|
detectableChan chan types.Chunk (buffer: 500)
|
||||||
resultsChan chan Finding (buffer: 100)
|
resultsChan chan Finding (buffer: 100)
|
||||||
|
|
||||||
Stage 1: Source.Chunks() → chunksChan (goroutine, closes chan on done)
|
Stage 1: Source.Chunks() → chunksChan (goroutine, closes chan on done)
|
||||||
Stage 2: KeywordFilter(chunksChan) → detectableChan (goroutine, AC.FindAll)
|
Stage 2: KeywordFilter(chunksChan) → detectableChan (goroutine, AC.FindAll)
|
||||||
@@ -124,7 +140,7 @@ type ScanConfig struct {
|
|||||||
|
|
||||||
<!-- Source interface -->
|
<!-- Source interface -->
|
||||||
type Source interface {
|
type Source interface {
|
||||||
Chunks(ctx context.Context, out chan<- Chunk) error
|
Chunks(ctx context.Context, out chan<- types.Chunk) error
|
||||||
}
|
}
|
||||||
|
|
||||||
<!-- FileSource -->
|
<!-- FileSource -->
|
||||||
@@ -151,8 +167,8 @@ import "github.com/panjf2000/ants/v2"
|
|||||||
<tasks>
|
<tasks>
|
||||||
|
|
||||||
<task type="auto" tdd="true">
|
<task type="auto" tdd="true">
|
||||||
<name>Task 1: Core types and Shannon entropy function</name>
|
<name>Task 1: Shared types package, Finding, and Shannon entropy function</name>
|
||||||
<files>pkg/engine/chunk.go, pkg/engine/finding.go, pkg/engine/entropy.go</files>
|
<files>pkg/types/chunk.go, pkg/engine/finding.go, pkg/engine/entropy.go</files>
|
||||||
<read_first>
|
<read_first>
|
||||||
- /home/salva/Documents/apikey/.planning/phases/01-foundation/01-RESEARCH.md (CORE-04 row: Shannon entropy, ~10-line stdlib function, threshold 3.5 bits/char)
|
- /home/salva/Documents/apikey/.planning/phases/01-foundation/01-RESEARCH.md (CORE-04 row: Shannon entropy, ~10-line stdlib function, threshold 3.5 bits/char)
|
||||||
- /home/salva/Documents/apikey/pkg/storage/findings.go (Finding and MaskKey defined there — engine.Finding is a separate type for the pipeline)
|
- /home/salva/Documents/apikey/pkg/storage/findings.go (Finding and MaskKey defined there — engine.Finding is a separate type for the pipeline)
|
||||||
@@ -166,11 +182,13 @@ import "github.com/panjf2000/ants/v2"
|
|||||||
- Test 6: MaskKey("abc") → "****" (too short to mask)
|
- Test 6: MaskKey("abc") → "****" (too short to mask)
|
||||||
</behavior>
|
</behavior>
|
||||||
<action>
|
<action>
|
||||||
Create **pkg/engine/chunk.go**:
|
Create **pkg/types/chunk.go** — the shared type that breaks the circular import:
|
||||||
```go
|
```go
|
||||||
package engine
|
package types
|
||||||
|
|
||||||
// Chunk is a segment of file content passed through the scanning pipeline.
|
// Chunk is a segment of file content passed through the scanning pipeline.
|
||||||
|
// Defined in pkg/types (not pkg/engine) so that pkg/engine/sources can use it
|
||||||
|
// without creating a circular import with pkg/engine.
|
||||||
type Chunk struct {
|
type Chunk struct {
|
||||||
Data []byte // raw bytes
|
Data []byte // raw bytes
|
||||||
Source string // file path, URL, or description
|
Source string // file path, URL, or description
|
||||||
@@ -236,18 +254,18 @@ func Shannon(s string) float64 {
|
|||||||
```
|
```
|
||||||
</action>
|
</action>
|
||||||
<verify>
|
<verify>
|
||||||
<automated>cd /home/salva/Documents/apikey && go build ./pkg/engine/... && echo "BUILD OK"</automated>
|
<automated>cd /home/salva/Documents/apikey && go build ./pkg/types/... && go build ./pkg/engine/... && echo "BUILD OK"</automated>
|
||||||
</verify>
|
</verify>
|
||||||
<acceptance_criteria>
|
<acceptance_criteria>
|
||||||
|
- `go build ./pkg/types/...` exits 0
|
||||||
- `go build ./pkg/engine/...` exits 0
|
- `go build ./pkg/engine/...` exits 0
|
||||||
- pkg/engine/chunk.go exports Chunk with fields Data, Source, Offset
|
- pkg/types/chunk.go exports Chunk with fields Data, Source, Offset
|
||||||
- pkg/engine/finding.go exports Finding and MaskKey
|
- pkg/engine/finding.go exports Finding and MaskKey
|
||||||
- pkg/engine/entropy.go exports Shannon using math.Log2
|
- pkg/engine/entropy.go exports Shannon using math.Log2
|
||||||
- `grep -q 'math\.Log2' pkg/engine/entropy.go` exits 0
|
- `grep -q 'math\.Log2' pkg/engine/entropy.go` exits 0
|
||||||
- Shannon("aaaaaaa") == 0.0 (manually verifiable from code)
|
|
||||||
- MaskKey("sk-proj-abc1234") produces "sk-proj-...1234"
|
- MaskKey("sk-proj-abc1234") produces "sk-proj-...1234"
|
||||||
</acceptance_criteria>
|
</acceptance_criteria>
|
||||||
<done>Chunk, Finding, MaskKey, and Shannon exist and compile. Shannon uses stdlib math only — no external library.</done>
|
<done>pkg/types/Chunk exists (no imports, no circular dependency risk), Finding, MaskKey, and Shannon exist and compile.</done>
|
||||||
</task>
|
</task>
|
||||||
|
|
||||||
<task type="auto" tdd="true">
|
<task type="auto" tdd="true">
|
||||||
@@ -262,7 +280,8 @@ func Shannon(s string) float64 {
|
|||||||
</files>
|
</files>
|
||||||
<read_first>
|
<read_first>
|
||||||
- /home/salva/Documents/apikey/.planning/phases/01-foundation/01-RESEARCH.md (Pattern 2: Three-Stage Scanning Pipeline — exact channel-based code example)
|
- /home/salva/Documents/apikey/.planning/phases/01-foundation/01-RESEARCH.md (Pattern 2: Three-Stage Scanning Pipeline — exact channel-based code example)
|
||||||
- /home/salva/Documents/apikey/pkg/engine/chunk.go
|
- /home/salva/Documents/apikey/pkg/types/chunk.go
|
||||||
|
- /home/salva/Documents/apikey/pkg/engine/chunk.go (if exists — use pkg/types/chunk.go instead)
|
||||||
- /home/salva/Documents/apikey/pkg/engine/finding.go
|
- /home/salva/Documents/apikey/pkg/engine/finding.go
|
||||||
- /home/salva/Documents/apikey/pkg/engine/entropy.go
|
- /home/salva/Documents/apikey/pkg/engine/entropy.go
|
||||||
- /home/salva/Documents/apikey/pkg/providers/registry.go (Registry.AC() and Registry.List() signatures)
|
- /home/salva/Documents/apikey/pkg/providers/registry.go (Registry.AC() and Registry.List() signatures)
|
||||||
@@ -283,13 +302,15 @@ package sources
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
|
||||||
"github.com/salvacybersec/keyhunter/pkg/engine"
|
"github.com/salvacybersec/keyhunter/pkg/types"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Source is the interface all input adapters must implement.
|
// Source is the interface all input adapters must implement.
|
||||||
// Chunks writes content segments to the out channel until the source is exhausted or ctx is cancelled.
|
// Chunks writes content segments to the out channel until the source is exhausted or ctx is cancelled.
|
||||||
|
// NOTE: Source is defined in the sources sub-package (not pkg/engine) and uses pkg/types.Chunk
|
||||||
|
// to avoid a circular import: engine → sources → engine.
|
||||||
type Source interface {
|
type Source interface {
|
||||||
Chunks(ctx context.Context, out chan<- engine.Chunk) error
|
Chunks(ctx context.Context, out chan<- types.Chunk) error
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -301,7 +322,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
"github.com/salvacybersec/keyhunter/pkg/engine"
|
"github.com/salvacybersec/keyhunter/pkg/types"
|
||||||
)
|
)
|
||||||
|
|
||||||
const defaultChunkSize = 4096
|
const defaultChunkSize = 4096
|
||||||
@@ -319,7 +340,9 @@ func NewFileSource(path string) *FileSource {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Chunks reads the file in overlapping segments and sends each chunk to out.
|
// Chunks reads the file in overlapping segments and sends each chunk to out.
|
||||||
func (f *FileSource) Chunks(ctx context.Context, out chan<- engine.Chunk) error {
|
// Uses os.ReadFile for simplicity in Phase 1. mmap for files > 10MB is implemented
|
||||||
|
// in Phase 4 (Input Sources) alongside all other source adapter enhancements.
|
||||||
|
func (f *FileSource) Chunks(ctx context.Context, out chan<- types.Chunk) error {
|
||||||
data, err := os.ReadFile(f.Path)
|
data, err := os.ReadFile(f.Path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@@ -333,7 +356,7 @@ func (f *FileSource) Chunks(ctx context.Context, out chan<- engine.Chunk) error
|
|||||||
select {
|
select {
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
return ctx.Err()
|
return ctx.Err()
|
||||||
case out <- engine.Chunk{Data: data, Source: f.Path, Offset: 0}:
|
case out <- types.Chunk{Data: data, Source: f.Path, Offset: 0}:
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -344,7 +367,7 @@ func (f *FileSource) Chunks(ctx context.Context, out chan<- engine.Chunk) error
|
|||||||
if end > len(data) {
|
if end > len(data) {
|
||||||
end = len(data)
|
end = len(data)
|
||||||
}
|
}
|
||||||
chunk := engine.Chunk{
|
chunk := types.Chunk{
|
||||||
Data: data[start:end],
|
Data: data[start:end],
|
||||||
Source: f.Path,
|
Source: f.Path,
|
||||||
Offset: offset,
|
Offset: offset,
|
||||||
@@ -369,12 +392,13 @@ package engine
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
ahocorasick "github.com/petar-dambovaliev/aho-corasick"
|
ahocorasick "github.com/petar-dambovaliev/aho-corasick"
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/types"
|
||||||
)
|
)
|
||||||
|
|
||||||
// KeywordFilter filters a stream of chunks using an Aho-Corasick automaton.
|
// KeywordFilter filters a stream of chunks using an Aho-Corasick automaton.
|
||||||
// Only chunks that contain at least one provider keyword are sent to out.
|
// Only chunks that contain at least one provider keyword are sent to out.
|
||||||
// This is Stage 2 of the pipeline (runs after Source, before Detector).
|
// This is Stage 2 of the pipeline (runs after Source, before Detector).
|
||||||
func KeywordFilter(ac ahocorasick.AhoCorasick, in <-chan Chunk, out chan<- Chunk) {
|
func KeywordFilter(ac ahocorasick.AhoCorasick, in <-chan types.Chunk, out chan<- types.Chunk) {
|
||||||
for chunk := range in {
|
for chunk := range in {
|
||||||
if len(ac.FindAll(string(chunk.Data))) > 0 {
|
if len(ac.FindAll(string(chunk.Data))) > 0 {
|
||||||
out <- chunk
|
out <- chunk
|
||||||
@@ -393,11 +417,12 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/salvacybersec/keyhunter/pkg/providers"
|
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/types"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Detector applies provider regex patterns and optional entropy checks to a chunk.
|
// Detect applies provider regex patterns and optional entropy checks to a chunk.
|
||||||
// It returns all findings from the chunk.
|
// It returns all findings from the chunk.
|
||||||
func Detect(chunk Chunk, providerList []providers.Provider) []Finding {
|
func Detect(chunk types.Chunk, providerList []providers.Provider) []Finding {
|
||||||
var findings []Finding
|
var findings []Finding
|
||||||
content := string(chunk.Data)
|
content := string(chunk.Data)
|
||||||
|
|
||||||
@@ -452,8 +477,9 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/panjf2000/ants/v2"
|
"github.com/panjf2000/ants/v2"
|
||||||
"github.com/salvacybersec/keyhunter/pkg/providers"
|
|
||||||
"github.com/salvacybersec/keyhunter/pkg/engine/sources"
|
"github.com/salvacybersec/keyhunter/pkg/engine/sources"
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/providers"
|
||||||
|
"github.com/salvacybersec/keyhunter/pkg/types"
|
||||||
)
|
)
|
||||||
|
|
||||||
// ScanConfig controls scan execution parameters.
|
// ScanConfig controls scan execution parameters.
|
||||||
@@ -482,9 +508,10 @@ func (e *Engine) Scan(ctx context.Context, src sources.Source, cfg ScanConfig) (
|
|||||||
workers = runtime.NumCPU() * 8
|
workers = runtime.NumCPU() * 8
|
||||||
}
|
}
|
||||||
|
|
||||||
chunksChan := make(chan Chunk, 1000)
|
// Declare channels on separate lines to ensure correct Go syntax.
|
||||||
detectableChan := make(chan Chunk, 500)
|
chunksChan := make(chan types.Chunk, 1000)
|
||||||
resultsChan := make(chan Finding, 100)
|
detectableChan := make(chan types.Chunk, 500)
|
||||||
|
resultsChan := make(chan Finding, 100)
|
||||||
|
|
||||||
// Stage 1: source → chunksChan
|
// Stage 1: source → chunksChan
|
||||||
go func() {
|
go func() {
|
||||||
@@ -517,7 +544,7 @@ func (e *Engine) Scan(ctx context.Context, src sources.Source, cfg ScanConfig) (
|
|||||||
}()
|
}()
|
||||||
|
|
||||||
for chunk := range detectableChan {
|
for chunk := range detectableChan {
|
||||||
c := chunk // capture
|
c := chunk // capture loop variable
|
||||||
wg.Add(1)
|
wg.Add(1)
|
||||||
_ = pool.Submit(func() {
|
_ = pool.Submit(func() {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
@@ -645,6 +672,7 @@ func TestScannerPipelineMultipleKeys(t *testing.T) {
|
|||||||
</verify>
|
</verify>
|
||||||
<acceptance_criteria>
|
<acceptance_criteria>
|
||||||
- `go test ./pkg/engine/... -v -count=1` exits 0 with all tests PASS (no SKIP)
|
- `go test ./pkg/engine/... -v -count=1` exits 0 with all tests PASS (no SKIP)
|
||||||
|
- `go build ./...` exits 0 with no circular import errors
|
||||||
- TestShannonEntropy passes — 0.0 for "aaaaaaa", >= 3.5 for real key pattern
|
- TestShannonEntropy passes — 0.0 for "aaaaaaa", >= 3.5 for real key pattern
|
||||||
- TestKeywordPreFilter passes — AC matches sk-proj-, empty for "hello world"
|
- TestKeywordPreFilter passes — AC matches sk-proj-, empty for "hello world"
|
||||||
- TestScannerPipelineOpenAI passes — 1 finding with ProviderName=="openai"
|
- TestScannerPipelineOpenAI passes — 1 finding with ProviderName=="openai"
|
||||||
@@ -652,19 +680,20 @@ func TestScannerPipelineMultipleKeys(t *testing.T) {
|
|||||||
- TestScannerPipelineMultipleKeys passes — >= 2 findings with both provider names
|
- TestScannerPipelineMultipleKeys passes — >= 2 findings with both provider names
|
||||||
- `grep -q 'ants\.NewPool' pkg/engine/engine.go` exits 0
|
- `grep -q 'ants\.NewPool' pkg/engine/engine.go` exits 0
|
||||||
- `grep -q 'KeywordFilter' pkg/engine/engine.go` exits 0
|
- `grep -q 'KeywordFilter' pkg/engine/engine.go` exits 0
|
||||||
- `go build ./...` still exits 0
|
- pkg/types/chunk.go exists and pkg/engine/sources imports pkg/types (not pkg/engine)
|
||||||
</acceptance_criteria>
|
</acceptance_criteria>
|
||||||
<done>Three-stage scanning pipeline works end-to-end: FileSource → KeywordFilter (AC) → Detect (regex + entropy) → Finding channel. All engine tests pass.</done>
|
<done>Three-stage scanning pipeline works end-to-end: FileSource → KeywordFilter (AC) → Detect (regex + entropy) → Finding channel. Circular import resolved via pkg/types. All engine tests pass.</done>
|
||||||
</task>
|
</task>
|
||||||
|
|
||||||
</tasks>
|
</tasks>
|
||||||
|
|
||||||
<verification>
|
<verification>
|
||||||
After both tasks:
|
After both tasks:
|
||||||
- `go test ./pkg/engine/... -v -count=1` exits 0 with 6 tests PASS
|
- `go build ./...` exits 0 with zero circular import errors
|
||||||
- `go build ./...` exits 0
|
- `go test ./pkg/engine/... -v -count=1` exits 0 with all tests PASS
|
||||||
- `grep -q 'ants\.NewPool' pkg/engine/engine.go` exits 0
|
- `grep -q 'ants\.NewPool' pkg/engine/engine.go` exits 0
|
||||||
- `grep -q 'math\.Log2' pkg/engine/entropy.go` exits 0
|
- `grep -q 'math\.Log2' pkg/engine/entropy.go` exits 0
|
||||||
|
- `grep -rq 'pkg/types' pkg/engine/sources/source.go` exits 0 (sources imports types, not engine)
|
||||||
- Scanning testdata/samples/openai_key.txt returns 1 finding with provider "openai"
|
- Scanning testdata/samples/openai_key.txt returns 1 finding with provider "openai"
|
||||||
- Scanning testdata/samples/no_keys.txt returns 0 findings
|
- Scanning testdata/samples/no_keys.txt returns 0 findings
|
||||||
</verification>
|
</verification>
|
||||||
@@ -673,7 +702,8 @@ After both tasks:
|
|||||||
- Three-stage pipeline: AC pre-filter → regex + entropy detector → results channel (CORE-01, CORE-06)
|
- Three-stage pipeline: AC pre-filter → regex + entropy detector → results channel (CORE-01, CORE-06)
|
||||||
- Shannon entropy function using stdlib math (CORE-04)
|
- Shannon entropy function using stdlib math (CORE-04)
|
||||||
- ants v2 goroutine pool with configurable worker count (CORE-05)
|
- ants v2 goroutine pool with configurable worker count (CORE-05)
|
||||||
- FileSource adapter reading files in overlapping chunks (CORE-07 partial — full mmap in Phase 4)
|
- FileSource adapter reading files in overlapping chunks using os.ReadFile (mmap deferred to Phase 4)
|
||||||
|
- pkg/types/Chunk breaks the engine↔sources circular import
|
||||||
- All engine tests pass against real testdata fixtures
|
- All engine tests pass against real testdata fixtures
|
||||||
</success_criteria>
|
</success_criteria>
|
||||||
|
|
||||||
|
|||||||
@@ -9,9 +9,10 @@ files_modified:
|
|||||||
- cmd/scan.go
|
- cmd/scan.go
|
||||||
- cmd/providers.go
|
- cmd/providers.go
|
||||||
- cmd/config.go
|
- cmd/config.go
|
||||||
|
- cmd/stubs.go
|
||||||
- pkg/config/config.go
|
- pkg/config/config.go
|
||||||
- pkg/output/table.go
|
- pkg/output/table.go
|
||||||
autonomous: false
|
autonomous: true
|
||||||
requirements: [CLI-01, CLI-02, CLI-03, CLI-04, CLI-05]
|
requirements: [CLI-01, CLI-02, CLI-03, CLI-04, CLI-05]
|
||||||
|
|
||||||
must_haves:
|
must_haves:
|
||||||
@@ -21,14 +22,19 @@ must_haves:
|
|||||||
- "`keyhunter providers info openai` prints OpenAI provider details"
|
- "`keyhunter providers info openai` prints OpenAI provider details"
|
||||||
- "`keyhunter config init` creates ~/.keyhunter.yaml without error"
|
- "`keyhunter config init` creates ~/.keyhunter.yaml without error"
|
||||||
- "`keyhunter config set workers 16` persists the value to ~/.keyhunter.yaml"
|
- "`keyhunter config set workers 16` persists the value to ~/.keyhunter.yaml"
|
||||||
- "`keyhunter --help` shows all top-level commands: scan, providers, config"
|
- "`keyhunter --help` shows all top-level commands: scan, verify, import, recon, keys, serve, dorks, hook, schedule, providers, config"
|
||||||
|
- "Findings are stored with a per-installation salt loaded from the settings table — not a hardcoded salt"
|
||||||
|
- "Raw sqlite3 query on the database file does NOT return plaintext key values"
|
||||||
artifacts:
|
artifacts:
|
||||||
- path: "cmd/root.go"
|
- path: "cmd/root.go"
|
||||||
provides: "Cobra root command with PersistentPreRunE config loading"
|
provides: "Cobra root command with PersistentPreRunE config loading"
|
||||||
contains: "cobra.Command"
|
contains: "cobra.Command"
|
||||||
- path: "cmd/scan.go"
|
- path: "cmd/scan.go"
|
||||||
provides: "scan command wiring Engine + FileSource + output table"
|
provides: "scan command wiring Engine + FileSource + output table + salt from settings"
|
||||||
exports: ["scanCmd"]
|
exports: ["scanCmd"]
|
||||||
|
- path: "cmd/stubs.go"
|
||||||
|
provides: "stub commands for verify, import, recon, keys, serve, dorks, hook, schedule"
|
||||||
|
exports: ["verifyCmd", "importCmd", "reconCmd", "keysCmd", "serveCmd", "dorksCmd", "hookCmd", "scheduleCmd"]
|
||||||
- path: "cmd/providers.go"
|
- path: "cmd/providers.go"
|
||||||
provides: "providers list/info/stats subcommands using Registry"
|
provides: "providers list/info/stats subcommands using Registry"
|
||||||
exports: ["providersCmd"]
|
exports: ["providersCmd"]
|
||||||
@@ -50,6 +56,10 @@ must_haves:
|
|||||||
to: "pkg/storage/db.go"
|
to: "pkg/storage/db.go"
|
||||||
via: "storage.Open() called, SaveFinding for each result"
|
via: "storage.Open() called, SaveFinding for each result"
|
||||||
pattern: "storage\\.Open"
|
pattern: "storage\\.Open"
|
||||||
|
- from: "cmd/scan.go"
|
||||||
|
to: "pkg/storage/crypto.go"
|
||||||
|
via: "loadOrCreateSalt() reads salt from settings table via storage.GetSetting/SetSetting, then calls storage.DeriveKey"
|
||||||
|
pattern: "DeriveKey|GetSetting|SetSetting"
|
||||||
- from: "cmd/root.go"
|
- from: "cmd/root.go"
|
||||||
to: "github.com/spf13/viper"
|
to: "github.com/spf13/viper"
|
||||||
via: "viper.SetConfigFile in PersistentPreRunE"
|
via: "viper.SetConfigFile in PersistentPreRunE"
|
||||||
@@ -61,10 +71,10 @@ must_haves:
|
|||||||
---
|
---
|
||||||
|
|
||||||
<objective>
|
<objective>
|
||||||
Wire all subsystems together through the Cobra CLI: scan command (engine + storage + output), providers list/info/stats commands, and config init/set/get commands. This is the integration layer — all business logic lives in pkg/, cmd/ only wires.
|
Wire all subsystems together through the Cobra CLI: scan command (engine + storage + output), providers list/info/stats commands, config init/set/get commands, and 8 stub commands for future phases. This is the integration layer — all business logic lives in pkg/, cmd/ only wires.
|
||||||
|
|
||||||
Purpose: Satisfies all Phase 1 CLI requirements and delivers the first working `keyhunter scan` command that completes the end-to-end success criteria.
|
Purpose: Satisfies all Phase 1 CLI requirements and delivers the first working `keyhunter scan` command that completes the end-to-end success criteria.
|
||||||
Output: cmd/{root,scan,providers,config}.go, pkg/config/config.go, pkg/output/table.go.
|
Output: cmd/{root,scan,providers,config,stubs}.go, pkg/config/config.go, pkg/output/table.go.
|
||||||
</objective>
|
</objective>
|
||||||
|
|
||||||
<execution_context>
|
<execution_context>
|
||||||
@@ -106,6 +116,10 @@ func (db *DB) SaveFinding(f Finding, encKey []byte) (int64, error)
|
|||||||
func DeriveKey(passphrase []byte, salt []byte) []byte
|
func DeriveKey(passphrase []byte, salt []byte) []byte
|
||||||
func NewSalt() ([]byte, error)
|
func NewSalt() ([]byte, error)
|
||||||
|
|
||||||
|
<!-- Storage settings CRUD — must be added to pkg/storage/db.go in this plan -->
|
||||||
|
func (db *DB) GetSetting(key string) (string, bool, error)
|
||||||
|
func (db *DB) SetSetting(key string, value string) error
|
||||||
|
|
||||||
<!-- Registry (from Plan 02) -->
|
<!-- Registry (from Plan 02) -->
|
||||||
package providers
|
package providers
|
||||||
func NewRegistry() (*Registry, error)
|
func NewRegistry() (*Registry, error)
|
||||||
@@ -127,17 +141,24 @@ Passphrase: (prompt if not in env KEYHUNTER_PASSPHRASE — Phase 1: use empty s
|
|||||||
<!-- lipgloss table output -->
|
<!-- lipgloss table output -->
|
||||||
Columns: PROVIDER | MASKED KEY | CONFIDENCE | SOURCE | LINE
|
Columns: PROVIDER | MASKED KEY | CONFIDENCE | SOURCE | LINE
|
||||||
Colors: use lipgloss.NewStyle().Foreground() for confidence: high=green, medium=yellow, low=red
|
Colors: use lipgloss.NewStyle().Foreground() for confidence: high=green, medium=yellow, low=red
|
||||||
|
|
||||||
|
<!-- Salt wiring strategy (BLOCKER 4 fix) -->
|
||||||
|
On first scan, call storage.NewSalt(), hex-encode it, store in settings table with key "encryption.salt".
|
||||||
|
On subsequent scans, read the salt from the settings table.
|
||||||
|
This ensures all users have a unique per-installation salt instead of a shared hardcoded salt.
|
||||||
|
The helper function loadOrCreateSalt(db *storage.DB) ([]byte, error) handles both cases.
|
||||||
</interfaces>
|
</interfaces>
|
||||||
</context>
|
</context>
|
||||||
|
|
||||||
<tasks>
|
<tasks>
|
||||||
|
|
||||||
<task type="auto" tdd="false">
|
<task type="auto" tdd="false">
|
||||||
<name>Task 1: Config package, output table, and root command</name>
|
<name>Task 1: Config package, output table, root command, and settings helpers</name>
|
||||||
<files>pkg/config/config.go, pkg/output/table.go, cmd/root.go</files>
|
<files>pkg/config/config.go, pkg/output/table.go, cmd/root.go, pkg/storage/settings.go</files>
|
||||||
<read_first>
|
<read_first>
|
||||||
- /home/salva/Documents/apikey/.planning/phases/01-foundation/01-RESEARCH.md (CLI-01, CLI-02, CLI-03 rows, Standard Stack: cobra v1.10.2 + viper v1.21.0)
|
- /home/salva/Documents/apikey/.planning/phases/01-foundation/01-RESEARCH.md (CLI-01, CLI-02, CLI-03 rows, Standard Stack: cobra v1.10.2 + viper v1.21.0)
|
||||||
- /home/salva/Documents/apikey/pkg/engine/finding.go (Finding struct fields for output)
|
- /home/salva/Documents/apikey/pkg/engine/finding.go (Finding struct fields for output)
|
||||||
|
- /home/salva/Documents/apikey/pkg/storage/db.go (DB struct, to add GetSetting/SetSetting)
|
||||||
</read_first>
|
</read_first>
|
||||||
<action>
|
<action>
|
||||||
Create **pkg/config/config.go**:
|
Create **pkg/config/config.go**:
|
||||||
@@ -172,6 +193,43 @@ func Load() Config {
|
|||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Create **pkg/storage/settings.go** — adds GetSetting/SetSetting to the storage package:
|
||||||
|
```go
|
||||||
|
package storage
|
||||||
|
|
||||||
|
import (
|
||||||
|
"database/sql"
|
||||||
|
"fmt"
|
||||||
|
)
|
||||||
|
|
||||||
|
// GetSetting retrieves a value from the settings table.
|
||||||
|
// Returns (value, true, nil) if found, ("", false, nil) if not found, ("", false, err) on error.
|
||||||
|
func (db *DB) GetSetting(key string) (string, bool, error) {
|
||||||
|
var value string
|
||||||
|
err := db.sql.QueryRow("SELECT value FROM settings WHERE key = ?", key).Scan(&value)
|
||||||
|
if err == sql.ErrNoRows {
|
||||||
|
return "", false, nil
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return "", false, fmt.Errorf("getting setting %q: %w", key, err)
|
||||||
|
}
|
||||||
|
return value, true, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetSetting inserts or updates a key-value pair in the settings table.
|
||||||
|
func (db *DB) SetSetting(key, value string) error {
|
||||||
|
_, err := db.sql.Exec(
|
||||||
|
`INSERT INTO settings (key, value, updated_at) VALUES (?, ?, CURRENT_TIMESTAMP)
|
||||||
|
ON CONFLICT(key) DO UPDATE SET value = excluded.value, updated_at = CURRENT_TIMESTAMP`,
|
||||||
|
key, value,
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("setting %q: %w", key, err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
Create **pkg/output/table.go**:
|
Create **pkg/output/table.go**:
|
||||||
```go
|
```go
|
||||||
package output
|
package output
|
||||||
@@ -281,6 +339,15 @@ func init() {
|
|||||||
rootCmd.AddCommand(scanCmd)
|
rootCmd.AddCommand(scanCmd)
|
||||||
rootCmd.AddCommand(providersCmd)
|
rootCmd.AddCommand(providersCmd)
|
||||||
rootCmd.AddCommand(configCmd)
|
rootCmd.AddCommand(configCmd)
|
||||||
|
// Stub commands for future phases (per CLI-01 requirement of 11 commands)
|
||||||
|
rootCmd.AddCommand(verifyCmd)
|
||||||
|
rootCmd.AddCommand(importCmd)
|
||||||
|
rootCmd.AddCommand(reconCmd)
|
||||||
|
rootCmd.AddCommand(keysCmd)
|
||||||
|
rootCmd.AddCommand(serveCmd)
|
||||||
|
rootCmd.AddCommand(dorksCmd)
|
||||||
|
rootCmd.AddCommand(hookCmd)
|
||||||
|
rootCmd.AddCommand(scheduleCmd)
|
||||||
}
|
}
|
||||||
|
|
||||||
func initConfig() {
|
func initConfig() {
|
||||||
@@ -316,28 +383,30 @@ func mustHomeDir() string {
|
|||||||
```
|
```
|
||||||
</action>
|
</action>
|
||||||
<verify>
|
<verify>
|
||||||
<automated>cd /home/salva/Documents/apikey && go build ./... && ./keyhunter --help 2>&1 | grep -E "scan|providers|config" && echo "HELP OK"</automated>
|
<automated>cd /home/salva/Documents/apikey && go build ./... && ./keyhunter --help 2>&1 | grep -E "scan|providers|config|verify|recon|keys|serve|dorks|hook|schedule" && echo "HELP OK"</automated>
|
||||||
</verify>
|
</verify>
|
||||||
<acceptance_criteria>
|
<acceptance_criteria>
|
||||||
- `go build ./...` exits 0
|
- `go build ./...` exits 0
|
||||||
- `./keyhunter --help` shows "scan", "providers", and "config" in command list
|
- `./keyhunter --help` shows scan, providers, config, verify, import, recon, keys, serve, dorks, hook, schedule in command list
|
||||||
- pkg/config/config.go exports Config and Load
|
- pkg/config/config.go exports Config and Load
|
||||||
- pkg/output/table.go exports PrintFindings
|
- pkg/output/table.go exports PrintFindings
|
||||||
- cmd/root.go declares rootCmd, Execute(), scanCmd, providersCmd, configCmd referenced
|
- pkg/storage/settings.go exports GetSetting and SetSetting
|
||||||
|
- cmd/root.go declares rootCmd, Execute(), and adds all 11 subcommands
|
||||||
- `grep -q 'viper\.SetConfigFile\|viper\.SetConfigName' cmd/root.go` exits 0
|
- `grep -q 'viper\.SetConfigFile\|viper\.SetConfigName' cmd/root.go` exits 0
|
||||||
- lipgloss used for header and confidence coloring
|
- lipgloss used for header and confidence coloring
|
||||||
</acceptance_criteria>
|
</acceptance_criteria>
|
||||||
<done>Root command, config package, and output table exist. `keyhunter --help` shows the three top-level commands.</done>
|
<done>Root command registers all 11 CLI commands. Config package, output table, and settings helpers exist. `keyhunter --help` shows all commands.</done>
|
||||||
</task>
|
</task>
|
||||||
|
|
||||||
<task type="auto" tdd="false">
|
<task type="auto" tdd="false">
|
||||||
<name>Task 2: scan, providers, and config subcommands</name>
|
<name>Task 2: scan, providers, config subcommands, and stub commands</name>
|
||||||
<files>cmd/scan.go, cmd/providers.go, cmd/config.go</files>
|
<files>cmd/scan.go, cmd/providers.go, cmd/config.go, cmd/stubs.go</files>
|
||||||
<read_first>
|
<read_first>
|
||||||
- /home/salva/Documents/apikey/.planning/phases/01-foundation/01-RESEARCH.md (CLI-04, CLI-05 rows, Pattern 2 pipeline usage)
|
- /home/salva/Documents/apikey/.planning/phases/01-foundation/01-RESEARCH.md (CLI-04, CLI-05 rows, Pattern 2 pipeline usage)
|
||||||
- /home/salva/Documents/apikey/cmd/root.go (rootCmd, viper setup)
|
- /home/salva/Documents/apikey/cmd/root.go (rootCmd, viper setup)
|
||||||
- /home/salva/Documents/apikey/pkg/engine/engine.go (Engine.Scan, ScanConfig)
|
- /home/salva/Documents/apikey/pkg/engine/engine.go (Engine.Scan, ScanConfig)
|
||||||
- /home/salva/Documents/apikey/pkg/storage/db.go (Open, SaveFinding)
|
- /home/salva/Documents/apikey/pkg/storage/db.go (Open, SaveFinding)
|
||||||
|
- /home/salva/Documents/apikey/pkg/storage/settings.go (GetSetting, SetSetting)
|
||||||
- /home/salva/Documents/apikey/pkg/providers/registry.go (NewRegistry, List, Get, Stats)
|
- /home/salva/Documents/apikey/pkg/providers/registry.go (NewRegistry, List, Get, Stats)
|
||||||
</read_first>
|
</read_first>
|
||||||
<action>
|
<action>
|
||||||
@@ -347,6 +416,8 @@ package cmd
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"encoding/hex"
|
||||||
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
@@ -422,9 +493,13 @@ var scanCmd = &cobra.Command{
|
|||||||
}
|
}
|
||||||
defer db.Close()
|
defer db.Close()
|
||||||
|
|
||||||
// Derive encryption key (Phase 1: empty passphrase with fixed dev salt)
|
// Derive encryption key using a per-installation salt stored in settings table.
|
||||||
salt := []byte("keyhunter-dev-s0") // Phase 1 placeholder — Phase 6 replaces with proper salt storage
|
// On first run, NewSalt() generates a random salt and stores it.
|
||||||
encKey := storage.DeriveKey([]byte(cfg.Passphrase), salt)
|
// On subsequent runs, the same salt is loaded — ensuring consistent encryption.
|
||||||
|
encKey, err := loadOrCreateEncKey(db, cfg.Passphrase)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("preparing encryption key: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
// Run scan
|
// Run scan
|
||||||
ch, err := eng.Scan(context.Background(), src, scanCfg)
|
ch, err := eng.Scan(context.Background(), src, scanCfg)
|
||||||
@@ -453,8 +528,29 @@ var scanCmd = &cobra.Command{
|
|||||||
// Output
|
// Output
|
||||||
switch flagOutput {
|
switch flagOutput {
|
||||||
case "json":
|
case "json":
|
||||||
// Phase 6 — basic JSON for now
|
// Return valid empty JSON array when no findings; full JSON in Phase 6.
|
||||||
fmt.Printf("[] # JSON output: Phase 6\n")
|
enc := json.NewEncoder(os.Stdout)
|
||||||
|
enc.SetIndent("", " ")
|
||||||
|
type jsonFinding struct {
|
||||||
|
Provider string `json:"provider"`
|
||||||
|
KeyMasked string `json:"key_masked"`
|
||||||
|
Confidence string `json:"confidence"`
|
||||||
|
Source string `json:"source"`
|
||||||
|
Line int `json:"line"`
|
||||||
|
}
|
||||||
|
out := make([]jsonFinding, 0, len(findings))
|
||||||
|
for _, f := range findings {
|
||||||
|
out = append(out, jsonFinding{
|
||||||
|
Provider: f.ProviderName,
|
||||||
|
KeyMasked: f.KeyMasked,
|
||||||
|
Confidence: f.Confidence,
|
||||||
|
Source: f.Source,
|
||||||
|
Line: f.LineNumber,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
if err := enc.Encode(out); err != nil {
|
||||||
|
return fmt.Errorf("encoding JSON output: %w", err)
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
output.PrintFindings(findings, flagUnmask)
|
output.PrintFindings(findings, flagUnmask)
|
||||||
}
|
}
|
||||||
@@ -467,11 +563,39 @@ var scanCmd = &cobra.Command{
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// loadOrCreateEncKey loads the per-installation salt from the settings table.
|
||||||
|
// On first run it generates a new random salt with storage.NewSalt() and persists it.
|
||||||
|
// The salt is hex-encoded in the settings table under key "encryption.salt".
|
||||||
|
func loadOrCreateEncKey(db *storage.DB, passphrase string) ([]byte, error) {
|
||||||
|
const saltKey = "encryption.salt"
|
||||||
|
saltHex, found, err := db.GetSetting(saltKey)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("reading salt from settings: %w", err)
|
||||||
|
}
|
||||||
|
var salt []byte
|
||||||
|
if !found {
|
||||||
|
// First run: generate and persist a new random salt.
|
||||||
|
salt, err = storage.NewSalt()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("generating salt: %w", err)
|
||||||
|
}
|
||||||
|
if err := db.SetSetting(saltKey, hex.EncodeToString(salt)); err != nil {
|
||||||
|
return nil, fmt.Errorf("storing salt: %w", err)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
salt, err = hex.DecodeString(saltHex)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("decoding stored salt: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return storage.DeriveKey([]byte(passphrase), salt), nil
|
||||||
|
}
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
scanCmd.Flags().IntVar(&flagWorkers, "workers", 0, "number of worker goroutines (default: CPU*8)")
|
scanCmd.Flags().IntVar(&flagWorkers, "workers", 0, "number of worker goroutines (default: CPU*8)")
|
||||||
scanCmd.Flags().BoolVar(&flagVerify, "verify", false, "actively verify found keys (opt-in, Phase 5)")
|
scanCmd.Flags().BoolVar(&flagVerify, "verify", false, "actively verify found keys (opt-in, Phase 5)")
|
||||||
scanCmd.Flags().BoolVar(&flagUnmask, "unmask", false, "show full key values (default: masked)")
|
scanCmd.Flags().BoolVar(&flagUnmask, "unmask", false, "show full key values (default: masked)")
|
||||||
scanCmd.Flags().StringVar(&flagOutput, "output", "table", "output format: table, json (more in Phase 6)")
|
scanCmd.Flags().StringVar(&flagOutput, "output", "table", "output format: table, json (full JSON output in Phase 6)")
|
||||||
scanCmd.Flags().StringSliceVar(&flagExclude, "exclude", nil, "glob patterns to exclude (e.g. *.min.js)")
|
scanCmd.Flags().StringSliceVar(&flagExclude, "exclude", nil, "glob patterns to exclude (e.g. *.min.js)")
|
||||||
viper.BindPFlag("scan.workers", scanCmd.Flags().Lookup("workers"))
|
viper.BindPFlag("scan.workers", scanCmd.Flags().Lookup("workers"))
|
||||||
}
|
}
|
||||||
@@ -658,6 +782,77 @@ func init() {
|
|||||||
configCmd.AddCommand(configSetCmd)
|
configCmd.AddCommand(configSetCmd)
|
||||||
configCmd.AddCommand(configGetCmd)
|
configCmd.AddCommand(configGetCmd)
|
||||||
}
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Create **cmd/stubs.go** — stub commands for the 8 phases not yet implemented.
|
||||||
|
These satisfy CLI-01 (11 commands) and print a clear "not implemented" message so users
|
||||||
|
know the command exists but is pending a future phase.
|
||||||
|
|
||||||
|
```go
|
||||||
|
package cmd
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"github.com/spf13/cobra"
|
||||||
|
)
|
||||||
|
|
||||||
|
// notImplemented returns a RunE function that prints a "not yet implemented" message.
|
||||||
|
// Each stub command is registered in root.go and satisfies CLI-01's 11-command requirement.
|
||||||
|
func notImplemented(name, phase string) func(cmd *cobra.Command, args []string) error {
|
||||||
|
return func(cmd *cobra.Command, args []string) error {
|
||||||
|
fmt.Printf("%s: not implemented in this phase (coming in %s)\n", name, phase)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var verifyCmd = &cobra.Command{
|
||||||
|
Use: "verify",
|
||||||
|
Short: "Actively verify found API keys (Phase 5)",
|
||||||
|
RunE: notImplemented("verify", "Phase 5"),
|
||||||
|
}
|
||||||
|
|
||||||
|
var importCmd = &cobra.Command{
|
||||||
|
Use: "import",
|
||||||
|
Short: "Import findings from TruffleHog or Gitleaks output (Phase 7)",
|
||||||
|
RunE: notImplemented("import", "Phase 7"),
|
||||||
|
}
|
||||||
|
|
||||||
|
var reconCmd = &cobra.Command{
|
||||||
|
Use: "recon",
|
||||||
|
Short: "Run OSINT recon across internet sources (Phase 9+)",
|
||||||
|
RunE: notImplemented("recon", "Phase 9"),
|
||||||
|
}
|
||||||
|
|
||||||
|
var keysCmd = &cobra.Command{
|
||||||
|
Use: "keys",
|
||||||
|
Short: "Manage stored keys (list, export, delete) (Phase 6)",
|
||||||
|
RunE: notImplemented("keys", "Phase 6"),
|
||||||
|
}
|
||||||
|
|
||||||
|
var serveCmd = &cobra.Command{
|
||||||
|
Use: "serve",
|
||||||
|
Short: "Start the web dashboard (Phase 18)",
|
||||||
|
RunE: notImplemented("serve", "Phase 18"),
|
||||||
|
}
|
||||||
|
|
||||||
|
var dorksCmd = &cobra.Command{
|
||||||
|
Use: "dorks",
|
||||||
|
Short: "Manage and run dork queries (Phase 8)",
|
||||||
|
RunE: notImplemented("dorks", "Phase 8"),
|
||||||
|
}
|
||||||
|
|
||||||
|
var hookCmd = &cobra.Command{
|
||||||
|
Use: "hook",
|
||||||
|
Short: "Install or manage git pre-commit hooks (Phase 7)",
|
||||||
|
RunE: notImplemented("hook", "Phase 7"),
|
||||||
|
}
|
||||||
|
|
||||||
|
var scheduleCmd = &cobra.Command{
|
||||||
|
Use: "schedule",
|
||||||
|
Short: "Manage scheduled recurring scans (Phase 17)",
|
||||||
|
RunE: notImplemented("schedule", "Phase 17"),
|
||||||
|
}
|
||||||
```
|
```
|
||||||
</action>
|
</action>
|
||||||
<verify>
|
<verify>
|
||||||
@@ -665,26 +860,32 @@ func init() {
|
|||||||
</verify>
|
</verify>
|
||||||
<acceptance_criteria>
|
<acceptance_criteria>
|
||||||
- `go build -o keyhunter .` exits 0
|
- `go build -o keyhunter .` exits 0
|
||||||
- `./keyhunter --help` shows scan, providers, config commands
|
- `./keyhunter --help` shows all 11 commands: scan, verify, import, recon, keys, serve, dorks, hook, schedule, providers, config
|
||||||
- `./keyhunter providers list` prints table with >= 3 rows including "openai"
|
- `./keyhunter providers list` prints table with >= 3 rows including "openai"
|
||||||
- `./keyhunter providers info openai` prints Name, Tier, Keywords, Patterns, Verify URL
|
- `./keyhunter providers info openai` prints Name, Tier, Keywords, Patterns, Verify URL
|
||||||
- `./keyhunter providers stats` prints "Total providers: 3" or more
|
- `./keyhunter providers stats` prints "Total providers: 3" or more
|
||||||
- `./keyhunter config init` creates or updates ~/.keyhunter.yaml
|
- `./keyhunter config init` creates or updates ~/.keyhunter.yaml
|
||||||
- `./keyhunter config set scan.workers 16` exits 0
|
- `./keyhunter config set scan.workers 16` exits 0
|
||||||
|
- `./keyhunter verify` prints "not implemented in this phase"
|
||||||
|
- `./keyhunter recon` prints "not implemented in this phase"
|
||||||
- `./keyhunter scan testdata/samples/openai_key.txt` exits 1 (keys found) and prints a table row with "openai"
|
- `./keyhunter scan testdata/samples/openai_key.txt` exits 1 (keys found) and prints a table row with "openai"
|
||||||
- `./keyhunter scan testdata/samples/no_keys.txt` exits 0 and prints "No API keys found."
|
- `./keyhunter scan testdata/samples/no_keys.txt` exits 0 and prints "No API keys found."
|
||||||
|
- `./keyhunter scan --output json testdata/samples/no_keys.txt` exits 0 and prints `[]` (valid JSON)
|
||||||
|
- Second run of `./keyhunter scan testdata/samples/openai_key.txt` uses the SAME salt (loaded from settings table)
|
||||||
- `grep -q 'viper\.BindPFlag' cmd/scan.go` exits 0
|
- `grep -q 'viper\.BindPFlag' cmd/scan.go` exits 0
|
||||||
|
- `grep -q 'loadOrCreateEncKey' cmd/scan.go` exits 0
|
||||||
</acceptance_criteria>
|
</acceptance_criteria>
|
||||||
<done>Full CLI works: scan finds and persists keys, providers list/info/stats work, config init/set/get work. Phase 1 success criteria all met.</done>
|
<done>Full CLI works: scan finds and persists keys with per-installation salt, providers list/info/stats work, config init/set/get work, 8 stub commands registered and respond. Phase 1 success criteria all met.</done>
|
||||||
</task>
|
</task>
|
||||||
|
|
||||||
<task type="checkpoint:human-verify" gate="blocking">
|
<task type="checkpoint:human-verify" gate="blocking">
|
||||||
<what-built>
|
<what-built>
|
||||||
Complete Phase 1 implementation:
|
Complete Phase 1 implementation:
|
||||||
- Provider registry with 3 YAML definitions, Aho-Corasick automaton, schema validation
|
- Provider registry with 3 YAML definitions, Aho-Corasick automaton, schema validation
|
||||||
- Storage layer with AES-256-GCM encryption, Argon2id key derivation, SQLite WAL mode
|
- Storage layer with AES-256-GCM encryption, Argon2id key derivation, SQLite WAL mode, per-installation salt
|
||||||
- Three-stage scan engine: keyword pre-filter → regex + entropy detector → finding channel
|
- Three-stage scan engine: keyword pre-filter → regex + entropy detector → finding channel
|
||||||
- CLI: keyhunter scan, providers list/info/stats, config init/set/get
|
- CLI: keyhunter scan, providers list/info/stats, config init/set/get
|
||||||
|
- 8 stub commands for future phases (verify, import, recon, keys, serve, dorks, hook, schedule)
|
||||||
</what-built>
|
</what-built>
|
||||||
<how-to-verify>
|
<how-to-verify>
|
||||||
Run these commands from the project root and confirm each expected output:
|
Run these commands from the project root and confirm each expected output:
|
||||||
@@ -698,23 +899,29 @@ Run these commands from the project root and confirm each expected output:
|
|||||||
3. `./keyhunter scan testdata/samples/no_keys.txt`
|
3. `./keyhunter scan testdata/samples/no_keys.txt`
|
||||||
Expected: Exit code 0, "No API keys found." printed
|
Expected: Exit code 0, "No API keys found." printed
|
||||||
|
|
||||||
4. `./keyhunter providers list`
|
4. `./keyhunter scan --output json testdata/samples/no_keys.txt`
|
||||||
|
Expected: Exit code 0, valid JSON printed: `[]`
|
||||||
|
|
||||||
|
5. `./keyhunter providers list`
|
||||||
Expected: Table with openai, anthropic, huggingface rows
|
Expected: Table with openai, anthropic, huggingface rows
|
||||||
|
|
||||||
5. `./keyhunter providers info openai`
|
6. `./keyhunter providers info openai`
|
||||||
Expected: Name, Tier 1, Keywords including "sk-proj-", Pattern regex shown
|
Expected: Name, Tier 1, Keywords including "sk-proj-", Pattern regex shown
|
||||||
|
|
||||||
6. `./keyhunter config init`
|
7. `./keyhunter config init`
|
||||||
Expected: "Config initialized: ~/.keyhunter.yaml" and the file exists
|
Expected: "Config initialized: ~/.keyhunter.yaml" and the file exists
|
||||||
|
|
||||||
7. `./keyhunter config set scan.workers 16 && ./keyhunter config get scan.workers`
|
8. `./keyhunter config set scan.workers 16 && ./keyhunter config get scan.workers`
|
||||||
Expected: "Set scan.workers = 16" then "16"
|
Expected: "Set scan.workers = 16" then "16"
|
||||||
|
|
||||||
8. Build the binary with production flags:
|
9. `./keyhunter verify`
|
||||||
`CGO_ENABLED=0 go build -ldflags="-s -w" -o keyhunter-prod .`
|
Expected: "verify: not implemented in this phase (coming in Phase 5)"
|
||||||
Expected: Builds without error, binary produced
|
|
||||||
|
10. Build the binary with production flags:
|
||||||
|
`CGO_ENABLED=0 go build -ldflags="-s -w" -o keyhunter-prod .`
|
||||||
|
Expected: Builds without error, binary produced
|
||||||
</how-to-verify>
|
</how-to-verify>
|
||||||
<resume-signal>Type "approved" if all 8 checks pass, or describe which check failed and what output you saw.</resume-signal>
|
<resume-signal>Type "approved" if all 10 checks pass, or describe which check failed and what output you saw.</resume-signal>
|
||||||
</task>
|
</task>
|
||||||
|
|
||||||
</tasks>
|
</tasks>
|
||||||
@@ -724,20 +931,24 @@ Full Phase 1 integration check:
|
|||||||
- `go test ./... -count=1` exits 0
|
- `go test ./... -count=1` exits 0
|
||||||
- `./keyhunter scan testdata/samples/openai_key.txt` exits 1 with findings table
|
- `./keyhunter scan testdata/samples/openai_key.txt` exits 1 with findings table
|
||||||
- `./keyhunter scan testdata/samples/no_keys.txt` exits 0 with "No API keys found."
|
- `./keyhunter scan testdata/samples/no_keys.txt` exits 0 with "No API keys found."
|
||||||
|
- `./keyhunter scan --output json testdata/samples/no_keys.txt` prints valid JSON `[]`
|
||||||
- `./keyhunter providers list` shows 3+ providers
|
- `./keyhunter providers list` shows 3+ providers
|
||||||
- `./keyhunter config init` creates ~/.keyhunter.yaml
|
- `./keyhunter config init` creates ~/.keyhunter.yaml
|
||||||
|
- `./keyhunter verify` prints "not implemented in this phase"
|
||||||
- `CGO_ENABLED=0 go build -ldflags="-s -w" -o keyhunter-prod .` exits 0
|
- `CGO_ENABLED=0 go build -ldflags="-s -w" -o keyhunter-prod .` exits 0
|
||||||
</verification>
|
</verification>
|
||||||
|
|
||||||
<success_criteria>
|
<success_criteria>
|
||||||
- Cobra CLI with scan, providers, config commands (CLI-01)
|
- Cobra CLI with all 11 commands: scan, verify, import, recon, keys, serve, dorks, hook, schedule, providers, config (CLI-01)
|
||||||
- `keyhunter config init` creates ~/.keyhunter.yaml (CLI-02)
|
- `keyhunter config init` creates ~/.keyhunter.yaml (CLI-02)
|
||||||
- `keyhunter config set key value` persists (CLI-03)
|
- `keyhunter config set key value` persists (CLI-03)
|
||||||
- `keyhunter providers list/info/stats` work (CLI-04)
|
- `keyhunter providers list/info/stats` work (CLI-04)
|
||||||
- scan flags: --workers, --verify, --unmask, --output, --exclude (CLI-05)
|
- scan flags: --workers, --verify, --unmask, --output, --exclude (CLI-05)
|
||||||
|
- Per-installation salt stored in settings table; no hardcoded salt in production code
|
||||||
|
- JSON output returns valid JSON (not a comment string)
|
||||||
- All Phase 1 success criteria from ROADMAP.md satisfied:
|
- All Phase 1 success criteria from ROADMAP.md satisfied:
|
||||||
1. `keyhunter scan ./somefile` runs three-stage pipeline and returns findings with provider names
|
1. `keyhunter scan ./somefile` runs three-stage pipeline and returns findings with provider names
|
||||||
2. Findings persisted to SQLite with AES-256 encrypted key_value
|
2. Findings persisted to SQLite with AES-256 encrypted key_value; raw db does not contain plaintext
|
||||||
3. `keyhunter config init` and `config set` work
|
3. `keyhunter config init` and `config set` work
|
||||||
4. `keyhunter providers list/info` return provider metadata from YAML
|
4. `keyhunter providers list/info` return provider metadata from YAML
|
||||||
5. Provider YAML has format_version and last_verified, validated at load time
|
5. Provider YAML has format_version and last_verified, validated at load time
|
||||||
|
|||||||
Reference in New Issue
Block a user