docs(07): create phase 7 import & CI/CD plans
This commit is contained in:
263
.planning/phases/07-import-cicd/07-04-PLAN.md
Normal file
263
.planning/phases/07-import-cicd/07-04-PLAN.md
Normal file
@@ -0,0 +1,263 @@
|
||||
---
|
||||
phase: 07-import-cicd
|
||||
plan: 04
|
||||
type: execute
|
||||
wave: 2
|
||||
depends_on: ["07-01", "07-02", "07-03"]
|
||||
files_modified:
|
||||
- cmd/import.go
|
||||
- cmd/stubs.go
|
||||
- cmd/import_test.go
|
||||
autonomous: true
|
||||
requirements: [IMP-01, IMP-02, IMP-03]
|
||||
must_haves:
|
||||
truths:
|
||||
- "keyhunter import --format=trufflehog <file> inserts findings into the SQLite database"
|
||||
- "keyhunter import --format=gitleaks <file> inserts findings"
|
||||
- "keyhunter import --format=gitleaks-csv <file> inserts findings"
|
||||
- "Duplicate findings across repeated imports are skipped with reported count"
|
||||
- "Summary 'Imported N findings (M new, K duplicates)' is printed to stdout"
|
||||
artifacts:
|
||||
- path: cmd/import.go
|
||||
provides: "keyhunter import command implementation"
|
||||
contains: "var importCmd"
|
||||
key_links:
|
||||
- from: cmd/import.go
|
||||
to: pkg/importer
|
||||
via: "dispatches by format flag to Importer implementations"
|
||||
pattern: "importer\\.(TruffleHog|Gitleaks|GitleaksCSV)Importer"
|
||||
- from: cmd/import.go
|
||||
to: pkg/storage
|
||||
via: "calls db.SaveFinding for each deduped record"
|
||||
pattern: "SaveFinding"
|
||||
---
|
||||
|
||||
<objective>
|
||||
Replace the cmd/import stub with a fully wired command that parses external scanner output (via pkg/importer), deduplicates, and persists findings to the KeyHunter SQLite database.
|
||||
|
||||
Purpose: Delivers IMP-01/02/03 end-to-end from CLI. Users can consolidate TruffleHog and Gitleaks scans into the unified KeyHunter database.
|
||||
Output: Working `keyhunter import` command with tests.
|
||||
</objective>
|
||||
|
||||
<execution_context>
|
||||
@$HOME/.claude/get-shit-done/workflows/execute-plan.md
|
||||
@$HOME/.claude/get-shit-done/templates/summary.md
|
||||
</execution_context>
|
||||
|
||||
<context>
|
||||
@.planning/phases/07-import-cicd/07-CONTEXT.md
|
||||
@cmd/stubs.go
|
||||
@cmd/root.go
|
||||
@pkg/storage/findings.go
|
||||
|
||||
<interfaces>
|
||||
From pkg/importer (Plans 07-01, 07-02, 07-03):
|
||||
```go
|
||||
type Importer interface {
|
||||
Name() string
|
||||
Import(r io.Reader) ([]engine.Finding, error)
|
||||
}
|
||||
type TruffleHogImporter struct{}
|
||||
type GitleaksImporter struct{}
|
||||
type GitleaksCSVImporter struct{}
|
||||
func FindingKey(f engine.Finding) string
|
||||
func Dedup(in []engine.Finding) (unique []engine.Finding, duplicates int)
|
||||
```
|
||||
From pkg/storage/findings.go:
|
||||
```go
|
||||
func (db *DB) SaveFinding(f storage.Finding, encKey []byte) (int64, error)
|
||||
```
|
||||
storage.Finding fields: ProviderName, KeyValue, KeyMasked, Confidence, SourcePath, SourceType, LineNumber, Verified, VerifyStatus, VerifyHTTPCode, VerifyMetadata, ScanID.
|
||||
Note field name difference: storage uses SourcePath; engine uses Source. Conversion required.
|
||||
</interfaces>
|
||||
</context>
|
||||
|
||||
<tasks>
|
||||
|
||||
<task type="auto">
|
||||
<name>Task 1: Implement cmd/import.go with format dispatch and dedup</name>
|
||||
<files>cmd/import.go, cmd/stubs.go, cmd/import_test.go</files>
|
||||
<action>
|
||||
Remove the `importCmd` stub from cmd/stubs.go (delete the `var importCmd = &cobra.Command{...}` block). Leave all other stubs intact.
|
||||
|
||||
Create cmd/import.go:
|
||||
```go
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
|
||||
"github.com/salvacybersec/keyhunter/pkg/engine"
|
||||
"github.com/salvacybersec/keyhunter/pkg/importer"
|
||||
"github.com/salvacybersec/keyhunter/pkg/storage"
|
||||
)
|
||||
|
||||
var (
|
||||
importFormat string
|
||||
)
|
||||
|
||||
var importCmd = &cobra.Command{
|
||||
Use: "import <file>",
|
||||
Short: "Import findings from TruffleHog or Gitleaks output",
|
||||
Long: `Import scan output from external secret scanners into the KeyHunter database. Supported formats: trufflehog (v3 JSON), gitleaks (JSON), gitleaks-csv.`,
|
||||
Args: cobra.ExactArgs(1),
|
||||
RunE: runImport,
|
||||
}
|
||||
|
||||
func init() {
|
||||
importCmd.Flags().StringVar(&importFormat, "format", "", "input format: trufflehog | gitleaks | gitleaks-csv (required)")
|
||||
_ = importCmd.MarkFlagRequired("format")
|
||||
}
|
||||
|
||||
func runImport(cmd *cobra.Command, args []string) error {
|
||||
path := args[0]
|
||||
imp, err := selectImporter(importFormat)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("opening %s: %w", path, err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
findings, err := imp.Import(f)
|
||||
if err != nil {
|
||||
return fmt.Errorf("parsing %s output: %w", imp.Name(), err)
|
||||
}
|
||||
|
||||
unique, dupes := importer.Dedup(findings)
|
||||
|
||||
db, encKey, err := openDBForImport()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
newCount := 0
|
||||
dbDupes := 0
|
||||
for _, finding := range unique {
|
||||
sf := engineToStorage(finding)
|
||||
// Defense against cross-import duplicates already in DB:
|
||||
exists, err := findingExistsInDB(db, finding)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if exists {
|
||||
dbDupes++
|
||||
continue
|
||||
}
|
||||
if _, err := db.SaveFinding(sf, encKey); err != nil {
|
||||
return fmt.Errorf("saving finding: %w", err)
|
||||
}
|
||||
newCount++
|
||||
}
|
||||
|
||||
totalDupes := dupes + dbDupes
|
||||
fmt.Fprintf(cmd.OutOrStdout(), "Imported %d findings (%d new, %d duplicates)\n", len(findings), newCount, totalDupes)
|
||||
return nil
|
||||
}
|
||||
|
||||
func selectImporter(format string) (importer.Importer, error) {
|
||||
switch format {
|
||||
case "trufflehog":
|
||||
return importer.TruffleHogImporter{}, nil
|
||||
case "gitleaks":
|
||||
return importer.GitleaksImporter{}, nil
|
||||
case "gitleaks-csv":
|
||||
return importer.GitleaksCSVImporter{}, nil
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown format %q (want trufflehog | gitleaks | gitleaks-csv)", format)
|
||||
}
|
||||
}
|
||||
|
||||
func engineToStorage(f engine.Finding) storage.Finding {
|
||||
if f.DetectedAt.IsZero() {
|
||||
f.DetectedAt = time.Now()
|
||||
}
|
||||
return storage.Finding{
|
||||
ProviderName: f.ProviderName,
|
||||
KeyValue: f.KeyValue,
|
||||
KeyMasked: f.KeyMasked,
|
||||
Confidence: f.Confidence,
|
||||
SourcePath: f.Source,
|
||||
SourceType: f.SourceType,
|
||||
LineNumber: f.LineNumber,
|
||||
Verified: f.Verified,
|
||||
VerifyStatus: f.VerifyStatus,
|
||||
VerifyHTTPCode: f.VerifyHTTPCode,
|
||||
VerifyMetadata: f.VerifyMetadata,
|
||||
}
|
||||
}
|
||||
|
||||
// openDBForImport opens the configured DB using the same helpers as scan/keys.
|
||||
// Reuse whatever helper already exists in cmd/ (e.g., openDBWithKey from keys.go).
|
||||
// If no shared helper exists, extract one from cmd/scan.go.
|
||||
func openDBForImport() (*storage.DB, []byte, error) {
|
||||
// TODO-executor: reuse existing DB-open helper from cmd/scan.go or cmd/keys.go.
|
||||
// Do NOT duplicate encryption key derivation — call into the existing helper.
|
||||
return nil, nil, fmt.Errorf("not yet wired")
|
||||
}
|
||||
|
||||
// findingExistsInDB checks if a finding with the same provider + masked key + source + line
|
||||
// already exists. Uses importer.FindingKey-style logic via a DB query against findings table.
|
||||
func findingExistsInDB(db *storage.DB, f engine.Finding) (bool, error) {
|
||||
// Executor: add a storage helper or use db.SQL() with:
|
||||
// SELECT 1 FROM findings WHERE provider_name=? AND key_masked=? AND source_path=? AND line_number=? LIMIT 1
|
||||
return false, nil
|
||||
}
|
||||
```
|
||||
|
||||
CRITICAL executor notes:
|
||||
1. Inspect cmd/scan.go and cmd/keys.go to find the existing DB-open + passphrase helper (e.g., `openDBWithPassphrase` or similar). Use that helper — do not reimplement encryption key derivation. Replace the `openDBForImport` body accordingly.
|
||||
2. Inspect pkg/storage for an existing "find by key" helper. If none, add a thin method `func (db *DB) FindingExistsByKey(provider, masked, sourcePath string, line int) (bool, error)` to pkg/storage/queries.go that runs the SELECT above. If you add this method, update pkg/storage/queries.go to include it, and add a test in pkg/storage (simple in-memory roundtrip).
|
||||
3. Register importCmd: it's already added in cmd/root.go via `rootCmd.AddCommand(importCmd)`. Since you removed the stub, your new `var importCmd` declaration takes over the identifier — no root.go change needed.
|
||||
|
||||
Create cmd/import_test.go:
|
||||
- TestSelectImporter: table — {"trufflehog", TruffleHogImporter}, {"gitleaks", GitleaksImporter}, {"gitleaks-csv", GitleaksCSVImporter}, {"bogus", error}.
|
||||
- TestEngineToStorage: converts engine.Finding (with Source="a.yml", LineNumber=5, Verified=true) to storage.Finding (SourcePath="a.yml", LineNumber=5, Verified=true).
|
||||
- TestRunImport_EndToEnd (integration-style):
|
||||
* Create a temp DB via existing test helpers (look for one in cmd/*_test.go or pkg/storage/*_test.go).
|
||||
* Write a tiny TruffleHog JSON file to a temp path.
|
||||
* Invoke importCmd.Execute() with args `["import", "--format=trufflehog", tmpPath]`.
|
||||
* Assert stdout contains "Imported" and "new".
|
||||
* Assert db.ListFindings returns at least 1 finding with ProviderName set.
|
||||
* Re-run the same command → assert output reports "0 new" and dupe count equals prior insert count.
|
||||
* If a shared test DB helper is not discoverable, mark this subtest with t.Skip("needs shared test DB helper") but still ship TestSelectImporter and TestEngineToStorage.
|
||||
</action>
|
||||
<verify>
|
||||
<automated>cd /home/salva/Documents/apikey && go build ./... && go test ./cmd/... -run Import -v</automated>
|
||||
</verify>
|
||||
<done>
|
||||
- cmd/import.go replaces the stub; stub removed from cmd/stubs.go
|
||||
- `keyhunter import --format=trufflehog sample.json` inserts findings
|
||||
- Re-running the same import reports all as duplicates
|
||||
- Unit tests pass; build succeeds
|
||||
</done>
|
||||
</task>
|
||||
|
||||
</tasks>
|
||||
|
||||
<verification>
|
||||
Manual smoke test:
|
||||
```
|
||||
go run ./cmd/keyhunter import --format=trufflehog pkg/importer/testdata/trufflehog-sample.json
|
||||
# Expect: "Imported 3 findings (3 new, 0 duplicates)"
|
||||
go run ./cmd/keyhunter import --format=trufflehog pkg/importer/testdata/trufflehog-sample.json
|
||||
# Expect: "Imported 3 findings (0 new, 3 duplicates)"
|
||||
```
|
||||
</verification>
|
||||
|
||||
<success_criteria>
|
||||
IMP-01, IMP-02, IMP-03 delivered end-to-end: external scanner output can be imported, deduped, and persisted; repeat imports are idempotent.
|
||||
</success_criteria>
|
||||
|
||||
<output>
|
||||
After completion, create `.planning/phases/07-import-cicd/07-04-SUMMARY.md`.
|
||||
</output>
|
||||
Reference in New Issue
Block a user