feat(07-04): wire keyhunter import command with dedup and DB persist

- Replace import stub with cmd/import.go dispatching to pkg/importer
  (trufflehog, gitleaks, gitleaks-csv) via --format flag
- Reuse openDBWithKey helper so encryption + path resolution match scan/keys
- engineToStorage converts engine.Finding -> storage.Finding (Source -> SourcePath)
- Add pkg/storage.FindingExistsByKey for idempotent cross-import dedup
  keyed on (provider, masked key, source path, line number)
- cmd/import_test.go: selector table, field conversion, end-to-end trufflehog
  import with re-run duplicate assertion, unknown-format + missing-file errors
- pkg/storage queries_test: FindingExistsByKey hit and four miss cases

Delivers IMP-01/02/03 end-to-end.
This commit is contained in:
salvacybersec
2026-04-05 23:59:39 +03:00
parent b3db22ac93
commit 9dbb0b87d4
4 changed files with 349 additions and 0 deletions

View File

@@ -84,6 +84,28 @@ func (db *DB) GetFinding(id int64, encKey []byte) (*Finding, error) {
return &f, nil
}
// FindingExistsByKey reports whether a finding with the same provider name,
// masked key, source path, and line number already exists in the database.
// This is the identity tuple used by the import pipeline to make repeated
// imports of the same scanner output idempotent without decrypting stored
// key values.
func (db *DB) FindingExistsByKey(provider, masked, sourcePath string, line int) (bool, error) {
row := db.sql.QueryRow(
`SELECT 1 FROM findings
WHERE provider_name = ? AND key_masked = ? AND source_path = ? AND line_number = ?
LIMIT 1`,
provider, masked, sourcePath, line,
)
var one int
if err := row.Scan(&one); err != nil {
if err == sql.ErrNoRows {
return false, nil
}
return false, fmt.Errorf("querying finding existence: %w", err)
}
return true, nil
}
// DeleteFinding removes the finding with the given id.
// Returns the number of rows affected (0 if no such id). A missing id is not
// an error — the caller decides whether to surface it.

View File

@@ -147,3 +147,42 @@ func TestDeleteFinding_Miss(t *testing.T) {
require.NoError(t, err)
assert.Equal(t, int64(0), n)
}
func TestFindingExistsByKey(t *testing.T) {
db, encKey, _ := seedQueryFindings(t)
// Insert a finding with a deterministic masked key we can query against.
masked := "sk-exact...1234"
_, err := db.SaveFinding(storage.Finding{
ProviderName: "openai",
KeyValue: "sk-exact-key-value-1234",
KeyMasked: masked,
Confidence: "high",
SourcePath: "/tmp/exact.env",
SourceType: "import:trufflehog",
LineNumber: 42,
}, encKey)
require.NoError(t, err)
// Exact tuple hits.
exists, err := db.FindingExistsByKey("openai", masked, "/tmp/exact.env", 42)
require.NoError(t, err)
assert.True(t, exists, "exact tuple should be found")
// Any differing field misses.
miss1, err := db.FindingExistsByKey("anthropic", masked, "/tmp/exact.env", 42)
require.NoError(t, err)
assert.False(t, miss1)
miss2, err := db.FindingExistsByKey("openai", "sk-other...9999", "/tmp/exact.env", 42)
require.NoError(t, err)
assert.False(t, miss2)
miss3, err := db.FindingExistsByKey("openai", masked, "/tmp/other.env", 42)
require.NoError(t, err)
assert.False(t, miss3)
miss4, err := db.FindingExistsByKey("openai", masked, "/tmp/exact.env", 7)
require.NoError(t, err)
assert.False(t, miss4)
}