Files
keyhunter/pkg/storage/queries.go
salvacybersec 9dbb0b87d4 feat(07-04): wire keyhunter import command with dedup and DB persist
- Replace import stub with cmd/import.go dispatching to pkg/importer
  (trufflehog, gitleaks, gitleaks-csv) via --format flag
- Reuse openDBWithKey helper so encryption + path resolution match scan/keys
- engineToStorage converts engine.Finding -> storage.Finding (Source -> SourcePath)
- Add pkg/storage.FindingExistsByKey for idempotent cross-import dedup
  keyed on (provider, masked key, source path, line number)
- cmd/import_test.go: selector table, field conversion, end-to-end trufflehog
  import with re-run duplicate assertion, unknown-format + missing-file errors
- pkg/storage queries_test: FindingExistsByKey hit and four miss cases

Delivers IMP-01/02/03 end-to-end.
2026-04-05 23:59:39 +03:00

180 lines
5.6 KiB
Go

package storage
import (
"database/sql"
"encoding/json"
"fmt"
"strings"
"time"
)
// Filters selects a subset of findings for ListFindingsFiltered.
// Empty Provider means "any provider". Nil Verified means "any verified state".
// Limit <= 0 disables pagination (Offset is then ignored).
type Filters struct {
Provider string
Verified *bool
Limit int
Offset int
}
// ListFindingsFiltered returns findings matching the given filters, newest first.
// Key values are decrypted before return. encKey must match the key used at save time.
func (db *DB) ListFindingsFiltered(encKey []byte, f Filters) ([]Finding, error) {
var (
where []string
args []interface{}
)
if f.Provider != "" {
where = append(where, "provider_name = ?")
args = append(args, f.Provider)
}
if f.Verified != nil {
where = append(where, "verified = ?")
if *f.Verified {
args = append(args, 1)
} else {
args = append(args, 0)
}
}
q := `SELECT id, scan_id, provider_name, key_value, key_masked, confidence,
source_path, source_type, line_number,
verified, verify_status, verify_http_code, verify_metadata_json,
created_at
FROM findings`
if len(where) > 0 {
q += " WHERE " + strings.Join(where, " AND ")
}
q += " ORDER BY created_at DESC, id DESC"
if f.Limit > 0 {
q += " LIMIT ? OFFSET ?"
args = append(args, f.Limit, f.Offset)
}
rows, err := db.sql.Query(q, args...)
if err != nil {
return nil, fmt.Errorf("querying findings: %w", err)
}
defer rows.Close()
var out []Finding
for rows.Next() {
finding, err := scanFindingRow(rows, encKey)
if err != nil {
return nil, err
}
out = append(out, finding)
}
return out, rows.Err()
}
// GetFinding returns a single finding by id. Returns sql.ErrNoRows when no
// finding with the given id exists; callers can detect this with errors.Is.
func (db *DB) GetFinding(id int64, encKey []byte) (*Finding, error) {
row := db.sql.QueryRow(
`SELECT id, scan_id, provider_name, key_value, key_masked, confidence,
source_path, source_type, line_number,
verified, verify_status, verify_http_code, verify_metadata_json,
created_at
FROM findings WHERE id = ?`, id)
f, err := scanFindingRowFromRow(row, encKey)
if err != nil {
return nil, err
}
return &f, nil
}
// FindingExistsByKey reports whether a finding with the same provider name,
// masked key, source path, and line number already exists in the database.
// This is the identity tuple used by the import pipeline to make repeated
// imports of the same scanner output idempotent without decrypting stored
// key values.
func (db *DB) FindingExistsByKey(provider, masked, sourcePath string, line int) (bool, error) {
row := db.sql.QueryRow(
`SELECT 1 FROM findings
WHERE provider_name = ? AND key_masked = ? AND source_path = ? AND line_number = ?
LIMIT 1`,
provider, masked, sourcePath, line,
)
var one int
if err := row.Scan(&one); err != nil {
if err == sql.ErrNoRows {
return false, nil
}
return false, fmt.Errorf("querying finding existence: %w", err)
}
return true, nil
}
// DeleteFinding removes the finding with the given id.
// Returns the number of rows affected (0 if no such id). A missing id is not
// an error — the caller decides whether to surface it.
func (db *DB) DeleteFinding(id int64) (int64, error) {
res, err := db.sql.Exec(`DELETE FROM findings WHERE id = ?`, id)
if err != nil {
return 0, fmt.Errorf("deleting finding %d: %w", id, err)
}
return res.RowsAffected()
}
// scanFindingRow reads one Finding from *sql.Rows and decrypts its key value.
func scanFindingRow(rows *sql.Rows, encKey []byte) (Finding, error) {
var f Finding
var encrypted []byte
var createdAt string
var scanID sql.NullInt64
var verifiedInt int
var metaJSON sql.NullString
if err := rows.Scan(
&f.ID, &scanID, &f.ProviderName, &encrypted, &f.KeyMasked,
&f.Confidence, &f.SourcePath, &f.SourceType, &f.LineNumber,
&verifiedInt, &f.VerifyStatus, &f.VerifyHTTPCode, &metaJSON,
&createdAt,
); err != nil {
return f, fmt.Errorf("scanning finding row: %w", err)
}
return hydrateFinding(f, encrypted, scanID, verifiedInt, metaJSON, createdAt, encKey)
}
// scanFindingRowFromRow reads one Finding from a *sql.Row. Propagates
// sql.ErrNoRows unchanged so callers can use errors.Is to detect a miss.
func scanFindingRowFromRow(row *sql.Row, encKey []byte) (Finding, error) {
var f Finding
var encrypted []byte
var createdAt string
var scanID sql.NullInt64
var verifiedInt int
var metaJSON sql.NullString
if err := row.Scan(
&f.ID, &scanID, &f.ProviderName, &encrypted, &f.KeyMasked,
&f.Confidence, &f.SourcePath, &f.SourceType, &f.LineNumber,
&verifiedInt, &f.VerifyStatus, &f.VerifyHTTPCode, &metaJSON,
&createdAt,
); err != nil {
return f, err // includes sql.ErrNoRows — let caller detect
}
return hydrateFinding(f, encrypted, scanID, verifiedInt, metaJSON, createdAt, encKey)
}
// hydrateFinding decrypts the key value and fills derived fields.
func hydrateFinding(f Finding, encrypted []byte, scanID sql.NullInt64, verifiedInt int, metaJSON sql.NullString, createdAt string, encKey []byte) (Finding, error) {
if scanID.Valid {
f.ScanID = scanID.Int64
}
f.Verified = verifiedInt != 0
if metaJSON.Valid && metaJSON.String != "" {
m := map[string]string{}
if err := json.Unmarshal([]byte(metaJSON.String), &m); err != nil {
return f, fmt.Errorf("unmarshaling verify metadata for finding %d: %w", f.ID, err)
}
f.VerifyMetadata = m
}
plain, err := Decrypt(encrypted, encKey)
if err != nil {
return f, fmt.Errorf("decrypting finding %d: %w", f.ID, err)
}
f.KeyValue = string(plain)
f.CreatedAt, _ = time.Parse("2006-01-02 15:04:05", createdAt)
return f, nil
}