feat(04-03): implement GitSource with full-history traversal

- Walks every commit across branches, tags, remote-tracking refs, and stash
- Deduplicates blob scans by OID (seenBlobs map) so identical content
  across commits/files is scanned exactly once
- Emits chunks with source format git:<short-sha>:<path>
- Honors --since filter via GitSource.Since (commit author date)
- Resolves annotated tag objects down to their commit hash
- Skips binary blobs via go-git IsBinary plus null-byte sniff
- 8 subtests cover history walk, dedup, modified-file, multi-branch,
  tag reachability, since filter, source format, missing repo
This commit is contained in:
salvacybersec
2026-04-05 15:18:05 +03:00
parent ce6298f304
commit e48a7a489e
4 changed files with 420 additions and 8 deletions

10
go.mod
View File

@@ -3,13 +3,16 @@ module github.com/salvacybersec/keyhunter
go 1.26.1 go 1.26.1
require ( require (
github.com/atotto/clipboard v0.1.4
github.com/charmbracelet/lipgloss v1.1.0 github.com/charmbracelet/lipgloss v1.1.0
github.com/go-git/go-git/v5 v5.17.2
github.com/panjf2000/ants/v2 v2.12.0 github.com/panjf2000/ants/v2 v2.12.0
github.com/petar-dambovaliev/aho-corasick v0.0.0-20250424160509-463d218d4745 github.com/petar-dambovaliev/aho-corasick v0.0.0-20250424160509-463d218d4745
github.com/spf13/cobra v1.10.2 github.com/spf13/cobra v1.10.2
github.com/spf13/viper v1.21.0 github.com/spf13/viper v1.21.0
github.com/stretchr/testify v1.11.1 github.com/stretchr/testify v1.11.1
golang.org/x/crypto v0.49.0 golang.org/x/crypto v0.49.0
golang.org/x/exp v0.0.0-20260312153236-7ab1446f8b90
golang.org/x/time v0.15.0 golang.org/x/time v0.15.0
gopkg.in/yaml.v3 v3.0.1 gopkg.in/yaml.v3 v3.0.1
modernc.org/sqlite v1.48.1 modernc.org/sqlite v1.48.1
@@ -19,7 +22,6 @@ require (
dario.cat/mergo v1.0.0 // indirect dario.cat/mergo v1.0.0 // indirect
github.com/Microsoft/go-winio v0.6.2 // indirect github.com/Microsoft/go-winio v0.6.2 // indirect
github.com/ProtonMail/go-crypto v1.1.6 // indirect github.com/ProtonMail/go-crypto v1.1.6 // indirect
github.com/atotto/clipboard v0.1.4 // indirect
github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect
github.com/charmbracelet/colorprofile v0.2.3-0.20250311203215-f60798e515dc // indirect github.com/charmbracelet/colorprofile v0.2.3-0.20250311203215-f60798e515dc // indirect
github.com/charmbracelet/x/ansi v0.8.0 // indirect github.com/charmbracelet/x/ansi v0.8.0 // indirect
@@ -33,10 +35,8 @@ require (
github.com/fsnotify/fsnotify v1.9.0 // indirect github.com/fsnotify/fsnotify v1.9.0 // indirect
github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 // indirect github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 // indirect
github.com/go-git/go-billy/v5 v5.8.0 // indirect github.com/go-git/go-billy/v5 v5.8.0 // indirect
github.com/go-git/go-git/v5 v5.17.2 // indirect
github.com/go-viper/mapstructure/v2 v2.4.0 // indirect github.com/go-viper/mapstructure/v2 v2.4.0 // indirect
github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect
github.com/google/go-cmp v0.7.0 // indirect
github.com/google/uuid v1.6.0 // indirect github.com/google/uuid v1.6.0 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect
@@ -51,7 +51,6 @@ require (
github.com/pmezard/go-difflib v1.0.0 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
github.com/rivo/uniseg v0.4.7 // indirect github.com/rivo/uniseg v0.4.7 // indirect
github.com/rogpeppe/go-internal v1.14.1 // indirect
github.com/sagikazarmark/locafero v0.11.0 // indirect github.com/sagikazarmark/locafero v0.11.0 // indirect
github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 // indirect github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 // indirect
github.com/skeema/knownhosts v1.3.1 // indirect github.com/skeema/knownhosts v1.3.1 // indirect
@@ -63,13 +62,10 @@ require (
github.com/xanzy/ssh-agent v0.3.3 // indirect github.com/xanzy/ssh-agent v0.3.3 // indirect
github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect github.com/xo/terminfo v0.0.0-20220910002029-abceb7e1c41e // indirect
go.yaml.in/yaml/v3 v3.0.4 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect
golang.org/x/exp v0.0.0-20260312153236-7ab1446f8b90 // indirect
golang.org/x/net v0.52.0 // indirect golang.org/x/net v0.52.0 // indirect
golang.org/x/sync v0.20.0 // indirect golang.org/x/sync v0.20.0 // indirect
golang.org/x/sys v0.42.0 // indirect golang.org/x/sys v0.42.0 // indirect
golang.org/x/text v0.35.0 // indirect golang.org/x/text v0.35.0 // indirect
golang.org/x/tools v0.43.0 // indirect
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
gopkg.in/warnings.v0 v0.1.2 // indirect gopkg.in/warnings.v0 v0.1.2 // indirect
modernc.org/libc v1.70.0 // indirect modernc.org/libc v1.70.0 // indirect
modernc.org/mathutil v1.7.1 // indirect modernc.org/mathutil v1.7.1 // indirect

16
go.sum
View File

@@ -5,6 +5,10 @@ github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERo
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
github.com/ProtonMail/go-crypto v1.1.6 h1:ZcV+Ropw6Qn0AX9brlQLAUXfqLBc7Bl+f/DmNxpLfdw= github.com/ProtonMail/go-crypto v1.1.6 h1:ZcV+Ropw6Qn0AX9brlQLAUXfqLBc7Bl+f/DmNxpLfdw=
github.com/ProtonMail/go-crypto v1.1.6/go.mod h1:rA3QumHc/FZ8pAHreoekgiAbzpNsfQAosU5td4SnOrE= github.com/ProtonMail/go-crypto v1.1.6/go.mod h1:rA3QumHc/FZ8pAHreoekgiAbzpNsfQAosU5td4SnOrE=
github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be h1:9AeTilPcZAjCFIImctFaOjnTIavg87rW78vTPkQqLI8=
github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be/go.mod h1:ySMOLuWl6zY27l47sB3qLNK6tF2fkHG55UZxx8oIVo4=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4= github.com/atotto/clipboard v0.1.4 h1:EH0zSVneZPSuFR11BlR9YppQTVDbh5+16AmcJi4g1z4=
github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI= github.com/atotto/clipboard v0.1.4/go.mod h1:ZY9tmq7sm5xIbd9bOK4onWV4S6X0u6GY7Vn0Yu86PYI=
github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k= github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiErDT4WkJ2k=
@@ -29,16 +33,22 @@ github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/elazarl/goproxy v1.7.2 h1:Y2o6urb7Eule09PjlhQRGNsqRfPmYI3KKQLFpCAV3+o=
github.com/elazarl/goproxy v1.7.2/go.mod h1:82vkLNir0ALaW14Rc399OTTjyNREgmdL2cVoIbS6XaE=
github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc= github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc=
github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ= github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ=
github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8=
github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0=
github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k= github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k=
github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
github.com/gliderlabs/ssh v0.3.8 h1:a4YXD1V7xMF9g5nTkdfnja3Sxy1PVDCj1Zg4Wb8vY6c=
github.com/gliderlabs/ssh v0.3.8/go.mod h1:xYoytBv1sV0aL3CavoDuJIQNURXkkfPA/wxQ1pL1fAU=
github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 h1:+zs/tPmkDkHx3U66DAb0lQFJrpS6731Oaa12ikc+DiI= github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 h1:+zs/tPmkDkHx3U66DAb0lQFJrpS6731Oaa12ikc+DiI=
github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376/go.mod h1:an3vInlBmSxCcxctByoQdvwPiA7DTK7jaaFDBTtu0ic= github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376/go.mod h1:an3vInlBmSxCcxctByoQdvwPiA7DTK7jaaFDBTtu0ic=
github.com/go-git/go-billy/v5 v5.8.0 h1:I8hjc3LbBlXTtVuFNJuwYuMiHvQJDq1AT6u4DwDzZG0= github.com/go-git/go-billy/v5 v5.8.0 h1:I8hjc3LbBlXTtVuFNJuwYuMiHvQJDq1AT6u4DwDzZG0=
github.com/go-git/go-billy/v5 v5.8.0/go.mod h1:RpvI/rw4Vr5QA+Z60c6d6LXH0rYJo0uD5SqfmrrheCY= github.com/go-git/go-billy/v5 v5.8.0/go.mod h1:RpvI/rw4Vr5QA+Z60c6d6LXH0rYJo0uD5SqfmrrheCY=
github.com/go-git/go-git-fixtures/v4 v4.3.2-0.20231010084843-55a94097c399 h1:eMje31YglSBqCdIqdhKBW8lokaMrL3uTkpGYlE2OOT4=
github.com/go-git/go-git-fixtures/v4 v4.3.2-0.20231010084843-55a94097c399/go.mod h1:1OCfN199q1Jm3HZlxleg+Dw/mwps2Wbk9frAWm+4FII=
github.com/go-git/go-git/v5 v5.17.2 h1:B+nkdlxdYrvyFK4GPXVU8w1U+YkbsgciIR7f2sZJ104= github.com/go-git/go-git/v5 v5.17.2 h1:B+nkdlxdYrvyFK4GPXVU8w1U+YkbsgciIR7f2sZJ104=
github.com/go-git/go-git/v5 v5.17.2/go.mod h1:pW/VmeqkanRFqR6AljLcs7EA7FbZaN5MQqO7oZADXpo= github.com/go-git/go-git/v5 v5.17.2/go.mod h1:pW/VmeqkanRFqR6AljLcs7EA7FbZaN5MQqO7oZADXpo=
github.com/go-viper/mapstructure/v2 v2.4.0 h1:EBsztssimR/CONLSZZ04E8qAkxNYq4Qp9LvH92wZUgs= github.com/go-viper/mapstructure/v2 v2.4.0 h1:EBsztssimR/CONLSZZ04E8qAkxNYq4Qp9LvH92wZUgs=
@@ -60,7 +70,6 @@ github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99/go.mod h1:1lJo3i
github.com/kevinburke/ssh_config v1.2.0 h1:x584FjTGwHzMwvHx18PXxbBVzfnxogHaAReU4gf13a4= github.com/kevinburke/ssh_config v1.2.0 h1:x584FjTGwHzMwvHx18PXxbBVzfnxogHaAReU4gf13a4=
github.com/kevinburke/ssh_config v1.2.0/go.mod h1:CT57kijsi8u/K/BOFA39wgDQJ9CxiF4nAY/ojJ6r6mM= github.com/kevinburke/ssh_config v1.2.0/go.mod h1:CT57kijsi8u/K/BOFA39wgDQJ9CxiF4nAY/ojJ6r6mM=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
@@ -77,6 +86,8 @@ github.com/muesli/termenv v0.16.0 h1:S5AlUN9dENB57rsbnkPyfdGuWIlkmzJjbFf0Tf5FWUc
github.com/muesli/termenv v0.16.0/go.mod h1:ZRfOIKPFDYQoDFF4Olj7/QJbW60Ol/kL1pU3VfY/Cnk= github.com/muesli/termenv v0.16.0/go.mod h1:ZRfOIKPFDYQoDFF4Olj7/QJbW60Ol/kL1pU3VfY/Cnk=
github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w= github.com/ncruces/go-strftime v1.0.0 h1:HMFp8mLCTPp341M/ZnA4qaf7ZlsbTc+miZjCLOFAw7w=
github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls= github.com/ncruces/go-strftime v1.0.0/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls=
github.com/onsi/gomega v1.34.1 h1:EUMJIKUjM8sKjYbtxQI9A4z2o+rruxnzNvpknOXie6k=
github.com/onsi/gomega v1.34.1/go.mod h1:kU1QgUvBDLXBJq618Xvm2LUX6rSAfRaFRTcdOeDLwwY=
github.com/panjf2000/ants/v2 v2.12.0 h1:u9JhESo83i/GkZnhfTNuFMMWcNt7mnV1bGJ6FT4wXH8= github.com/panjf2000/ants/v2 v2.12.0 h1:u9JhESo83i/GkZnhfTNuFMMWcNt7mnV1bGJ6FT4wXH8=
github.com/panjf2000/ants/v2 v2.12.0/go.mod h1:tSQuaNQ6r6NRhPt+IZVUevvDyFMTs+eS4ztZc52uJTY= github.com/panjf2000/ants/v2 v2.12.0/go.mod h1:tSQuaNQ6r6NRhPt+IZVUevvDyFMTs+eS4ztZc52uJTY=
github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0t5Ec4= github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0t5Ec4=
@@ -85,6 +96,7 @@ github.com/petar-dambovaliev/aho-corasick v0.0.0-20250424160509-463d218d4745 h1:
github.com/petar-dambovaliev/aho-corasick v0.0.0-20250424160509-463d218d4745/go.mod h1:EHPiTAKtiFmrMldLUNswFwfZ2eJIYBHktdaUTZxYWRw= github.com/petar-dambovaliev/aho-corasick v0.0.0-20250424160509-463d218d4745/go.mod h1:EHPiTAKtiFmrMldLUNswFwfZ2eJIYBHktdaUTZxYWRw=
github.com/pjbgf/sha1cd v0.3.2 h1:a9wb0bp1oC2TGwStyn0Umc/IGKQnEgF0vVaZ8QF8eo4= github.com/pjbgf/sha1cd v0.3.2 h1:a9wb0bp1oC2TGwStyn0Umc/IGKQnEgF0vVaZ8QF8eo4=
github.com/pjbgf/sha1cd v0.3.2/go.mod h1:zQWigSxVmsHEZow5qaLtPYxpcKMMQpa09ixqBxuCS6A= github.com/pjbgf/sha1cd v0.3.2/go.mod h1:zQWigSxVmsHEZow5qaLtPYxpcKMMQpa09ixqBxuCS6A=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
@@ -151,6 +163,8 @@ golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo= golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.41.0 h1:QCgPso/Q3RTJx2Th4bDLqML4W6iJiaXFq2/ftQF13YU=
golang.org/x/term v0.41.0/go.mod h1:3pfBgksrReYfZ5lvYM0kSO0LIkAl4Yl2bXOkKP7Ec2A=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8= golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8=
golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA= golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA=

216
pkg/engine/sources/git.go Normal file
View File

@@ -0,0 +1,216 @@
package sources
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"time"
"github.com/go-git/go-git/v5"
"github.com/go-git/go-git/v5/plumbing"
"github.com/go-git/go-git/v5/plumbing/object"
"github.com/salvacybersec/keyhunter/pkg/types"
)
// gitBinarySniffSize is the byte window scanned for null bytes when deciding
// whether a blob looks binary. Local to this file until plan 04-02 introduces
// a package-wide constant.
const gitBinarySniffSize = 512
// GitSource scans the full history of a local git repository: every commit
// on every branch and tag, deduplicating blob scans by OID.
type GitSource struct {
// RepoPath is the path to the local git repo (working tree or bare).
RepoPath string
// Since, if non-zero, excludes commits older than this timestamp
// (using commit author date).
Since time.Time
// ChunkSize is the overlap-chunker size; zero uses defaultChunkSize.
ChunkSize int
}
// NewGitSource creates a GitSource for the given repo path.
func NewGitSource(repoPath string) *GitSource {
return &GitSource{RepoPath: repoPath, ChunkSize: defaultChunkSize}
}
// Chunks walks every commit reachable from every branch, tag, and the
// stash ref (if present), streaming each unique blob's content through
// the shared chunk emitter.
func (g *GitSource) Chunks(ctx context.Context, out chan<- types.Chunk) error {
if g.RepoPath == "" {
return errors.New("GitSource: RepoPath is empty")
}
repo, err := git.PlainOpen(g.RepoPath)
if err != nil {
return fmt.Errorf("GitSource: open %q: %w", g.RepoPath, err)
}
// Collect commit hashes to walk from every ref under refs/heads, refs/tags, refs/stash.
seedCommits, err := collectSeedCommits(repo)
if err != nil {
return fmt.Errorf("GitSource: collect refs: %w", err)
}
if len(seedCommits) == 0 {
return nil // empty repo is not an error
}
seenCommits := make(map[plumbing.Hash]struct{})
seenBlobs := make(map[plumbing.Hash]struct{})
for _, seed := range seedCommits {
if err := ctx.Err(); err != nil {
return err
}
iter, err := repo.Log(&git.LogOptions{From: seed, All: false})
if err != nil {
continue
}
err = iter.ForEach(func(c *object.Commit) error {
if ctxErr := ctx.Err(); ctxErr != nil {
return ctxErr
}
if _, ok := seenCommits[c.Hash]; ok {
return nil
}
seenCommits[c.Hash] = struct{}{}
if !g.Since.IsZero() && c.Author.When.Before(g.Since) {
return nil
}
return g.emitCommitBlobs(ctx, c, seenBlobs, out)
})
iter.Close()
if err != nil {
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
return err
}
// Swallow per-seed iterator errors; continue with other refs.
}
}
return nil
}
// collectSeedCommits gathers commit hashes from all local branches, tags,
// remote-tracking branches, and the stash ref — the union of which reaches
// every commit worth scanning.
func collectSeedCommits(repo *git.Repository) ([]plumbing.Hash, error) {
var seeds []plumbing.Hash
refs, err := repo.References()
if err != nil {
return nil, err
}
err = refs.ForEach(func(ref *plumbing.Reference) error {
// Only care about direct refs — skip symbolic refs (HEAD).
if ref.Type() != plumbing.HashReference {
return nil
}
name := ref.Name()
if !(name.IsBranch() || name.IsTag() || name == plumbing.ReferenceName("refs/stash") || name.IsRemote()) {
return nil
}
hash := ref.Hash()
// For annotated tags the ref points at a tag object; resolve to commit if possible.
if name.IsTag() {
if tag, err := repo.TagObject(hash); err == nil {
if c, err := tag.Commit(); err == nil {
hash = c.Hash
}
}
}
seeds = append(seeds, hash)
return nil
})
return seeds, err
}
// emitCommitBlobs walks the tree of a commit and emits every blob whose
// OID has not already been scanned.
func (g *GitSource) emitCommitBlobs(ctx context.Context, c *object.Commit, seenBlobs map[plumbing.Hash]struct{}, out chan<- types.Chunk) error {
tree, err := c.Tree()
if err != nil {
return nil // skip unreadable tree
}
shortSHA := c.Hash.String()[:7]
return tree.Files().ForEach(func(f *object.File) error {
if err := ctx.Err(); err != nil {
return err
}
if _, ok := seenBlobs[f.Hash]; ok {
return nil
}
seenBlobs[f.Hash] = struct{}{}
// Skip obviously-binary blobs via go-git's helper, then via our sniff.
if isBin, _ := f.IsBinary(); isBin {
return nil
}
reader, err := f.Reader()
if err != nil {
return nil
}
data, err := io.ReadAll(reader)
_ = reader.Close()
if err != nil {
return nil
}
if len(data) == 0 {
return nil
}
sniff := data
if len(sniff) > gitBinarySniffSize {
sniff = sniff[:gitBinarySniffSize]
}
if bytes.IndexByte(sniff, 0x00) >= 0 {
return nil
}
source := fmt.Sprintf("git:%s:%s", shortSHA, f.Name)
return emitGitChunks(ctx, data, source, g.ChunkSize, out)
})
}
// emitGitChunks sends overlapping chunks of data to out. Mirrors the
// chunking logic used by FileSource so that git blobs are scanned with the
// same boundary-overlap guarantees. Will be replaced by the shared
// emitChunks helper once plan 04-02 lands.
func emitGitChunks(ctx context.Context, data []byte, source string, chunkSize int, out chan<- types.Chunk) error {
size := chunkSize
if size <= 0 {
size = defaultChunkSize
}
if len(data) <= size {
select {
case <-ctx.Done():
return ctx.Err()
case out <- types.Chunk{Data: data, Source: source, Offset: 0}:
}
return nil
}
var offset int64
for start := 0; start < len(data); start += size - chunkOverlap {
end := start + size
if end > len(data) {
end = len(data)
}
chunk := types.Chunk{
Data: data[start:end],
Source: source,
Offset: offset,
}
select {
case <-ctx.Done():
return ctx.Err()
case out <- chunk:
}
offset += int64(end - start)
if end == len(data) {
break
}
}
return nil
}

View File

@@ -0,0 +1,186 @@
package sources
import (
"context"
"os"
"path/filepath"
"regexp"
"testing"
"time"
"github.com/go-git/go-git/v5"
"github.com/go-git/go-git/v5/plumbing"
"github.com/go-git/go-git/v5/plumbing/object"
"github.com/stretchr/testify/require"
"github.com/salvacybersec/keyhunter/pkg/types"
)
func initRepo(t *testing.T) (string, *git.Repository) {
t.Helper()
dir := t.TempDir()
repo, err := git.PlainInit(dir, false)
require.NoError(t, err)
return dir, repo
}
func commitFile(t *testing.T, dir string, repo *git.Repository, name, content string) plumbing.Hash {
t.Helper()
path := filepath.Join(dir, name)
require.NoError(t, os.MkdirAll(filepath.Dir(path), 0o755))
require.NoError(t, os.WriteFile(path, []byte(content), 0o644))
wt, err := repo.Worktree()
require.NoError(t, err)
_, err = wt.Add(name)
require.NoError(t, err)
h, err := wt.Commit("add "+name, &git.CommitOptions{
Author: &object.Signature{Name: "test", Email: "t@x", When: time.Now()},
})
require.NoError(t, err)
return h
}
func drainGit(t *testing.T, src Source) []types.Chunk {
t.Helper()
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
out := make(chan types.Chunk, 1024)
errCh := make(chan error, 1)
go func() { errCh <- src.Chunks(ctx, out); close(out) }()
var got []types.Chunk
for c := range out {
got = append(got, c)
}
require.NoError(t, <-errCh)
return got
}
func TestGitSource_HistoryWalk(t *testing.T) {
dir, repo := initRepo(t)
commitFile(t, dir, repo, "a.txt", "contents alpha")
commitFile(t, dir, repo, "b.txt", "contents bravo")
commitFile(t, dir, repo, "c.txt", "contents charlie")
chunks := drainGit(t, NewGitSource(dir))
require.GreaterOrEqual(t, len(chunks), 3)
re := regexp.MustCompile(`^git:[0-9a-f]{7}:.+$`)
for _, c := range chunks {
require.Regexp(t, re, c.Source)
}
}
func TestGitSource_BlobDeduplication(t *testing.T) {
dir, repo := initRepo(t)
commitFile(t, dir, repo, "a.txt", "same exact content everywhere")
commitFile(t, dir, repo, "b.txt", "same exact content everywhere") // identical blob -> same OID
commitFile(t, dir, repo, "c.txt", "different content here")
chunks := drainGit(t, NewGitSource(dir))
// Expect 2 unique blob contents scanned across all commits, not 3 per commit.
unique := make(map[string]bool)
for _, c := range chunks {
unique[string(c.Data)] = true
}
require.Len(t, unique, 2, "duplicate blobs must be deduped by OID")
}
func TestGitSource_ModifiedFileKeepsBothVersions(t *testing.T) {
dir, repo := initRepo(t)
commitFile(t, dir, repo, "a.txt", "version one")
commitFile(t, dir, repo, "a.txt", "version two") // modifying produces a second blob
chunks := drainGit(t, NewGitSource(dir))
bodies := make(map[string]bool)
for _, c := range chunks {
bodies[string(c.Data)] = true
}
require.True(t, bodies["version one"], "old version must still be scanned")
require.True(t, bodies["version two"], "new version must be scanned")
}
func TestGitSource_MultiBranch(t *testing.T) {
dir, repo := initRepo(t)
commitFile(t, dir, repo, "base.txt", "base content")
// Capture current branch to come back to it.
head, err := repo.Head()
require.NoError(t, err)
baseBranch := head.Name()
// Create and checkout a new branch "feature".
wt, err := repo.Worktree()
require.NoError(t, err)
featureRef := plumbing.NewBranchReferenceName("feature")
require.NoError(t, wt.Checkout(&git.CheckoutOptions{
Branch: featureRef,
Create: true,
}))
commitFile(t, dir, repo, "feature_only.txt", "feature branch content")
// Switch back to base branch and add another file.
require.NoError(t, wt.Checkout(&git.CheckoutOptions{Branch: baseBranch}))
commitFile(t, dir, repo, "main_only.txt", "main branch content")
chunks := drainGit(t, NewGitSource(dir))
bodies := make(map[string]bool)
for _, c := range chunks {
bodies[string(c.Data)] = true
}
require.True(t, bodies["base content"], "base blob must be scanned")
require.True(t, bodies["feature branch content"], "feature branch blob must be scanned")
require.True(t, bodies["main branch content"], "main branch blob must be scanned")
}
func TestGitSource_TagReachesOldCommit(t *testing.T) {
dir, repo := initRepo(t)
oldHash := commitFile(t, dir, repo, "old.txt", "old tagged content")
// Tag the old commit.
_, err := repo.CreateTag("v0.1", oldHash, nil)
require.NoError(t, err)
// Add more commits after the tag.
commitFile(t, dir, repo, "new.txt", "later content")
chunks := drainGit(t, NewGitSource(dir))
bodies := make(map[string]bool)
for _, c := range chunks {
bodies[string(c.Data)] = true
}
require.True(t, bodies["old tagged content"], "tagged commit's blobs must be reachable")
require.True(t, bodies["later content"])
}
func TestGitSource_SinceFilterExcludesAll(t *testing.T) {
dir, repo := initRepo(t)
commitFile(t, dir, repo, "a.txt", "alpha")
src := NewGitSource(dir)
src.Since = time.Now().Add(1 * time.Hour)
chunks := drainGit(t, src)
require.Empty(t, chunks)
}
func TestGitSource_SourceFormat(t *testing.T) {
dir, repo := initRepo(t)
commitFile(t, dir, repo, "path/to/file.txt", "some content")
chunks := drainGit(t, NewGitSource(dir))
require.NotEmpty(t, chunks)
re := regexp.MustCompile(`^git:[0-9a-f]{7}:path/to/file\.txt$`)
matched := false
for _, c := range chunks {
if re.MatchString(c.Source) {
matched = true
break
}
}
require.True(t, matched, "expected a chunk with source matching git:<sha>:path/to/file.txt")
}
func TestGitSource_MissingRepo(t *testing.T) {
src := NewGitSource(filepath.Join(t.TempDir(), "not-a-repo"))
ctx := context.Background()
out := make(chan types.Chunk, 1)
err := src.Chunks(ctx, out)
require.Error(t, err)
}