Skip to content

Commit 08c5ae6

Browse files
committed
lfs: teach gitscanner how to emit lockable files that aren't lfs objects
1 parent 7cf53a7 commit 08c5ae6

7 files changed

+140
-69
lines changed

lfs/gitscanner.go

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,17 +20,24 @@ func IsCallbackMissing(err error) bool {
2020

2121
// GitScanner scans objects in a Git repository for LFS pointers.
2222
type GitScanner struct {
23-
Filter *filepathfilter.Filter
24-
FoundPointer GitScannerFoundPointer
25-
remote string
26-
skippedRefs []string
23+
Filter *filepathfilter.Filter
24+
FoundPointer GitScannerFoundPointer
25+
FoundLockable GitScannerFoundLockable
26+
PotentialLockables GitScannerSet
27+
remote string
28+
skippedRefs []string
2729

2830
closed bool
2931
started time.Time
3032
mu sync.Mutex
3133
}
3234

3335
type GitScannerFoundPointer func(*WrappedPointer, error)
36+
type GitScannerFoundLockable func(filename string)
37+
38+
type GitScannerSet interface {
39+
Contains(string) bool
40+
}
3441

3542
// NewGitScanner initializes a *GitScanner for a Git repository in the current
3643
// working directory.
@@ -82,7 +89,7 @@ func (s *GitScanner) ScanLeftToRemote(left string, cb GitScannerFoundPointer) er
8289
}
8390
s.mu.Unlock()
8491

85-
return scanRefsToChan(callback, left, "", s.opts(ScanLeftToRemoteMode))
92+
return scanRefsToChan(s, callback, left, "", s.opts(ScanLeftToRemoteMode))
8693
}
8794

8895
// ScanRefRange scans through all commits from the given left and right refs,
@@ -95,7 +102,7 @@ func (s *GitScanner) ScanRefRange(left, right string, cb GitScannerFoundPointer)
95102

96103
opts := s.opts(ScanRefsMode)
97104
opts.SkipDeletedBlobs = false
98-
return scanRefsToChan(callback, left, right, opts)
105+
return scanRefsToChan(s, callback, left, right, opts)
99106
}
100107

101108
// ScanRefWithDeleted scans through all objects in the given ref, including
@@ -114,7 +121,7 @@ func (s *GitScanner) ScanRef(ref string, cb GitScannerFoundPointer) error {
114121

115122
opts := s.opts(ScanRefsMode)
116123
opts.SkipDeletedBlobs = true
117-
return scanRefsToChan(callback, ref, "", opts)
124+
return scanRefsToChan(s, callback, ref, "", opts)
118125
}
119126

120127
// ScanAll scans through all objects in the git repository.
@@ -126,7 +133,7 @@ func (s *GitScanner) ScanAll(cb GitScannerFoundPointer) error {
126133

127134
opts := s.opts(ScanAllMode)
128135
opts.SkipDeletedBlobs = false
129-
return scanRefsToChan(callback, "", "", opts)
136+
return scanRefsToChan(s, callback, "", "", opts)
130137
}
131138

132139
// ScanTree takes a ref and returns WrappedPointer objects in the tree at that

lfs/gitscanner_catfilebatch.go

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,11 @@ import (
1414
// runCatFileBatch uses 'git cat-file --batch' to get the object contents of a
1515
// git object, given its sha1. The contents will be decoded into a Git LFS
1616
// pointer. Git Blob SHA1s are read from the sha1Ch channel and fed to STDIN.
17-
// Results are parsed from STDOUT, and any elegible LFS pointers are sent to
18-
// pointerCh. Any errors are sent to errCh. An error is returned if the 'git
19-
// cat-file' command fails to start.
20-
func runCatFileBatch(pointerCh chan *WrappedPointer, revs *StringChannelWrapper, errCh chan error) error {
17+
// Results are parsed from STDOUT, and any eligible LFS pointers are sent to
18+
// pointerCh. If a Git Blob is not an LFS pointer, check the lockableSet to see
19+
// if that blob is for a locked file. Any errors are sent to errCh. An error is
20+
// returned if the 'git cat-file' command fails to start.
21+
func runCatFileBatch(pointerCh chan *WrappedPointer, lockableCh chan string, lockableSet *lockableNameSet, revs *StringChannelWrapper, errCh chan error) error {
2122
cmd, err := startCommand("git", "cat-file", "--batch")
2223
if err != nil {
2324
return err
@@ -28,12 +29,15 @@ func runCatFileBatch(pointerCh chan *WrappedPointer, revs *StringChannelWrapper,
2829
for r := range revs.Results {
2930
cmd.Stdin.Write([]byte(r + "\n"))
3031
canScan := scanner.Scan()
31-
if p := scanner.Pointer(); p != nil {
32-
pointerCh <- p
33-
}
3432

3533
if err := scanner.Err(); err != nil {
3634
errCh <- err
35+
} else if p := scanner.Pointer(); p != nil {
36+
pointerCh <- p
37+
} else if b := scanner.BlobSHA(); len(b) == 40 {
38+
if name, ok := lockableSet.Check(b); ok {
39+
lockableCh <- name
40+
}
3741
}
3842

3943
if !canScan {
@@ -55,17 +59,23 @@ func runCatFileBatch(pointerCh chan *WrappedPointer, revs *StringChannelWrapper,
5559

5660
close(pointerCh)
5761
close(errCh)
62+
close(lockableCh)
5863
}()
5964

6065
return nil
6166
}
6267

6368
type catFileBatchScanner struct {
6469
r *bufio.Reader
70+
blobSha string
6571
pointer *WrappedPointer
6672
err error
6773
}
6874

75+
func (s *catFileBatchScanner) BlobSHA() string {
76+
return s.blobSha
77+
}
78+
6979
func (s *catFileBatchScanner) Pointer() *WrappedPointer {
7080
return s.pointer
7181
}
@@ -76,7 +86,8 @@ func (s *catFileBatchScanner) Err() error {
7686

7787
func (s *catFileBatchScanner) Scan() bool {
7888
s.pointer, s.err = nil, nil
79-
p, err := s.next()
89+
b, p, err := s.next()
90+
s.blobSha = b
8091
s.pointer = p
8192

8293
if err != nil {
@@ -89,39 +100,40 @@ func (s *catFileBatchScanner) Scan() bool {
89100
return true
90101
}
91102

92-
func (s *catFileBatchScanner) next() (*WrappedPointer, error) {
103+
func (s *catFileBatchScanner) next() (string, *WrappedPointer, error) {
93104
l, err := s.r.ReadBytes('\n')
94105
if err != nil {
95-
return nil, err
106+
return "", nil, err
96107
}
97108

98109
// Line is formatted:
99110
// <sha1> <type> <size>
100111
fields := bytes.Fields(l)
101112
if len(fields) < 3 {
102-
return nil, errors.Wrap(fmt.Errorf("Invalid: %q", string(l)), "git cat-file --batch")
113+
return "", nil, errors.Wrap(fmt.Errorf("Invalid: %q", string(l)), "git cat-file --batch")
103114
}
104115

116+
blobSha := string(fields[0])
105117
size, _ := strconv.Atoi(string(fields[2]))
106118
buf := make([]byte, size)
107119
read, err := io.ReadFull(s.r, buf)
108120
if err != nil {
109-
return nil, err
121+
return blobSha, nil, err
110122
}
111123

112124
if size != read {
113-
return nil, fmt.Errorf("expected %d bytes, read %d bytes", size, read)
125+
return blobSha, nil, fmt.Errorf("expected %d bytes, read %d bytes", size, read)
114126
}
115127

116128
p, err := DecodePointer(bytes.NewBuffer(buf[:read]))
117129
var pointer *WrappedPointer
118130
if err == nil {
119131
pointer = &WrappedPointer{
120-
Sha1: string(fields[0]),
132+
Sha1: blobSha,
121133
Pointer: p,
122134
}
123135
}
124136

125137
_, err = s.r.ReadBytes('\n') // Extra \n inserted by cat-file
126-
return pointer, err
138+
return blobSha, pointer, err
127139
}

lfs/gitscanner_catfilebatchcheck.go

Lines changed: 30 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,10 @@ import (
99

1010
// runCatFileBatchCheck uses 'git cat-file --batch-check' to get the type and
1111
// size of a git object. Any object that isn't of type blob and under the
12-
// blobSizeCutoff will be ignored. revs is a channel over which strings
13-
// containing git sha1s will be sent. It returns a channel from which sha1
14-
// strings can be read.
15-
func runCatFileBatchCheck(smallRevCh chan string, revs *StringChannelWrapper, errCh chan error) error {
12+
// blobSizeCutoff will be ignored, unless it's a locked file. revs is a channel
13+
// over which strings containing git sha1s will be sent. It returns a channel
14+
// from which sha1 strings can be read.
15+
func runCatFileBatchCheck(smallRevCh chan string, lockableCh chan string, lockableSet *lockableNameSet, revs *StringChannelWrapper, errCh chan error) error {
1616
cmd, err := startCommand("git", "cat-file", "--batch-check")
1717
if err != nil {
1818
return err
@@ -23,12 +23,14 @@ func runCatFileBatchCheck(smallRevCh chan string, revs *StringChannelWrapper, er
2323
for r := range revs.Results {
2424
cmd.Stdin.Write([]byte(r + "\n"))
2525
hasNext := scanner.Scan()
26-
if b := scanner.BlobOID(); len(b) > 0 {
27-
smallRevCh <- b
28-
}
29-
3026
if err := scanner.Err(); err != nil {
3127
errCh <- err
28+
} else if b := scanner.LFSBlobOID(); len(b) > 0 {
29+
smallRevCh <- b
30+
} else if b := scanner.GitBlobOID(); len(b) > 0 {
31+
if name, ok := lockableSet.Check(b); ok {
32+
lockableCh <- name
33+
}
3234
}
3335

3436
if !hasNext {
@@ -54,27 +56,32 @@ func runCatFileBatchCheck(smallRevCh chan string, revs *StringChannelWrapper, er
5456
}
5557

5658
type catFileBatchCheckScanner struct {
57-
s *bufio.Scanner
58-
limit int
59-
blobOID string
59+
s *bufio.Scanner
60+
limit int
61+
lfsBlobOID string
62+
gitBlobOID string
63+
}
64+
65+
func (s *catFileBatchCheckScanner) LFSBlobOID() string {
66+
return s.lfsBlobOID
6067
}
6168

62-
func (s *catFileBatchCheckScanner) BlobOID() string {
63-
return s.blobOID
69+
func (s *catFileBatchCheckScanner) GitBlobOID() string {
70+
return s.gitBlobOID
6471
}
6572

6673
func (s *catFileBatchCheckScanner) Err() error {
6774
return s.s.Err()
6875
}
6976

7077
func (s *catFileBatchCheckScanner) Scan() bool {
71-
s.blobOID = ""
72-
b, hasNext := s.next()
73-
s.blobOID = b
78+
lfsBlobSha, gitBlobSha, hasNext := s.next()
79+
s.lfsBlobOID = lfsBlobSha
80+
s.gitBlobOID = gitBlobSha
7481
return hasNext
7582
}
7683

77-
func (s *catFileBatchCheckScanner) next() (string, bool) {
84+
func (s *catFileBatchCheckScanner) next() (string, string, bool) {
7885
hasNext := s.s.Scan()
7986
line := s.s.Text()
8087
lineLen := len(line)
@@ -84,21 +91,22 @@ func (s *catFileBatchCheckScanner) next() (string, bool) {
8491
// type is at a fixed spot, if we see that it's "blob", we can avoid
8592
// splitting the line just to get the size.
8693
if lineLen < 46 {
87-
return "", hasNext
94+
return "", "", hasNext
8895
}
8996

9097
if line[41:45] != "blob" {
91-
return "", hasNext
98+
return "", "", hasNext
9299
}
93100

94101
size, err := strconv.Atoi(line[46:lineLen])
95102
if err != nil {
96-
return "", hasNext
103+
return "", "", hasNext
97104
}
98105

106+
blobSha := line[0:40]
99107
if size >= s.limit {
100-
return "", hasNext
108+
return "", blobSha, hasNext
101109
}
102110

103-
return line[0:40], hasNext
111+
return blobSha, "", hasNext
104112
}

lfs/gitscanner_catfilebatchcheckscanner_test.go

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,15 @@ func TestCatFileBatchCheckScannerWithValidOutput(t *testing.T) {
2323
limit: 1024,
2424
}
2525

26-
assertNextOID(t, s, "")
27-
assertNextOID(t, s, "")
28-
assertNextOID(t, s, "")
29-
assertNextOID(t, s, "0000000000000000000000000000000000000002")
30-
assertNextOID(t, s, "")
31-
assertNextOID(t, s, "")
26+
assertNextOID(t, s, "", "")
27+
assertNextOID(t, s, "", "")
28+
assertNextOID(t, s, "", "")
29+
assertNextOID(t, s, "0000000000000000000000000000000000000002", "")
30+
assertNextOID(t, s, "", "")
31+
assertNextOID(t, s, "", "0000000000000000000000000000000000000004")
3232
assertScannerDone(t, s)
33-
assert.Equal(t, "", s.BlobOID())
33+
assert.Equal(t, "", s.LFSBlobOID())
34+
assert.Equal(t, "", s.GitBlobOID())
3435
}
3536

3637
type stringScanner interface {
@@ -49,9 +50,10 @@ func assertNextScan(t *testing.T, scanner genericScanner) {
4950
assert.Nil(t, scanner.Err())
5051
}
5152

52-
func assertNextOID(t *testing.T, scanner *catFileBatchCheckScanner, oid string) {
53+
func assertNextOID(t *testing.T, scanner *catFileBatchCheckScanner, lfsBlobOID, gitBlobOID string) {
5354
assertNextScan(t, scanner)
54-
assert.Equal(t, oid, scanner.BlobOID())
55+
assert.Equal(t, lfsBlobOID, scanner.LFSBlobOID())
56+
assert.Equal(t, gitBlobOID, scanner.GitBlobOID())
5557
}
5658

5759
func assertScannerDone(t *testing.T, scanner genericScanner) {

lfs/gitscanner_index.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,14 +59,14 @@ func scanIndex(cb GitScannerFoundPointer, ref string) error {
5959
close(allRevsErr)
6060
}()
6161

62-
smallShas, err := catFileBatchCheck(allRevs)
62+
smallShas, _, err := catFileBatchCheck(allRevs, nil)
6363
if err != nil {
6464
return err
6565
}
6666

6767
ch := make(chan gitscannerResult, chanBufSize)
6868

69-
barePointerCh, err := catFileBatch(smallShas)
69+
barePointerCh, _, err := catFileBatch(smallShas, nil)
7070
if err != nil {
7171
return err
7272
}

0 commit comments

Comments
 (0)