Skip to content

Commit bbe552a

Browse files
committed
extract ScanTree
1 parent d3a7caf commit bbe552a

File tree

3 files changed

+191
-173
lines changed

3 files changed

+191
-173
lines changed

lfs/gitscanner.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,13 @@ func (s *GitScanner) ScanAll() (*PointerChannelWrapper, error) {
6262
return scanRefsToChan("", "", opts)
6363
}
6464

65+
// ScanTree takes a ref and returns WrappedPointer objects in the tree at that
66+
// ref. Differs from ScanRefs in that multiple files in the tree with the same
67+
// content are all reported.
68+
func (s *GitScanner) ScanTree(ref string) (*PointerChannelWrapper, error) {
69+
return runScanTree(ref)
70+
}
71+
6572
// ScanUnpushed scans history for all LFS pointers which have been added but not
6673
// pushed to the named remote. remote can be left blank to mean 'any remote'.
6774
func (s *GitScanner) ScanUnpushed(remote string) (*PointerChannelWrapper, error) {

lfs/gitscanner_tree.go

Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
package lfs
2+
3+
import (
4+
"bufio"
5+
"bytes"
6+
"fmt"
7+
"io"
8+
"io/ioutil"
9+
"strconv"
10+
"strings"
11+
)
12+
13+
// An entry from ls-tree or rev-list including a blob sha and tree path
14+
type TreeBlob struct {
15+
Sha1 string
16+
Filename string
17+
}
18+
19+
func runScanTree(ref string) (*PointerChannelWrapper, error) {
20+
// We don't use the nameMap approach here since that's imprecise when >1 file
21+
// can be using the same content
22+
treeShas, err := lsTreeBlobs(ref)
23+
if err != nil {
24+
return nil, err
25+
}
26+
27+
return catFileBatchTree(treeShas)
28+
}
29+
30+
// catFileBatchTree uses git cat-file --batch to get the object contents
31+
// of a git object, given its sha1. The contents will be decoded into
32+
// a Git LFS pointer. treeblobs is a channel over which blob entries
33+
// will be sent. It returns a channel from which point.Pointers can be read.
34+
func catFileBatchTree(treeblobs *TreeBlobChannelWrapper) (*PointerChannelWrapper, error) {
35+
cmd, err := startCommand("git", "cat-file", "--batch")
36+
if err != nil {
37+
return nil, err
38+
}
39+
40+
pointers := make(chan *WrappedPointer, chanBufSize)
41+
errchan := make(chan error, 10) // Multiple errors possible
42+
43+
go func() {
44+
for t := range treeblobs.Results {
45+
cmd.Stdin.Write([]byte(t.Sha1 + "\n"))
46+
l, err := cmd.Stdout.ReadBytes('\n')
47+
if err != nil {
48+
break
49+
}
50+
51+
// Line is formatted:
52+
// <sha1> <type> <size>
53+
fields := bytes.Fields(l)
54+
s, _ := strconv.Atoi(string(fields[2]))
55+
56+
nbuf := make([]byte, s)
57+
_, err = io.ReadFull(cmd.Stdout, nbuf)
58+
if err != nil {
59+
break // Legit errors
60+
}
61+
62+
p, err := DecodePointer(bytes.NewBuffer(nbuf))
63+
if err == nil {
64+
pointers <- &WrappedPointer{
65+
Sha1: string(fields[0]),
66+
Size: p.Size,
67+
Pointer: p,
68+
Name: t.Filename,
69+
}
70+
}
71+
72+
_, err = cmd.Stdout.ReadBytes('\n') // Extra \n inserted by cat-file
73+
if err != nil {
74+
break
75+
}
76+
}
77+
// Deal with nested error from incoming treeblobs
78+
err := treeblobs.Wait()
79+
if err != nil {
80+
errchan <- err
81+
}
82+
83+
cmd.Stdin.Close()
84+
85+
// also errors from our command
86+
stderr, _ := ioutil.ReadAll(cmd.Stderr)
87+
err = cmd.Wait()
88+
if err != nil {
89+
errchan <- fmt.Errorf("Error in git cat-file: %v %v", err, string(stderr))
90+
}
91+
close(pointers)
92+
close(errchan)
93+
}()
94+
95+
return NewPointerChannelWrapper(pointers, errchan), nil
96+
}
97+
98+
// Use ls-tree at ref to find a list of candidate tree blobs which might be lfs files
99+
// The returned channel will be sent these blobs which should be sent to catFileBatchTree
100+
// for final check & conversion to Pointer
101+
func lsTreeBlobs(ref string) (*TreeBlobChannelWrapper, error) {
102+
cmd, err := startCommand("git", "ls-tree",
103+
"-r", // recurse
104+
"-l", // report object size (we'll need this)
105+
"-z", // null line termination
106+
"--full-tree", // start at the root regardless of where we are in it
107+
ref,
108+
)
109+
110+
if err != nil {
111+
return nil, err
112+
}
113+
114+
cmd.Stdin.Close()
115+
116+
blobs := make(chan TreeBlob, chanBufSize)
117+
errchan := make(chan error, 1)
118+
119+
go func() {
120+
parseLsTree(cmd.Stdout, blobs)
121+
stderr, _ := ioutil.ReadAll(cmd.Stderr)
122+
err := cmd.Wait()
123+
if err != nil {
124+
errchan <- fmt.Errorf("Error in git ls-tree: %v %v", err, string(stderr))
125+
}
126+
close(blobs)
127+
close(errchan)
128+
}()
129+
130+
return NewTreeBlobChannelWrapper(blobs, errchan), nil
131+
}
132+
133+
func parseLsTree(reader io.Reader, output chan TreeBlob) {
134+
scanner := bufio.NewScanner(reader)
135+
scanner.Split(scanNullLines)
136+
for scanner.Scan() {
137+
line := scanner.Text()
138+
parts := strings.SplitN(line, "\t", 2)
139+
if len(parts) < 2 {
140+
continue
141+
}
142+
143+
attrs := strings.SplitN(parts[0], " ", 4)
144+
if len(attrs) < 4 {
145+
continue
146+
}
147+
148+
if attrs[1] != "blob" {
149+
continue
150+
}
151+
152+
sz, err := strconv.ParseInt(strings.TrimSpace(attrs[3]), 10, 64)
153+
if err != nil {
154+
continue
155+
}
156+
157+
if sz < blobSizeCutoff {
158+
sha1 := attrs[2]
159+
filename := parts[1]
160+
output <- TreeBlob{sha1, filename}
161+
}
162+
}
163+
}
164+
165+
func scanNullLines(data []byte, atEOF bool) (advance int, token []byte, err error) {
166+
if atEOF && len(data) == 0 {
167+
return 0, nil, nil
168+
}
169+
170+
if i := bytes.IndexByte(data, '\000'); i >= 0 {
171+
// We have a full null-terminated line.
172+
return i + 1, data[0:i], nil
173+
}
174+
175+
// If we're at EOF, we have a final, non-terminated line. Return it.
176+
if atEOF {
177+
return len(data), data, nil
178+
}
179+
180+
// Request more data.
181+
return 0, nil, nil
182+
}

0 commit comments

Comments
 (0)