Skip to content

Commit 270aab4

Browse files
authored
On open repository open common cat file batch and batch-check (#15667)
Use common git cat-file --batch and git cat-file --batch-check to significantly reduce calls to git. Signed-off-by: Andrew Thornton <[email protected]>
1 parent 038e1db commit 270aab4

26 files changed

+463
-161
lines changed

modules/context/repo.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -905,12 +905,18 @@ func (ctx *Context) IssueTemplatesFromDefaultBranch() []api.IssueTemplate {
905905
log.Debug("DataAsync: %v", err)
906906
continue
907907
}
908-
defer r.Close()
908+
closed := false
909+
defer func() {
910+
if !closed {
911+
_ = r.Close()
912+
}
913+
}()
909914
data, err := ioutil.ReadAll(r)
910915
if err != nil {
911916
log.Debug("ReadAll: %v", err)
912917
continue
913918
}
919+
_ = r.Close()
914920
var it api.IssueTemplate
915921
content, err := markdown.ExtractMetadata(string(data), &it)
916922
if err != nil {

modules/git/batch_reader.go

Lines changed: 46 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,44 @@ import (
1313
"strings"
1414
)
1515

16+
// WriteCloserError wraps an io.WriteCloser with an additional CloseWithError function
17+
type WriteCloserError interface {
18+
io.WriteCloser
19+
CloseWithError(err error) error
20+
}
21+
22+
// CatFileBatchCheck opens git cat-file --batch-check in the provided repo and returns a stdin pipe, a stdout reader and cancel function
23+
func CatFileBatchCheck(repoPath string) (WriteCloserError, *bufio.Reader, func()) {
24+
batchStdinReader, batchStdinWriter := io.Pipe()
25+
batchStdoutReader, batchStdoutWriter := io.Pipe()
26+
cancel := func() {
27+
_ = batchStdinReader.Close()
28+
_ = batchStdinWriter.Close()
29+
_ = batchStdoutReader.Close()
30+
_ = batchStdoutWriter.Close()
31+
}
32+
33+
go func() {
34+
stderr := strings.Builder{}
35+
err := NewCommand("cat-file", "--batch-check").RunInDirFullPipeline(repoPath, batchStdoutWriter, &stderr, batchStdinReader)
36+
if err != nil {
37+
_ = batchStdoutWriter.CloseWithError(ConcatenateError(err, (&stderr).String()))
38+
_ = batchStdinReader.CloseWithError(ConcatenateError(err, (&stderr).String()))
39+
} else {
40+
_ = batchStdoutWriter.Close()
41+
_ = batchStdinReader.Close()
42+
}
43+
}()
44+
45+
// For simplicities sake we'll us a buffered reader to read from the cat-file --batch
46+
batchReader := bufio.NewReader(batchStdoutReader)
47+
48+
return batchStdinWriter, batchReader, cancel
49+
}
50+
1651
// CatFileBatch opens git cat-file --batch in the provided repo and returns a stdin pipe, a stdout reader and cancel function
17-
func CatFileBatch(repoPath string) (*io.PipeWriter, *bufio.Reader, func()) {
18-
// Next feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
52+
func CatFileBatch(repoPath string) (WriteCloserError, *bufio.Reader, func()) {
53+
// We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
1954
// so let's create a batch stdin and stdout
2055
batchStdinReader, batchStdinWriter := io.Pipe()
2156
batchStdoutReader, batchStdoutWriter := io.Pipe()
@@ -47,26 +82,28 @@ func CatFileBatch(repoPath string) (*io.PipeWriter, *bufio.Reader, func()) {
4782
// ReadBatchLine reads the header line from cat-file --batch
4883
// We expect:
4984
// <sha> SP <type> SP <size> LF
85+
// sha is a 40byte not 20byte here
5086
func ReadBatchLine(rd *bufio.Reader) (sha []byte, typ string, size int64, err error) {
5187
sha, err = rd.ReadBytes(' ')
5288
if err != nil {
5389
return
5490
}
5591
sha = sha[:len(sha)-1]
5692

57-
typ, err = rd.ReadString(' ')
93+
typ, err = rd.ReadString('\n')
5894
if err != nil {
5995
return
6096
}
61-
typ = typ[:len(typ)-1]
6297

63-
var sizeStr string
64-
sizeStr, err = rd.ReadString('\n')
65-
if err != nil {
98+
idx := strings.Index(typ, " ")
99+
if idx < 0 {
100+
err = ErrNotExist{ID: string(sha)}
66101
return
67102
}
103+
sizeStr := typ[idx+1 : len(typ)-1]
104+
typ = typ[:idx]
68105

69-
size, err = strconv.ParseInt(sizeStr[:len(sizeStr)-1], 10, 64)
106+
size, err = strconv.ParseInt(sizeStr, 10, 64)
70107
return
71108
}
72109

@@ -128,7 +165,7 @@ headerLoop:
128165
}
129166

130167
// Discard the rest of the commit
131-
discard := size - n
168+
discard := size - n + 1
132169
for discard > math.MaxInt32 {
133170
_, err := rd.Discard(math.MaxInt32)
134171
if err != nil {

modules/git/blob_nogogit.go

Lines changed: 84 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -8,48 +8,54 @@ package git
88

99
import (
1010
"bufio"
11+
"bytes"
1112
"io"
12-
"strconv"
13-
"strings"
13+
"io/ioutil"
14+
"math"
1415
)
1516

1617
// Blob represents a Git object.
1718
type Blob struct {
1819
ID SHA1
1920

20-
gotSize bool
21-
size int64
22-
repoPath string
23-
name string
21+
gotSize bool
22+
size int64
23+
name string
24+
repo *Repository
2425
}
2526

2627
// DataAsync gets a ReadCloser for the contents of a blob without reading it all.
2728
// Calling the Close function on the result will discard all unread output.
2829
func (b *Blob) DataAsync() (io.ReadCloser, error) {
29-
stdoutReader, stdoutWriter := io.Pipe()
30+
wr, rd, cancel := b.repo.CatFileBatch()
3031

31-
go func() {
32-
stderr := &strings.Builder{}
33-
err := NewCommand("cat-file", "--batch").RunInDirFullPipeline(b.repoPath, stdoutWriter, stderr, strings.NewReader(b.ID.String()+"\n"))
34-
if err != nil {
35-
err = ConcatenateError(err, stderr.String())
36-
_ = stdoutWriter.CloseWithError(err)
37-
} else {
38-
_ = stdoutWriter.Close()
39-
}
40-
}()
41-
42-
bufReader := bufio.NewReader(stdoutReader)
43-
_, _, size, err := ReadBatchLine(bufReader)
32+
_, err := wr.Write([]byte(b.ID.String() + "\n"))
4433
if err != nil {
45-
stdoutReader.Close()
34+
cancel()
4635
return nil, err
4736
}
37+
_, _, size, err := ReadBatchLine(rd)
38+
if err != nil {
39+
cancel()
40+
return nil, err
41+
}
42+
b.gotSize = true
43+
b.size = size
4844

49-
return &LimitedReaderCloser{
50-
R: bufReader,
51-
C: stdoutReader,
52-
N: size,
45+
if size < 4096 {
46+
bs, err := ioutil.ReadAll(io.LimitReader(rd, size))
47+
if err != nil {
48+
cancel()
49+
return nil, err
50+
}
51+
_, err = rd.Discard(1)
52+
return ioutil.NopCloser(bytes.NewReader(bs)), err
53+
}
54+
55+
return &blobReader{
56+
rd: rd,
57+
n: size,
58+
cancel: cancel,
5359
}, nil
5460
}
5561

@@ -59,18 +65,66 @@ func (b *Blob) Size() int64 {
5965
return b.size
6066
}
6167

62-
size, err := NewCommand("cat-file", "-s", b.ID.String()).RunInDir(b.repoPath)
68+
wr, rd, cancel := b.repo.CatFileBatchCheck()
69+
defer cancel()
70+
_, err := wr.Write([]byte(b.ID.String() + "\n"))
6371
if err != nil {
64-
log("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repoPath, err)
72+
log("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err)
6573
return 0
6674
}
67-
68-
b.size, err = strconv.ParseInt(size[:len(size)-1], 10, 64)
75+
_, _, b.size, err = ReadBatchLine(rd)
6976
if err != nil {
70-
log("error whilst parsing size %s for %s in %s. Error: %v", size, b.ID.String(), b.repoPath, err)
77+
log("error whilst reading size for %s in %s. Error: %v", b.ID.String(), b.repo.Path, err)
7178
return 0
7279
}
80+
7381
b.gotSize = true
7482

7583
return b.size
7684
}
85+
86+
type blobReader struct {
87+
rd *bufio.Reader
88+
n int64
89+
cancel func()
90+
}
91+
92+
func (b *blobReader) Read(p []byte) (n int, err error) {
93+
if b.n <= 0 {
94+
return 0, io.EOF
95+
}
96+
if int64(len(p)) > b.n {
97+
p = p[0:b.n]
98+
}
99+
n, err = b.rd.Read(p)
100+
b.n -= int64(n)
101+
return
102+
}
103+
104+
// Close implements io.Closer
105+
func (b *blobReader) Close() error {
106+
if b.n > 0 {
107+
for b.n > math.MaxInt32 {
108+
n, err := b.rd.Discard(math.MaxInt32)
109+
b.n -= int64(n)
110+
if err != nil {
111+
b.cancel()
112+
return err
113+
}
114+
b.n -= math.MaxInt32
115+
}
116+
n, err := b.rd.Discard(int(b.n))
117+
b.n -= int64(n)
118+
if err != nil {
119+
b.cancel()
120+
return err
121+
}
122+
}
123+
if b.n == 0 {
124+
_, err := b.rd.Discard(1)
125+
b.n--
126+
b.cancel()
127+
return err
128+
}
129+
return nil
130+
}

modules/git/blob_test.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,10 @@ func TestBlob_Data(t *testing.T) {
2929
r, err := testBlob.DataAsync()
3030
assert.NoError(t, err)
3131
require.NotNil(t, r)
32-
defer r.Close()
3332

3433
data, err := ioutil.ReadAll(r)
34+
assert.NoError(t, r.Close())
35+
3536
assert.NoError(t, err)
3637
assert.Equal(t, output, string(data))
3738
}
@@ -54,7 +55,7 @@ func Benchmark_Blob_Data(b *testing.B) {
5455
if err != nil {
5556
b.Fatal(err)
5657
}
57-
defer r.Close()
5858
ioutil.ReadAll(r)
59+
_ = r.Close()
5960
}
6061
}

modules/git/commit_info_nogogit.go

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ func (tes Entries) GetCommitsInfo(commit *Commit, treePath string, cache *LastCo
102102
}
103103

104104
func getLastCommitForPathsByCache(commitID, treePath string, paths []string, cache *LastCommitCache) (map[string]*Commit, []string, error) {
105-
wr, rd, cancel := CatFileBatch(cache.repo.Path)
105+
wr, rd, cancel := cache.repo.CatFileBatch()
106106
defer cancel()
107107

108108
var unHitEntryPaths []string
@@ -144,7 +144,7 @@ func GetLastCommitForPaths(commit *Commit, treePath string, paths []string) ([]*
144144
}
145145
}()
146146

147-
batchStdinWriter, batchReader, cancel := CatFileBatch(commit.repo.Path)
147+
batchStdinWriter, batchReader, cancel := commit.repo.CatFileBatch()
148148
defer cancel()
149149

150150
mapsize := 4096
@@ -237,6 +237,10 @@ revListLoop:
237237
// FIXME: is there any order to the way strings are emitted from cat-file?
238238
// if there is - then we could skip once we've passed all of our data
239239
}
240+
if _, err := batchReader.Discard(1); err != nil {
241+
return nil, err
242+
}
243+
240244
break treeReadingLoop
241245
}
242246

@@ -281,6 +285,9 @@ revListLoop:
281285
return nil, err
282286
}
283287
}
288+
if _, err := batchReader.Discard(1); err != nil {
289+
return nil, err
290+
}
284291

285292
// if we haven't found a treeID for the target directory our search is over
286293
if len(treeID) == 0 {
@@ -345,6 +352,9 @@ revListLoop:
345352
if err != nil {
346353
return nil, err
347354
}
355+
if _, err := batchReader.Discard(1); err != nil {
356+
return nil, err
357+
}
348358
commitCommits[i] = c
349359
}
350360

modules/git/last_commit_cache_nogogit.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ package git
88

99
import (
1010
"bufio"
11-
"io"
1211
"path"
1312
)
1413

@@ -36,7 +35,7 @@ func NewLastCommitCache(repoPath string, gitRepo *Repository, ttl func() int64,
3635
}
3736

3837
// Get get the last commit information by commit id and entry path
39-
func (c *LastCommitCache) Get(ref, entryPath string, wr *io.PipeWriter, rd *bufio.Reader) (interface{}, error) {
38+
func (c *LastCommitCache) Get(ref, entryPath string, wr WriteCloserError, rd *bufio.Reader) (interface{}, error) {
4039
v := c.cache.Get(c.getCacheKey(c.repoPath, ref, entryPath))
4140
if vs, ok := v.(string); ok {
4241
log("LastCommitCache hit level 1: [%s:%s:%s]", ref, entryPath, vs)

modules/git/notes_nogogit.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,18 @@ func GetNote(repo *Repository, commitID string, note *Note) error {
4343
if err != nil {
4444
return err
4545
}
46-
defer dataRc.Close()
46+
closed := false
47+
defer func() {
48+
if !closed {
49+
_ = dataRc.Close()
50+
}
51+
}()
4752
d, err := ioutil.ReadAll(dataRc)
4853
if err != nil {
4954
return err
5055
}
56+
_ = dataRc.Close()
57+
closed = true
5158
note.Message = d
5259

5360
treePath := ""

0 commit comments

Comments
 (0)