Skip to content

Commit bf38602

Browse files
committed
Improve get last commit using git log --name-status
git log --name-status -c provides information about the diff between a commit and its parents. Using this and adjusting the algorithm to use the first change to a path allows for a much faster generation of commit info. There is a subtle change in the results generated but this will cause the results to more closely match those from elsewhere. Signed-off-by: Andrew Thornton <[email protected]>
1 parent 5d113bd commit bf38602

40 files changed

+2619
-252
lines changed

go.mod

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ require (
3030
github.com/couchbase/goutils v0.0.0-20210118111533-e33d3ffb5401 // indirect
3131
github.com/denisenkom/go-mssqldb v0.10.0
3232
github.com/dgrijalva/jwt-go v3.2.0+incompatible
33+
github.com/djherbis/buffer v1.2.0
34+
github.com/djherbis/nio/v3 v3.0.1
3335
github.com/dustin/go-humanize v1.0.0
3436
github.com/editorconfig/editorconfig-core-go/v2 v2.4.2
3537
github.com/emirpasic/gods v1.12.0

go.sum

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,11 @@ github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZm
244244
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
245245
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
246246
github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no=
247+
github.com/djherbis/buffer v1.1.0/go.mod h1:VwN8VdFkMY0DCALdY8o00d3IZ6Amz/UNVMWcSaJT44o=
248+
github.com/djherbis/buffer v1.2.0 h1:PH5Dd2ss0C7CRRhQCZ2u7MssF+No9ide8Ye71nPHcrQ=
249+
github.com/djherbis/buffer v1.2.0/go.mod h1:fjnebbZjCUpPinBRD+TDwXSOeNQ7fPQWLfGQqiAiUyE=
250+
github.com/djherbis/nio/v3 v3.0.1 h1:6wxhnuppteMa6RHA4L81Dq7ThkZH8SwnDzXDYy95vB4=
251+
github.com/djherbis/nio/v3 v3.0.1/go.mod h1:Ng4h80pbZFMla1yKzm61cF0tqqilXZYrogmWgZxOcmg=
247252
github.com/dlclark/regexp2 v1.1.6/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc=
248253
github.com/dlclark/regexp2 v1.2.0/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc=
249254
github.com/dlclark/regexp2 v1.4.0 h1:F1rxgk7p4uKjwIQxBs9oAXe5CqrXlCduYEJvrF4u93E=

modules/git/batch_reader.go

Lines changed: 120 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ import (
1111
"math"
1212
"strconv"
1313
"strings"
14+
15+
"github.com/djherbis/buffer"
16+
"github.com/djherbis/nio/v3"
1417
)
1518

1619
// WriteCloserError wraps an io.WriteCloser with an additional CloseWithError function
@@ -42,7 +45,7 @@ func CatFileBatchCheck(repoPath string) (WriteCloserError, *bufio.Reader, func()
4245
}
4346
}()
4447

45-
// For simplicities sake we'll us a buffered reader to read from the cat-file --batch
48+
// For simplicities sake we'll use a buffered reader to read from the cat-file --batch-check
4649
batchReader := bufio.NewReader(batchStdoutReader)
4750

4851
return batchStdinWriter, batchReader, cancel
@@ -53,7 +56,7 @@ func CatFileBatch(repoPath string) (WriteCloserError, *bufio.Reader, func()) {
5356
// We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
5457
// so let's create a batch stdin and stdout
5558
batchStdinReader, batchStdinWriter := io.Pipe()
56-
batchStdoutReader, batchStdoutWriter := io.Pipe()
59+
batchStdoutReader, batchStdoutWriter := nio.Pipe(buffer.New(32 * 1024))
5760
cancel := func() {
5861
_ = batchStdinReader.Close()
5962
_ = batchStdinWriter.Close()
@@ -74,7 +77,7 @@ func CatFileBatch(repoPath string) (WriteCloserError, *bufio.Reader, func()) {
7477
}()
7578

7679
// For simplicities sake we'll us a buffered reader to read from the cat-file --batch
77-
batchReader := bufio.NewReader(batchStdoutReader)
80+
batchReader := bufio.NewReaderSize(batchStdoutReader, 32*1024)
7881

7982
return batchStdinWriter, batchReader, cancel
8083
}
@@ -84,22 +87,31 @@ func CatFileBatch(repoPath string) (WriteCloserError, *bufio.Reader, func()) {
8487
// <sha> SP <type> SP <size> LF
8588
// sha is a 40byte not 20byte here
8689
func ReadBatchLine(rd *bufio.Reader) (sha []byte, typ string, size int64, err error) {
87-
sha, err = rd.ReadBytes(' ')
90+
typ, err = rd.ReadString('\n')
8891
if err != nil {
8992
return
9093
}
91-
sha = sha[:len(sha)-1]
92-
93-
typ, err = rd.ReadString('\n')
94-
if err != nil {
94+
if len(typ) == 1 {
95+
typ, err = rd.ReadString('\n')
96+
if err != nil {
97+
return
98+
}
99+
}
100+
idx := strings.IndexByte(typ, ' ')
101+
if idx < 0 {
102+
log("missing space typ: %s", typ)
103+
err = ErrNotExist{ID: string(sha)}
95104
return
96105
}
106+
sha = []byte(typ[:idx])
107+
typ = typ[idx+1:]
97108

98-
idx := strings.Index(typ, " ")
109+
idx = strings.IndexByte(typ, ' ')
99110
if idx < 0 {
100111
err = ErrNotExist{ID: string(sha)}
101112
return
102113
}
114+
103115
sizeStr := typ[idx+1 : len(typ)-1]
104116
typ = typ[:idx]
105117

@@ -130,7 +142,7 @@ headerLoop:
130142
}
131143

132144
// Discard the rest of the tag
133-
discard := size - n
145+
discard := size - n + 1
134146
for discard > math.MaxInt32 {
135147
_, err := rd.Discard(math.MaxInt32)
136148
if err != nil {
@@ -211,14 +223,20 @@ func To40ByteSHA(sha, out []byte) []byte {
211223
func ParseTreeLineSkipMode(rd *bufio.Reader, fnameBuf, shaBuf []byte) (fname, sha []byte, n int, err error) {
212224
var readBytes []byte
213225
// Skip the Mode
214-
readBytes, err = rd.ReadSlice(' ') // NB: DOES NOT ALLOCATE SIMPLY RETURNS SLICE WITHIN READER BUFFER
226+
readBytes, err = rd.ReadSlice('\x00') // NB: DOES NOT ALLOCATE SIMPLY RETURNS SLICE WITHIN READER BUFFER
215227
if err != nil {
216228
return
217229
}
218-
n += len(readBytes)
230+
idx := bytes.IndexByte(readBytes, ' ')
231+
if idx < 0 {
232+
log("missing space in readBytes: %s", readBytes)
233+
err = &ErrNotExist{}
234+
return
235+
}
236+
n += idx + 1
237+
readBytes = readBytes[idx+1:]
219238

220239
// Deal with the fname
221-
readBytes, err = rd.ReadSlice('\x00')
222240
copy(fnameBuf, readBytes)
223241
if len(fnameBuf) > len(readBytes) {
224242
fnameBuf = fnameBuf[:len(readBytes)] // cut the buf the correct size
@@ -237,7 +255,7 @@ func ParseTreeLineSkipMode(rd *bufio.Reader, fnameBuf, shaBuf []byte) (fname, sh
237255
fname = fnameBuf // set the returnable fname to the slice
238256

239257
// Now deal with the 20-byte SHA
240-
idx := 0
258+
idx = 0
241259
for idx < 20 {
242260
read := 0
243261
read, err = rd.Read(shaBuf[idx:20])
@@ -262,23 +280,102 @@ func ParseTreeLineSkipMode(rd *bufio.Reader, fnameBuf, shaBuf []byte) (fname, sh
262280
func ParseTreeLine(rd *bufio.Reader, modeBuf, fnameBuf, shaBuf []byte) (mode, fname, sha []byte, n int, err error) {
263281
var readBytes []byte
264282

265-
// Read the Mode
266-
readBytes, err = rd.ReadSlice(' ')
283+
// Read the Mode & fname
284+
readBytes, err = rd.ReadSlice('\x00')
267285
if err != nil {
268286
return
269287
}
270-
n += len(readBytes)
271-
copy(modeBuf, readBytes)
272-
if len(modeBuf) > len(readBytes) {
273-
modeBuf = modeBuf[:len(readBytes)]
274-
} else {
275-
modeBuf = append(modeBuf, readBytes[len(modeBuf):]...)
288+
idx := bytes.IndexByte(readBytes, ' ')
289+
if idx < 0 {
290+
log("missing space in readBytes ParseTreeLine: %s", readBytes)
291+
292+
err = &ErrNotExist{}
293+
return
294+
}
276295

296+
n += idx + 1
297+
copy(modeBuf, readBytes[:idx])
298+
if len(modeBuf) >= idx {
299+
modeBuf = modeBuf[:idx]
300+
} else {
301+
modeBuf = append(modeBuf, readBytes[len(modeBuf):idx]...)
277302
}
278-
mode = modeBuf[:len(modeBuf)-1] // Drop the SP
303+
mode = modeBuf
304+
305+
readBytes = readBytes[idx+1:]
279306

280307
// Deal with the fname
308+
copy(fnameBuf, readBytes)
309+
if len(fnameBuf) > len(readBytes) {
310+
fnameBuf = fnameBuf[:len(readBytes)]
311+
} else {
312+
fnameBuf = append(fnameBuf, readBytes[len(fnameBuf):]...)
313+
}
314+
for err == bufio.ErrBufferFull {
315+
readBytes, err = rd.ReadSlice('\x00')
316+
fnameBuf = append(fnameBuf, readBytes...)
317+
}
318+
n += len(fnameBuf)
319+
if err != nil {
320+
return
321+
}
322+
fnameBuf = fnameBuf[:len(fnameBuf)-1]
323+
fname = fnameBuf
324+
325+
// Deal with the 20-byte SHA
326+
idx = 0
327+
for idx < 20 {
328+
read := 0
329+
read, err = rd.Read(shaBuf[idx:20])
330+
n += read
331+
if err != nil {
332+
return
333+
}
334+
idx += read
335+
}
336+
sha = shaBuf
337+
return
338+
}
339+
340+
// ParseTreeLineTree reads a tree entry from a tree in a cat-file --batch stream
341+
//
342+
// This carefully avoids allocations - except where fnameBuf is too small.
343+
// It is recommended therefore to pass in an fnameBuf large enough to avoid almost all allocations
344+
//
345+
// Each line is composed of:
346+
// <mode-in-ascii-dropping-initial-zeros> SP <fname> NUL <20-byte SHA>
347+
//
348+
// We don't attempt to convert the 20-byte SHA to 40-byte SHA to save a lot of time
349+
func ParseTreeLineTree(rd *bufio.Reader, modeBuf, fnameBuf, shaBuf []byte) (isTree bool, fname, sha []byte, n int, err error) {
350+
var readBytes []byte
351+
352+
// Read the Mode & fname
281353
readBytes, err = rd.ReadSlice('\x00')
354+
if err != nil {
355+
return
356+
}
357+
if len(readBytes) < 6 {
358+
log("missing space in readBytes ParseTreeLineTree: %v", readBytes)
359+
err = &ErrNotExist{}
360+
return
361+
}
362+
if !bytes.Equal(readBytes[:6], []byte("40000 ")) {
363+
n += len(readBytes)
364+
for err == bufio.ErrBufferFull {
365+
readBytes, err = rd.ReadSlice('\x00')
366+
n += len(readBytes)
367+
}
368+
d := 0
369+
d, err = rd.Discard(20)
370+
n += d
371+
return
372+
}
373+
isTree = true
374+
375+
n += 6
376+
readBytes = readBytes[6:]
377+
378+
// Deal with the fname
282379
copy(fnameBuf, readBytes)
283380
if len(fnameBuf) > len(readBytes) {
284381
fnameBuf = fnameBuf[:len(readBytes)]

0 commit comments

Comments
 (0)