Skip to content

Commit e7c5977

Browse files
committed
refactor parseTreeEntries, speed up tree list
1 parent 81d7270 commit e7c5977

File tree

7 files changed

+92
-50
lines changed

7 files changed

+92
-50
lines changed

modules/git/parse_nogogit.go

+40-38
Original file line numberDiff line numberDiff line change
@@ -22,70 +22,72 @@ func ParseTreeEntries(data []byte) ([]*TreeEntry, error) {
2222
return parseTreeEntries(data, nil)
2323
}
2424

25+
var sepSpace = []byte{' '}
26+
2527
func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
26-
entries := make([]*TreeEntry, 0, 10)
28+
var err error
29+
entries := make([]*TreeEntry, 0, bytes.Count(data, []byte{'\n'})+1)
2730
for pos := 0; pos < len(data); {
28-
// expect line to be of the form "<mode> <type> <sha> <space-padded-size>\t<filename>"
31+
// expect line to be of the form:
32+
// <mode> <type> <sha> <space-padded-size>\t<filename>
33+
// <mode> <type> <sha>\t<filename>
34+
posEnd := bytes.IndexByte(data[pos:], '\n')
35+
if posEnd == -1 {
36+
posEnd = len(data)
37+
} else {
38+
posEnd += pos
39+
}
40+
line := data[pos:posEnd]
41+
posTab := bytes.IndexByte(line, '\t')
42+
if posTab == -1 {
43+
return nil, fmt.Errorf("invalid ls-tree output (no tab): %q", line)
44+
}
45+
2946
entry := new(TreeEntry)
3047
entry.ptree = ptree
31-
if pos+6 > len(data) {
32-
return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data))
48+
49+
entryAttrs := line[:posTab]
50+
entryName := line[posTab+1:]
51+
52+
entryMode, entryAttrs, _ := bytes.Cut(entryAttrs, sepSpace)
53+
_ /* entryType */, entryAttrs, _ = bytes.Cut(entryAttrs, sepSpace) // the type is not used, the mode is enough to determine the type
54+
entryObjectID, entryAttrs, _ := bytes.Cut(entryAttrs, sepSpace)
55+
if len(entryAttrs) > 0 {
56+
entrySize := entryAttrs // the last field is the space-padded-size
57+
entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(entrySize)), 10, 64)
58+
entry.sized = true
3359
}
34-
switch string(data[pos : pos+6]) {
60+
61+
switch string(entryMode) {
3562
case "100644":
3663
entry.entryMode = EntryModeBlob
37-
pos += 12 // skip over "100644 blob "
3864
case "100755":
3965
entry.entryMode = EntryModeExec
40-
pos += 12 // skip over "100755 blob "
4166
case "120000":
4267
entry.entryMode = EntryModeSymlink
43-
pos += 12 // skip over "120000 blob "
4468
case "160000":
4569
entry.entryMode = EntryModeCommit
46-
pos += 14 // skip over "160000 object "
4770
case "040000", "040755": // git uses 040000 for tree object, but some users may get 040755 for unknown reasons
4871
entry.entryMode = EntryModeTree
49-
pos += 12 // skip over "040000 tree "
5072
default:
51-
return nil, fmt.Errorf("unknown type: %v", string(data[pos:pos+6]))
73+
return nil, fmt.Errorf("unknown type: %v", string(entryMode))
5274
}
5375

54-
if pos+40 > len(data) {
55-
return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data))
56-
}
57-
id, err := NewIDFromString(string(data[pos : pos+40]))
76+
entry.ID, err = NewIDFromString(string(entryObjectID))
5877
if err != nil {
59-
return nil, fmt.Errorf("Invalid ls-tree output: %v", err)
60-
}
61-
entry.ID = id
62-
pos += 41 // skip over sha and trailing space
63-
64-
end := pos + bytes.IndexByte(data[pos:], '\t')
65-
if end < pos {
66-
return nil, fmt.Errorf("Invalid ls-tree -l output: %s", string(data))
67-
}
68-
entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(data[pos:end])), 10, 64)
69-
entry.sized = true
70-
71-
pos = end + 1
72-
73-
end = pos + bytes.IndexByte(data[pos:], '\n')
74-
if end < pos {
75-
return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data))
78+
return nil, fmt.Errorf("invalid ls-tree output (invalid object id): %q, err: %w", line, err)
7679
}
7780

78-
// In case entry name is surrounded by double quotes(it happens only in git-shell).
79-
if data[pos] == '"' {
80-
entry.name, err = strconv.Unquote(string(data[pos:end]))
81+
if len(entryName) > 0 && entryName[0] == '"' {
82+
entry.name, err = strconv.Unquote(string(entryName))
8183
if err != nil {
82-
return nil, fmt.Errorf("Invalid ls-tree output: %v", err)
84+
return nil, fmt.Errorf("invalid ls-tree output (invalid name): %q, err: %w", line, err)
8385
}
8486
} else {
85-
entry.name = string(data[pos:end])
87+
entry.name = string(entryName)
8688
}
8789

88-
pos = end + 1
90+
pos = posEnd + 1
8991
entries = append(entries, entry)
9092
}
9193
return entries, nil

modules/git/parse_nogogit_test.go

+42-6
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ import (
1212
"github.com/stretchr/testify/assert"
1313
)
1414

15-
func TestParseTreeEntries(t *testing.T) {
15+
func TestParseTreeEntriesLong(t *testing.T) {
1616
testCases := []struct {
1717
Input string
1818
Expected []*TreeEntry
@@ -59,11 +59,47 @@ func TestParseTreeEntries(t *testing.T) {
5959
assert.NoError(t, err)
6060
assert.Len(t, entries, len(testCase.Expected))
6161
for i, entry := range entries {
62-
assert.EqualValues(t, testCase.Expected[i].ID, entry.ID)
63-
assert.EqualValues(t, testCase.Expected[i].name, entry.name)
64-
assert.EqualValues(t, testCase.Expected[i].entryMode, entry.entryMode)
65-
assert.EqualValues(t, testCase.Expected[i].sized, entry.sized)
66-
assert.EqualValues(t, testCase.Expected[i].size, entry.size)
62+
assert.EqualValues(t, testCase.Expected[i], entry)
6763
}
6864
}
6965
}
66+
67+
func TestParseTreeEntriesShort(t *testing.T) {
68+
testCases := []struct {
69+
Input string
70+
Expected []*TreeEntry
71+
}{
72+
{
73+
Input: `100644 blob ea0d83c9081af9500ac9f804101b3fd0a5c293af README.md
74+
040000 tree 84b90550547016f73c5dd3f50dea662389e67b6d assets
75+
`,
76+
Expected: []*TreeEntry{
77+
{
78+
ID: MustIDFromString("ea0d83c9081af9500ac9f804101b3fd0a5c293af"),
79+
name: "README.md",
80+
entryMode: EntryModeBlob,
81+
},
82+
{
83+
ID: MustIDFromString("84b90550547016f73c5dd3f50dea662389e67b6d"),
84+
name: "assets",
85+
entryMode: EntryModeTree,
86+
},
87+
},
88+
},
89+
}
90+
for _, testCase := range testCases {
91+
entries, err := ParseTreeEntries([]byte(testCase.Input))
92+
assert.NoError(t, err)
93+
assert.Len(t, entries, len(testCase.Expected))
94+
for i, entry := range entries {
95+
assert.EqualValues(t, testCase.Expected[i], entry)
96+
}
97+
}
98+
}
99+
100+
func TestParseTreeEntriesInvalid(t *testing.T) {
101+
// there was a panic: "runtime error: slice bounds out of range" when the input was invalid: #20315
102+
entries, err := ParseTreeEntries([]byte("100644 blob ea0d83c9081af9500ac9f804101b3fd0a5c293af"))
103+
assert.Error(t, err)
104+
assert.Len(t, entries, 0)
105+
}

modules/git/repo_language_stats_nogogit.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
5757

5858
tree := commit.Tree
5959

60-
entries, err := tree.ListEntriesRecursive()
60+
entries, err := tree.ListEntriesRecursive("--long") // get blob sizes
6161
if err != nil {
6262
return nil, err
6363
}

modules/git/tree_gogit.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,8 @@ func (t *Tree) ListEntries() (Entries, error) {
5858
}
5959

6060
// ListEntriesRecursive returns all entries of current tree recursively including all subtrees
61-
func (t *Tree) ListEntriesRecursive() (Entries, error) {
61+
// extraArgs takes no effect (for nogogit version, it controls whether getting the blob size)
62+
func (t *Tree) ListEntriesRecursive(extraArgs ...string) (Entries, error) {
6263
if t.gogitTree == nil {
6364
err := t.loadTreeObject()
6465
if err != nil {

modules/git/tree_nogogit.go

+5-2
Original file line numberDiff line numberDiff line change
@@ -100,12 +100,15 @@ func (t *Tree) ListEntries() (Entries, error) {
100100
}
101101

102102
// ListEntriesRecursive returns all entries of current tree recursively including all subtrees
103-
func (t *Tree) ListEntriesRecursive() (Entries, error) {
103+
// extraArgs could be "-l" to get the size, which is slower
104+
func (t *Tree) ListEntriesRecursive(extraArgs ...string) (Entries, error) {
104105
if t.entriesRecursiveParsed {
105106
return t.entriesRecursive, nil
106107
}
107108

108-
stdout, _, runErr := NewCommand(t.repo.Ctx, "ls-tree", "-t", "-l", "-r", t.ID.String()).RunStdBytes(&RunOpts{Dir: t.repo.Path})
109+
args := append([]string{"ls-tree", "-t", "-r"}, extraArgs...)
110+
args = append(args, t.ID.String())
111+
stdout, _, runErr := NewCommand(t.repo.Ctx, args...).RunStdBytes(&RunOpts{Dir: t.repo.Path})
109112
if runErr != nil {
110113
return nil, runErr
111114
}

routers/web/repo/treelist.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ func TreeList(ctx *context.Context) {
2222
return
2323
}
2424

25-
entries, err := tree.ListEntriesRecursive()
25+
entries, err := tree.ListEntriesRecursive() // do not need the size, so use the faster arguments
2626
if err != nil {
2727
ctx.ServerError("ListEntriesRecursive", err)
2828
return

services/repository/files/tree.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ func GetTreeBySHA(ctx context.Context, repo *repo_model.Repository, gitRepo *git
2929
tree.URL = repo.APIURL() + "/git/trees/" + url.PathEscape(tree.SHA)
3030
var entries git.Entries
3131
if recursive {
32-
entries, err = gitTree.ListEntriesRecursive()
32+
entries, err = gitTree.ListEntriesRecursive("--long") // get blob sizes
3333
} else {
3434
entries, err = gitTree.ListEntries()
3535
}

0 commit comments

Comments
 (0)