Skip to content

Commit b83114f

Browse files
filipnavaralunny
authored andcommitted
Fix one performance/correctness regression in #6478 found on Rails repository. (#6686)
* Fix flaw in the commit history lookup that caused unnecessary traversal when the repository contains a lot of merge commits. Also return the merge commit as the changed one if the file or directory was changed as part of the merge, eg. through conflict resolution. Signed-off-by: Filip Navara <[email protected]> * Perform history simplification. If a file is present on multiple parents in a merge commit follow only the first parent.
1 parent 04ff3dd commit b83114f

File tree

1 file changed

+33
-38
lines changed

1 file changed

+33
-38
lines changed

modules/git/commit_info.go

Lines changed: 33 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -147,12 +147,6 @@ func getLastCommitForPaths(c *object.Commit, treePath string, paths []string) (m
147147
break
148148
}
149149
current := cIn.(*commitAndPaths)
150-
currentID := current.commit.ID()
151-
152-
if seen[currentID] {
153-
continue
154-
}
155-
seen[currentID] = true
156150

157151
// Load the parent commits for the one we are currently examining
158152
numParents := current.commit.NumParents()
@@ -166,8 +160,7 @@ func getLastCommitForPaths(c *object.Commit, treePath string, paths []string) (m
166160
}
167161

168162
// Examine the current commit and set of interesting paths
169-
numOfParentsWithPath := make([]int, len(current.paths))
170-
pathChanged := make([]bool, len(current.paths))
163+
pathUnchanged := make([]bool, len(current.paths))
171164
parentHashes := make([]map[string]plumbing.Hash, len(parents))
172165
for j, parent := range parents {
173166
parentHashes[j], err = getFileHashes(parent, treePath, current.paths)
@@ -176,42 +169,32 @@ func getLastCommitForPaths(c *object.Commit, treePath string, paths []string) (m
176169
}
177170

178171
for i, path := range current.paths {
179-
if parentHashes[j][path] != plumbing.ZeroHash {
180-
numOfParentsWithPath[i]++
181-
if parentHashes[j][path] != current.hashes[path] {
182-
pathChanged[i] = true
183-
}
172+
if parentHashes[j][path] == current.hashes[path] {
173+
pathUnchanged[i] = true
184174
}
185175
}
186176
}
187177

188178
var remainingPaths []string
189179
for i, path := range current.paths {
190-
switch numOfParentsWithPath[i] {
191-
case 0:
192-
// The path didn't exist in any parent, so it must have been created by
193-
// this commit. The results could already contain some newer change from
194-
// different path, so don't override that.
195-
if result[path] == nil {
196-
result[path] = current.commit
197-
}
198-
case 1:
199-
// The file is present on exactly one parent, so check if it was changed
200-
// and save the revision if it did.
201-
if pathChanged[i] {
202-
if result[path] == nil {
203-
result[path] = current.commit
204-
}
205-
} else {
180+
// The results could already contain some newer change for the same path,
181+
// so don't override that and bail out on the file early.
182+
if result[path] == nil {
183+
if pathUnchanged[i] {
184+
// The path existed with the same hash in at least one parent so it could
185+
// not have been changed in this commit directly.
206186
remainingPaths = append(remainingPaths, path)
187+
} else {
188+
// There are few possible cases how can we get here:
189+
// - The path didn't exist in any parent, so it must have been created by
190+
// this commit.
191+
// - The path did exist in the parent commit, but the hash of the file has
192+
// changed.
193+
// - We are looking at a merge commit and the hash of the file doesn't
194+
// match any of the hashes being merged. This is more common for directories,
195+
// but it can also happen if a file is changed through conflict resolution.
196+
result[path] = current.commit
207197
}
208-
default:
209-
// The file is present on more than one of the parent paths, so this is
210-
// a merge. We have to examine all the parent trees to find out where
211-
// the change occurred. pathChanged[i] would tell us that the file was
212-
// changed during the merge, but it wouldn't tell us the relevant commit
213-
// that introduced it.
214-
remainingPaths = append(remainingPaths, path)
215198
}
216199
}
217200

@@ -222,17 +205,29 @@ func getLastCommitForPaths(c *object.Commit, treePath string, paths []string) (m
222205
if seen[parent.ID()] {
223206
continue
224207
}
208+
seen[parent.ID()] = true
225209

226210
// Combine remainingPath with paths available on the parent branch
227211
// and make union of them
228212
var remainingPathsForParent []string
213+
var newRemainingPaths []string
229214
for _, path := range remainingPaths {
230-
if parentHashes[j][path] != plumbing.ZeroHash {
215+
if parentHashes[j][path] == current.hashes[path] {
231216
remainingPathsForParent = append(remainingPathsForParent, path)
217+
} else {
218+
newRemainingPaths = append(newRemainingPaths, path)
232219
}
233220
}
234221

235-
heap.Push(&commitAndPaths{parent, remainingPathsForParent, parentHashes[j]})
222+
if remainingPathsForParent != nil {
223+
heap.Push(&commitAndPaths{parent, remainingPathsForParent, parentHashes[j]})
224+
}
225+
226+
if len(newRemainingPaths) == 0 {
227+
break
228+
} else {
229+
remainingPaths = newRemainingPaths
230+
}
236231
}
237232
}
238233
}

0 commit comments

Comments
 (0)