@@ -7,22 +7,23 @@ use gix_object::{
7
7
bstr:: { BStr , BString } ,
8
8
FindExt ,
9
9
} ;
10
+ use gix_traverse:: commit:: find;
11
+ use smallvec:: SmallVec ;
10
12
use std:: num:: NonZeroU32 ;
11
13
use std:: ops:: Range ;
12
14
13
15
/// Produce a list of consecutive [`BlameEntry`] instances to indicate in which commits the ranges of the file
14
- /// at `traverse[0] :<file_path>` originated in.
16
+ /// at `suspect :<file_path>` originated in.
15
17
///
16
18
/// ## Paramters
17
19
///
18
20
/// * `odb`
19
21
/// - Access to database objects, also for used for diffing.
20
22
/// - Should have an object cache for good diff performance.
21
- /// * `traverse`
22
- /// - The list of commits from the most recent to prior ones, following all parents sorted
23
- /// by time.
24
- /// - It's paramount that older commits are returned after newer ones.
25
- /// - The first commit returned here is the first eligible commit to be responsible for parts of `file_path`.
23
+ /// * `suspect`
24
+ /// - The first commit to be responsible for parts of `file_path`.
25
+ /// * `cache`
26
+ /// - Optionally, the commitgraph cache.
26
27
/// * `file_path`
27
28
/// - A *slash-separated* worktree-relative path to the file to blame.
28
29
/// * `range`
@@ -60,20 +61,14 @@ use std::ops::Range;
60
61
// <---><----------><-------><-----><------->
61
62
// <---><---><-----><-------><-----><------->
62
63
// <---><---><-----><-------><-----><-><-><->
63
- pub fn file < E > (
64
+ pub fn file (
64
65
odb : impl gix_object:: Find + gix_object:: FindHeader ,
65
- traverse : impl IntoIterator < Item = Result < gix_traverse:: commit:: Info , E > > ,
66
+ suspect : ObjectId ,
67
+ cache : Option < gix_commitgraph:: Graph > ,
66
68
resource_cache : & mut gix_diff:: blob:: Platform ,
67
69
file_path : & BStr ,
68
70
range : Option < Range < u32 > > ,
69
- ) -> Result < Outcome , Error >
70
- where
71
- E : Into < Box < dyn std:: error:: Error + Send + Sync + ' static > > ,
72
- {
73
- let mut traverse = traverse. into_iter ( ) . peekable ( ) ;
74
- let Some ( Ok ( suspect) ) = traverse. peek ( ) . map ( |res| res. as_ref ( ) . map ( |item| item. id ) ) else {
75
- return Err ( Error :: EmptyTraversal ) ;
76
- } ;
71
+ ) -> Result < Outcome , Error > {
77
72
let _span = gix_trace:: coarse!( "gix_blame::file()" , ?file_path, ?suspect) ;
78
73
79
74
let mut stats = Statistics :: default ( ) ;
@@ -103,25 +98,43 @@ where
103
98
suspects: [ ( suspect, range_in_blamed_file) ] . into( ) ,
104
99
} ] ;
105
100
101
+ let mut buf = Vec :: new ( ) ;
102
+ let commit = find ( cache. as_ref ( ) , & odb, & suspect, & mut buf) ?;
103
+
104
+ let mut queue: gix_revwalk:: PriorityQueue < CommitTime , ObjectId > = gix_revwalk:: PriorityQueue :: new ( ) ;
105
+
106
+ let commit_time = commit_time ( commit) ;
107
+ queue. insert ( commit_time, suspect) ;
108
+
106
109
let mut out = Vec :: new ( ) ;
107
110
let mut diff_state = gix_diff:: tree:: State :: default ( ) ;
108
111
let mut previous_entry: Option < ( ObjectId , ObjectId ) > = None ;
109
- ' outer: while let Some ( item ) = traverse . next ( ) {
112
+ ' outer: while let Some ( suspect ) = queue . pop_value ( ) {
110
113
if hunks_to_blame. is_empty ( ) {
111
114
break ;
112
115
}
113
- let commit = item. map_err ( |err| Error :: Traverse ( err. into ( ) ) ) ?;
114
- let suspect = commit. id ;
116
+
117
+ let is_still_suspect = hunks_to_blame. iter ( ) . any ( |hunk| hunk. suspects . contains_key ( & suspect) ) ;
118
+
119
+ if !is_still_suspect {
120
+ // There are no `UnblamedHunk`s associated with this `suspect`, so we can continue with
121
+ // the next one.
122
+ continue ' outer;
123
+ }
124
+
115
125
stats. commits_traversed += 1 ;
116
126
117
- let parent_ids = commit. parent_ids ;
127
+ let commit = find ( cache. as_ref ( ) , & odb, & suspect, & mut buf) ?;
128
+
129
+ let parent_ids: ParentIds = collect_parents ( commit, & odb, cache. as_ref ( ) ) ;
130
+
118
131
if parent_ids. is_empty ( ) {
119
- if traverse . peek ( ) . is_none ( ) {
120
- // I’m not entirely sure if this is correct yet. `suspect`, at this point, is the `id` of
121
- // the last `item` that was yielded by `traverse `, so it makes sense to assign the
122
- // remaining lines to it, even though we don’t explicitly check whether that is true
123
- // here. We could perhaps use diff-tree-to-tree to compare `suspect`
124
- // against an empty tree to validate this assumption.
132
+ if queue . is_empty ( ) {
133
+ // I’m not entirely sure if this is correct yet. `suspect`, at this point, is the
134
+ // `id` of the last `item` that was yielded by `queue `, so it makes sense to assign
135
+ // the remaining lines to it, even though we don’t explicitly check whether that is
136
+ // true here. We could perhaps use diff-tree-to-tree to compare `suspect` against
137
+ // an empty tree to validate this assumption.
125
138
if unblamed_to_out_is_done ( & mut hunks_to_blame, & mut out, suspect) {
126
139
break ' outer;
127
140
}
@@ -143,7 +156,41 @@ where
143
156
continue ;
144
157
} ;
145
158
146
- for ( pid, parent_id) in parent_ids. iter ( ) . enumerate ( ) {
159
+ // This block asserts that, for every `UnblamedHunk`, all lines in the *Blamed File* are
160
+ // identical to the corresponding lines in the *Source File*.
161
+ #[ cfg( debug_assertions) ]
162
+ {
163
+ let source_blob = odb. find_blob ( & entry_id, & mut buf) ?. data . to_vec ( ) ;
164
+ let mut source_interner = gix_diff:: blob:: intern:: Interner :: new ( source_blob. len ( ) / 100 ) ;
165
+ let source_lines_as_tokens: Vec < _ > = tokens_for_diffing ( & source_blob)
166
+ . tokenize ( )
167
+ . map ( |token| source_interner. intern ( token) )
168
+ . collect ( ) ;
169
+
170
+ let mut blamed_interner = gix_diff:: blob:: intern:: Interner :: new ( blamed_file_blob. len ( ) / 100 ) ;
171
+ let blamed_lines_as_tokens: Vec < _ > = tokens_for_diffing ( & blamed_file_blob)
172
+ . tokenize ( )
173
+ . map ( |token| blamed_interner. intern ( token) )
174
+ . collect ( ) ;
175
+
176
+ for hunk in hunks_to_blame. iter ( ) {
177
+ if let Some ( range_in_suspect) = hunk. suspects . get ( & suspect) {
178
+ let range_in_blamed_file = hunk. range_in_blamed_file . clone ( ) ;
179
+
180
+ for ( blamed_line_number, source_line_number) in range_in_blamed_file. zip ( range_in_suspect. clone ( ) ) {
181
+ let source_token = source_lines_as_tokens[ source_line_number as usize ] ;
182
+ let blame_token = blamed_lines_as_tokens[ blamed_line_number as usize ] ;
183
+
184
+ let source_line = BString :: new ( source_interner[ source_token] . into ( ) ) ;
185
+ let blamed_line = BString :: new ( blamed_interner[ blame_token] . into ( ) ) ;
186
+
187
+ assert_eq ! ( source_line, blamed_line) ;
188
+ }
189
+ }
190
+ }
191
+ }
192
+
193
+ for ( pid, ( parent_id, parent_commit_time) ) in parent_ids. iter ( ) . enumerate ( ) {
147
194
if let Some ( parent_entry_id) =
148
195
find_path_entry_in_commit ( & odb, parent_id, file_path, & mut buf, & mut buf2, & mut stats) ?
149
196
{
@@ -153,17 +200,19 @@ where
153
200
}
154
201
if no_change_in_entry {
155
202
pass_blame_from_to ( suspect, * parent_id, & mut hunks_to_blame) ;
203
+ queue. insert ( * parent_commit_time, * parent_id) ;
156
204
continue ' outer;
157
205
}
158
206
}
159
207
}
160
208
161
209
let more_than_one_parent = parent_ids. len ( ) > 1 ;
162
- for parent_id in parent_ids {
210
+ for ( parent_id, parent_commit_time) in parent_ids {
211
+ queue. insert ( parent_commit_time, parent_id) ;
163
212
let changes_for_file_path = tree_diff_at_file_path (
164
213
& odb,
165
214
file_path,
166
- commit . id ,
215
+ suspect ,
167
216
parent_id,
168
217
& mut stats,
169
218
& mut diff_state,
@@ -588,8 +637,82 @@ fn find_path_entry_in_commit(
588
637
Ok ( res. map ( |e| e. oid ) )
589
638
}
590
639
591
- /// Return an iterator over tokens for use in diffing. These usually lines, but iit's important to unify them
592
- /// so the later access shows the right thing.
640
+ type CommitTime = i64 ;
641
+
642
+ fn commit_time ( commit : gix_traverse:: commit:: Either < ' _ , ' _ > ) -> CommitTime {
643
+ match commit {
644
+ gix_traverse:: commit:: Either :: CommitRefIter ( commit_ref_iter) => {
645
+ let mut commit_time = 0 ;
646
+ for token in commit_ref_iter {
647
+ use gix_object:: commit:: ref_iter:: Token as T ;
648
+ match token {
649
+ Ok ( T :: Tree { .. } ) => continue ,
650
+ Ok ( T :: Parent { .. } ) => continue ,
651
+ Ok ( T :: Author { .. } ) => continue ,
652
+ Ok ( T :: Committer { signature } ) => {
653
+ commit_time = signature. time . seconds ;
654
+ break ;
655
+ }
656
+ Ok ( _unused_token) => break ,
657
+ Err ( _err) => todo ! ( ) ,
658
+ }
659
+ }
660
+ commit_time
661
+ }
662
+ gix_traverse:: commit:: Either :: CachedCommit ( commit) => commit. committer_timestamp ( ) as i64 ,
663
+ }
664
+ }
665
+
666
+ type ParentIds = SmallVec < [ ( gix_hash:: ObjectId , i64 ) ; 2 ] > ;
667
+
668
+ fn collect_parents (
669
+ commit : gix_traverse:: commit:: Either < ' _ , ' _ > ,
670
+ odb : & impl gix_object:: Find ,
671
+ cache : Option < & gix_commitgraph:: Graph > ,
672
+ ) -> ParentIds {
673
+ let mut parent_ids: ParentIds = Default :: default ( ) ;
674
+
675
+ match commit {
676
+ gix_traverse:: commit:: Either :: CachedCommit ( commit) => {
677
+ let cache = cache
678
+ . as_ref ( )
679
+ . expect ( "find returned a cached commit, so we expect cache to be present" ) ;
680
+ for parent_id in commit. iter_parents ( ) {
681
+ match parent_id {
682
+ Ok ( pos) => {
683
+ let parent = cache. commit_at ( pos) ;
684
+
685
+ parent_ids. push ( ( parent. id ( ) . to_owned ( ) , parent. committer_timestamp ( ) as i64 ) ) ;
686
+ }
687
+ Err ( _) => todo ! ( ) ,
688
+ }
689
+ }
690
+ }
691
+ gix_traverse:: commit:: Either :: CommitRefIter ( commit_ref_iter) => {
692
+ for token in commit_ref_iter {
693
+ match token {
694
+ Ok ( gix_object:: commit:: ref_iter:: Token :: Tree { .. } ) => continue ,
695
+ Ok ( gix_object:: commit:: ref_iter:: Token :: Parent { id } ) => {
696
+ let mut buf = Vec :: new ( ) ;
697
+ let parent = odb. find_commit_iter ( id. as_ref ( ) , & mut buf) . ok ( ) ;
698
+ let parent_commit_time = parent
699
+ . and_then ( |parent| parent. committer ( ) . ok ( ) . map ( |committer| committer. time . seconds ) )
700
+ . unwrap_or_default ( ) ;
701
+
702
+ parent_ids. push ( ( id, parent_commit_time) ) ;
703
+ }
704
+ Ok ( _unused_token) => break ,
705
+ Err ( _err) => todo ! ( ) ,
706
+ }
707
+ }
708
+ }
709
+ } ;
710
+
711
+ parent_ids
712
+ }
713
+
714
+ /// Return an iterator over tokens for use in diffing. These are usually lines, but it's important
715
+ /// to unify them so the later access shows the right thing.
593
716
pub ( crate ) fn tokens_for_diffing ( data : & [ u8 ] ) -> impl TokenSource < Token = & [ u8 ] > {
594
717
gix_diff:: blob:: sources:: byte_lines_with_terminator ( data)
595
718
}
0 commit comments