Skip to content

Commit 1dbb94a

Browse files
nrdxpByron
authored andcommitted
feat!: add option to traverse commits from oldest to newest
This change introduces an enum to control commit traversal order. Users can now choose between newest-first or oldest-first traversal. The default behavior remains newest-first, but it can be toggled by passing a CommitTimeOrder to a Sorting::ByCommitTime* variant. This feature is particularly useful for searching early repository history. The implementation remains largely agnostic to this change, with only minor logic adjustments in key areas as necessary. The reversed order is achieved by inverting the PriorityQueue key when an oldest-first traversal is requested.
1 parent 7bf2f4f commit 1dbb94a

File tree

2 files changed

+90
-35
lines changed

2 files changed

+90
-35
lines changed

gix-traverse/src/commit/simple.rs

Lines changed: 81 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,19 @@ use gix_date::SecondsSinceUnixEpoch;
22
use gix_hash::ObjectId;
33
use gix_hashtable::HashSet;
44
use smallvec::SmallVec;
5+
use std::cmp::Reverse;
56
use std::collections::VecDeque;
67

8+
#[derive(Default, Debug, Copy, Clone)]
9+
/// The order with which to prioritize the search
10+
pub enum CommitTimeOrder {
11+
#[default]
12+
/// sort commits by newest first
13+
NewestFirst,
14+
/// sort commits by oldest first
15+
OldestFirst,
16+
}
17+
718
/// Specify how to sort commits during a [simple](super::Simple) traversal.
819
///
920
/// ### Sample History
@@ -28,24 +39,27 @@ pub enum Sorting {
2839
/// as it avoids overlapping branches.
2940
#[default]
3041
BreadthFirst,
31-
/// Commits are sorted by their commit time in descending order, that is newest first.
42+
/// Commits are sorted by their commit time in the order specified, either newest or oldest first.
3243
///
3344
/// The sorting applies to all currently queued commit ids and thus is full.
3445
///
35-
/// In the *sample history* the order would be `8, 7, 6, 5, 4, 3, 2, 1`
46+
/// In the *sample history* the order would be `8, 7, 6, 5, 4, 3, 2, 1` for NewestFirst
47+
/// Or `1, 2, 3, 4, 5, 6, 7, 8` for OldestFirst
3648
///
3749
/// # Performance
3850
///
3951
/// This mode benefits greatly from having an object_cache in `find()`
4052
/// to avoid having to lookup each commit twice.
41-
ByCommitTimeNewestFirst,
42-
/// This sorting is similar to `ByCommitTimeNewestFirst`, but adds a cutoff to not return commits older than
53+
ByCommitTime(CommitTimeOrder),
54+
/// This sorting is similar to `ByCommitTime`, but adds a cutoff to not return commits older than
4355
/// a given time, stopping the iteration once no younger commits is queued to be traversed.
4456
///
4557
/// As the query is usually repeated with different cutoff dates, this search mode benefits greatly from an object cache.
4658
///
4759
/// In the *sample history* and a cut-off date of 4, the returned list of commits would be `8, 7, 6, 4`
48-
ByCommitTimeNewestFirstCutoffOlderThan {
60+
ByCommitTimeCutoff {
61+
/// The order in wich to prioritize lookups
62+
order: CommitTimeOrder,
4963
/// The amount of seconds since unix epoch, the same value obtained by any `gix_date::Time` structure and the way git counts time.
5064
seconds: gix_date::SecondsSinceUnixEpoch,
5165
},
@@ -61,11 +75,14 @@ pub enum Error {
6175
ObjectDecode(#[from] gix_object::decode::Error),
6276
}
6377

78+
use Result as Either;
79+
type QueueKey<T> = Either<T, Reverse<T>>;
80+
6481
/// The state used and potentially shared by multiple graph traversals.
6582
#[derive(Clone)]
6683
pub(super) struct State {
6784
next: VecDeque<ObjectId>,
68-
queue: gix_revwalk::PriorityQueue<SecondsSinceUnixEpoch, ObjectId>,
85+
queue: gix_revwalk::PriorityQueue<QueueKey<SecondsSinceUnixEpoch>, ObjectId>,
6986
buf: Vec<u8>,
7087
seen: HashSet<ObjectId>,
7188
parents_buf: Vec<u8>,
@@ -77,10 +94,13 @@ mod init {
7794
use gix_date::SecondsSinceUnixEpoch;
7895
use gix_hash::{oid, ObjectId};
7996
use gix_object::{CommitRefIter, FindExt};
97+
use std::cmp::Reverse;
98+
use Err as Oldest;
99+
use Ok as Newest;
80100

81101
use super::{
82102
super::{simple::Sorting, Either, Info, ParentIds, Parents, Simple},
83-
collect_parents, Error, State,
103+
collect_parents, CommitTimeOrder, Error, State,
84104
};
85105

86106
impl Default for State {
@@ -105,6 +125,14 @@ mod init {
105125
}
106126
}
107127

128+
fn order_time(i: i64, order: CommitTimeOrder) -> super::QueueKey<i64> {
129+
if let CommitTimeOrder::NewestFirst = order {
130+
Newest(i)
131+
} else {
132+
Oldest(Reverse(i))
133+
}
134+
}
135+
108136
/// Builder
109137
impl<Find, Predicate> Simple<Find, Predicate>
110138
where
@@ -117,19 +145,23 @@ mod init {
117145
Sorting::BreadthFirst => {
118146
self.queue_to_vecdeque();
119147
}
120-
Sorting::ByCommitTimeNewestFirst | Sorting::ByCommitTimeNewestFirstCutoffOlderThan { .. } => {
148+
Sorting::ByCommitTime(order) | Sorting::ByCommitTimeCutoff { order, .. } => {
121149
let cutoff_time = self.sorting.cutoff_time();
122150
let state = &mut self.state;
123151
for commit_id in state.next.drain(..) {
124152
let commit_iter = self.objects.find_commit_iter(&commit_id, &mut state.buf)?;
125153
let time = commit_iter.committer()?.time.seconds;
126-
match cutoff_time {
127-
Some(cutoff_time) if time >= cutoff_time => {
128-
state.queue.insert(time, commit_id);
154+
let ordered_time = order_time(time, order);
155+
match (cutoff_time, order) {
156+
(Some(cutoff_time), CommitTimeOrder::NewestFirst) if time >= cutoff_time => {
157+
state.queue.insert(ordered_time, commit_id);
158+
}
159+
(Some(cutoff_time), CommitTimeOrder::OldestFirst) if time <= cutoff_time => {
160+
state.queue.insert(ordered_time, commit_id);
129161
}
130-
Some(_) => {}
131-
None => {
132-
state.queue.insert(time, commit_id);
162+
(Some(_), _) => {}
163+
(None, _) => {
164+
state.queue.insert(ordered_time, commit_id);
133165
}
134166
}
135167
}
@@ -254,10 +286,8 @@ mod init {
254286
} else {
255287
match self.sorting {
256288
Sorting::BreadthFirst => self.next_by_topology(),
257-
Sorting::ByCommitTimeNewestFirst => self.next_by_commit_date(None),
258-
Sorting::ByCommitTimeNewestFirstCutoffOlderThan { seconds } => {
259-
self.next_by_commit_date(seconds.into())
260-
}
289+
Sorting::ByCommitTime(order) => self.next_by_commit_date(order, None),
290+
Sorting::ByCommitTimeCutoff { seconds, order } => self.next_by_commit_date(order, seconds.into()),
261291
}
262292
}
263293
}
@@ -267,7 +297,7 @@ mod init {
267297
/// If not topo sort, provide the cutoff date if present.
268298
fn cutoff_time(&self) -> Option<SecondsSinceUnixEpoch> {
269299
match self {
270-
Sorting::ByCommitTimeNewestFirstCutoffOlderThan { seconds } => Some(*seconds),
300+
Sorting::ByCommitTimeCutoff { seconds, .. } => Some(*seconds),
271301
_ => None,
272302
}
273303
}
@@ -281,18 +311,21 @@ mod init {
281311
{
282312
fn next_by_commit_date(
283313
&mut self,
284-
cutoff_older_than: Option<SecondsSinceUnixEpoch>,
314+
order: CommitTimeOrder,
315+
cutoff: Option<SecondsSinceUnixEpoch>,
285316
) -> Option<Result<Info, Error>> {
286317
let state = &mut self.state;
287318

288-
let (commit_time, oid) = state.queue.pop()?;
319+
let (commit_time, oid) = match state.queue.pop()? {
320+
(Newest(t) | Oldest(Reverse(t)), o) => (t, o),
321+
};
289322
let mut parents: ParentIds = Default::default();
290323
match super::super::find(self.cache.as_ref(), &self.objects, &oid, &mut state.buf) {
291324
Ok(Either::CachedCommit(commit)) => {
292325
if !collect_parents(&mut state.parent_ids, self.cache.as_ref(), commit.iter_parents()) {
293326
// drop corrupt caches and try again with ODB
294327
self.cache = None;
295-
return self.next_by_commit_date(cutoff_older_than);
328+
return self.next_by_commit_date(order, cutoff);
296329
}
297330
for (id, parent_commit_time) in state.parent_ids.drain(..) {
298331
parents.push(id);
@@ -301,9 +334,19 @@ mod init {
301334
continue;
302335
}
303336

304-
match cutoff_older_than {
305-
Some(cutoff_older_than) if parent_commit_time < cutoff_older_than => continue,
306-
Some(_) | None => state.queue.insert(parent_commit_time, id),
337+
let time = order_time(parent_commit_time, order);
338+
match (cutoff, order) {
339+
(Some(cutoff_older_than), CommitTimeOrder::NewestFirst)
340+
if parent_commit_time < cutoff_older_than =>
341+
{
342+
continue
343+
}
344+
(Some(cutoff_newer_than), CommitTimeOrder::OldestFirst)
345+
if parent_commit_time > cutoff_newer_than =>
346+
{
347+
continue
348+
}
349+
(Some(_) | None, _) => state.queue.insert(time, id),
307350
}
308351
}
309352
}
@@ -323,9 +366,19 @@ mod init {
323366
.and_then(|parent| parent.committer().ok().map(|committer| committer.time.seconds))
324367
.unwrap_or_default();
325368

326-
match cutoff_older_than {
327-
Some(cutoff_older_than) if parent_commit_time < cutoff_older_than => continue,
328-
Some(_) | None => state.queue.insert(parent_commit_time, id),
369+
let time = order_time(parent_commit_time, order);
370+
match (cutoff, order) {
371+
(Some(cutoff_older_than), CommitTimeOrder::NewestFirst)
372+
if parent_commit_time < cutoff_older_than =>
373+
{
374+
continue
375+
}
376+
(Some(cutoff_newer_than), CommitTimeOrder::OldestFirst)
377+
if parent_commit_time > cutoff_newer_than =>
378+
{
379+
continue
380+
}
381+
(Some(_) | None, _) => state.queue.insert(time, id),
329382
}
330383
}
331384
Ok(_unused_token) => break,

gix-traverse/tests/commit/simple.rs

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ mod different_date_intermixed {
134134
"65d6af66f60b8e39fd1ba6a1423178831e764ec5", /* c1 */
135135
],
136136
)
137-
.with_sorting(Sorting::ByCommitTimeNewestFirst)
137+
.with_sorting(Sorting::ByCommitTime(Default::default()))
138138
.check()
139139
}
140140
}
@@ -186,7 +186,7 @@ mod different_date {
186186
"65d6af66f60b8e39fd1ba6a1423178831e764ec5", /* c1 */
187187
],
188188
)
189-
.with_sorting(Sorting::ByCommitTimeNewestFirst)
189+
.with_sorting(Sorting::ByCommitTime(Default::default()))
190190
.check()
191191
}
192192
}
@@ -247,7 +247,7 @@ mod same_date {
247247
"134385f6d781b7e97062102c6a483440bfda2a03", /* c1 */
248248
],
249249
)
250-
.with_sorting(Sorting::ByCommitTimeNewestFirst)
250+
.with_sorting(Sorting::ByCommitTime(Default::default()))
251251
.check()
252252
}
253253

@@ -368,7 +368,7 @@ mod adjusted_dates {
368368
"134385f6d781b7e97062102c6a483440bfda2a03", /* c1 */
369369
],
370370
)
371-
.with_sorting(Sorting::ByCommitTimeNewestFirst)
371+
.with_sorting(Sorting::ByCommitTime(Default::default()))
372372
.check()
373373
}
374374

@@ -379,7 +379,8 @@ mod adjusted_dates {
379379
&["288e509293165cb5630d08f4185bdf2445bf6170"], /* m1b1 */
380380
&["bcb05040a6925f2ff5e10d3ae1f9264f2e8c43ac"], /* b1c1 */
381381
)
382-
.with_sorting(Sorting::ByCommitTimeNewestFirstCutoffOlderThan {
382+
.with_sorting(Sorting::ByCommitTimeCutoff {
383+
order: Default::default(),
383384
seconds: 978393600, // =2001-01-02 00:00:00 +0000
384385
})
385386
.check()
@@ -394,7 +395,8 @@ mod adjusted_dates {
394395
Some(hex_to_id("9902e3c3e8f0c569b4ab295ddf473e6de763e1e7" /* c2 */)),
395396
&store,
396397
)
397-
.sorting(Sorting::ByCommitTimeNewestFirstCutoffOlderThan {
398+
.sorting(Sorting::ByCommitTimeCutoff {
399+
order: Default::default(),
398400
seconds: 978393600, // =2001-01-02 00:00:00 +0000
399401
})?;
400402
assert_eq!(
@@ -415,7 +417,7 @@ mod adjusted_dates {
415417
"134385f6d781b7e97062102c6a483440bfda2a03", /* c1 */
416418
],
417419
)
418-
.with_sorting(Sorting::ByCommitTimeNewestFirst)
420+
.with_sorting(Sorting::ByCommitTime(Default::default()))
419421
.with_parents(Parents::First)
420422
.check()
421423
}

0 commit comments

Comments
 (0)