Skip to content

Commit e4f9ddb

Browse files
committed
Auto merge of #24180 - huonw:optimise-max-etc, r=alexcrichton
The main change in this patch is removing the use of `Option` inside the inner loops of those functions to avoid comparisons where one branch will only trigger on the first pass through the loop. The included benchmarks go from: test bench_max ... bench: 372 ns/iter (+/- 118) test bench_max_by ... bench: 428 ns/iter (+/- 33) test bench_max_by2 ... bench: 7128 ns/iter (+/- 326) to: test bench_max ... bench: 317 ns/iter (+/- 64) test bench_max_by ... bench: 356 ns/iter (+/- 270) test bench_max_by2 ... bench: 1387 ns/iter (+/- 183) Problem noticed in http://www.reddit.com/r/rust/comments/31syce/using_iterators_to_find_the_index_of_the_min_or/
2 parents 6b95d8b + c2258d6 commit e4f9ddb

File tree

2 files changed

+88
-36
lines changed

2 files changed

+88
-36
lines changed

src/libcore/iter.rs

+57-36
Original file line numberDiff line numberDiff line change
@@ -744,12 +744,12 @@ pub trait Iterator {
744744
#[stable(feature = "rust1", since = "1.0.0")]
745745
fn max(self) -> Option<Self::Item> where Self: Sized, Self::Item: Ord
746746
{
747-
self.fold(None, |max, y| {
748-
match max {
749-
None => Some(y),
750-
Some(x) => Some(cmp::max(x, y))
751-
}
752-
})
747+
select_fold1(self,
748+
|_| (),
749+
// switch to y even if it is only equal, to preserve
750+
// stability.
751+
|_, x, _, y| *x <= *y)
752+
.map(|(_, x)| x)
753753
}
754754

755755
/// Consumes the entire iterator to return the minimum element.
@@ -767,12 +767,12 @@ pub trait Iterator {
767767
#[stable(feature = "rust1", since = "1.0.0")]
768768
fn min(self) -> Option<Self::Item> where Self: Sized, Self::Item: Ord
769769
{
770-
self.fold(None, |min, y| {
771-
match min {
772-
None => Some(y),
773-
Some(x) => Some(cmp::min(x, y))
774-
}
775-
})
770+
select_fold1(self,
771+
|_| (),
772+
// only switch to y if it is strictly smaller, to
773+
// preserve stability.
774+
|_, x, _, y| *x > *y)
775+
.map(|(_, x)| x)
776776
}
777777

778778
/// `min_max` finds the minimum and maximum elements in the iterator.
@@ -870,21 +870,16 @@ pub trait Iterator {
870870
#[inline]
871871
#[unstable(feature = "core",
872872
reason = "may want to produce an Ordering directly; see #15311")]
873-
fn max_by<B: Ord, F>(self, mut f: F) -> Option<Self::Item> where
873+
fn max_by<B: Ord, F>(self, f: F) -> Option<Self::Item> where
874874
Self: Sized,
875875
F: FnMut(&Self::Item) -> B,
876876
{
877-
self.fold(None, |max: Option<(Self::Item, B)>, y| {
878-
let y_val = f(&y);
879-
match max {
880-
None => Some((y, y_val)),
881-
Some((x, x_val)) => if y_val >= x_val {
882-
Some((y, y_val))
883-
} else {
884-
Some((x, x_val))
885-
}
886-
}
887-
}).map(|(x, _)| x)
877+
select_fold1(self,
878+
f,
879+
// switch to y even if it is only equal, to preserve
880+
// stability.
881+
|x_p, _, y_p, _| x_p <= y_p)
882+
.map(|(_, x)| x)
888883
}
889884

890885
/// Return the element that gives the minimum value from the
@@ -904,21 +899,16 @@ pub trait Iterator {
904899
#[inline]
905900
#[unstable(feature = "core",
906901
reason = "may want to produce an Ordering directly; see #15311")]
907-
fn min_by<B: Ord, F>(self, mut f: F) -> Option<Self::Item> where
902+
fn min_by<B: Ord, F>(self, f: F) -> Option<Self::Item> where
908903
Self: Sized,
909904
F: FnMut(&Self::Item) -> B,
910905
{
911-
self.fold(None, |min: Option<(Self::Item, B)>, y| {
912-
let y_val = f(&y);
913-
match min {
914-
None => Some((y, y_val)),
915-
Some((x, x_val)) => if x_val <= y_val {
916-
Some((x, x_val))
917-
} else {
918-
Some((y, y_val))
919-
}
920-
}
921-
}).map(|(x, _)| x)
906+
select_fold1(self,
907+
f,
908+
// only switch to y if it is strictly smaller, to
909+
// preserve stability.
910+
|x_p, _, y_p, _| x_p > y_p)
911+
.map(|(_, x)| x)
922912
}
923913

924914
/// Change the direction of the iterator
@@ -1066,6 +1056,37 @@ pub trait Iterator {
10661056
}
10671057
}
10681058

1059+
/// Select an element from an iterator based on the given projection
1060+
/// and "comparison" function.
1061+
///
1062+
/// This is an idiosyncratic helper to try to factor out the
1063+
/// commonalities of {max,min}{,_by}. In particular, this avoids
1064+
/// having to implement optimisations several times.
1065+
#[inline]
1066+
fn select_fold1<I,B, FProj, FCmp>(mut it: I,
1067+
mut f_proj: FProj,
1068+
mut f_cmp: FCmp) -> Option<(B, I::Item)>
1069+
where I: Iterator,
1070+
FProj: FnMut(&I::Item) -> B,
1071+
FCmp: FnMut(&B, &I::Item, &B, &I::Item) -> bool
1072+
{
1073+
// start with the first element as our selection. This avoids
1074+
// having to use `Option`s inside the loop, translating to a
1075+
// sizeable performance gain (6x in one case).
1076+
it.next().map(|mut sel| {
1077+
let mut sel_p = f_proj(&sel);
1078+
1079+
for x in it {
1080+
let x_p = f_proj(&x);
1081+
if f_cmp(&sel_p, &sel, &x_p, &x) {
1082+
sel = x;
1083+
sel_p = x_p;
1084+
}
1085+
}
1086+
(sel_p, sel)
1087+
})
1088+
}
1089+
10691090
#[stable(feature = "rust1", since = "1.0.0")]
10701091
impl<'a, I: Iterator + ?Sized> Iterator for &'a mut I {
10711092
type Item = I::Item;

src/libcoretest/iter.rs

+31
Original file line numberDiff line numberDiff line change
@@ -901,3 +901,34 @@ fn bench_multiple_take(b: &mut Bencher) {
901901
}
902902
});
903903
}
904+
905+
fn scatter(x: i32) -> i32 { (x * 31) % 127 }
906+
907+
#[bench]
908+
fn bench_max_by(b: &mut Bencher) {
909+
b.iter(|| {
910+
let it = 0..100;
911+
it.max_by(|&x| scatter(x))
912+
})
913+
}
914+
915+
// http://www.reddit.com/r/rust/comments/31syce/using_iterators_to_find_the_index_of_the_min_or/
916+
#[bench]
917+
fn bench_max_by2(b: &mut Bencher) {
918+
fn max_index_iter(array: &[i32]) -> usize {
919+
array.iter().enumerate().max_by(|&(_, item)| item).unwrap().0
920+
}
921+
922+
let mut data = vec![0i32; 1638];
923+
data[514] = 9999;
924+
925+
b.iter(|| max_index_iter(&data));
926+
}
927+
928+
#[bench]
929+
fn bench_max(b: &mut Bencher) {
930+
b.iter(|| {
931+
let it = 0..100;
932+
it.map(scatter).max()
933+
})
934+
}

0 commit comments

Comments
 (0)