Skip to content

Commit a884a6c

Browse files
author
Stjepan Glavina
committed
Slightly optimize slice::sort
First, get rid of some bound checks. Second, instead of comparing by ternary `compare` function, use a binary function testing whether an element is less than some other element. This apparently makes it easier for the compiler to reason about the code. Benchmark: ``` name before ns/iter after ns/iter diff ns/iter diff % slice::bench::sort_large_ascending 8,969 (8919 MB/s) 7,410 (10796 MB/s) -1,559 -17.38% slice::bench::sort_large_big_ascending 355,640 (3599 MB/s) 359,137 (3564 MB/s) 3,497 0.98% slice::bench::sort_large_big_descending 427,112 (2996 MB/s) 424,721 (3013 MB/s) -2,391 -0.56% slice::bench::sort_large_big_random 2,207,799 (579 MB/s) 2,138,804 (598 MB/s) -68,995 -3.13% slice::bench::sort_large_descending 13,694 (5841 MB/s) 13,514 (5919 MB/s) -180 -1.31% slice::bench::sort_large_mostly_ascending 239,697 (333 MB/s) 203,542 (393 MB/s) -36,155 -15.08% slice::bench::sort_large_mostly_descending 270,102 (296 MB/s) 234,263 (341 MB/s) -35,839 -13.27% slice::bench::sort_large_random 513,406 (155 MB/s) 470,084 (170 MB/s) -43,322 -8.44% slice::bench::sort_large_random_expensive 23,650,321 (3 MB/s) 23,675,098 (3 MB/s) 24,777 0.10% slice::bench::sort_medium_ascending 143 (5594 MB/s) 132 (6060 MB/s) -11 -7.69% slice::bench::sort_medium_descending 197 (4060 MB/s) 188 (4255 MB/s) -9 -4.57% slice::bench::sort_medium_random 3,358 (238 MB/s) 3,271 (244 MB/s) -87 -2.59% slice::bench::sort_small_ascending 32 (2500 MB/s) 32 (2500 MB/s) 0 0.00% slice::bench::sort_small_big_ascending 97 (13195 MB/s) 97 (13195 MB/s) 0 0.00% slice::bench::sort_small_big_descending 247 (5182 MB/s) 249 (5140 MB/s) 2 0.81% slice::bench::sort_small_big_random 502 (2549 MB/s) 498 (2570 MB/s) -4 -0.80% slice::bench::sort_small_descending 55 (1454 MB/s) 61 (1311 MB/s) 6 10.91% slice::bench::sort_small_random 358 (223 MB/s) 356 (224 MB/s) -2 -0.56% ```
1 parent 7df5c0f commit a884a6c

File tree

1 file changed

+36
-32
lines changed

1 file changed

+36
-32
lines changed

src/libcollections/slice.rs

+36-32
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@
9898
#![cfg_attr(test, allow(unused_imports, dead_code))]
9999

100100
use alloc::boxed::Box;
101-
use core::cmp::Ordering::{self, Greater};
101+
use core::cmp::Ordering::{self, Less};
102102
use core::mem::size_of;
103103
use core::mem;
104104
use core::ptr;
@@ -1089,7 +1089,7 @@ impl<T> [T] {
10891089
pub fn sort(&mut self)
10901090
where T: Ord
10911091
{
1092-
self.sort_by(|a, b| a.cmp(b))
1092+
merge_sort(self, |a, b| a.lt(b));
10931093
}
10941094

10951095
/// Sorts the slice using `f` to extract a key to compare elements by.
@@ -1119,7 +1119,7 @@ impl<T> [T] {
11191119
pub fn sort_by_key<B, F>(&mut self, mut f: F)
11201120
where F: FnMut(&T) -> B, B: Ord
11211121
{
1122-
self.sort_by(|a, b| f(a).cmp(&f(b)))
1122+
merge_sort(self, |a, b| f(a).lt(&f(b)));
11231123
}
11241124

11251125
/// Sorts the slice using `compare` to compare elements.
@@ -1149,10 +1149,10 @@ impl<T> [T] {
11491149
/// ```
11501150
#[stable(feature = "rust1", since = "1.0.0")]
11511151
#[inline]
1152-
pub fn sort_by<F>(&mut self, compare: F)
1152+
pub fn sort_by<F>(&mut self, mut compare: F)
11531153
where F: FnMut(&T, &T) -> Ordering
11541154
{
1155-
merge_sort(self, compare)
1155+
merge_sort(self, |a, b| compare(a, b) == Less);
11561156
}
11571157

11581158
/// Copies the elements from `src` into `self`.
@@ -1355,10 +1355,10 @@ impl<T: Clone> ToOwned for [T] {
13551355
/// Inserts `v[0]` into pre-sorted sequence `v[1..]` so that whole `v[..]` becomes sorted.
13561356
///
13571357
/// This is the integral subroutine of insertion sort.
1358-
fn insert_head<T, F>(v: &mut [T], compare: &mut F)
1359-
where F: FnMut(&T, &T) -> Ordering
1358+
fn insert_head<T, F>(v: &mut [T], is_less: &mut F)
1359+
where F: FnMut(&T, &T) -> bool
13601360
{
1361-
if v.len() >= 2 && compare(&v[0], &v[1]) == Greater {
1361+
if v.len() >= 2 && is_less(&v[1], &v[0]) {
13621362
unsafe {
13631363
// There are three ways to implement insertion here:
13641364
//
@@ -1381,12 +1381,12 @@ fn insert_head<T, F>(v: &mut [T], compare: &mut F)
13811381

13821382
// Intermediate state of the insertion process is always tracked by `hole`, which
13831383
// serves two purposes:
1384-
// 1. Protects integrity of `v` from panics in `compare`.
1384+
// 1. Protects integrity of `v` from panics in `is_less`.
13851385
// 2. Fills the remaining hole in `v` in the end.
13861386
//
13871387
// Panic safety:
13881388
//
1389-
// If `compare` panics at any point during the process, `hole` will get dropped and
1389+
// If `is_less` panics at any point during the process, `hole` will get dropped and
13901390
// fill the hole in `v` with `tmp`, thus ensuring that `v` still holds every object it
13911391
// initially held exactly once.
13921392
let mut hole = InsertionHole {
@@ -1396,7 +1396,7 @@ fn insert_head<T, F>(v: &mut [T], compare: &mut F)
13961396
ptr::copy_nonoverlapping(&v[1], &mut v[0], 1);
13971397

13981398
for i in 2..v.len() {
1399-
if compare(&tmp.value, &v[i]) != Greater {
1399+
if !is_less(&v[i], &tmp.value) {
14001400
break;
14011401
}
14021402
ptr::copy_nonoverlapping(&v[i], &mut v[i - 1], 1);
@@ -1432,8 +1432,8 @@ fn insert_head<T, F>(v: &mut [T], compare: &mut F)
14321432
///
14331433
/// The two slices must be non-empty and `mid` must be in bounds. Buffer `buf` must be long enough
14341434
/// to hold a copy of the shorter slice. Also, `T` must not be a zero-sized type.
1435-
unsafe fn merge<T, F>(v: &mut [T], mid: usize, buf: *mut T, compare: &mut F)
1436-
where F: FnMut(&T, &T) -> Ordering
1435+
unsafe fn merge<T, F>(v: &mut [T], mid: usize, buf: *mut T, is_less: &mut F)
1436+
where F: FnMut(&T, &T) -> bool
14371437
{
14381438
let len = v.len();
14391439
let v = v.as_mut_ptr();
@@ -1449,12 +1449,12 @@ unsafe fn merge<T, F>(v: &mut [T], mid: usize, buf: *mut T, compare: &mut F)
14491449
// hole in `v`.
14501450
//
14511451
// Intermediate state of the process is always tracked by `hole`, which serves two purposes:
1452-
// 1. Protects integrity of `v` from panics in `compare`.
1452+
// 1. Protects integrity of `v` from panics in `is_less`.
14531453
// 2. Fills the remaining hole in `v` if the longer run gets consumed first.
14541454
//
14551455
// Panic safety:
14561456
//
1457-
// If `compare` panics at any point during the process, `hole` will get dropped and fill the
1457+
// If `is_less` panics at any point during the process, `hole` will get dropped and fill the
14581458
// hole in `v` with the unconsumed range in `buf`, thus ensuring that `v` still holds every
14591459
// object it initially held exactly once.
14601460
let mut hole;
@@ -1476,7 +1476,7 @@ unsafe fn merge<T, F>(v: &mut [T], mid: usize, buf: *mut T, compare: &mut F)
14761476
while *left < hole.end && right < v_end {
14771477
// Consume the lesser side.
14781478
// If equal, prefer the left run to maintain stability.
1479-
let to_copy = if compare(&**left, &*right) == Greater {
1479+
let to_copy = if is_less(&*right, &**left) {
14801480
get_and_increment(&mut right)
14811481
} else {
14821482
get_and_increment(left)
@@ -1500,7 +1500,7 @@ unsafe fn merge<T, F>(v: &mut [T], mid: usize, buf: *mut T, compare: &mut F)
15001500
while v < *left && buf < *right {
15011501
// Consume the greater side.
15021502
// If equal, prefer the right run to maintain stability.
1503-
let to_copy = if compare(&*left.offset(-1), &*right.offset(-1)) == Greater {
1503+
let to_copy = if is_less(&*right.offset(-1), &*left.offset(-1)) {
15041504
decrement_and_get(left)
15051505
} else {
15061506
decrement_and_get(right)
@@ -1550,8 +1550,8 @@ unsafe fn merge<T, F>(v: &mut [T], mid: usize, buf: *mut T, compare: &mut F)
15501550
/// 2. for every `i` in `2..runs.len()`: `runs[i - 2].len > runs[i - 1].len + runs[i].len`
15511551
///
15521552
/// The invariants ensure that the total running time is `O(n log n)` worst-case.
1553-
fn merge_sort<T, F>(v: &mut [T], mut compare: F)
1554-
where F: FnMut(&T, &T) -> Ordering
1553+
fn merge_sort<T, F>(v: &mut [T], mut is_less: F)
1554+
where F: FnMut(&T, &T) -> bool
15551555
{
15561556
// Sorting has no meaningful behavior on zero-sized types.
15571557
if size_of::<T>() == 0 {
@@ -1565,7 +1565,7 @@ fn merge_sort<T, F>(v: &mut [T], mut compare: F)
15651565
//
15661566
// Short runs are extended using insertion sort to span at least `min_run` elements, in order
15671567
// to improve performance.
1568-
let (max_insertion, min_run) = if size_of::<T>() <= 16 {
1568+
let (max_insertion, min_run) = if size_of::<T>() <= 2 * mem::size_of::<usize>() {
15691569
(64, 32)
15701570
} else {
15711571
(32, 16)
@@ -1577,15 +1577,15 @@ fn merge_sort<T, F>(v: &mut [T], mut compare: F)
15771577
if len <= max_insertion {
15781578
if len >= 2 {
15791579
for i in (0..len-1).rev() {
1580-
insert_head(&mut v[i..], &mut compare);
1580+
insert_head(&mut v[i..], &mut is_less);
15811581
}
15821582
}
15831583
return;
15841584
}
15851585

15861586
// Allocate a buffer to use as scratch memory. We keep the length 0 so we can keep in it
15871587
// shallow copies of the contents of `v` without risking the dtors running on copies if
1588-
// `compare` panics. When merging two sorted runs, this buffer holds a copy of the shorter run,
1588+
// `is_less` panics. When merging two sorted runs, this buffer holds a copy of the shorter run,
15891589
// which will always have length at most `len / 2`.
15901590
let mut buf = Vec::with_capacity(len / 2);
15911591

@@ -1600,14 +1600,18 @@ fn merge_sort<T, F>(v: &mut [T], mut compare: F)
16001600
let mut start = end - 1;
16011601
if start > 0 {
16021602
start -= 1;
1603-
if compare(&v[start], &v[start + 1]) == Greater {
1604-
while start > 0 && compare(&v[start - 1], &v[start]) == Greater {
1605-
start -= 1;
1606-
}
1607-
v[start..end].reverse();
1608-
} else {
1609-
while start > 0 && compare(&v[start - 1], &v[start]) != Greater {
1610-
start -= 1;
1603+
unsafe {
1604+
if is_less(v.get_unchecked(start + 1), v.get_unchecked(start)) {
1605+
while start > 0 && is_less(v.get_unchecked(start),
1606+
v.get_unchecked(start - 1)) {
1607+
start -= 1;
1608+
}
1609+
v[start..end].reverse();
1610+
} else {
1611+
while start > 0 && !is_less(v.get_unchecked(start),
1612+
v.get_unchecked(start - 1)) {
1613+
start -= 1;
1614+
}
16111615
}
16121616
}
16131617
}
@@ -1616,7 +1620,7 @@ fn merge_sort<T, F>(v: &mut [T], mut compare: F)
16161620
// merge sort on short sequences, so this significantly improves performance.
16171621
while start > 0 && end - start < min_run {
16181622
start -= 1;
1619-
insert_head(&mut v[start..end], &mut compare);
1623+
insert_head(&mut v[start..end], &mut is_less);
16201624
}
16211625

16221626
// Push this run onto the stack.
@@ -1632,7 +1636,7 @@ fn merge_sort<T, F>(v: &mut [T], mut compare: F)
16321636
let right = runs[r];
16331637
unsafe {
16341638
merge(&mut v[left.start .. right.start + right.len], left.len, buf.as_mut_ptr(),
1635-
&mut compare);
1639+
&mut is_less);
16361640
}
16371641
runs[r] = Run {
16381642
start: left.start,

0 commit comments

Comments
 (0)