Skip to content

Commit c24c906

Browse files
committed
Auto merge of #88060 - TennyZhuang:optimize-vec-retain, r=dtolnay
Optimize unnecessary check in Vec::retain The function `vec::Vec::retain` only have two stages: 1. Nothing was deleted. 2. Some elements were deleted. Here is an unnecessary check `if g.deleted_cnt > 0` in the loop, and it's difficult for compiler to optimize it. I split the loop into two stages manully and keep the code clean using const generics. I write a special but common bench case for this optimization. I call retain on vec but keep all elements. Before and after this optimization: ``` test vec::bench_retain_whole_100000 ... bench: 84,803 ns/iter (+/- 17,314) ``` ``` test vec::bench_retain_whole_100000 ... bench: 42,638 ns/iter (+/- 16,910) ``` The result is expected, there are two `if`s before the optimization and one `if` after.
2 parents 77f1e50 + 3839ca9 commit c24c906

File tree

2 files changed

+39
-3
lines changed

2 files changed

+39
-3
lines changed

library/alloc/benches/vec.rs

+15
Original file line numberDiff line numberDiff line change
@@ -732,3 +732,18 @@ fn bench_flat_map_collect(b: &mut Bencher) {
732732
let v = vec![777u32; 500000];
733733
b.iter(|| v.iter().flat_map(|color| color.rotate_left(8).to_be_bytes()).collect::<Vec<_>>());
734734
}
735+
736+
#[bench]
737+
fn bench_retain_100000(b: &mut Bencher) {
738+
let v = (1..=100000).collect::<Vec<u32>>();
739+
b.iter(|| {
740+
let mut v = v.clone();
741+
v.retain(|x| x & 1 == 0)
742+
});
743+
}
744+
745+
#[bench]
746+
fn bench_retain_whole_100000(b: &mut Bencher) {
747+
let mut v = black_box(vec![826u32; 100000]);
748+
b.iter(|| v.retain(|x| *x == 826u32));
749+
}

library/alloc/src/vec/mod.rs

+24-3
Original file line numberDiff line numberDiff line change
@@ -1488,7 +1488,15 @@ impl<T, A: Allocator> Vec<T, A> {
14881488

14891489
let mut g = BackshiftOnDrop { v: self, processed_len: 0, deleted_cnt: 0, original_len };
14901490

1491-
while g.processed_len < original_len {
1491+
// process_one return a bool indicates whether the processing element should be retained.
1492+
#[inline(always)]
1493+
fn process_one<F, T, A: Allocator, const DELETED: bool>(
1494+
f: &mut F,
1495+
g: &mut BackshiftOnDrop<'_, T, A>,
1496+
) -> bool
1497+
where
1498+
F: FnMut(&T) -> bool,
1499+
{
14921500
// SAFETY: Unchecked element must be valid.
14931501
let cur = unsafe { &mut *g.v.as_mut_ptr().add(g.processed_len) };
14941502
if !f(cur) {
@@ -1498,9 +1506,9 @@ impl<T, A: Allocator> Vec<T, A> {
14981506
// SAFETY: We never touch this element again after dropped.
14991507
unsafe { ptr::drop_in_place(cur) };
15001508
// We already advanced the counter.
1501-
continue;
1509+
return false;
15021510
}
1503-
if g.deleted_cnt > 0 {
1511+
if DELETED {
15041512
// SAFETY: `deleted_cnt` > 0, so the hole slot must not overlap with current element.
15051513
// We use copy for move, and never touch this element again.
15061514
unsafe {
@@ -1509,6 +1517,19 @@ impl<T, A: Allocator> Vec<T, A> {
15091517
}
15101518
}
15111519
g.processed_len += 1;
1520+
return true;
1521+
}
1522+
1523+
// Stage 1: Nothing was deleted.
1524+
while g.processed_len != original_len {
1525+
if !process_one::<F, T, A, false>(&mut f, &mut g) {
1526+
break;
1527+
}
1528+
}
1529+
1530+
// Stage 2: Some elements were deleted.
1531+
while g.processed_len != original_len {
1532+
process_one::<F, T, A, true>(&mut f, &mut g);
15121533
}
15131534

15141535
// All item are processed. This can be optimized to `set_len` by LLVM.

0 commit comments

Comments
 (0)