Skip to content

Commit f1ea540

Browse files
committed
collections: Optimize Vec when cloning from a slice
llvm is currently not able to conver `Vec::extend` into a memcpy for `Copy` types, which results in methods like `Vec::push_all` to run twice as slow as it should be running. This patch takes the unsafe `Vec::clone` optimization to speed up all the operations that are cloning a slice into a `Vec`. before: test vec::tests::bench_clone_from_0000_0000 ... bench: 12 ns/iter (+/- 2) test vec::tests::bench_clone_from_0000_0010 ... bench: 125 ns/iter (+/- 4) = 80 MB/s test vec::tests::bench_clone_from_0000_0100 ... bench: 360 ns/iter (+/- 33) = 277 MB/s test vec::tests::bench_clone_from_0000_1000 ... bench: 2601 ns/iter (+/- 175) = 384 MB/s test vec::tests::bench_clone_from_0010_0000 ... bench: 12 ns/iter (+/- 2) test vec::tests::bench_clone_from_0010_0010 ... bench: 125 ns/iter (+/- 10) = 80 MB/s test vec::tests::bench_clone_from_0010_0100 ... bench: 361 ns/iter (+/- 28) = 277 MB/s test vec::tests::bench_clone_from_0100_0010 ... bench: 131 ns/iter (+/- 13) = 76 MB/s test vec::tests::bench_clone_from_0100_0100 ... bench: 360 ns/iter (+/- 9) = 277 MB/s test vec::tests::bench_clone_from_0100_1000 ... bench: 2575 ns/iter (+/- 168) = 388 MB/s test vec::tests::bench_clone_from_1000_0100 ... bench: 356 ns/iter (+/- 20) = 280 MB/s test vec::tests::bench_clone_from_1000_1000 ... bench: 2605 ns/iter (+/- 167) = 383 MB/s test vec::tests::bench_from_slice_0000 ... bench: 11 ns/iter (+/- 0) test vec::tests::bench_from_slice_0010 ... bench: 115 ns/iter (+/- 5) = 86 MB/s test vec::tests::bench_from_slice_0100 ... bench: 309 ns/iter (+/- 170) = 323 MB/s test vec::tests::bench_from_slice_1000 ... bench: 2065 ns/iter (+/- 198) = 484 MB/s test vec::tests::bench_push_all_0000_0000 ... bench: 7 ns/iter (+/- 0) test vec::tests::bench_push_all_0000_0010 ... bench: 79 ns/iter (+/- 7) = 126 MB/s test vec::tests::bench_push_all_0000_0100 ... bench: 342 ns/iter (+/- 18) = 292 MB/s test vec::tests::bench_push_all_0000_1000 ... bench: 2873 ns/iter (+/- 75) = 348 MB/s test vec::tests::bench_push_all_0010_0010 ... bench: 154 ns/iter (+/- 8) = 64 MB/s test vec::tests::bench_push_all_0100_0100 ... bench: 518 ns/iter (+/- 18) = 193 MB/s test vec::tests::bench_push_all_1000_1000 ... bench: 4490 ns/iter (+/- 223) = 222 MB/s after: test vec::tests::bench_clone_from_0000_0000 ... bench: 12 ns/iter (+/- 1) test vec::tests::bench_clone_from_0000_0010 ... bench: 123 ns/iter (+/- 5) = 81 MB/s test vec::tests::bench_clone_from_0000_0100 ... bench: 367 ns/iter (+/- 23) = 272 MB/s test vec::tests::bench_clone_from_0000_1000 ... bench: 2618 ns/iter (+/- 252) = 381 MB/s test vec::tests::bench_clone_from_0010_0000 ... bench: 12 ns/iter (+/- 1) test vec::tests::bench_clone_from_0010_0010 ... bench: 124 ns/iter (+/- 7) = 80 MB/s test vec::tests::bench_clone_from_0010_0100 ... bench: 369 ns/iter (+/- 34) = 271 MB/s test vec::tests::bench_clone_from_0100_0010 ... bench: 123 ns/iter (+/- 6) = 81 MB/s test vec::tests::bench_clone_from_0100_0100 ... bench: 371 ns/iter (+/- 25) = 269 MB/s test vec::tests::bench_clone_from_0100_1000 ... bench: 2713 ns/iter (+/- 532) = 368 MB/s test vec::tests::bench_clone_from_1000_0100 ... bench: 369 ns/iter (+/- 14) = 271 MB/s test vec::tests::bench_clone_from_1000_1000 ... bench: 2611 ns/iter (+/- 194) = 382 MB/s test vec::tests::bench_from_slice_0000 ... bench: 7 ns/iter (+/- 0) test vec::tests::bench_from_slice_0010 ... bench: 108 ns/iter (+/- 4) = 92 MB/s test vec::tests::bench_from_slice_0100 ... bench: 235 ns/iter (+/- 24) = 425 MB/s test vec::tests::bench_from_slice_1000 ... bench: 1318 ns/iter (+/- 96) = 758 MB/s test vec::tests::bench_push_all_0000_0000 ... bench: 7 ns/iter (+/- 0) test vec::tests::bench_push_all_0000_0010 ... bench: 70 ns/iter (+/- 4) = 142 MB/s test vec::tests::bench_push_all_0000_0100 ... bench: 176 ns/iter (+/- 16) = 568 MB/s test vec::tests::bench_push_all_0000_1000 ... bench: 1125 ns/iter (+/- 94) = 888 MB/s test vec::tests::bench_push_all_0010_0010 ... bench: 159 ns/iter (+/- 15) = 62 MB/s test vec::tests::bench_push_all_0100_0100 ... bench: 363 ns/iter (+/- 12) = 275 MB/s test vec::tests::bench_push_all_1000_1000 ... bench: 2860 ns/iter (+/- 415) = 349 MB/s
1 parent 065b98d commit f1ea540

File tree

1 file changed

+44
-31
lines changed

1 file changed

+44
-31
lines changed

src/libcollections/vec.rs

+44-31
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,9 @@ impl<T: Clone> Vec<T> {
197197
/// ```
198198
#[inline]
199199
pub fn from_slice(values: &[T]) -> Vec<T> {
200-
values.iter().map(|x| x.clone()).collect()
200+
let mut vector = Vec::with_capacity(values.len());
201+
vector.push_all(values);
202+
vector
201203
}
202204

203205
/// Constructs a `Vec` with copies of a value.
@@ -238,7 +240,10 @@ impl<T: Clone> Vec<T> {
238240
/// ```
239241
#[inline]
240242
pub fn push_all(&mut self, other: &[T]) {
241-
self.extend(other.iter().map(|e| e.clone()));
243+
unsafe {
244+
self.reserve_additional(other.len());
245+
unsafe_push_all_clone(self, other)
246+
}
242247
}
243248

244249
/// Grows the `Vec` in-place.
@@ -318,41 +323,31 @@ impl<T: Clone> Vec<T> {
318323
#[unstable]
319324
impl<T:Clone> Clone for Vec<T> {
320325
fn clone(&self) -> Vec<T> {
321-
let len = self.len;
322-
let mut vector = Vec::with_capacity(len);
323-
// Unsafe code so this can be optimised to a memcpy (or something
324-
// similarly fast) when T is Copy. LLVM is easily confused, so any
325-
// extra operations during the loop can prevent this optimisation
326-
{
327-
let this_slice = self.as_slice();
328-
while vector.len < len {
329-
unsafe {
330-
let len = vector.len;
331-
ptr::write(
332-
vector.as_mut_slice().unsafe_mut_ref(len),
333-
this_slice.unsafe_ref(len).clone());
334-
}
335-
vector.len += 1;
336-
}
326+
unsafe {
327+
let mut vector = Vec::with_capacity(self.len);
328+
unsafe_push_all_clone(&mut vector, self.as_slice());
329+
vector
337330
}
338-
vector
339331
}
340332

341333
fn clone_from(&mut self, other: &Vec<T>) {
342-
// drop anything in self that will not be overwritten
343-
if self.len() > other.len() {
344-
self.truncate(other.len())
345-
}
334+
unsafe {
335+
// drop anything in self that will not be overwritten
336+
if self.len() > other.len() {
337+
self.truncate(other.len())
338+
}
346339

347-
// reuse the contained values' allocations/resources.
348-
for (place, thing) in self.mut_iter().zip(other.iter()) {
349-
place.clone_from(thing)
350-
}
340+
// reuse the contained values' allocations/resources.
341+
for (place, thing) in self.mut_iter().zip(other.iter()) {
342+
place.clone_from(thing)
343+
}
351344

352-
// self.len <= other.len due to the truncate above, so the
353-
// slice here is always in-bounds.
354-
let len = self.len();
355-
self.extend(other.slice_from(len).iter().map(|x| x.clone()));
345+
// self.len <= other.len due to the truncate above, so the
346+
// slice here is always in-bounds.
347+
let slice = other.slice_from(self.len());
348+
self.reserve_additional(slice.len());
349+
unsafe_push_all_clone(self, slice)
350+
}
356351
}
357352
}
358353

@@ -1555,6 +1550,24 @@ pub mod raw {
15551550
}
15561551
}
15571552

1553+
// Unsafe code so this can be optimised to a memcpy (or something similarly
1554+
// fast) when T is Copy. LLVM is easily confused, so any extra operations
1555+
// during the loop can prevent this optimisation.
1556+
//
1557+
// WARNING: You must preallocate space on the vector before you call this
1558+
// method.
1559+
#[inline(always)]
1560+
unsafe fn unsafe_push_all_clone<T: Clone>(dst: &mut Vec<T>, src: &[T]) {
1561+
let mut dst_len = dst.len();
1562+
1563+
for i in range(0, src.len()) {
1564+
ptr::write(
1565+
dst.as_mut_slice().unsafe_mut_ref(dst_len),
1566+
src.unsafe_ref(i).clone());
1567+
dst_len += 1;
1568+
dst.set_len(dst_len);
1569+
}
1570+
}
15581571

15591572
#[cfg(test)]
15601573
mod tests {

0 commit comments

Comments
 (0)