Open
Description
In the process of trying to stabilize get_many_mut()
, and while trying to check what will be the costs of a more detailed error type, I came across the following perf regression from stable to beta.
Code
I tried this code:
#![feature(get_many_mut)]
#[inline]
fn get_many_check_valid(indices: &[usize; 2], len: usize) -> Result<(), GetManyMutError> {
for (i, &idx) in indices.iter().enumerate() {
if idx >= len {
return Err(GetManyMutError::OutOfBounds);
}
for &idx2 in &indices[..i] {
if idx == idx2 {
return Err(GetManyMutError::Overlapping);
}
}
}
Ok(())
}
#[inline]
pub fn after_all<'a>(
slice: &'a mut [i32],
indices: [usize; 2],
) -> Result<[&mut i32; 2], GetManyMutError> {
get_many_check_valid(&indices, slice.len())?;
unsafe { Ok(slice.get_many_unchecked_mut(indices)) }
}
#[derive(Debug)]
pub enum GetManyMutError {
OutOfBounds,
Overlapping,
}
#[no_mangle]
pub fn after_all_unwrap(data: &mut [i32], indices: [usize; 2]) -> [&mut i32; 2] {
after_all(data, indices).unwrap()
}
On stable (with RUSTC_BOOTSTRAP=1
) this optimizes nicely to the following code (https://godbolt.org/z/zq8EG7df7):
after_all_unwrap:
mov r8, qword ptr [rcx]
xor eax, eax
cmp r8, rdx
jae .LBB0_4
mov rcx, qword ptr [rcx + 8]
cmp rcx, rdx
jae .LBB0_4
cmp rcx, r8
je .LBB0_3
lea rax, [rsi + 4*r8]
lea rcx, [rsi + 4*rcx]
mov qword ptr [rdi], rax
mov qword ptr [rdi + 8], rcx
mov rax, rdi
ret
.LBB0_3:
mov al, 1
.LBB0_4:
push rax
mov byte ptr [rsp + 7], al
lea rdi, [rip + .L__unnamed_1]
lea rcx, [rip + .L__unnamed_2]
lea r8, [rip + .L__unnamed_3]
lea rdx, [rsp + 7]
mov esi, 43
call qword ptr [rip + core::result::unwrap_failed::haccb9aaa604e1e21@GOTPCREL]
However, on beta (and nightly) this adds a whole bunch of unnecessary instructions:
after_all_unwrap:
mov r8, qword ptr [rcx]
xor eax, eax
cmp r8, rdx
jae .LBB0_4
mov rcx, qword ptr [rcx + 8]
cmp rcx, rdx
jae .LBB0_4
cmp rcx, r8
je .LBB0_3
lea rax, [rsi + 4*r8]
lea rcx, [rsi + 4*rcx]
mov rdx, rcx
shr rdx, 8
mov qword ptr [rdi], rax
mov byte ptr [rdi + 8], cl
mov rax, rcx
shr rax, 56
mov byte ptr [rdi + 15], al
shr rcx, 40
mov word ptr [rdi + 13], cx
mov dword ptr [rdi + 9], edx
mov rax, rdi
ret
.LBB0_3:
mov al, 1
.LBB0_4:
push rax
mov byte ptr [rsp + 7], al
lea rdi, [rip + .L__unnamed_1]
lea rcx, [rip + .L__unnamed_2]
lea r8, [rip + .L__unnamed_3]
lea rdx, [rsp + 7]
mov esi, 43
call qword ptr [rip + core::result::unwrap_failed::h803c538fb296a320@GOTPCREL]
With cargo-bisect-rustc
I bisected it to #126578. CC @scottmcm.
@rustbot label +regression-from-stable-to-beta -regression-untriaged -C-bug +perf-regression +I-heavy +I-slow
Metadata
Metadata
Assignees
Labels
Category: An issue highlighting optimization opportunities or PRs implementing suchIssue: Problems and improvements with respect to binary size of generated code.Issue: Problems and improvements with respect to performance of generated code.Medium priorityPerformance regression.Performance or correctness regression from one stable version to another.