Description
Possibly an LLVM issue instead of rustc. Godbolt: https://godbolt.org/z/WqMqEn13v
The following two functions should be identical, and small:
pub fn switch_a(x: i32, y: i32) {
match x.cmp(&y) {
Ordering::Less => lt(),
Ordering::Equal => eq(),
Ordering::Greater => gt(),
}
}
pub fn switch_b(x: i32, y: i32) {
let o = x.cmp(&y);
if o == Ordering::Less {
lt()
} else if o == Ordering::Equal {
eq()
} else {
gt()
}
}
(with fn lt()
, fn eq()
, and fn gt()
are defined elsewhere, and not inlined)
However, the if
version, on x86-64 generates much better assembly than the one using match
:
$crate::switch_a:
xor ecx, ecx
cmp edi, esi
setne cl
mov eax, 255
cmovge eax, ecx
cmp al, -1
je .LBB0_3
test al, al
jne .LBB0_2
jmp $crate::eq
.LBB0_3:
jmp $crate::lt
.LBB0_2:
jmp $crate::gt
$crate::switch_b:
cmp edi, esi
jge .LBB1_1
jmp $crate::lt
.LBB1_1:
jne .LBB1_4
jmp $crate::eq
.LBB1_4:
jmp $crate::gt
I also tried manually inlining the Ord::cmp
implementation into switch_a
, but that had no effect on the generate assembly; AFAICT it's just a match
vs if
issue.
I wasn't sure if the LLVM IR would also be helpful, so I've attached that here just in case:
LLVM IR for switch_a
(slow)
; playground::switch_a ; Function Attrs: noinline nonlazybind uwtable define internal void @_ZN10playground8switch_a17h9d37c1318c71046dE(i32 %0, i32 %1) unnamed_addr #0 !dbg !383 { start: %_3 = alloca i8, align 1 %y = alloca i32, align 4 %x = alloca i32, align 4 store i32 %0, i32* %x, align 4 store i32 %1, i32* %y, align 4 call void @llvm.dbg.declare(metadata i32* %x, metadata !387, metadata !DIExpression()), !dbg !389 call void @llvm.dbg.declare(metadata i32* %y, metadata !388, metadata !DIExpression()), !dbg !390 ; call core::cmp::impls::::cmp %2 = call i8 @"_ZN4core3cmp5impls48_$LT$impl$u20$core..cmp..Ord$u20$for$u20$i32$GT$3cmp17h798939cb199721aeE"(i32* align 4 %x, i32* align 4 %y), !dbg !391, !range !154 store i8 %2, i8* %_3, align 1, !dbg !391 br label %bb1, !dbg !391bb1: ; preds = %start
%_7 = load i8, i8* %_3, align 1, !dbg !391, !range !154, !noundef !23
switch i8 %_7, label %bb3 [
i8 -1, label %bb4
i8 0, label %bb5
i8 1, label %bb2
], !dbg !392bb3: ; preds = %bb1
unreachable, !dbg !391bb4: ; preds = %bb1
; call playground::lt
call void @_ZN10playground2lt17h10d091da8f223c9fE(), !dbg !393
br label %bb6, !dbg !393bb5: ; preds = %bb1
; call playground::eq
call void @_ZN10playground2eq17hea0ed2e3f5d44bfbE(), !dbg !394
br label %bb6, !dbg !394bb2: ; preds = %bb1
; call playground::gt
call void @_ZN10playground2gt17hb40089beada6c920E(), !dbg !395
br label %bb6, !dbg !395bb6: ; preds = %bb4, %bb5, %bb2
ret void, !dbg !396
}
LLVM IR for switch_b
(fast)
; playground::switch_b ; Function Attrs: noinline nonlazybind uwtable define internal void @_ZN10playground8switch_b17h0a86fcae9cdc33d1E(i32 %0, i32 %1) unnamed_addr #0 !dbg !397 { start: %o = alloca i8, align 1 %y = alloca i32, align 4 %x = alloca i32, align 4 store i32 %0, i32* %x, align 4 store i32 %1, i32* %y, align 4 call void @llvm.dbg.declare(metadata i32* %x, metadata !399, metadata !DIExpression()), !dbg !403 call void @llvm.dbg.declare(metadata i32* %y, metadata !400, metadata !DIExpression()), !dbg !404 call void @llvm.dbg.declare(metadata i8* %o, metadata !401, metadata !DIExpression()), !dbg !405 ; call core::cmp::impls::::cmp %2 = call i8 @"_ZN4core3cmp5impls48_$LT$impl$u20$core..cmp..Ord$u20$for$u20$i32$GT$3cmp17h798939cb199721aeE"(i32* align 4 %x, i32* align 4 %y), !dbg !406, !range !154 store i8 %2, i8* %o, align 1, !dbg !406 br label %bb1, !dbg !406bb1: ; preds = %start
; call <core::cmp::Ordering as core::cmp::PartialEq>::eq
%_7 = call zeroext i1 @"ZN60$LT$core..cmp..Ordering$u20$as$u20$core..cmp..PartialEq$GT$2eq17h3c0a27209002f6f2E"(i8* align 1 %o, i8* align 1 getelementptr inbounds (<{ [1 x i8] }>, <{ [1 x i8] }>* @alloc36, i32 0, i32 0, i32 0)), !dbg !407
br label %bb2, !dbg !407bb2: ; preds = %bb1
br i1 %_7, label %bb3, label %bb4, !dbg !407bb4: ; preds = %bb2
; call <core::cmp::Ordering as core::cmp::PartialEq>::eq
%_10 = call zeroext i1 @"ZN60$LT$core..cmp..Ordering$u20$as$u20$core..cmp..PartialEq$GT$2eq17h3c0a27209002f6f2E"(i8* align 1 %o, i8* align 1 getelementptr inbounds (<{ [1 x i8] }>, <{ [1 x i8] }>* @alloc38, i32 0, i32 0, i32 0)), !dbg !408
br label %bb5, !dbg !408bb3: ; preds = %bb2
; call playground::lt
call void @_ZN10playground2lt17h10d091da8f223c9fE(), !dbg !409
br label %bb8, !dbg !409bb8: ; preds = %bb7, %bb6, %bb3
ret void, !dbg !410bb5: ; preds = %bb4
br i1 %_10, label %bb6, label %bb7, !dbg !408bb7: ; preds = %bb5
; call playground::gt
call void @_ZN10playground2gt17hb40089beada6c920E(), !dbg !411
br label %bb8, !dbg !411bb6: ; preds = %bb5
; call playground::eq
call void @_ZN10playground2eq17hea0ed2e3f5d44bfbE(), !dbg !412
br label %bb8, !dbg !412
}