Skip to content

Different (suboptimal) assembly generated for match expr vs if-else ifs #100562

Open
@sharnoff

Description

@sharnoff

Possibly an LLVM issue instead of rustc. Godbolt: https://godbolt.org/z/WqMqEn13v

The following two functions should be identical, and small:

pub fn switch_a(x: i32, y: i32) {
    match x.cmp(&y) {
        Ordering::Less => lt(),
        Ordering::Equal => eq(),
        Ordering::Greater => gt(),
    }
}

pub fn switch_b(x: i32, y: i32) {
    let o = x.cmp(&y);
    if o == Ordering::Less {
        lt()
    } else if o == Ordering::Equal {
        eq()
    } else {
        gt()
    }
}

(with fn lt(), fn eq(), and fn gt() are defined elsewhere, and not inlined)

However, the if version, on x86-64 generates much better assembly than the one using match:

$crate::switch_a:
        xor     ecx, ecx
        cmp     edi, esi
        setne   cl
        mov     eax, 255
        cmovge  eax, ecx
        cmp     al, -1
        je      .LBB0_3
        test    al, al
        jne     .LBB0_2
        jmp     $crate::eq
.LBB0_3:
        jmp     $crate::lt
.LBB0_2:
        jmp     $crate::gt

$crate::switch_b:
        cmp     edi, esi
        jge     .LBB1_1
        jmp     $crate::lt
.LBB1_1:
        jne     .LBB1_4
        jmp     $crate::eq
.LBB1_4:
        jmp     $crate::gt

I also tried manually inlining the Ord::cmp implementation into switch_a, but that had no effect on the generate assembly; AFAICT it's just a match vs if issue.

I wasn't sure if the LLVM IR would also be helpful, so I've attached that here just in case:

LLVM IR for switch_a (slow)
; playground::switch_a
; Function Attrs: noinline nonlazybind uwtable
define internal void @_ZN10playground8switch_a17h9d37c1318c71046dE(i32 %0, i32 %1) unnamed_addr #0 !dbg !383 {
start:
  %_3 = alloca i8, align 1
  %y = alloca i32, align 4
  %x = alloca i32, align 4
  store i32 %0, i32* %x, align 4
  store i32 %1, i32* %y, align 4
  call void @llvm.dbg.declare(metadata i32* %x, metadata !387, metadata !DIExpression()), !dbg !389
  call void @llvm.dbg.declare(metadata i32* %y, metadata !388, metadata !DIExpression()), !dbg !390
; call core::cmp::impls::::cmp
  %2 = call i8 @"_ZN4core3cmp5impls48_$LT$impl$u20$core..cmp..Ord$u20$for$u20$i32$GT$3cmp17h798939cb199721aeE"(i32* align 4 %x, i32* align 4 %y), !dbg !391, !range !154
  store i8 %2, i8* %_3, align 1, !dbg !391
  br label %bb1, !dbg !391

bb1: ; preds = %start
%_7 = load i8, i8* %_3, align 1, !dbg !391, !range !154, !noundef !23
switch i8 %_7, label %bb3 [
i8 -1, label %bb4
i8 0, label %bb5
i8 1, label %bb2
], !dbg !392

bb3: ; preds = %bb1
unreachable, !dbg !391

bb4: ; preds = %bb1
; call playground::lt
call void @_ZN10playground2lt17h10d091da8f223c9fE(), !dbg !393
br label %bb6, !dbg !393

bb5: ; preds = %bb1
; call playground::eq
call void @_ZN10playground2eq17hea0ed2e3f5d44bfbE(), !dbg !394
br label %bb6, !dbg !394

bb2: ; preds = %bb1
; call playground::gt
call void @_ZN10playground2gt17hb40089beada6c920E(), !dbg !395
br label %bb6, !dbg !395

bb6: ; preds = %bb4, %bb5, %bb2
ret void, !dbg !396
}

LLVM IR for switch_b (fast)
; playground::switch_b
; Function Attrs: noinline nonlazybind uwtable
define internal void @_ZN10playground8switch_b17h0a86fcae9cdc33d1E(i32 %0, i32 %1) unnamed_addr #0 !dbg !397 {
start:
  %o = alloca i8, align 1
  %y = alloca i32, align 4
  %x = alloca i32, align 4
  store i32 %0, i32* %x, align 4
  store i32 %1, i32* %y, align 4
  call void @llvm.dbg.declare(metadata i32* %x, metadata !399, metadata !DIExpression()), !dbg !403
  call void @llvm.dbg.declare(metadata i32* %y, metadata !400, metadata !DIExpression()), !dbg !404
  call void @llvm.dbg.declare(metadata i8* %o, metadata !401, metadata !DIExpression()), !dbg !405
; call core::cmp::impls::::cmp
  %2 = call i8 @"_ZN4core3cmp5impls48_$LT$impl$u20$core..cmp..Ord$u20$for$u20$i32$GT$3cmp17h798939cb199721aeE"(i32* align 4 %x, i32* align 4 %y), !dbg !406, !range !154
  store i8 %2, i8* %o, align 1, !dbg !406
  br label %bb1, !dbg !406

bb1: ; preds = %start
; call <core::cmp::Ordering as core::cmp::PartialEq>::eq
%_7 = call zeroext i1 @"ZN60$LT$core..cmp..Ordering$u20$as$u20$core..cmp..PartialEq$GT$2eq17h3c0a27209002f6f2E"(i8* align 1 %o, i8* align 1 getelementptr inbounds (<{ [1 x i8] }>, <{ [1 x i8] }>* @alloc36, i32 0, i32 0, i32 0)), !dbg !407
br label %bb2, !dbg !407

bb2: ; preds = %bb1
br i1 %_7, label %bb3, label %bb4, !dbg !407

bb4: ; preds = %bb2
; call <core::cmp::Ordering as core::cmp::PartialEq>::eq
%_10 = call zeroext i1 @"ZN60$LT$core..cmp..Ordering$u20$as$u20$core..cmp..PartialEq$GT$2eq17h3c0a27209002f6f2E"(i8* align 1 %o, i8* align 1 getelementptr inbounds (<{ [1 x i8] }>, <{ [1 x i8] }>* @alloc38, i32 0, i32 0, i32 0)), !dbg !408
br label %bb5, !dbg !408

bb3: ; preds = %bb2
; call playground::lt
call void @_ZN10playground2lt17h10d091da8f223c9fE(), !dbg !409
br label %bb8, !dbg !409

bb8: ; preds = %bb7, %bb6, %bb3
ret void, !dbg !410

bb5: ; preds = %bb4
br i1 %_10, label %bb6, label %bb7, !dbg !408

bb7: ; preds = %bb5
; call playground::gt
call void @_ZN10playground2gt17hb40089beada6c920E(), !dbg !411
br label %bb8, !dbg !411

bb6: ; preds = %bb5
; call playground::eq
call void @_ZN10playground2eq17hea0ed2e3f5d44bfbE(), !dbg !412
br label %bb8, !dbg !412
}

Metadata

Metadata

Assignees

Labels

A-LLVMArea: Code generation parts specific to LLVM. Both correctness bugs and optimization-related issues.C-bugCategory: This is a bug.I-slowIssue: Problems and improvements with respect to performance of generated code.T-compilerRelevant to the compiler team, which will review and decide on the PR/issue.

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions