Skip to content

Missed optimization #67342

Open
Open
@ojeda

Description

@ojeda

From: rust-lang/rust#116150

With LLVM 17.0.1:

%"core::result::Result<u16, i32>" = type { i16, [3 x i16] }
%"core::result::Result<u16, i32>::Ok" = type { [1 x i16], i16 }
%"core::result::Result<u16, i32>::Err" = type { [1 x i32], i32 }

define i64 @f(i32 %err) unnamed_addr #0 {
  %r = alloca %"core::result::Result<u16, i32>", align 4
  %_3 = icmp slt i32 %err, 0
  br i1 %_3, label %bb1, label %bb2

bb2:                                              ; preds = %start
  %ok = trunc i32 %err to i16
  %1 = getelementptr inbounds %"core::result::Result<u16, i32>::Ok", ptr %r, i32 0, i32 1
  store i16 %ok, ptr %1, align 2
  store i16 0, ptr %r, align 4
  br label %bb3

bb1:                                              ; preds = %start
  %2 = getelementptr inbounds %"core::result::Result<u16, i32>::Err", ptr %r, i32 0, i32 1
  store i32 %err, ptr %2, align 4
  store i16 1, ptr %r, align 4
  br label %bb3

bb3:                                              ; preds = %bb1, %bb2
  %3 = load i64, ptr %r, align 4
  ret i64 %3
}

optimizes to:

define i64 @f(i32 %err) unnamed_addr #0 {
  %_3 = icmp slt i32 %err, 0
  %err.lobit = lshr i32 %err, 31
  %r.sroa.4.0.insert.ext = zext i32 %err to i64
  %r.sroa.4.0.insert.shift = shl nuw i64 %r.sroa.4.0.insert.ext, 32
  %0 = shl i32 %err, 16
  %1 = select i1 %_3, i32 0, i32 %0
  %r.sroa.3.0.insert.shift = zext i32 %1 to i64
  %r.sroa.3.0.insert.insert = or i64 %r.sroa.4.0.insert.shift, %r.sroa.3.0.insert.shift
  %r.sroa.0.0.insert.ext = zext i32 %err.lobit to i64
  %r.sroa.0.0.insert.insert = or i64 %r.sroa.3.0.insert.insert, %r.sroa.0.0.insert.ext
  ret i64 %r.sroa.0.0.insert.insert
}
f:                                      # @f
        mov     eax, edi
        shr     eax, 31
        mov     rcx, rdi
        shl     rcx, 32
        mov     edx, edi
        shl     edx, 16
        xor     esi, esi
        test    edi, edi
        cmovns  esi, edx
        or      rsi, rcx
        or      rax, rsi
        ret

But if the trunc is away from the store:

%"core::result::Result<u16, i32>" = type { i16, [3 x i16] }
%"core::result::Result<u16, i32>::Ok" = type { [1 x i16], i16 }
%"core::result::Result<u16, i32>::Err" = type { [1 x i32], i32 }

define i64 @f(i32 %err) unnamed_addr #0 {
  %r = alloca %"core::result::Result<u16, i32>", align 4
  %ok = trunc i32 %err to i16
  %_3 = icmp slt i32 %err, 0
  br i1 %_3, label %bb1, label %bb2

bb2:                                              ; preds = %start
  %1 = getelementptr inbounds %"core::result::Result<u16, i32>::Ok", ptr %r, i32 0, i32 1
  store i16 %ok, ptr %1, align 2
  store i16 0, ptr %r, align 4
  br label %bb3

bb1:                                              ; preds = %start
  %2 = getelementptr inbounds %"core::result::Result<u16, i32>::Err", ptr %r, i32 0, i32 1
  store i32 %err, ptr %2, align 4
  store i16 1, ptr %r, align 4
  br label %bb3

bb3:                                              ; preds = %bb1, %bb2
  %3 = load i64, ptr %r, align 4
  ret i64 %3
}

it ends up without a select:

define i64 @f(i32 %err) unnamed_addr #0 {
  %err.lobit = lshr i32 %err, 31
  %r.sroa.4.0.insert.ext = zext i32 %err to i64
  %r.sroa.4.0.insert.shift = shl nuw i64 %r.sroa.4.0.insert.ext, 32
  %0 = shl i32 %err, 16
  %r.sroa.3.0.insert.shift = zext i32 %0 to i64
  %r.sroa.3.0.insert.insert = or i64 %r.sroa.4.0.insert.shift, %r.sroa.3.0.insert.shift
  %r.sroa.0.0.insert.ext = zext i32 %err.lobit to i64
  %r.sroa.0.0.insert.insert = or i64 %r.sroa.3.0.insert.insert, %r.sroa.0.0.insert.ext
  ret i64 %r.sroa.0.0.insert.insert
}
f:                                      # @f
        mov     eax, edi
        shr     eax, 31
        mov     rcx, rdi
        shl     rcx, 32
        shl     edi, 16
        or      rcx, rdi
        or      rax, rcx
        ret

Metadata

Metadata

Assignees

Labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions