Open
Description
Take the following:
define i32 @test1_known_not_zero_mul(i32 %a) {
; CHECK-LABEL: test1_known_not_zero_mul:
; CHECK: // %bb.0:
; CHECK-NEXT: mov w8, #4093 // =0xffd
; CHECK-NEXT: mov w9, #12288 // =0x3000
; CHECK-NEXT: movk w8, #65520, lsl #16
; CHECK-NEXT: movk w9, #7, lsl #16
; CHECK-NEXT: and w8, w0, w8
; CHECK-NEXT: orr w0, w8, w9
; CHECK-NEXT: ret
%and = and i32 %a, -1044483
%or = or disjoint i32 %and, 471040
ret i32 %or
}
Now, here is a case where adding an instruction actually makes the code faster:
define i32 @test1_known_not_zero_mul(i32 %a) {
; CHECK-LABEL: test1_known_not_zero_mul:
; CHECK: // %bb.0:
; CHECK-NEXT: mul w9, w0, w0
; CHECK-NEXT: mov w8, #12288 // =0x3000
; CHECK-NEXT: movk w8, #7, lsl #16
; CHECK-NEXT: and w9, w9, #0xfff03fff
; CHECK-NEXT: orr w0, w9, w8
; CHECK-NEXT: ret
%mul = mul i32 %a, %a ; We set the second bit to 0.
%and = and i32 %mul, -1044483
%or = or disjoint i32 %and, 471040
ret i32 %or
}
So, this means, we can just rewrite the first one, at minimum to:
; CHECK-NEXT: mov w9, w0
; CHECK-NEXT: mov w8, #12288 // =0x3000
; CHECK-NEXT: movk w8, #7, lsl #16
; CHECK-NEXT: and w9, w9, #0xfff03fff
; CHECK-NEXT: orr w0, w9, w8
; CHECK-NEXT: ret
Instead of decomposing the and. How can we do this?