-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[ARM][RISCV] Partially revert #101786 #137120
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
The change as is breaks the Linux kernel build as pointed out in the comments.
@llvm/pr-subscribers-backend-risc-v @llvm/pr-subscribers-backend-arm Author: Sergei Barannikov (s-barannikov) ChangesThe change as is breaks the Linux kernel build as pointed out in the comments. Patch is 83.81 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/137120.diff 13 Files Affected:
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index bdebd842b011c..03364d9025208 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1221,8 +1221,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ROTR, VT, Expand);
}
setOperationAction(ISD::CTTZ, MVT::i32, Custom);
- setOperationAction(ISD::CTPOP, MVT::i32, LibCall);
- setOperationAction(ISD::CTPOP, MVT::i64, LibCall);
+ // TODO: These two should be set to LibCall, but this currently breaks
+ // the Linux kernel build. See #101786.
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i64, Expand);
if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
setOperationAction(ISD::CTLZ, MVT::i32, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, LibCall);
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 099ba5c9943ac..02451ee716865 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -396,11 +396,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom);
} else {
setOperationAction(ISD::CTTZ, XLenVT, Expand);
+ // TODO: These should be set to LibCall, but this currently breaks
+ // the Linux kernel build. See #101786. Lacks i128 tests, too.
if (Subtarget.is64Bit())
- setOperationAction(ISD::CTPOP, MVT::i128, LibCall);
+ setOperationAction(ISD::CTPOP, MVT::i128, Expand);
else
- setOperationAction(ISD::CTPOP, MVT::i32, LibCall);
- setOperationAction(ISD::CTPOP, MVT::i64, LibCall);
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i64, Expand);
}
if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
diff --git a/llvm/test/CodeGen/ARM/popcnt.ll b/llvm/test/CodeGen/ARM/popcnt.ll
index fc4387320ef77..a70fdc580ca9b 100644
--- a/llvm/test/CodeGen/ARM/popcnt.ll
+++ b/llvm/test/CodeGen/ARM/popcnt.ll
@@ -324,7 +324,30 @@ define i32 @ctpop16(i16 %x) nounwind readnone {
define i32 @ctpop32(i32 %x) nounwind readnone {
; CHECK-LABEL: ctpop32:
; CHECK: @ %bb.0:
-; CHECK-NEXT: b __popcountsi2
+; CHECK-NEXT: ldr r1, .LCPI22_0
+; CHECK-NEXT: ldr r2, .LCPI22_3
+; CHECK-NEXT: and r1, r1, r0, lsr #1
+; CHECK-NEXT: ldr r12, .LCPI22_1
+; CHECK-NEXT: sub r0, r0, r1
+; CHECK-NEXT: ldr r3, .LCPI22_2
+; CHECK-NEXT: and r1, r0, r2
+; CHECK-NEXT: and r0, r2, r0, lsr #2
+; CHECK-NEXT: add r0, r1, r0
+; CHECK-NEXT: add r0, r0, r0, lsr #4
+; CHECK-NEXT: and r0, r0, r12
+; CHECK-NEXT: mul r1, r0, r3
+; CHECK-NEXT: lsr r0, r1, #24
+; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI22_0:
+; CHECK-NEXT: .long 1431655765 @ 0x55555555
+; CHECK-NEXT: .LCPI22_1:
+; CHECK-NEXT: .long 252645135 @ 0xf0f0f0f
+; CHECK-NEXT: .LCPI22_2:
+; CHECK-NEXT: .long 16843009 @ 0x1010101
+; CHECK-NEXT: .LCPI22_3:
+; CHECK-NEXT: .long 858993459 @ 0x33333333
%count = tail call i32 @llvm.ctpop.i32(i32 %x)
ret i32 %count
}
@@ -332,12 +355,43 @@ define i32 @ctpop32(i32 %x) nounwind readnone {
define i64 @ctpop64(i64 %x) nounwind readnone {
; CHECK-LABEL: ctpop64:
; CHECK: @ %bb.0:
-; CHECK-NEXT: .save {r11, lr}
-; CHECK-NEXT: push {r11, lr}
-; CHECK-NEXT: bl __popcountdi2
-; CHECK-NEXT: asr r1, r0, #31
-; CHECK-NEXT: pop {r11, lr}
+; CHECK-NEXT: .save {r4, lr}
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: ldr r2, .LCPI23_0
+; CHECK-NEXT: ldr r3, .LCPI23_3
+; CHECK-NEXT: and r4, r2, r0, lsr #1
+; CHECK-NEXT: and r2, r2, r1, lsr #1
+; CHECK-NEXT: sub r0, r0, r4
+; CHECK-NEXT: sub r1, r1, r2
+; CHECK-NEXT: and r4, r0, r3
+; CHECK-NEXT: and r2, r1, r3
+; CHECK-NEXT: and r0, r3, r0, lsr #2
+; CHECK-NEXT: and r1, r3, r1, lsr #2
+; CHECK-NEXT: add r0, r4, r0
+; CHECK-NEXT: ldr lr, .LCPI23_1
+; CHECK-NEXT: add r1, r2, r1
+; CHECK-NEXT: ldr r12, .LCPI23_2
+; CHECK-NEXT: add r0, r0, r0, lsr #4
+; CHECK-NEXT: and r0, r0, lr
+; CHECK-NEXT: add r1, r1, r1, lsr #4
+; CHECK-NEXT: mul r2, r0, r12
+; CHECK-NEXT: and r0, r1, lr
+; CHECK-NEXT: mul r1, r0, r12
+; CHECK-NEXT: lsr r0, r2, #24
+; CHECK-NEXT: add r0, r0, r1, lsr #24
+; CHECK-NEXT: mov r1, #0
+; CHECK-NEXT: pop {r4, lr}
; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: .p2align 2
+; CHECK-NEXT: @ %bb.1:
+; CHECK-NEXT: .LCPI23_0:
+; CHECK-NEXT: .long 1431655765 @ 0x55555555
+; CHECK-NEXT: .LCPI23_1:
+; CHECK-NEXT: .long 252645135 @ 0xf0f0f0f
+; CHECK-NEXT: .LCPI23_2:
+; CHECK-NEXT: .long 16843009 @ 0x1010101
+; CHECK-NEXT: .LCPI23_3:
+; CHECK-NEXT: .long 858993459 @ 0x33333333
%count = tail call i64 @llvm.ctpop.i64(i64 %x)
ret i64 %count
}
diff --git a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
index f8c3a75f844db..a46168f114bb9 100644
--- a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
@@ -1156,30 +1156,46 @@ define i16 @test_ctlz_i16(i16 %a) nounwind {
}
define i32 @test_ctlz_i32(i32 %a) nounwind {
-; RV32_NOZBB-LABEL: test_ctlz_i32:
-; RV32_NOZBB: # %bb.0:
-; RV32_NOZBB-NEXT: beqz a0, .LBB10_2
-; RV32_NOZBB-NEXT: # %bb.1: # %cond.false
-; RV32_NOZBB-NEXT: addi sp, sp, -16
-; RV32_NOZBB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32_NOZBB-NEXT: srli a1, a0, 1
-; RV32_NOZBB-NEXT: or a0, a0, a1
-; RV32_NOZBB-NEXT: srli a1, a0, 2
-; RV32_NOZBB-NEXT: or a0, a0, a1
-; RV32_NOZBB-NEXT: srli a1, a0, 4
-; RV32_NOZBB-NEXT: or a0, a0, a1
-; RV32_NOZBB-NEXT: srli a1, a0, 8
-; RV32_NOZBB-NEXT: or a0, a0, a1
-; RV32_NOZBB-NEXT: srli a1, a0, 16
-; RV32_NOZBB-NEXT: or a0, a0, a1
-; RV32_NOZBB-NEXT: not a0, a0
-; RV32_NOZBB-NEXT: call __popcountsi2
-; RV32_NOZBB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32_NOZBB-NEXT: addi sp, sp, 16
-; RV32_NOZBB-NEXT: ret
-; RV32_NOZBB-NEXT: .LBB10_2:
-; RV32_NOZBB-NEXT: li a0, 32
-; RV32_NOZBB-NEXT: ret
+; RV32I-LABEL: test_ctlz_i32:
+; RV32I: # %bb.0:
+; RV32I-NEXT: beqz a0, .LBB10_2
+; RV32I-NEXT: # %bb.1: # %cond.false
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: lui a2, 349525
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: addi a1, a2, 1365
+; RV32I-NEXT: srli a2, a0, 2
+; RV32I-NEXT: or a0, a0, a2
+; RV32I-NEXT: srli a2, a0, 4
+; RV32I-NEXT: or a0, a0, a2
+; RV32I-NEXT: srli a2, a0, 8
+; RV32I-NEXT: or a0, a0, a2
+; RV32I-NEXT: srli a2, a0, 16
+; RV32I-NEXT: or a0, a0, a2
+; RV32I-NEXT: not a0, a0
+; RV32I-NEXT: srli a2, a0, 1
+; RV32I-NEXT: and a1, a2, a1
+; RV32I-NEXT: lui a2, 209715
+; RV32I-NEXT: addi a2, a2, 819
+; RV32I-NEXT: sub a0, a0, a1
+; RV32I-NEXT: and a1, a0, a2
+; RV32I-NEXT: srli a0, a0, 2
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: lui a2, 61681
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: srli a1, a0, 4
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: addi a1, a2, -241
+; RV32I-NEXT: and a0, a0, a1
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: slli a1, a0, 16
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: srli a0, a0, 24
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB10_2:
+; RV32I-NEXT: li a0, 32
+; RV32I-NEXT: ret
;
; RV64I-LABEL: test_ctlz_i32:
; RV64I: # %bb.0:
@@ -1223,6 +1239,46 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
; RV64I-NEXT: li a0, 32
; RV64I-NEXT: ret
;
+; RV32M-LABEL: test_ctlz_i32:
+; RV32M: # %bb.0:
+; RV32M-NEXT: beqz a0, .LBB10_2
+; RV32M-NEXT: # %bb.1: # %cond.false
+; RV32M-NEXT: srli a1, a0, 1
+; RV32M-NEXT: lui a2, 349525
+; RV32M-NEXT: or a0, a0, a1
+; RV32M-NEXT: addi a1, a2, 1365
+; RV32M-NEXT: srli a2, a0, 2
+; RV32M-NEXT: or a0, a0, a2
+; RV32M-NEXT: srli a2, a0, 4
+; RV32M-NEXT: or a0, a0, a2
+; RV32M-NEXT: srli a2, a0, 8
+; RV32M-NEXT: or a0, a0, a2
+; RV32M-NEXT: srli a2, a0, 16
+; RV32M-NEXT: or a0, a0, a2
+; RV32M-NEXT: not a0, a0
+; RV32M-NEXT: srli a2, a0, 1
+; RV32M-NEXT: and a1, a2, a1
+; RV32M-NEXT: lui a2, 209715
+; RV32M-NEXT: addi a2, a2, 819
+; RV32M-NEXT: sub a0, a0, a1
+; RV32M-NEXT: and a1, a0, a2
+; RV32M-NEXT: srli a0, a0, 2
+; RV32M-NEXT: and a0, a0, a2
+; RV32M-NEXT: lui a2, 61681
+; RV32M-NEXT: add a0, a1, a0
+; RV32M-NEXT: srli a1, a0, 4
+; RV32M-NEXT: add a0, a0, a1
+; RV32M-NEXT: lui a1, 4112
+; RV32M-NEXT: addi a2, a2, -241
+; RV32M-NEXT: and a0, a0, a2
+; RV32M-NEXT: addi a1, a1, 257
+; RV32M-NEXT: mul a0, a0, a1
+; RV32M-NEXT: srli a0, a0, 24
+; RV32M-NEXT: ret
+; RV32M-NEXT: .LBB10_2:
+; RV32M-NEXT: li a0, 32
+; RV32M-NEXT: ret
+;
; RV64M-LABEL: test_ctlz_i32:
; RV64M: # %bb.0:
; RV64M-NEXT: sext.w a1, a0
@@ -1290,75 +1346,240 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
}
define i64 @test_ctlz_i64(i64 %a) nounwind {
-; RV32_NOZBB-LABEL: test_ctlz_i64:
-; RV32_NOZBB: # %bb.0:
-; RV32_NOZBB-NEXT: addi sp, sp, -16
-; RV32_NOZBB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32_NOZBB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
-; RV32_NOZBB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32_NOZBB-NEXT: mv s1, a1
-; RV32_NOZBB-NEXT: srli a1, a0, 1
-; RV32_NOZBB-NEXT: or a0, a0, a1
-; RV32_NOZBB-NEXT: srli a1, a0, 2
-; RV32_NOZBB-NEXT: or a0, a0, a1
-; RV32_NOZBB-NEXT: srli a1, a0, 4
-; RV32_NOZBB-NEXT: or a0, a0, a1
-; RV32_NOZBB-NEXT: srli a1, a0, 8
-; RV32_NOZBB-NEXT: or a0, a0, a1
-; RV32_NOZBB-NEXT: srli a1, a0, 16
-; RV32_NOZBB-NEXT: or a0, a0, a1
-; RV32_NOZBB-NEXT: not a0, a0
-; RV32_NOZBB-NEXT: call __popcountsi2
-; RV32_NOZBB-NEXT: mv s0, a0
-; RV32_NOZBB-NEXT: srli a0, s1, 1
-; RV32_NOZBB-NEXT: or a0, s1, a0
-; RV32_NOZBB-NEXT: srli a1, a0, 2
-; RV32_NOZBB-NEXT: or a0, a0, a1
-; RV32_NOZBB-NEXT: srli a1, a0, 4
-; RV32_NOZBB-NEXT: or a0, a0, a1
-; RV32_NOZBB-NEXT: srli a1, a0, 8
-; RV32_NOZBB-NEXT: or a0, a0, a1
-; RV32_NOZBB-NEXT: srli a1, a0, 16
-; RV32_NOZBB-NEXT: or a0, a0, a1
-; RV32_NOZBB-NEXT: not a0, a0
-; RV32_NOZBB-NEXT: call __popcountsi2
-; RV32_NOZBB-NEXT: bnez s1, .LBB11_2
-; RV32_NOZBB-NEXT: # %bb.1:
-; RV32_NOZBB-NEXT: addi a0, s0, 32
-; RV32_NOZBB-NEXT: .LBB11_2:
-; RV32_NOZBB-NEXT: li a1, 0
-; RV32_NOZBB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32_NOZBB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
-; RV32_NOZBB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
-; RV32_NOZBB-NEXT: addi sp, sp, 16
-; RV32_NOZBB-NEXT: ret
+; RV32I-LABEL: test_ctlz_i64:
+; RV32I: # %bb.0:
+; RV32I-NEXT: lui a2, 349525
+; RV32I-NEXT: lui a3, 209715
+; RV32I-NEXT: lui a5, 61681
+; RV32I-NEXT: addi a4, a2, 1365
+; RV32I-NEXT: addi a3, a3, 819
+; RV32I-NEXT: addi a2, a5, -241
+; RV32I-NEXT: bnez a1, .LBB11_2
+; RV32I-NEXT: # %bb.1:
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 2
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 4
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 8
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 16
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: not a0, a0
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: and a1, a1, a4
+; RV32I-NEXT: sub a0, a0, a1
+; RV32I-NEXT: and a1, a0, a3
+; RV32I-NEXT: srli a0, a0, 2
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: srli a1, a0, 4
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: slli a1, a0, 16
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: srli a0, a0, 24
+; RV32I-NEXT: addi a0, a0, 32
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: ret
+; RV32I-NEXT: .LBB11_2:
+; RV32I-NEXT: srli a0, a1, 1
+; RV32I-NEXT: or a0, a1, a0
+; RV32I-NEXT: srli a1, a0, 2
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 4
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 8
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: srli a1, a0, 16
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: not a0, a0
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: and a1, a1, a4
+; RV32I-NEXT: sub a0, a0, a1
+; RV32I-NEXT: and a1, a0, a3
+; RV32I-NEXT: srli a0, a0, 2
+; RV32I-NEXT: and a0, a0, a3
+; RV32I-NEXT: add a0, a1, a0
+; RV32I-NEXT: srli a1, a0, 4
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: and a0, a0, a2
+; RV32I-NEXT: slli a1, a0, 8
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: slli a1, a0, 16
+; RV32I-NEXT: add a0, a0, a1
+; RV32I-NEXT: srli a0, a0, 24
+; RV32I-NEXT: li a1, 0
+; RV32I-NEXT: ret
;
-; RV64NOZBB-LABEL: test_ctlz_i64:
-; RV64NOZBB: # %bb.0:
-; RV64NOZBB-NEXT: beqz a0, .LBB11_2
-; RV64NOZBB-NEXT: # %bb.1: # %cond.false
-; RV64NOZBB-NEXT: addi sp, sp, -16
-; RV64NOZBB-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
-; RV64NOZBB-NEXT: srli a1, a0, 1
-; RV64NOZBB-NEXT: or a0, a0, a1
-; RV64NOZBB-NEXT: srli a1, a0, 2
-; RV64NOZBB-NEXT: or a0, a0, a1
-; RV64NOZBB-NEXT: srli a1, a0, 4
-; RV64NOZBB-NEXT: or a0, a0, a1
-; RV64NOZBB-NEXT: srli a1, a0, 8
-; RV64NOZBB-NEXT: or a0, a0, a1
-; RV64NOZBB-NEXT: srli a1, a0, 16
-; RV64NOZBB-NEXT: or a0, a0, a1
-; RV64NOZBB-NEXT: srli a1, a0, 32
-; RV64NOZBB-NEXT: or a0, a0, a1
-; RV64NOZBB-NEXT: not a0, a0
-; RV64NOZBB-NEXT: call __popcountdi2
-; RV64NOZBB-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
-; RV64NOZBB-NEXT: addi sp, sp, 16
-; RV64NOZBB-NEXT: ret
-; RV64NOZBB-NEXT: .LBB11_2:
-; RV64NOZBB-NEXT: li a0, 64
-; RV64NOZBB-NEXT: ret
+; RV64I-LABEL: test_ctlz_i64:
+; RV64I: # %bb.0:
+; RV64I-NEXT: beqz a0, .LBB11_2
+; RV64I-NEXT: # %bb.1: # %cond.false
+; RV64I-NEXT: srli a1, a0, 1
+; RV64I-NEXT: lui a2, 349525
+; RV64I-NEXT: lui a3, 209715
+; RV64I-NEXT: or a0, a0, a1
+; RV64I-NEXT: addiw a1, a2, 1365
+; RV64I-NEXT: addiw a2, a3, 819
+; RV64I-NEXT: srli a3, a0, 2
+; RV64I-NEXT: or a0, a0, a3
+; RV64I-NEXT: slli a3, a1, 32
+; RV64I-NEXT: add a1, a1, a3
+; RV64I-NEXT: slli a3, a2, 32
+; RV64I-NEXT: add a2, a2, a3
+; RV64I-NEXT: srli a3, a0, 4
+; RV64I-NEXT: or a0, a0, a3
+; RV64I-NEXT: srli a3, a0, 8
+; RV64I-NEXT: or a0, a0, a3
+; RV64I-NEXT: srli a3, a0, 16
+; RV64I-NEXT: or a0, a0, a3
+; RV64I-NEXT: srli a3, a0, 32
+; RV64I-NEXT: or a0, a0, a3
+; RV64I-NEXT: not a0, a0
+; RV64I-NEXT: srli a3, a0, 1
+; RV64I-NEXT: and a1, a3, a1
+; RV64I-NEXT: lui a3, 61681
+; RV64I-NEXT: addiw a3, a3, -241
+; RV64I-NEXT: sub a0, a0, a1
+; RV64I-NEXT: and a1, a0, a2
+; RV64I-NEXT: srli a0, a0, 2
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: slli a2, a3, 32
+; RV64I-NEXT: add a0, a1, a0
+; RV64I-NEXT: srli a1, a0, 4
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: add a2, a3, a2
+; RV64I-NEXT: and a0, a0, a2
+; RV64I-NEXT: slli a1, a0, 8
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 16
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: slli a1, a0, 32
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: srli a0, a0, 56
+; RV64I-NEXT: ret
+; RV64I-NEXT: .LBB11_2:
+; RV64I-NEXT: li a0, 64
+; RV64I-NEXT: ret
+;
+; RV32M-LABEL: test_ctlz_i64:
+; RV32M: # %bb.0:
+; RV32M-NEXT: lui a2, 349525
+; RV32M-NEXT: lui a3, 209715
+; RV32M-NEXT: lui a6, 61681
+; RV32M-NEXT: lui a7, 4112
+; RV32M-NEXT: addi a5, a2, 1365
+; RV32M-NEXT: addi a4, a3, 819
+; RV32M-NEXT: addi a3, a6, -241
+; RV32M-NEXT: addi a2, a7, 257
+; RV32M-NEXT: bnez a1, .LBB11_2
+; RV32M-NEXT: # %bb.1:
+; RV32M-NEXT: srli a1, a0, 1
+; RV32M-NEXT: or a0, a0, a1
+; RV32M-NEXT: srli a1, a0, 2
+; RV32M-NEXT: or a0, a0, a1
+; RV32M-NEXT: srli a1, a0, 4
+; RV32M-NEXT: or a0, a0, a1
+; RV32M-NEXT: srli a1, a0, 8
+; RV32M-NEXT: or a0, a0, a1
+; RV32M-NEXT: srli a1, a0, 16
+; RV32M-NEXT: or a0, a0, a1
+; RV32M-NEXT: not a0, a0
+; RV32M-NEXT: srli a1, a0, 1
+; RV32M-NEXT: and a1, a1, a5
+; RV32M-NEXT: sub a0, a0, a1
+; RV32M-NEXT: and a1, a0, a4
+; RV32M-NEXT: srli a0, a0, 2
+; RV32M-NEXT: and a0, a0, a4
+; RV32M-NEXT: add a0, a1, a0
+; RV32M-NEXT: srli a1, a0, 4
+; RV32M-NEXT: add a0, a0, a1
+; RV32M-NEXT: and a0, a0, a3
+; RV32M-NEXT: mul a0, a0, a2
+; RV32M-NEXT: srli a0, a0, 24
+; RV32M-NEXT: addi a0, a0, 32
+; RV32M-NEXT: li a1, 0
+; RV32M-NEXT: ret
+; RV32M-NEXT: .LBB11_2:
+; RV32M-NEXT: srli a0, a1, 1
+; RV32M-NEXT: or a0, a1, a0
+; RV32M-NEXT: srli a1, a0, 2
+; RV32M-NEXT: or a0, a0, a1
+; RV32M-NEXT: srli a1, a0, 4
+; RV32M-NEXT: or a0, a0, a1
+; RV32M-NEXT: srli a1, a0, 8
+; RV32M-NEXT: or a0, a0, a1
+; RV32M-NEXT: srli a1, a0, 16
+; RV32M-NEXT: or a0, a0, a1
+; RV32M-NEXT: not a0, a0
+; RV32M-NEXT: srli a1, a0, 1
+; RV32M-NEXT: and a1, a1, a5
+; RV32M-NEXT: sub a0, a0, a1
+; RV32M-NEXT: and a1, a0, a4
+; RV32M-NEXT: srli a0, a0, 2
+; RV32M-NEXT: and a0, a0, a4
+; RV32M-NEXT: add a0, a1, a0
+; RV32M-NEXT: srli a1, a0, 4
+; RV32M-NEXT: add a0, a0, a1
+; RV32M-NEXT: and a0, a0, a3
+; RV32M-NEXT: mul a0, a0, a2
+; RV32M-NEXT: srli a0, a0, 24
+; RV32M-NEXT: li a1, 0
+; RV32M-NEXT: ret
+;
+; RV64M-LABEL: test_ctlz_i64:
+; RV64M: # %bb.0:
+; RV64M-NEXT: beqz a0, .LBB11_2
+; RV64M-NEXT: # %bb.1: # %cond.false
+; RV64M-NEXT: srli a1, a0, 1
+; RV64M-NEXT: lui a2, 349525
+; RV64M-NEXT: lui a3, 209715
+; RV64M-NEXT: lui a4, 61681
+; RV64M-NEXT: or a0, a0, a1
+; RV64M-NEXT: addiw a1, a2, 1365
+; RV64M-NEXT: addiw a2, a3, 819
+; RV64M-NEXT: addiw a3, a4, -241
+; RV64M-NEXT: srli a4, a0, 2
+; RV64M-NEXT: or a0, a0, a4
+; RV64M-NEXT: slli a4, a1, 32
+; RV64M-NEXT: add a1, a1, a4
+; RV64M-NEXT: slli a4, a2, 32
+; RV64M-NEXT: add a2, a2, a4
+; RV64M-NEXT: slli a4, a3, 32
+; RV64M-NEXT: add a3, a3, a4
+; RV64M-NEXT: srli a4, a0, 4
+; RV64M-NEXT: or a0, a0, a4
+; RV64M-NEXT: srli a4, a0, 8
+; RV64M-NEXT: or a0, a0, a4
+; RV64M-NEXT: srli a4, a0, 16
+; RV64M-NEXT: or a0, a0, a4
+; RV64M-NEXT: srli a4, a0, 32
+; RV64M-NEXT: or a0, a0, a4
+; RV64M-NEXT: not a0, a0
+; RV64M-NEXT: srli a4, a0, 1
+; RV64M-NEXT: and a1, a4, a1
+; RV64M-NEXT: sub a0, a0, a1
+; RV64M-NEXT: and a1, a0, a2
+; RV64M-NEXT: srli a0, a0, 2
+; RV64M-NEXT: and a0, a0, a2
+; RV64M-NEXT: lui a2, 4112
+; RV64M-NEXT: addiw a2, a2, 257
+; RV64M-NEXT: add a0, a1, a0
+; RV64M-NEXT: srli a1, a0, 4
+; RV64M-NEXT: add a0, a0, a1
+; RV64M-NEXT: slli a1, a2, 32
+; RV64M-NEXT: and a0, a0, a3
+; RV64M-NEXT: add a1, a2, a1
+; RV64M-NEXT: mul a0, a0, a1
+; RV64M-NEXT: srli a0, a0, 56
+; RV64M-NEXT: ret
+; RV64M-NEXT: .LBB11_2:
+; RV64M-NEXT: li a0, 64
+; RV64M-NEXT: ret
;
; RV32ZBB-LABEL: test_ctlz_i64:
; RV32ZBB: # %bb.0:
@@ -1572,20 +1793,41 @@ define i16 @test_ctlz_i16_zero_undef(i16 %a) nounwind {
}
define i32 @test_ctlz_i32_zero_undef(i32 %a) nounwind {
-; RV32_NOZBB-LABEL: test_ctlz_i32_zero_undef:
-; RV32_NOZBB: # %bb.0:
-; RV32_NOZBB-NEXT: srli a1, a0, 1
-; RV32_NOZBB-NEXT: or a0, a0, a1
-; RV32_NOZBB-NEXT: srli a1, a0, 2
-; RV32_NOZBB-NEXT: or a0, a0, a1
-; RV32_NOZBB-NEXT: srli a1, a0, 4
-; RV32_NOZBB-NEXT: or a0, a0, a1
-; RV32_NOZBB-NEXT: srli a1, a0, 8
-; RV32_NOZBB-NEXT: or a0, a0, a1
-; RV32_NOZBB-NEXT: srli a1, a0, 16
-; RV32_NOZBB-NEXT: or a0, a0, a1
-; RV32_NOZBB-NEXT: not a0, a0
-; RV32_NOZBB-NEXT: tail __popcountsi2
+; RV32I-LABEL: test_ctlz_i32_zero_undef:
+; RV32I: # %bb.0:
+; RV32I-NEXT: srli a1, a0, 1
+; RV32I-NEXT: lui a2, 349525
+; RV32I-NEXT: or a0, a0, a1
+; RV32I-NEXT: addi a1, a2, 1365
+; RV32I-NEXT: srli a2, a0, 2
+; RV32I-NEXT: or a0, a0, a2
+; RV32I-NEXT: srli a2, a0, 4
+; RV32I-NEXT: or a0, a0, a2
+; RV32I-NEXT: srli a2, a0, 8
+; RV32I-NEXT: or a0, a0, a2
+; RV32I-NEX...
[truncated]
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/59/builds/16551 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/169/builds/10792 Here is the relevant piece of the build log for the reference
|
The change as is breaks the Linux kernel build as pointed out in the comments.
The change as is breaks the Linux kernel build as pointed out in the comments.
The change as is breaks the Linux kernel build as pointed out in the comments.
The change as is breaks the Linux kernel build as pointed out in the comments.
The change as is breaks the Linux kernel build as pointed out in the comments.