Skip to content

Partially revert 92e18ffd803365c64910760ba20278f875d93681 #101673

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 2, 2024

Conversation

s-barannikov
Copy link
Contributor

It is likely to cause stage2 build failures:

https://lab.llvm.org/buildbot/#/builders/122/builds/389 https://lab.llvm.org/buildbot/#/builders/79/builds/552

I don't have an ARM machine to investigate, so I'm just reverting ARM changes to see if it helps make the bots green again.

It is likely to cause stage2 build failures:

https://lab.llvm.org/buildbot/#/builders/122/builds/389
https://lab.llvm.org/buildbot/#/builders/79/builds/552

I don't have an ARM machine to investigate, so I'm just reverting
ARM changes to see if it helps make the bots green again.
@llvmbot
Copy link
Member

llvmbot commented Aug 2, 2024

@llvm/pr-subscribers-backend-arm

Author: Sergei Barannikov (s-barannikov)

Changes

It is likely to cause stage2 build failures:

https://lab.llvm.org/buildbot/#/builders/122/builds/389 https://lab.llvm.org/buildbot/#/builders/79/builds/552

I don't have an ARM machine to investigate, so I'm just reverting ARM changes to see if it helps make the bots green again.


Full diff: https://github.com/llvm/llvm-project/pull/101673.diff

3 Files Affected:

  • (modified) llvm/lib/Target/ARM/ARMISelLowering.cpp (+1-2)
  • (modified) llvm/test/CodeGen/ARM/popcnt.ll (+59-5)
  • (modified) llvm/test/CodeGen/Thumb2/mve-ctpop.ll (+50-12)
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 1d1ea22f6aac4..75d16a42d0205 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1204,8 +1204,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::ROTR, VT, Expand);
   }
   setOperationAction(ISD::CTTZ,  MVT::i32, Custom);
-  setOperationAction(ISD::CTPOP, MVT::i32, LibCall);
-  setOperationAction(ISD::CTPOP, MVT::i64, LibCall);
+  setOperationAction(ISD::CTPOP, MVT::i32, Expand);
   if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
     setOperationAction(ISD::CTLZ, MVT::i32, Expand);
     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, LibCall);
diff --git a/llvm/test/CodeGen/ARM/popcnt.ll b/llvm/test/CodeGen/ARM/popcnt.ll
index 6c01c516be9d2..edcae5e141e73 100644
--- a/llvm/test/CodeGen/ARM/popcnt.ll
+++ b/llvm/test/CodeGen/ARM/popcnt.ll
@@ -324,7 +324,30 @@ define i32 @ctpop16(i16 %x) nounwind readnone {
 define i32 @ctpop32(i32 %x) nounwind readnone {
 ; CHECK-LABEL: ctpop32:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    b __popcountsi2
+; CHECK-NEXT:    ldr r1, .LCPI22_0
+; CHECK-NEXT:    ldr r2, .LCPI22_3
+; CHECK-NEXT:    and r1, r1, r0, lsr #1
+; CHECK-NEXT:    ldr r12, .LCPI22_1
+; CHECK-NEXT:    sub r0, r0, r1
+; CHECK-NEXT:    ldr r3, .LCPI22_2
+; CHECK-NEXT:    and r1, r0, r2
+; CHECK-NEXT:    and r0, r2, r0, lsr #2
+; CHECK-NEXT:    add r0, r1, r0
+; CHECK-NEXT:    add r0, r0, r0, lsr #4
+; CHECK-NEXT:    and r0, r0, r12
+; CHECK-NEXT:    mul r1, r0, r3
+; CHECK-NEXT:    lsr r0, r1, #24
+; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    .p2align 2
+; CHECK-NEXT:  @ %bb.1:
+; CHECK-NEXT:  .LCPI22_0:
+; CHECK-NEXT:    .long 1431655765 @ 0x55555555
+; CHECK-NEXT:  .LCPI22_1:
+; CHECK-NEXT:    .long 252645135 @ 0xf0f0f0f
+; CHECK-NEXT:  .LCPI22_2:
+; CHECK-NEXT:    .long 16843009 @ 0x1010101
+; CHECK-NEXT:  .LCPI22_3:
+; CHECK-NEXT:    .long 858993459 @ 0x33333333
   %count = tail call i32 @llvm.ctpop.i32(i32 %x)
   ret i32 %count
 }
@@ -332,11 +355,42 @@ define i32 @ctpop32(i32 %x) nounwind readnone {
 define i32 @ctpop64(i64 %x) nounwind readnone {
 ; CHECK-LABEL: ctpop64:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    .save {r11, lr}
-; CHECK-NEXT:    push {r11, lr}
-; CHECK-NEXT:    bl __popcountdi2
-; CHECK-NEXT:    pop {r11, lr}
+; CHECK-NEXT:    .save {r4, lr}
+; CHECK-NEXT:    push {r4, lr}
+; CHECK-NEXT:    ldr r2, .LCPI23_0
+; CHECK-NEXT:    ldr r3, .LCPI23_3
+; CHECK-NEXT:    and r4, r2, r0, lsr #1
+; CHECK-NEXT:    and r2, r2, r1, lsr #1
+; CHECK-NEXT:    sub r0, r0, r4
+; CHECK-NEXT:    sub r1, r1, r2
+; CHECK-NEXT:    and r4, r0, r3
+; CHECK-NEXT:    and r2, r1, r3
+; CHECK-NEXT:    and r0, r3, r0, lsr #2
+; CHECK-NEXT:    and r1, r3, r1, lsr #2
+; CHECK-NEXT:    add r0, r4, r0
+; CHECK-NEXT:    ldr lr, .LCPI23_1
+; CHECK-NEXT:    add r1, r2, r1
+; CHECK-NEXT:    ldr r12, .LCPI23_2
+; CHECK-NEXT:    add r0, r0, r0, lsr #4
+; CHECK-NEXT:    and r0, r0, lr
+; CHECK-NEXT:    add r1, r1, r1, lsr #4
+; CHECK-NEXT:    mul r2, r0, r12
+; CHECK-NEXT:    and r0, r1, lr
+; CHECK-NEXT:    mul r1, r0, r12
+; CHECK-NEXT:    lsr r0, r2, #24
+; CHECK-NEXT:    add r0, r0, r1, lsr #24
+; CHECK-NEXT:    pop {r4, lr}
 ; CHECK-NEXT:    mov pc, lr
+; CHECK-NEXT:    .p2align 2
+; CHECK-NEXT:  @ %bb.1:
+; CHECK-NEXT:  .LCPI23_0:
+; CHECK-NEXT:    .long 1431655765 @ 0x55555555
+; CHECK-NEXT:  .LCPI23_1:
+; CHECK-NEXT:    .long 252645135 @ 0xf0f0f0f
+; CHECK-NEXT:  .LCPI23_2:
+; CHECK-NEXT:    .long 16843009 @ 0x1010101
+; CHECK-NEXT:  .LCPI23_3:
+; CHECK-NEXT:    .long 858993459 @ 0x33333333
   %count = tail call i64 @llvm.ctpop.i64(i64 %x)
   %conv = trunc i64 %count to i32
   ret i32 %conv
diff --git a/llvm/test/CodeGen/Thumb2/mve-ctpop.ll b/llvm/test/CodeGen/Thumb2/mve-ctpop.ll
index 670568f50a6fe..724bd4f7963b8 100644
--- a/llvm/test/CodeGen/Thumb2/mve-ctpop.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-ctpop.ll
@@ -1,4 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Assertions have been autoenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK
 
 define arm_aapcs_vfpcc <2 x i64> @ctpop_2i64_t(<2 x i64> %src){
@@ -6,19 +7,56 @@ define arm_aapcs_vfpcc <2 x i64> @ctpop_2i64_t(<2 x i64> %src){
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    .save {r4, r5, r7, lr}
 ; CHECK-NEXT:    push {r4, r5, r7, lr}
-; CHECK-NEXT:    .vsave {d8, d9}
-; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    vmov q4, q0
-; CHECK-NEXT:    vmov r0, r1, d9
-; CHECK-NEXT:    bl __popcountdi2
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    mov r5, r1
-; CHECK-NEXT:    vmov r0, r1, d8
-; CHECK-NEXT:    bl __popcountdi2
-; CHECK-NEXT:    vmov q0[2], q0[0], r0, r4
-; CHECK-NEXT:    vmov q0[3], q0[1], r1, r5
-; CHECK-NEXT:    vpop {d8, d9}
+; CHECK-NEXT:    vmov r1, r2, d1
+; CHECK-NEXT:    mov.w lr, #1431655765
+; CHECK-NEXT:    vmov r3, r4, d0
+; CHECK-NEXT:    mov.w r12, #858993459
+; CHECK-NEXT:    vldr s1, .LCPI0_0
+; CHECK-NEXT:    vmov.f32 s3, s1
+; CHECK-NEXT:    and.w r0, lr, r2, lsr #1
+; CHECK-NEXT:    subs r0, r2, r0
+; CHECK-NEXT:    and.w r2, r12, r0, lsr #2
+; CHECK-NEXT:    bic r0, r0, #-858993460
+; CHECK-NEXT:    add r0, r2
+; CHECK-NEXT:    and.w r2, lr, r1, lsr #1
+; CHECK-NEXT:    subs r1, r1, r2
+; CHECK-NEXT:    add.w r0, r0, r0, lsr #4
+; CHECK-NEXT:    and.w r2, r12, r1, lsr #2
+; CHECK-NEXT:    bic r1, r1, #-858993460
+; CHECK-NEXT:    add r1, r2
+; CHECK-NEXT:    and.w r2, lr, r3, lsr #1
+; CHECK-NEXT:    subs r2, r3, r2
+; CHECK-NEXT:    bic r5, r0, #-252645136
+; CHECK-NEXT:    add.w r1, r1, r1, lsr #4
+; CHECK-NEXT:    mov.w r0, #16843009
+; CHECK-NEXT:    and.w r3, r12, r2, lsr #2
+; CHECK-NEXT:    bic r2, r2, #-858993460
+; CHECK-NEXT:    add r2, r3
+; CHECK-NEXT:    and.w r3, lr, r4, lsr #1
+; CHECK-NEXT:    subs r3, r4, r3
+; CHECK-NEXT:    bic r1, r1, #-252645136
+; CHECK-NEXT:    add.w r2, r2, r2, lsr #4
+; CHECK-NEXT:    muls r5, r0, r5
+; CHECK-NEXT:    and.w r4, r12, r3, lsr #2
+; CHECK-NEXT:    bic r3, r3, #-858993460
+; CHECK-NEXT:    bic r2, r2, #-252645136
+; CHECK-NEXT:    add r3, r4
+; CHECK-NEXT:    muls r1, r0, r1
+; CHECK-NEXT:    add.w r3, r3, r3, lsr #4
+; CHECK-NEXT:    muls r2, r0, r2
+; CHECK-NEXT:    bic r3, r3, #-252645136
+; CHECK-NEXT:    muls r0, r3, r0
+; CHECK-NEXT:    lsrs r1, r1, #24
+; CHECK-NEXT:    add.w r1, r1, r5, lsr #24
+; CHECK-NEXT:    lsrs r2, r2, #24
+; CHECK-NEXT:    vmov s2, r1
+; CHECK-NEXT:    add.w r0, r2, r0, lsr #24
+; CHECK-NEXT:    vmov s0, r0
 ; CHECK-NEXT:    pop {r4, r5, r7, pc}
+; CHECK-NEXT:    .p2align 2
+; CHECK-NEXT:  @ %bb.1:
+; CHECK-NEXT:  .LCPI0_0:
+; CHECK-NEXT:    .long 0x00000000 @ float 0
 entry:
   %0 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %src)
   ret <2 x i64> %0

@s-barannikov s-barannikov merged commit 411d31a into llvm:main Aug 2, 2024
6 of 8 checks passed
@s-barannikov s-barannikov deleted the revert-99752 branch August 2, 2024 13:38
@s-barannikov s-barannikov requested a review from petrhosek August 2, 2024 13:42
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

2 participants