Skip to content

Partially revert 92e18ffd803365c64910760ba20278f875d93681 #101673

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1204,8 +1204,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ROTR, VT, Expand);
}
setOperationAction(ISD::CTTZ, MVT::i32, Custom);
setOperationAction(ISD::CTPOP, MVT::i32, LibCall);
setOperationAction(ISD::CTPOP, MVT::i64, LibCall);
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
setOperationAction(ISD::CTLZ, MVT::i32, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, LibCall);
Expand Down
64 changes: 59 additions & 5 deletions llvm/test/CodeGen/ARM/popcnt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -324,19 +324,73 @@ define i32 @ctpop16(i16 %x) nounwind readnone {
define i32 @ctpop32(i32 %x) nounwind readnone {
; CHECK-LABEL: ctpop32:
; CHECK: @ %bb.0:
; CHECK-NEXT: b __popcountsi2
; CHECK-NEXT: ldr r1, .LCPI22_0
; CHECK-NEXT: ldr r2, .LCPI22_3
; CHECK-NEXT: and r1, r1, r0, lsr #1
; CHECK-NEXT: ldr r12, .LCPI22_1
; CHECK-NEXT: sub r0, r0, r1
; CHECK-NEXT: ldr r3, .LCPI22_2
; CHECK-NEXT: and r1, r0, r2
; CHECK-NEXT: and r0, r2, r0, lsr #2
; CHECK-NEXT: add r0, r1, r0
; CHECK-NEXT: add r0, r0, r0, lsr #4
; CHECK-NEXT: and r0, r0, r12
; CHECK-NEXT: mul r1, r0, r3
; CHECK-NEXT: lsr r0, r1, #24
; CHECK-NEXT: mov pc, lr
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI22_0:
; CHECK-NEXT: .long 1431655765 @ 0x55555555
; CHECK-NEXT: .LCPI22_1:
; CHECK-NEXT: .long 252645135 @ 0xf0f0f0f
; CHECK-NEXT: .LCPI22_2:
; CHECK-NEXT: .long 16843009 @ 0x1010101
; CHECK-NEXT: .LCPI22_3:
; CHECK-NEXT: .long 858993459 @ 0x33333333
%count = tail call i32 @llvm.ctpop.i32(i32 %x)
ret i32 %count
}

define i32 @ctpop64(i64 %x) nounwind readnone {
; CHECK-LABEL: ctpop64:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r11, lr}
; CHECK-NEXT: push {r11, lr}
; CHECK-NEXT: bl __popcountdi2
; CHECK-NEXT: pop {r11, lr}
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: ldr r2, .LCPI23_0
; CHECK-NEXT: ldr r3, .LCPI23_3
; CHECK-NEXT: and r4, r2, r0, lsr #1
; CHECK-NEXT: and r2, r2, r1, lsr #1
; CHECK-NEXT: sub r0, r0, r4
; CHECK-NEXT: sub r1, r1, r2
; CHECK-NEXT: and r4, r0, r3
; CHECK-NEXT: and r2, r1, r3
; CHECK-NEXT: and r0, r3, r0, lsr #2
; CHECK-NEXT: and r1, r3, r1, lsr #2
; CHECK-NEXT: add r0, r4, r0
; CHECK-NEXT: ldr lr, .LCPI23_1
; CHECK-NEXT: add r1, r2, r1
; CHECK-NEXT: ldr r12, .LCPI23_2
; CHECK-NEXT: add r0, r0, r0, lsr #4
; CHECK-NEXT: and r0, r0, lr
; CHECK-NEXT: add r1, r1, r1, lsr #4
; CHECK-NEXT: mul r2, r0, r12
; CHECK-NEXT: and r0, r1, lr
; CHECK-NEXT: mul r1, r0, r12
; CHECK-NEXT: lsr r0, r2, #24
; CHECK-NEXT: add r0, r0, r1, lsr #24
; CHECK-NEXT: pop {r4, lr}
; CHECK-NEXT: mov pc, lr
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI23_0:
; CHECK-NEXT: .long 1431655765 @ 0x55555555
; CHECK-NEXT: .LCPI23_1:
; CHECK-NEXT: .long 252645135 @ 0xf0f0f0f
; CHECK-NEXT: .LCPI23_2:
; CHECK-NEXT: .long 16843009 @ 0x1010101
; CHECK-NEXT: .LCPI23_3:
; CHECK-NEXT: .long 858993459 @ 0x33333333
%count = tail call i64 @llvm.ctpop.i64(i64 %x)
%conv = trunc i64 %count to i32
ret i32 %conv
Expand Down
62 changes: 50 additions & 12 deletions llvm/test/CodeGen/Thumb2/mve-ctpop.ll
Original file line number Diff line number Diff line change
@@ -1,24 +1,62 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; NOTE: Assertions have been autoenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECK

define arm_aapcs_vfpcc <2 x i64> @ctpop_2i64_t(<2 x i64> %src){
; CHECK-LABEL: ctpop_2i64_t:
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r7, lr}
; CHECK-NEXT: push {r4, r5, r7, lr}
; CHECK-NEXT: .vsave {d8, d9}
; CHECK-NEXT: vpush {d8, d9}
; CHECK-NEXT: vmov q4, q0
; CHECK-NEXT: vmov r0, r1, d9
; CHECK-NEXT: bl __popcountdi2
; CHECK-NEXT: mov r4, r0
; CHECK-NEXT: mov r5, r1
; CHECK-NEXT: vmov r0, r1, d8
; CHECK-NEXT: bl __popcountdi2
; CHECK-NEXT: vmov q0[2], q0[0], r0, r4
; CHECK-NEXT: vmov q0[3], q0[1], r1, r5
; CHECK-NEXT: vpop {d8, d9}
; CHECK-NEXT: vmov r1, r2, d1
; CHECK-NEXT: mov.w lr, #1431655765
; CHECK-NEXT: vmov r3, r4, d0
; CHECK-NEXT: mov.w r12, #858993459
; CHECK-NEXT: vldr s1, .LCPI0_0
; CHECK-NEXT: vmov.f32 s3, s1
; CHECK-NEXT: and.w r0, lr, r2, lsr #1
; CHECK-NEXT: subs r0, r2, r0
; CHECK-NEXT: and.w r2, r12, r0, lsr #2
; CHECK-NEXT: bic r0, r0, #-858993460
; CHECK-NEXT: add r0, r2
; CHECK-NEXT: and.w r2, lr, r1, lsr #1
; CHECK-NEXT: subs r1, r1, r2
; CHECK-NEXT: add.w r0, r0, r0, lsr #4
; CHECK-NEXT: and.w r2, r12, r1, lsr #2
; CHECK-NEXT: bic r1, r1, #-858993460
; CHECK-NEXT: add r1, r2
; CHECK-NEXT: and.w r2, lr, r3, lsr #1
; CHECK-NEXT: subs r2, r3, r2
; CHECK-NEXT: bic r5, r0, #-252645136
; CHECK-NEXT: add.w r1, r1, r1, lsr #4
; CHECK-NEXT: mov.w r0, #16843009
; CHECK-NEXT: and.w r3, r12, r2, lsr #2
; CHECK-NEXT: bic r2, r2, #-858993460
; CHECK-NEXT: add r2, r3
; CHECK-NEXT: and.w r3, lr, r4, lsr #1
; CHECK-NEXT: subs r3, r4, r3
; CHECK-NEXT: bic r1, r1, #-252645136
; CHECK-NEXT: add.w r2, r2, r2, lsr #4
; CHECK-NEXT: muls r5, r0, r5
; CHECK-NEXT: and.w r4, r12, r3, lsr #2
; CHECK-NEXT: bic r3, r3, #-858993460
; CHECK-NEXT: bic r2, r2, #-252645136
; CHECK-NEXT: add r3, r4
; CHECK-NEXT: muls r1, r0, r1
; CHECK-NEXT: add.w r3, r3, r3, lsr #4
; CHECK-NEXT: muls r2, r0, r2
; CHECK-NEXT: bic r3, r3, #-252645136
; CHECK-NEXT: muls r0, r3, r0
; CHECK-NEXT: lsrs r1, r1, #24
; CHECK-NEXT: add.w r1, r1, r5, lsr #24
; CHECK-NEXT: lsrs r2, r2, #24
; CHECK-NEXT: vmov s2, r1
; CHECK-NEXT: add.w r0, r2, r0, lsr #24
; CHECK-NEXT: vmov s0, r0
; CHECK-NEXT: pop {r4, r5, r7, pc}
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI0_0:
; CHECK-NEXT: .long 0x00000000 @ float 0
entry:
%0 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %src)
ret <2 x i64> %0
Expand Down
Loading