Skip to content

[SDag][ARM][RISCV] Allow lowering CTPOP into a libcall #99752

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Aug 2, 2024
3 changes: 3 additions & 0 deletions llvm/include/llvm/IR/RuntimeLibcalls.def
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ HANDLE_LIBCALL(NEG_I64, "__negdi2")
HANDLE_LIBCALL(CTLZ_I32, "__clzsi2")
HANDLE_LIBCALL(CTLZ_I64, "__clzdi2")
HANDLE_LIBCALL(CTLZ_I128, "__clzti2")
HANDLE_LIBCALL(CTPOP_I32, "__popcountsi2")
HANDLE_LIBCALL(CTPOP_I64, "__popcountdi2")
HANDLE_LIBCALL(CTPOP_I128, "__popcountti2")

// Floating-point
HANDLE_LIBCALL(ADD_F32, "__addsf3")
Expand Down
38 changes: 17 additions & 21 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,12 +140,9 @@ class SelectionDAGLegalize {
RTLIB::Libcall Call_F128,
RTLIB::Libcall Call_PPCF128,
SmallVectorImpl<SDValue> &Results);
SDValue ExpandIntLibCall(SDNode *Node, bool isSigned,
RTLIB::Libcall Call_I8,
RTLIB::Libcall Call_I16,
RTLIB::Libcall Call_I32,
RTLIB::Libcall Call_I64,
RTLIB::Libcall Call_I128);
SDValue ExpandIntLibCall(SDNode *Node, bool IsSigned, RTLIB::Libcall Call_I8,
RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32,
RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128);
Comment on lines +143 to +145
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like a lot of unrelated format changes?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I renamed isSigned -> IsSigned while I was here, clang-format insisted that I reformat this code fragment.
Do I need to revert these changes?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, didn't notice the capital letter change. I don't think it's necessary to change then.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can precommit this unrelated change

void ExpandArgFPLibCall(SDNode *Node,
RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64,
RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128,
Expand Down Expand Up @@ -2209,7 +2206,7 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
ExpandFPLibCall(Node, LC, Results);
}

SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode *Node, bool IsSigned,
RTLIB::Libcall Call_I8,
RTLIB::Libcall Call_I16,
RTLIB::Libcall Call_I32,
Expand All @@ -2224,7 +2221,9 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
case MVT::i64: LC = Call_I64; break;
case MVT::i128: LC = Call_I128; break;
}
return ExpandLibCall(LC, Node, isSigned).first;
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
"LibCall explicitly requested, but not available");
return ExpandLibCall(LC, Node, IsSigned).first;
}

/// Expand the node to a libcall based on first argument type (for instance
Expand Down Expand Up @@ -5000,19 +4999,16 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::MUL_I64, RTLIB::MUL_I128));
break;
case ISD::CTLZ_ZERO_UNDEF:
switch (Node->getSimpleValueType(0).SimpleTy) {
default:
llvm_unreachable("LibCall explicitly requested, but not available");
case MVT::i32:
Results.push_back(ExpandLibCall(RTLIB::CTLZ_I32, Node, false).first);
break;
case MVT::i64:
Results.push_back(ExpandLibCall(RTLIB::CTLZ_I64, Node, false).first);
break;
case MVT::i128:
Results.push_back(ExpandLibCall(RTLIB::CTLZ_I128, Node, false).first);
break;
}
Results.push_back(ExpandIntLibCall(Node, /*IsSigned=*/false,
RTLIB::UNKNOWN_LIBCALL,
RTLIB::UNKNOWN_LIBCALL, RTLIB::CTLZ_I32,
RTLIB::CTLZ_I64, RTLIB::CTLZ_I128));
break;
case ISD::CTPOP:
Results.push_back(ExpandIntLibCall(Node, /*IsSigned=*/false,
RTLIB::UNKNOWN_LIBCALL,
RTLIB::UNKNOWN_LIBCALL, RTLIB::CTPOP_I32,
RTLIB::CTPOP_I64, RTLIB::CTPOP_I128));
break;
case ISD::RESET_FPENV: {
// It is legalized to call 'fesetenv(FE_DFL_ENV)'. On most targets
Expand Down
32 changes: 25 additions & 7 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3850,15 +3850,33 @@ void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N,
Hi = DAG.getConstant(0, dl, NVT);
}

void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDLoc dl(N);
void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDValue Op = N->getOperand(0);
EVT VT = N->getValueType(0);
SDLoc DL(N);

if (TLI.getOperationAction(ISD::CTPOP, VT) == TargetLoweringBase::LibCall) {
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
if (VT == MVT::i32)
LC = RTLIB::CTPOP_I32;
else if (VT == MVT::i64)
LC = RTLIB::CTPOP_I64;
else if (VT == MVT::i128)
LC = RTLIB::CTPOP_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC) &&
"LibCall explicitly requested, but not available");
TargetLowering::MakeLibCallOptions CallOptions;
SDValue Res = TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, DL).first;
SplitInteger(Res, Lo, Hi);
return;
}

// ctpop(HiLo) -> ctpop(Hi)+ctpop(Lo)
GetExpandedInteger(N->getOperand(0), Lo, Hi);
GetExpandedInteger(Op, Lo, Hi);
EVT NVT = Lo.getValueType();
Lo = DAG.getNode(ISD::ADD, dl, NVT, DAG.getNode(ISD::CTPOP, dl, NVT, Lo),
DAG.getNode(ISD::CTPOP, dl, NVT, Hi));
Hi = DAG.getConstant(0, dl, NVT);
Lo = DAG.getNode(ISD::ADD, DL, NVT, DAG.getNode(ISD::CTPOP, DL, NVT, Lo),
DAG.getNode(ISD::CTPOP, DL, NVT, Hi));
Hi = DAG.getConstant(0, DL, NVT);
}

void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9161,8 +9161,9 @@ SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
!isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
return SDValue();

// Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal.
if (!VT.isVector() && isOperationExpand(ISD::CTPOP, VT) &&
// Emit Table Lookup if ISD::CTPOP used in the fallback path below is going
// to be expanded or converted to a libcall.
if (!VT.isVector() && !isOperationLegalOrCustomOrPromote(ISD::CTPOP, VT) &&
!isOperationLegal(ISD::CTLZ, VT))
if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
return V;
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/ARM/ARMISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1204,7 +1204,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ROTR, VT, Expand);
}
setOperationAction(ISD::CTTZ, MVT::i32, Custom);
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
setOperationAction(ISD::CTPOP, MVT::i32, LibCall);
setOperationAction(ISD::CTPOP, MVT::i64, LibCall);
if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
setOperationAction(ISD::CTLZ, MVT::i32, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, LibCall);
Expand Down
5 changes: 4 additions & 1 deletion llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom);
}
} else {
setOperationAction({ISD::CTTZ, ISD::CTPOP}, XLenVT, Expand);
setOperationAction(ISD::CTTZ, XLenVT, Expand);
if (!Subtarget.is64Bit())
setOperationAction(ISD::CTPOP, MVT::i32, LibCall);
setOperationAction(ISD::CTPOP, MVT::i64, LibCall);
if (RV64LegalI32 && Subtarget.is64Bit())
setOperationAction({ISD::CTTZ, ISD::CTPOP}, MVT::i32, Expand);
}
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/Analysis/CostModel/RISCV/int-bit-manip.ll
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ define void @bitreverse() {

define void @ctpop() {
; NOZVBB-LABEL: 'ctpop'
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = call i8 @llvm.ctpop.i8(i8 undef)
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call i8 @llvm.ctpop.i8(i8 undef)
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %2 = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> undef)
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %3 = call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> undef)
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %4 = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> undef)
Expand All @@ -169,7 +169,7 @@ define void @ctpop() {
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %8 = call <vscale x 4 x i8> @llvm.ctpop.nxv4i8(<vscale x 4 x i8> undef)
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %9 = call <vscale x 8 x i8> @llvm.ctpop.nxv8i8(<vscale x 8 x i8> undef)
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %10 = call <vscale x 16 x i8> @llvm.ctpop.nxv16i8(<vscale x 16 x i8> undef)
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %11 = call i16 @llvm.ctpop.i16(i16 undef)
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = call i16 @llvm.ctpop.i16(i16 undef)
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %12 = call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> undef)
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %13 = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> undef)
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %14 = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> undef)
Expand All @@ -179,7 +179,7 @@ define void @ctpop() {
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %18 = call <vscale x 4 x i16> @llvm.ctpop.nxv4i16(<vscale x 4 x i16> undef)
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %19 = call <vscale x 8 x i16> @llvm.ctpop.nxv8i16(<vscale x 8 x i16> undef)
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %20 = call <vscale x 16 x i16> @llvm.ctpop.nxv16i16(<vscale x 16 x i16> undef)
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %21 = call i32 @llvm.ctpop.i32(i32 undef)
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %21 = call i32 @llvm.ctpop.i32(i32 undef)
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %22 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> undef)
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %23 = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> undef)
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %24 = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> undef)
Expand All @@ -189,7 +189,7 @@ define void @ctpop() {
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %28 = call <vscale x 4 x i32> @llvm.ctpop.nxv4i32(<vscale x 4 x i32> undef)
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %29 = call <vscale x 8 x i32> @llvm.ctpop.nxv8i32(<vscale x 8 x i32> undef)
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %30 = call <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32> undef)
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = call i64 @llvm.ctpop.i64(i64 undef)
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %31 = call i64 @llvm.ctpop.i64(i64 undef)
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %32 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> undef)
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %33 = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> undef)
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %34 = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> undef)
Expand All @@ -202,7 +202,7 @@ define void @ctpop() {
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
; ZVBB-LABEL: 'ctpop'
; ZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = call i8 @llvm.ctpop.i8(i8 undef)
; ZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call i8 @llvm.ctpop.i8(i8 undef)
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> undef)
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> undef)
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> undef)
Expand All @@ -212,7 +212,7 @@ define void @ctpop() {
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x i8> @llvm.ctpop.nxv4i8(<vscale x 4 x i8> undef)
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x i8> @llvm.ctpop.nxv8i8(<vscale x 8 x i8> undef)
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = call <vscale x 16 x i8> @llvm.ctpop.nxv16i8(<vscale x 16 x i8> undef)
; ZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %11 = call i16 @llvm.ctpop.i16(i16 undef)
; ZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = call i16 @llvm.ctpop.i16(i16 undef)
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> undef)
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> undef)
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> undef)
Expand All @@ -222,7 +222,7 @@ define void @ctpop() {
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x i16> @llvm.ctpop.nxv4i16(<vscale x 4 x i16> undef)
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x i16> @llvm.ctpop.nxv8i16(<vscale x 8 x i16> undef)
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = call <vscale x 16 x i16> @llvm.ctpop.nxv16i16(<vscale x 16 x i16> undef)
; ZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %21 = call i32 @llvm.ctpop.i32(i32 undef)
; ZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %21 = call i32 @llvm.ctpop.i32(i32 undef)
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> undef)
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> undef)
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> undef)
Expand All @@ -232,7 +232,7 @@ define void @ctpop() {
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = call <vscale x 4 x i32> @llvm.ctpop.nxv4i32(<vscale x 4 x i32> undef)
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = call <vscale x 8 x i32> @llvm.ctpop.nxv8i32(<vscale x 8 x i32> undef)
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = call <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32> undef)
; ZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = call i64 @llvm.ctpop.i64(i64 undef)
; ZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %31 = call i64 @llvm.ctpop.i64(i64 undef)
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %32 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> undef)
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> undef)
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %34 = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> undef)
Expand Down
64 changes: 5 additions & 59 deletions llvm/test/CodeGen/ARM/popcnt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -324,73 +324,19 @@ define i32 @ctpop16(i16 %x) nounwind readnone {
define i32 @ctpop32(i32 %x) nounwind readnone {
; CHECK-LABEL: ctpop32:
; CHECK: @ %bb.0:
; CHECK-NEXT: ldr r1, .LCPI22_0
; CHECK-NEXT: ldr r2, .LCPI22_3
; CHECK-NEXT: and r1, r1, r0, lsr #1
; CHECK-NEXT: ldr r12, .LCPI22_1
; CHECK-NEXT: sub r0, r0, r1
; CHECK-NEXT: ldr r3, .LCPI22_2
; CHECK-NEXT: and r1, r0, r2
; CHECK-NEXT: and r0, r2, r0, lsr #2
; CHECK-NEXT: add r0, r1, r0
; CHECK-NEXT: add r0, r0, r0, lsr #4
; CHECK-NEXT: and r0, r0, r12
; CHECK-NEXT: mul r1, r0, r3
; CHECK-NEXT: lsr r0, r1, #24
; CHECK-NEXT: mov pc, lr
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI22_0:
; CHECK-NEXT: .long 1431655765 @ 0x55555555
; CHECK-NEXT: .LCPI22_1:
; CHECK-NEXT: .long 252645135 @ 0xf0f0f0f
; CHECK-NEXT: .LCPI22_2:
; CHECK-NEXT: .long 16843009 @ 0x1010101
; CHECK-NEXT: .LCPI22_3:
; CHECK-NEXT: .long 858993459 @ 0x33333333
; CHECK-NEXT: b __popcountsi2
%count = tail call i32 @llvm.ctpop.i32(i32 %x)
ret i32 %count
}

define i32 @ctpop64(i64 %x) nounwind readnone {
; CHECK-LABEL: ctpop64:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: ldr r2, .LCPI23_0
; CHECK-NEXT: ldr r3, .LCPI23_3
; CHECK-NEXT: and r4, r2, r0, lsr #1
; CHECK-NEXT: and r2, r2, r1, lsr #1
; CHECK-NEXT: sub r0, r0, r4
; CHECK-NEXT: sub r1, r1, r2
; CHECK-NEXT: and r4, r0, r3
; CHECK-NEXT: and r2, r1, r3
; CHECK-NEXT: and r0, r3, r0, lsr #2
; CHECK-NEXT: and r1, r3, r1, lsr #2
; CHECK-NEXT: add r0, r4, r0
; CHECK-NEXT: ldr lr, .LCPI23_1
; CHECK-NEXT: add r1, r2, r1
; CHECK-NEXT: ldr r12, .LCPI23_2
; CHECK-NEXT: add r0, r0, r0, lsr #4
; CHECK-NEXT: and r0, r0, lr
; CHECK-NEXT: add r1, r1, r1, lsr #4
; CHECK-NEXT: mul r2, r0, r12
; CHECK-NEXT: and r0, r1, lr
; CHECK-NEXT: mul r1, r0, r12
; CHECK-NEXT: lsr r0, r2, #24
; CHECK-NEXT: add r0, r0, r1, lsr #24
; CHECK-NEXT: pop {r4, lr}
; CHECK-NEXT: .save {r11, lr}
; CHECK-NEXT: push {r11, lr}
; CHECK-NEXT: bl __popcountdi2
; CHECK-NEXT: pop {r11, lr}
; CHECK-NEXT: mov pc, lr
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI23_0:
; CHECK-NEXT: .long 1431655765 @ 0x55555555
; CHECK-NEXT: .LCPI23_1:
; CHECK-NEXT: .long 252645135 @ 0xf0f0f0f
; CHECK-NEXT: .LCPI23_2:
; CHECK-NEXT: .long 16843009 @ 0x1010101
; CHECK-NEXT: .LCPI23_3:
; CHECK-NEXT: .long 858993459 @ 0x33333333
%count = tail call i64 @llvm.ctpop.i64(i64 %x)
%conv = trunc i64 %count to i32
ret i32 %conv
Expand Down
Loading
Loading