Skip to content

Commit e6b2fa6

Browse files
committed
[SDag][ARM][RISCV] Allow lowering CTPOP into a libcall (llvm#99752)
The main change is adding CTPOP to `RuntimeLibcalls.def` to allow targets to use LibCall action for CTPOP. DAG legalizers are changed accordingly.
1 parent 53e8790 commit e6b2fa6

20 files changed

+434
-1722
lines changed

llvm/include/llvm/IR/RuntimeLibcalls.def

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,9 @@ HANDLE_LIBCALL(NEG_I64, "__negdi2")
8585
HANDLE_LIBCALL(CTLZ_I32, "__clzsi2")
8686
HANDLE_LIBCALL(CTLZ_I64, "__clzdi2")
8787
HANDLE_LIBCALL(CTLZ_I128, "__clzti2")
88+
HANDLE_LIBCALL(CTPOP_I32, "__popcountsi2")
89+
HANDLE_LIBCALL(CTPOP_I64, "__popcountdi2")
90+
HANDLE_LIBCALL(CTPOP_I128, "__popcountti2")
8891

8992
// Floating-point
9093
HANDLE_LIBCALL(ADD_F32, "__addsf3")

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

Lines changed: 17 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -140,12 +140,9 @@ class SelectionDAGLegalize {
140140
RTLIB::Libcall Call_F128,
141141
RTLIB::Libcall Call_PPCF128,
142142
SmallVectorImpl<SDValue> &Results);
143-
SDValue ExpandIntLibCall(SDNode *Node, bool isSigned,
144-
RTLIB::Libcall Call_I8,
145-
RTLIB::Libcall Call_I16,
146-
RTLIB::Libcall Call_I32,
147-
RTLIB::Libcall Call_I64,
148-
RTLIB::Libcall Call_I128);
143+
SDValue ExpandIntLibCall(SDNode *Node, bool IsSigned, RTLIB::Libcall Call_I8,
144+
RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32,
145+
RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128);
149146
void ExpandArgFPLibCall(SDNode *Node,
150147
RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64,
151148
RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128,
@@ -2209,7 +2206,7 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
22092206
ExpandFPLibCall(Node, LC, Results);
22102207
}
22112208

2212-
SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
2209+
SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode *Node, bool IsSigned,
22132210
RTLIB::Libcall Call_I8,
22142211
RTLIB::Libcall Call_I16,
22152212
RTLIB::Libcall Call_I32,
@@ -2224,7 +2221,9 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
22242221
case MVT::i64: LC = Call_I64; break;
22252222
case MVT::i128: LC = Call_I128; break;
22262223
}
2227-
return ExpandLibCall(LC, Node, isSigned).first;
2224+
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
2225+
"LibCall explicitly requested, but not available");
2226+
return ExpandLibCall(LC, Node, IsSigned).first;
22282227
}
22292228

22302229
/// Expand the node to a libcall based on first argument type (for instance
@@ -5000,19 +4999,16 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
50004999
RTLIB::MUL_I64, RTLIB::MUL_I128));
50015000
break;
50025001
case ISD::CTLZ_ZERO_UNDEF:
5003-
switch (Node->getSimpleValueType(0).SimpleTy) {
5004-
default:
5005-
llvm_unreachable("LibCall explicitly requested, but not available");
5006-
case MVT::i32:
5007-
Results.push_back(ExpandLibCall(RTLIB::CTLZ_I32, Node, false).first);
5008-
break;
5009-
case MVT::i64:
5010-
Results.push_back(ExpandLibCall(RTLIB::CTLZ_I64, Node, false).first);
5011-
break;
5012-
case MVT::i128:
5013-
Results.push_back(ExpandLibCall(RTLIB::CTLZ_I128, Node, false).first);
5014-
break;
5015-
}
5002+
Results.push_back(ExpandIntLibCall(Node, /*IsSigned=*/false,
5003+
RTLIB::UNKNOWN_LIBCALL,
5004+
RTLIB::UNKNOWN_LIBCALL, RTLIB::CTLZ_I32,
5005+
RTLIB::CTLZ_I64, RTLIB::CTLZ_I128));
5006+
break;
5007+
case ISD::CTPOP:
5008+
Results.push_back(ExpandIntLibCall(Node, /*IsSigned=*/false,
5009+
RTLIB::UNKNOWN_LIBCALL,
5010+
RTLIB::UNKNOWN_LIBCALL, RTLIB::CTPOP_I32,
5011+
RTLIB::CTPOP_I64, RTLIB::CTPOP_I128));
50165012
break;
50175013
case ISD::RESET_FPENV: {
50185014
// It is legalized to call 'fesetenv(FE_DFL_ENV)'. On most targets

llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3850,15 +3850,33 @@ void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N,
38503850
Hi = DAG.getConstant(0, dl, NVT);
38513851
}
38523852

3853-
void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N,
3854-
SDValue &Lo, SDValue &Hi) {
3855-
SDLoc dl(N);
3853+
void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N, SDValue &Lo, SDValue &Hi) {
3854+
SDValue Op = N->getOperand(0);
3855+
EVT VT = N->getValueType(0);
3856+
SDLoc DL(N);
3857+
3858+
if (TLI.getOperationAction(ISD::CTPOP, VT) == TargetLoweringBase::LibCall) {
3859+
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
3860+
if (VT == MVT::i32)
3861+
LC = RTLIB::CTPOP_I32;
3862+
else if (VT == MVT::i64)
3863+
LC = RTLIB::CTPOP_I64;
3864+
else if (VT == MVT::i128)
3865+
LC = RTLIB::CTPOP_I128;
3866+
assert(LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC) &&
3867+
"LibCall explicitly requested, but not available");
3868+
TargetLowering::MakeLibCallOptions CallOptions;
3869+
SDValue Res = TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, DL).first;
3870+
SplitInteger(Res, Lo, Hi);
3871+
return;
3872+
}
3873+
38563874
// ctpop(HiLo) -> ctpop(Hi)+ctpop(Lo)
3857-
GetExpandedInteger(N->getOperand(0), Lo, Hi);
3875+
GetExpandedInteger(Op, Lo, Hi);
38583876
EVT NVT = Lo.getValueType();
3859-
Lo = DAG.getNode(ISD::ADD, dl, NVT, DAG.getNode(ISD::CTPOP, dl, NVT, Lo),
3860-
DAG.getNode(ISD::CTPOP, dl, NVT, Hi));
3861-
Hi = DAG.getConstant(0, dl, NVT);
3877+
Lo = DAG.getNode(ISD::ADD, DL, NVT, DAG.getNode(ISD::CTPOP, DL, NVT, Lo),
3878+
DAG.getNode(ISD::CTPOP, DL, NVT, Hi));
3879+
Hi = DAG.getConstant(0, DL, NVT);
38623880
}
38633881

38643882
void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9161,8 +9161,9 @@ SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
91619161
!isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
91629162
return SDValue();
91639163

9164-
// Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal.
9165-
if (!VT.isVector() && isOperationExpand(ISD::CTPOP, VT) &&
9164+
// Emit Table Lookup if ISD::CTPOP used in the fallback path below is going
9165+
// to be expanded or converted to a libcall.
9166+
if (!VT.isVector() && !isOperationLegalOrCustomOrPromote(ISD::CTPOP, VT) &&
91669167
!isOperationLegal(ISD::CTLZ, VT))
91679168
if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
91689169
return V;

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1204,7 +1204,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
12041204
setOperationAction(ISD::ROTR, VT, Expand);
12051205
}
12061206
setOperationAction(ISD::CTTZ, MVT::i32, Custom);
1207-
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
1207+
setOperationAction(ISD::CTPOP, MVT::i32, LibCall);
1208+
setOperationAction(ISD::CTPOP, MVT::i64, LibCall);
12081209
if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
12091210
setOperationAction(ISD::CTLZ, MVT::i32, Expand);
12101211
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, LibCall);

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -393,7 +393,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
393393
setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom);
394394
}
395395
} else {
396-
setOperationAction({ISD::CTTZ, ISD::CTPOP}, XLenVT, Expand);
396+
setOperationAction(ISD::CTTZ, XLenVT, Expand);
397+
if (!Subtarget.is64Bit())
398+
setOperationAction(ISD::CTPOP, MVT::i32, LibCall);
399+
setOperationAction(ISD::CTPOP, MVT::i64, LibCall);
397400
if (RV64LegalI32 && Subtarget.is64Bit())
398401
setOperationAction({ISD::CTTZ, ISD::CTPOP}, MVT::i32, Expand);
399402
}

llvm/test/Analysis/CostModel/RISCV/int-bit-manip.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ define void @bitreverse() {
159159

160160
define void @ctpop() {
161161
; NOZVBB-LABEL: 'ctpop'
162-
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = call i8 @llvm.ctpop.i8(i8 undef)
162+
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call i8 @llvm.ctpop.i8(i8 undef)
163163
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %2 = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> undef)
164164
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %3 = call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> undef)
165165
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %4 = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> undef)
@@ -169,7 +169,7 @@ define void @ctpop() {
169169
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %8 = call <vscale x 4 x i8> @llvm.ctpop.nxv4i8(<vscale x 4 x i8> undef)
170170
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %9 = call <vscale x 8 x i8> @llvm.ctpop.nxv8i8(<vscale x 8 x i8> undef)
171171
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %10 = call <vscale x 16 x i8> @llvm.ctpop.nxv16i8(<vscale x 16 x i8> undef)
172-
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %11 = call i16 @llvm.ctpop.i16(i16 undef)
172+
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = call i16 @llvm.ctpop.i16(i16 undef)
173173
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %12 = call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> undef)
174174
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %13 = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> undef)
175175
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %14 = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> undef)
@@ -179,7 +179,7 @@ define void @ctpop() {
179179
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %18 = call <vscale x 4 x i16> @llvm.ctpop.nxv4i16(<vscale x 4 x i16> undef)
180180
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %19 = call <vscale x 8 x i16> @llvm.ctpop.nxv8i16(<vscale x 8 x i16> undef)
181181
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %20 = call <vscale x 16 x i16> @llvm.ctpop.nxv16i16(<vscale x 16 x i16> undef)
182-
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %21 = call i32 @llvm.ctpop.i32(i32 undef)
182+
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %21 = call i32 @llvm.ctpop.i32(i32 undef)
183183
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %22 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> undef)
184184
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %23 = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> undef)
185185
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %24 = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> undef)
@@ -189,7 +189,7 @@ define void @ctpop() {
189189
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %28 = call <vscale x 4 x i32> @llvm.ctpop.nxv4i32(<vscale x 4 x i32> undef)
190190
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %29 = call <vscale x 8 x i32> @llvm.ctpop.nxv8i32(<vscale x 8 x i32> undef)
191191
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %30 = call <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32> undef)
192-
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = call i64 @llvm.ctpop.i64(i64 undef)
192+
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %31 = call i64 @llvm.ctpop.i64(i64 undef)
193193
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %32 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> undef)
194194
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %33 = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> undef)
195195
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %34 = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> undef)
@@ -202,7 +202,7 @@ define void @ctpop() {
202202
; NOZVBB-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
203203
;
204204
; ZVBB-LABEL: 'ctpop'
205-
; ZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %1 = call i8 @llvm.ctpop.i8(i8 undef)
205+
; ZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = call i8 @llvm.ctpop.i8(i8 undef)
206206
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> undef)
207207
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> undef)
208208
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> undef)
@@ -212,7 +212,7 @@ define void @ctpop() {
212212
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x i8> @llvm.ctpop.nxv4i8(<vscale x 4 x i8> undef)
213213
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x i8> @llvm.ctpop.nxv8i8(<vscale x 8 x i8> undef)
214214
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = call <vscale x 16 x i8> @llvm.ctpop.nxv16i8(<vscale x 16 x i8> undef)
215-
; ZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %11 = call i16 @llvm.ctpop.i16(i16 undef)
215+
; ZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %11 = call i16 @llvm.ctpop.i16(i16 undef)
216216
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> undef)
217217
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> undef)
218218
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> undef)
@@ -222,7 +222,7 @@ define void @ctpop() {
222222
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x i16> @llvm.ctpop.nxv4i16(<vscale x 4 x i16> undef)
223223
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x i16> @llvm.ctpop.nxv8i16(<vscale x 8 x i16> undef)
224224
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = call <vscale x 16 x i16> @llvm.ctpop.nxv16i16(<vscale x 16 x i16> undef)
225-
; ZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %21 = call i32 @llvm.ctpop.i32(i32 undef)
225+
; ZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %21 = call i32 @llvm.ctpop.i32(i32 undef)
226226
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> undef)
227227
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> undef)
228228
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> undef)
@@ -232,7 +232,7 @@ define void @ctpop() {
232232
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = call <vscale x 4 x i32> @llvm.ctpop.nxv4i32(<vscale x 4 x i32> undef)
233233
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = call <vscale x 8 x i32> @llvm.ctpop.nxv8i32(<vscale x 8 x i32> undef)
234234
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = call <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32> undef)
235-
; ZVBB-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %31 = call i64 @llvm.ctpop.i64(i64 undef)
235+
; ZVBB-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %31 = call i64 @llvm.ctpop.i64(i64 undef)
236236
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %32 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> undef)
237237
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> undef)
238238
; ZVBB-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %34 = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> undef)

llvm/test/CodeGen/ARM/popcnt.ll

Lines changed: 5 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -324,73 +324,19 @@ define i32 @ctpop16(i16 %x) nounwind readnone {
324324
define i32 @ctpop32(i32 %x) nounwind readnone {
325325
; CHECK-LABEL: ctpop32:
326326
; CHECK: @ %bb.0:
327-
; CHECK-NEXT: ldr r1, .LCPI22_0
328-
; CHECK-NEXT: ldr r2, .LCPI22_3
329-
; CHECK-NEXT: and r1, r1, r0, lsr #1
330-
; CHECK-NEXT: ldr r12, .LCPI22_1
331-
; CHECK-NEXT: sub r0, r0, r1
332-
; CHECK-NEXT: ldr r3, .LCPI22_2
333-
; CHECK-NEXT: and r1, r0, r2
334-
; CHECK-NEXT: and r0, r2, r0, lsr #2
335-
; CHECK-NEXT: add r0, r1, r0
336-
; CHECK-NEXT: add r0, r0, r0, lsr #4
337-
; CHECK-NEXT: and r0, r0, r12
338-
; CHECK-NEXT: mul r1, r0, r3
339-
; CHECK-NEXT: lsr r0, r1, #24
340-
; CHECK-NEXT: mov pc, lr
341-
; CHECK-NEXT: .p2align 2
342-
; CHECK-NEXT: @ %bb.1:
343-
; CHECK-NEXT: .LCPI22_0:
344-
; CHECK-NEXT: .long 1431655765 @ 0x55555555
345-
; CHECK-NEXT: .LCPI22_1:
346-
; CHECK-NEXT: .long 252645135 @ 0xf0f0f0f
347-
; CHECK-NEXT: .LCPI22_2:
348-
; CHECK-NEXT: .long 16843009 @ 0x1010101
349-
; CHECK-NEXT: .LCPI22_3:
350-
; CHECK-NEXT: .long 858993459 @ 0x33333333
327+
; CHECK-NEXT: b __popcountsi2
351328
%count = tail call i32 @llvm.ctpop.i32(i32 %x)
352329
ret i32 %count
353330
}
354331

355332
define i32 @ctpop64(i64 %x) nounwind readnone {
356333
; CHECK-LABEL: ctpop64:
357334
; CHECK: @ %bb.0:
358-
; CHECK-NEXT: .save {r4, lr}
359-
; CHECK-NEXT: push {r4, lr}
360-
; CHECK-NEXT: ldr r2, .LCPI23_0
361-
; CHECK-NEXT: ldr r3, .LCPI23_3
362-
; CHECK-NEXT: and r4, r2, r0, lsr #1
363-
; CHECK-NEXT: and r2, r2, r1, lsr #1
364-
; CHECK-NEXT: sub r0, r0, r4
365-
; CHECK-NEXT: sub r1, r1, r2
366-
; CHECK-NEXT: and r4, r0, r3
367-
; CHECK-NEXT: and r2, r1, r3
368-
; CHECK-NEXT: and r0, r3, r0, lsr #2
369-
; CHECK-NEXT: and r1, r3, r1, lsr #2
370-
; CHECK-NEXT: add r0, r4, r0
371-
; CHECK-NEXT: ldr lr, .LCPI23_1
372-
; CHECK-NEXT: add r1, r2, r1
373-
; CHECK-NEXT: ldr r12, .LCPI23_2
374-
; CHECK-NEXT: add r0, r0, r0, lsr #4
375-
; CHECK-NEXT: and r0, r0, lr
376-
; CHECK-NEXT: add r1, r1, r1, lsr #4
377-
; CHECK-NEXT: mul r2, r0, r12
378-
; CHECK-NEXT: and r0, r1, lr
379-
; CHECK-NEXT: mul r1, r0, r12
380-
; CHECK-NEXT: lsr r0, r2, #24
381-
; CHECK-NEXT: add r0, r0, r1, lsr #24
382-
; CHECK-NEXT: pop {r4, lr}
335+
; CHECK-NEXT: .save {r11, lr}
336+
; CHECK-NEXT: push {r11, lr}
337+
; CHECK-NEXT: bl __popcountdi2
338+
; CHECK-NEXT: pop {r11, lr}
383339
; CHECK-NEXT: mov pc, lr
384-
; CHECK-NEXT: .p2align 2
385-
; CHECK-NEXT: @ %bb.1:
386-
; CHECK-NEXT: .LCPI23_0:
387-
; CHECK-NEXT: .long 1431655765 @ 0x55555555
388-
; CHECK-NEXT: .LCPI23_1:
389-
; CHECK-NEXT: .long 252645135 @ 0xf0f0f0f
390-
; CHECK-NEXT: .LCPI23_2:
391-
; CHECK-NEXT: .long 16843009 @ 0x1010101
392-
; CHECK-NEXT: .LCPI23_3:
393-
; CHECK-NEXT: .long 858993459 @ 0x33333333
394340
%count = tail call i64 @llvm.ctpop.i64(i64 %x)
395341
%conv = trunc i64 %count to i32
396342
ret i32 %conv

0 commit comments

Comments
 (0)