Skip to content

Commit 11a3de7

Browse files
authored
[SDag][ARM][RISCV] Allow lowering CTPOP into a libcall (#101786)
This is a reland of #99752 with the bug fixed (see test diff in the third commit in this PR). All `popcount` libcalls return `int`, but `ISD::CTPOP` returns the type of the argument, which can be wider than `int`. The fix is to make DAG legalizer pass the correct return type to `makeLibCall` and sign-extend the result afterwards. Original commit message: The main change is adding CTPOP to `RuntimeLibcalls.def` to allow targets to use LibCall action for CTPOP. DAG legalizers are changed accordingly. Pull Request: #101786
1 parent 0de2f64 commit 11a3de7

18 files changed

+547
-1682
lines changed

llvm/include/llvm/CodeGen/BasicTTIImpl.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2503,7 +2503,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
25032503
return (LT.first * 2);
25042504
else
25052505
return (LT.first * 1);
2506-
} else if (!TLI->isOperationExpand(ISD, LT.second)) {
2506+
} else if (TLI->isOperationCustom(ISD, LT.second)) {
25072507
// If the operation is custom lowered then assume
25082508
// that the code is twice as expensive.
25092509
return (LT.first * 2);

llvm/include/llvm/IR/RuntimeLibcalls.def

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,9 @@ HANDLE_LIBCALL(NEG_I64, "__negdi2")
8585
HANDLE_LIBCALL(CTLZ_I32, "__clzsi2")
8686
HANDLE_LIBCALL(CTLZ_I64, "__clzdi2")
8787
HANDLE_LIBCALL(CTLZ_I128, "__clzti2")
88+
HANDLE_LIBCALL(CTPOP_I32, "__popcountsi2")
89+
HANDLE_LIBCALL(CTPOP_I64, "__popcountdi2")
90+
HANDLE_LIBCALL(CTPOP_I128, "__popcountti2")
8891

8992
// Floating-point
9093
HANDLE_LIBCALL(ADD_F32, "__addsf3")

llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

Lines changed: 62 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,8 @@ class SelectionDAGLegalize {
129129
ArrayRef<int> Mask) const;
130130

131131
std::pair<SDValue, SDValue> ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
132-
TargetLowering::ArgListTy &&Args, bool isSigned);
132+
TargetLowering::ArgListTy &&Args,
133+
bool IsSigned, EVT RetVT);
133134
std::pair<SDValue, SDValue> ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
134135

135136
void ExpandFPLibCall(SDNode *Node, RTLIB::Libcall LC,
@@ -150,6 +151,9 @@ class SelectionDAGLegalize {
150151
RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128,
151152
RTLIB::Libcall Call_PPCF128,
152153
SmallVectorImpl<SDValue> &Results);
154+
SDValue ExpandBitCountingLibCall(SDNode *Node, RTLIB::Libcall CallI32,
155+
RTLIB::Libcall CallI64,
156+
RTLIB::Libcall CallI128);
153157
void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
154158
void ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
155159

@@ -2114,9 +2118,10 @@ SDValue SelectionDAGLegalize::ExpandSPLAT_VECTOR(SDNode *Node) {
21142118
// register, return the lo part and set the hi part to the by-reg argument in
21152119
// the first. If it does fit into a single register, return the result and
21162120
// leave the Hi part unset.
2117-
std::pair<SDValue, SDValue> SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
2118-
TargetLowering::ArgListTy &&Args,
2119-
bool isSigned) {
2121+
std::pair<SDValue, SDValue>
2122+
SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
2123+
TargetLowering::ArgListTy &&Args,
2124+
bool IsSigned, EVT RetVT) {
21202125
EVT CodePtrTy = TLI.getPointerTy(DAG.getDataLayout());
21212126
SDValue Callee;
21222127
if (const char *LibcallName = TLI.getLibcallName(LC))
@@ -2127,7 +2132,6 @@ std::pair<SDValue, SDValue> SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall L
21272132
Node->getOperationName(&DAG));
21282133
}
21292134

2130-
EVT RetVT = Node->getValueType(0);
21312135
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
21322136

21332137
// By default, the input chain to this libcall is the entry node of the
@@ -2147,7 +2151,7 @@ std::pair<SDValue, SDValue> SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall L
21472151
InChain = TCChain;
21482152

21492153
TargetLowering::CallLoweringInfo CLI(DAG);
2150-
bool signExtend = TLI.shouldSignExtendTypeInLibCall(RetTy, isSigned);
2154+
bool signExtend = TLI.shouldSignExtendTypeInLibCall(RetTy, IsSigned);
21512155
CLI.setDebugLoc(SDLoc(Node))
21522156
.setChain(InChain)
21532157
.setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee,
@@ -2183,7 +2187,8 @@ std::pair<SDValue, SDValue> SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall L
21832187
Args.push_back(Entry);
21842188
}
21852189

2186-
return ExpandLibCall(LC, Node, std::move(Args), isSigned);
2190+
return ExpandLibCall(LC, Node, std::move(Args), isSigned,
2191+
Node->getValueType(0));
21872192
}
21882193

21892194
void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
@@ -2259,6 +2264,50 @@ void SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node,
22592264
ExpandFPLibCall(Node, LC, Results);
22602265
}
22612266

2267+
SDValue SelectionDAGLegalize::ExpandBitCountingLibCall(
2268+
SDNode *Node, RTLIB::Libcall CallI32, RTLIB::Libcall CallI64,
2269+
RTLIB::Libcall CallI128) {
2270+
RTLIB::Libcall LC;
2271+
switch (Node->getSimpleValueType(0).SimpleTy) {
2272+
default:
2273+
llvm_unreachable("Unexpected request for libcall!");
2274+
case MVT::i32:
2275+
LC = CallI32;
2276+
break;
2277+
case MVT::i64:
2278+
LC = CallI64;
2279+
break;
2280+
case MVT::i128:
2281+
LC = CallI128;
2282+
break;
2283+
}
2284+
2285+
// Bit-counting libcalls have one unsigned argument and return `int`.
2286+
// Note that `int` may be illegal on this target; ExpandLibCall will
2287+
// take care of promoting it to a legal type.
2288+
SDValue Op = Node->getOperand(0);
2289+
EVT IntVT =
2290+
EVT::getIntegerVT(*DAG.getContext(), DAG.getLibInfo().getIntSize());
2291+
2292+
TargetLowering::ArgListEntry Arg;
2293+
EVT ArgVT = Op.getValueType();
2294+
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
2295+
Arg.Node = Op;
2296+
Arg.Ty = ArgTy;
2297+
Arg.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgTy, /*IsSigned=*/false);
2298+
Arg.IsZExt = !Arg.IsSExt;
2299+
2300+
SDValue Res = ExpandLibCall(LC, Node, TargetLowering::ArgListTy{Arg},
2301+
/*IsSigned=*/true, IntVT)
2302+
.first;
2303+
2304+
// If ExpandLibCall created a tail call, the result was already
2305+
// of the correct type. Otherwise, we need to sign extend it.
2306+
if (Res.getValueType() != MVT::Other)
2307+
Res = DAG.getSExtOrTrunc(Res, SDLoc(Node), Node->getValueType(0));
2308+
return Res;
2309+
}
2310+
22622311
/// Issue libcalls to __{u}divmod to compute div / rem pairs.
22632312
void
22642313
SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
@@ -4993,19 +5042,12 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
49935042
RTLIB::MUL_I64, RTLIB::MUL_I128));
49945043
break;
49955044
case ISD::CTLZ_ZERO_UNDEF:
4996-
switch (Node->getSimpleValueType(0).SimpleTy) {
4997-
default:
4998-
llvm_unreachable("LibCall explicitly requested, but not available");
4999-
case MVT::i32:
5000-
Results.push_back(ExpandLibCall(RTLIB::CTLZ_I32, Node, false).first);
5001-
break;
5002-
case MVT::i64:
5003-
Results.push_back(ExpandLibCall(RTLIB::CTLZ_I64, Node, false).first);
5004-
break;
5005-
case MVT::i128:
5006-
Results.push_back(ExpandLibCall(RTLIB::CTLZ_I128, Node, false).first);
5007-
break;
5008-
}
5045+
Results.push_back(ExpandBitCountingLibCall(
5046+
Node, RTLIB::CTLZ_I32, RTLIB::CTLZ_I64, RTLIB::CTLZ_I128));
5047+
break;
5048+
case ISD::CTPOP:
5049+
Results.push_back(ExpandBitCountingLibCall(
5050+
Node, RTLIB::CTPOP_I32, RTLIB::CTPOP_I64, RTLIB::CTPOP_I128));
50095051
break;
50105052
case ISD::RESET_FPENV: {
50115053
// It is legalized to call 'fesetenv(FE_DFL_ENV)'. On most targets

llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4012,15 +4012,35 @@ void DAGTypeLegalizer::ExpandIntRes_ABD(SDNode *N, SDValue &Lo, SDValue &Hi) {
40124012
SplitInteger(Result, Lo, Hi);
40134013
}
40144014

4015-
void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N,
4016-
SDValue &Lo, SDValue &Hi) {
4017-
SDLoc dl(N);
4015+
void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N, SDValue &Lo, SDValue &Hi) {
4016+
SDValue Op = N->getOperand(0);
4017+
EVT VT = N->getValueType(0);
4018+
SDLoc DL(N);
4019+
4020+
if (TLI.getOperationAction(ISD::CTPOP, VT) == TargetLoweringBase::LibCall) {
4021+
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
4022+
if (VT == MVT::i32)
4023+
LC = RTLIB::CTPOP_I32;
4024+
else if (VT == MVT::i64)
4025+
LC = RTLIB::CTPOP_I64;
4026+
else if (VT == MVT::i128)
4027+
LC = RTLIB::CTPOP_I128;
4028+
assert(LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC) &&
4029+
"LibCall explicitly requested, but not available");
4030+
TargetLowering::MakeLibCallOptions CallOptions;
4031+
EVT IntVT =
4032+
EVT::getIntegerVT(*DAG.getContext(), DAG.getLibInfo().getIntSize());
4033+
SDValue Res = TLI.makeLibCall(DAG, LC, IntVT, Op, CallOptions, DL).first;
4034+
SplitInteger(DAG.getSExtOrTrunc(Res, DL, VT), Lo, Hi);
4035+
return;
4036+
}
4037+
40184038
// ctpop(HiLo) -> ctpop(Hi)+ctpop(Lo)
4019-
GetExpandedInteger(N->getOperand(0), Lo, Hi);
4039+
GetExpandedInteger(Op, Lo, Hi);
40204040
EVT NVT = Lo.getValueType();
4021-
Lo = DAG.getNode(ISD::ADD, dl, NVT, DAG.getNode(ISD::CTPOP, dl, NVT, Lo),
4022-
DAG.getNode(ISD::CTPOP, dl, NVT, Hi));
4023-
Hi = DAG.getConstant(0, dl, NVT);
4041+
Lo = DAG.getNode(ISD::ADD, DL, NVT, DAG.getNode(ISD::CTPOP, DL, NVT, Lo),
4042+
DAG.getNode(ISD::CTPOP, DL, NVT, Hi));
4043+
Hi = DAG.getConstant(0, DL, NVT);
40244044
}
40254045

40264046
void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9390,8 +9390,9 @@ SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
93909390
!isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
93919391
return SDValue();
93929392

9393-
// Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal.
9394-
if (!VT.isVector() && isOperationExpand(ISD::CTPOP, VT) &&
9393+
// Emit Table Lookup if ISD::CTPOP used in the fallback path below is going
9394+
// to be expanded or converted to a libcall.
9395+
if (!VT.isVector() && !isOperationLegalOrCustomOrPromote(ISD::CTPOP, VT) &&
93959396
!isOperationLegal(ISD::CTLZ, VT))
93969397
if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
93979398
return V;

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1221,7 +1221,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
12211221
setOperationAction(ISD::ROTR, VT, Expand);
12221222
}
12231223
setOperationAction(ISD::CTTZ, MVT::i32, Custom);
1224-
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
1224+
setOperationAction(ISD::CTPOP, MVT::i32, LibCall);
1225+
setOperationAction(ISD::CTPOP, MVT::i64, LibCall);
12251226
if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
12261227
setOperationAction(ISD::CTLZ, MVT::i32, Expand);
12271228
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, LibCall);

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -395,7 +395,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
395395
if (Subtarget.is64Bit())
396396
setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom);
397397
} else {
398-
setOperationAction({ISD::CTTZ, ISD::CTPOP}, XLenVT, Expand);
398+
setOperationAction(ISD::CTTZ, XLenVT, Expand);
399+
if (Subtarget.is64Bit())
400+
setOperationAction(ISD::CTPOP, MVT::i128, LibCall);
401+
else
402+
setOperationAction(ISD::CTPOP, MVT::i32, LibCall);
403+
setOperationAction(ISD::CTPOP, MVT::i64, LibCall);
399404
}
400405

401406
if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||

llvm/test/CodeGen/ARM/popcnt.ll

Lines changed: 8 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -324,76 +324,22 @@ define i32 @ctpop16(i16 %x) nounwind readnone {
324324
define i32 @ctpop32(i32 %x) nounwind readnone {
325325
; CHECK-LABEL: ctpop32:
326326
; CHECK: @ %bb.0:
327-
; CHECK-NEXT: ldr r1, .LCPI22_0
328-
; CHECK-NEXT: ldr r2, .LCPI22_3
329-
; CHECK-NEXT: and r1, r1, r0, lsr #1
330-
; CHECK-NEXT: ldr r12, .LCPI22_1
331-
; CHECK-NEXT: sub r0, r0, r1
332-
; CHECK-NEXT: ldr r3, .LCPI22_2
333-
; CHECK-NEXT: and r1, r0, r2
334-
; CHECK-NEXT: and r0, r2, r0, lsr #2
335-
; CHECK-NEXT: add r0, r1, r0
336-
; CHECK-NEXT: add r0, r0, r0, lsr #4
337-
; CHECK-NEXT: and r0, r0, r12
338-
; CHECK-NEXT: mul r1, r0, r3
339-
; CHECK-NEXT: lsr r0, r1, #24
340-
; CHECK-NEXT: mov pc, lr
341-
; CHECK-NEXT: .p2align 2
342-
; CHECK-NEXT: @ %bb.1:
343-
; CHECK-NEXT: .LCPI22_0:
344-
; CHECK-NEXT: .long 1431655765 @ 0x55555555
345-
; CHECK-NEXT: .LCPI22_1:
346-
; CHECK-NEXT: .long 252645135 @ 0xf0f0f0f
347-
; CHECK-NEXT: .LCPI22_2:
348-
; CHECK-NEXT: .long 16843009 @ 0x1010101
349-
; CHECK-NEXT: .LCPI22_3:
350-
; CHECK-NEXT: .long 858993459 @ 0x33333333
327+
; CHECK-NEXT: b __popcountsi2
351328
%count = tail call i32 @llvm.ctpop.i32(i32 %x)
352329
ret i32 %count
353330
}
354331

355-
define i32 @ctpop64(i64 %x) nounwind readnone {
332+
define i64 @ctpop64(i64 %x) nounwind readnone {
356333
; CHECK-LABEL: ctpop64:
357334
; CHECK: @ %bb.0:
358-
; CHECK-NEXT: .save {r4, lr}
359-
; CHECK-NEXT: push {r4, lr}
360-
; CHECK-NEXT: ldr r2, .LCPI23_0
361-
; CHECK-NEXT: ldr r3, .LCPI23_3
362-
; CHECK-NEXT: and r4, r2, r0, lsr #1
363-
; CHECK-NEXT: and r2, r2, r1, lsr #1
364-
; CHECK-NEXT: sub r0, r0, r4
365-
; CHECK-NEXT: sub r1, r1, r2
366-
; CHECK-NEXT: and r4, r0, r3
367-
; CHECK-NEXT: and r2, r1, r3
368-
; CHECK-NEXT: and r0, r3, r0, lsr #2
369-
; CHECK-NEXT: and r1, r3, r1, lsr #2
370-
; CHECK-NEXT: add r0, r4, r0
371-
; CHECK-NEXT: ldr lr, .LCPI23_1
372-
; CHECK-NEXT: add r1, r2, r1
373-
; CHECK-NEXT: ldr r12, .LCPI23_2
374-
; CHECK-NEXT: add r0, r0, r0, lsr #4
375-
; CHECK-NEXT: and r0, r0, lr
376-
; CHECK-NEXT: add r1, r1, r1, lsr #4
377-
; CHECK-NEXT: mul r2, r0, r12
378-
; CHECK-NEXT: and r0, r1, lr
379-
; CHECK-NEXT: mul r1, r0, r12
380-
; CHECK-NEXT: lsr r0, r2, #24
381-
; CHECK-NEXT: add r0, r0, r1, lsr #24
382-
; CHECK-NEXT: pop {r4, lr}
335+
; CHECK-NEXT: .save {r11, lr}
336+
; CHECK-NEXT: push {r11, lr}
337+
; CHECK-NEXT: bl __popcountdi2
338+
; CHECK-NEXT: asr r1, r0, #31
339+
; CHECK-NEXT: pop {r11, lr}
383340
; CHECK-NEXT: mov pc, lr
384-
; CHECK-NEXT: .p2align 2
385-
; CHECK-NEXT: @ %bb.1:
386-
; CHECK-NEXT: .LCPI23_0:
387-
; CHECK-NEXT: .long 1431655765 @ 0x55555555
388-
; CHECK-NEXT: .LCPI23_1:
389-
; CHECK-NEXT: .long 252645135 @ 0xf0f0f0f
390-
; CHECK-NEXT: .LCPI23_2:
391-
; CHECK-NEXT: .long 16843009 @ 0x1010101
392-
; CHECK-NEXT: .LCPI23_3:
393-
; CHECK-NEXT: .long 858993459 @ 0x33333333
394341
%count = tail call i64 @llvm.ctpop.i64(i64 %x)
395-
%conv = trunc i64 %count to i32
396-
ret i32 %conv
342+
ret i64 %count
397343
}
398344

399345
define i32 @ctpop_eq_one(i64 %x) nounwind readnone {

0 commit comments

Comments
 (0)