Skip to content

[SDag][ARM][RISCV] Allow lowering CTPOP into a libcall #101786

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Apr 23, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion llvm/include/llvm/CodeGen/BasicTTIImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -2503,7 +2503,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
return (LT.first * 2);
else
return (LT.first * 1);
} else if (!TLI->isOperationExpand(ISD, LT.second)) {
} else if (TLI->isOperationCustom(ISD, LT.second)) {
// If the operation is custom lowered then assume
// that the code is twice as expensive.
return (LT.first * 2);
Expand Down
3 changes: 3 additions & 0 deletions llvm/include/llvm/IR/RuntimeLibcalls.def
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ HANDLE_LIBCALL(NEG_I64, "__negdi2")
HANDLE_LIBCALL(CTLZ_I32, "__clzsi2")
HANDLE_LIBCALL(CTLZ_I64, "__clzdi2")
HANDLE_LIBCALL(CTLZ_I128, "__clzti2")
HANDLE_LIBCALL(CTPOP_I32, "__popcountsi2")
HANDLE_LIBCALL(CTPOP_I64, "__popcountdi2")
HANDLE_LIBCALL(CTPOP_I128, "__popcountti2")

// Floating-point
HANDLE_LIBCALL(ADD_F32, "__addsf3")
Expand Down
82 changes: 62 additions & 20 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,8 @@ class SelectionDAGLegalize {
ArrayRef<int> Mask) const;

std::pair<SDValue, SDValue> ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
TargetLowering::ArgListTy &&Args, bool isSigned);
TargetLowering::ArgListTy &&Args,
bool IsSigned, EVT RetVT);
std::pair<SDValue, SDValue> ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);

void ExpandFPLibCall(SDNode *Node, RTLIB::Libcall LC,
Expand All @@ -150,6 +151,9 @@ class SelectionDAGLegalize {
RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128,
RTLIB::Libcall Call_PPCF128,
SmallVectorImpl<SDValue> &Results);
SDValue ExpandBitCountingLibCall(SDNode *Node, RTLIB::Libcall CallI32,
RTLIB::Libcall CallI64,
RTLIB::Libcall CallI128);
void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);

Expand Down Expand Up @@ -2114,9 +2118,10 @@ SDValue SelectionDAGLegalize::ExpandSPLAT_VECTOR(SDNode *Node) {
// register, return the lo part and set the hi part to the by-reg argument in
// the first. If it does fit into a single register, return the result and
// leave the Hi part unset.
std::pair<SDValue, SDValue> SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
TargetLowering::ArgListTy &&Args,
bool isSigned) {
std::pair<SDValue, SDValue>
SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
TargetLowering::ArgListTy &&Args,
bool IsSigned, EVT RetVT) {
EVT CodePtrTy = TLI.getPointerTy(DAG.getDataLayout());
SDValue Callee;
if (const char *LibcallName = TLI.getLibcallName(LC))
Expand All @@ -2127,7 +2132,6 @@ std::pair<SDValue, SDValue> SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall L
Node->getOperationName(&DAG));
}

EVT RetVT = Node->getValueType(0);
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());

// By default, the input chain to this libcall is the entry node of the
Expand All @@ -2147,7 +2151,7 @@ std::pair<SDValue, SDValue> SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall L
InChain = TCChain;

TargetLowering::CallLoweringInfo CLI(DAG);
bool signExtend = TLI.shouldSignExtendTypeInLibCall(RetTy, isSigned);
bool signExtend = TLI.shouldSignExtendTypeInLibCall(RetTy, IsSigned);
CLI.setDebugLoc(SDLoc(Node))
.setChain(InChain)
.setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee,
Expand Down Expand Up @@ -2183,7 +2187,8 @@ std::pair<SDValue, SDValue> SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall L
Args.push_back(Entry);
}

return ExpandLibCall(LC, Node, std::move(Args), isSigned);
return ExpandLibCall(LC, Node, std::move(Args), isSigned,
Node->getValueType(0));
}

void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
Expand Down Expand Up @@ -2259,6 +2264,50 @@ void SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node,
ExpandFPLibCall(Node, LC, Results);
}

SDValue SelectionDAGLegalize::ExpandBitCountingLibCall(
SDNode *Node, RTLIB::Libcall CallI32, RTLIB::Libcall CallI64,
RTLIB::Libcall CallI128) {
RTLIB::Libcall LC;
switch (Node->getSimpleValueType(0).SimpleTy) {
default:
llvm_unreachable("Unexpected request for libcall!");
case MVT::i32:
LC = CallI32;
break;
case MVT::i64:
LC = CallI64;
break;
case MVT::i128:
LC = CallI128;
break;
}

// Bit-counting libcalls have one unsigned argument and return `int`.
// Note that `int` may be illegal on this target; ExpandLibCall will
// take care of promoting it to a legal type.
SDValue Op = Node->getOperand(0);
EVT IntVT =
EVT::getIntegerVT(*DAG.getContext(), DAG.getLibInfo().getIntSize());

TargetLowering::ArgListEntry Arg;
EVT ArgVT = Op.getValueType();
Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
Arg.Node = Op;
Arg.Ty = ArgTy;
Arg.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgTy, /*IsSigned=*/false);
Arg.IsZExt = !Arg.IsSExt;

SDValue Res = ExpandLibCall(LC, Node, TargetLowering::ArgListTy{Arg},
/*IsSigned=*/true, IntVT)
.first;

// If ExpandLibCall created a tail call, the result was already
// of the correct type. Otherwise, we need to sign extend it.
if (Res.getValueType() != MVT::Other)
Res = DAG.getSExtOrTrunc(Res, SDLoc(Node), Node->getValueType(0));
return Res;
}

/// Issue libcalls to __{u}divmod to compute div / rem pairs.
void
SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
Expand Down Expand Up @@ -4993,19 +5042,12 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::MUL_I64, RTLIB::MUL_I128));
break;
case ISD::CTLZ_ZERO_UNDEF:
switch (Node->getSimpleValueType(0).SimpleTy) {
default:
llvm_unreachable("LibCall explicitly requested, but not available");
case MVT::i32:
Results.push_back(ExpandLibCall(RTLIB::CTLZ_I32, Node, false).first);
break;
case MVT::i64:
Results.push_back(ExpandLibCall(RTLIB::CTLZ_I64, Node, false).first);
break;
case MVT::i128:
Results.push_back(ExpandLibCall(RTLIB::CTLZ_I128, Node, false).first);
break;
}
Results.push_back(ExpandBitCountingLibCall(
Node, RTLIB::CTLZ_I32, RTLIB::CTLZ_I64, RTLIB::CTLZ_I128));
break;
case ISD::CTPOP:
Results.push_back(ExpandBitCountingLibCall(
Node, RTLIB::CTPOP_I32, RTLIB::CTPOP_I64, RTLIB::CTPOP_I128));
break;
case ISD::RESET_FPENV: {
// It is legalized to call 'fesetenv(FE_DFL_ENV)'. On most targets
Expand Down
34 changes: 27 additions & 7 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4012,15 +4012,35 @@ void DAGTypeLegalizer::ExpandIntRes_ABD(SDNode *N, SDValue &Lo, SDValue &Hi) {
SplitInteger(Result, Lo, Hi);
}

void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDLoc dl(N);
void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDValue Op = N->getOperand(0);
EVT VT = N->getValueType(0);
SDLoc DL(N);

if (TLI.getOperationAction(ISD::CTPOP, VT) == TargetLoweringBase::LibCall) {
RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
if (VT == MVT::i32)
LC = RTLIB::CTPOP_I32;
else if (VT == MVT::i64)
LC = RTLIB::CTPOP_I64;
else if (VT == MVT::i128)
LC = RTLIB::CTPOP_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC) &&
"LibCall explicitly requested, but not available");
TargetLowering::MakeLibCallOptions CallOptions;
EVT IntVT =
EVT::getIntegerVT(*DAG.getContext(), DAG.getLibInfo().getIntSize());
SDValue Res = TLI.makeLibCall(DAG, LC, IntVT, Op, CallOptions, DL).first;
SplitInteger(DAG.getSExtOrTrunc(Res, DL, VT), Lo, Hi);
return;
}

// ctpop(HiLo) -> ctpop(Hi)+ctpop(Lo)
GetExpandedInteger(N->getOperand(0), Lo, Hi);
GetExpandedInteger(Op, Lo, Hi);
EVT NVT = Lo.getValueType();
Lo = DAG.getNode(ISD::ADD, dl, NVT, DAG.getNode(ISD::CTPOP, dl, NVT, Lo),
DAG.getNode(ISD::CTPOP, dl, NVT, Hi));
Hi = DAG.getConstant(0, dl, NVT);
Lo = DAG.getNode(ISD::ADD, DL, NVT, DAG.getNode(ISD::CTPOP, DL, NVT, Lo),
DAG.getNode(ISD::CTPOP, DL, NVT, Hi));
Hi = DAG.getConstant(0, DL, NVT);
}

void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,
Expand Down
5 changes: 3 additions & 2 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9390,8 +9390,9 @@ SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
!isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
return SDValue();

// Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal.
if (!VT.isVector() && isOperationExpand(ISD::CTPOP, VT) &&
// Emit Table Lookup if ISD::CTPOP used in the fallback path below is going
// to be expanded or converted to a libcall.
if (!VT.isVector() && !isOperationLegalOrCustomOrPromote(ISD::CTPOP, VT) &&
!isOperationLegal(ISD::CTLZ, VT))
if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
return V;
Expand Down
3 changes: 2 additions & 1 deletion llvm/lib/Target/ARM/ARMISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1221,7 +1221,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::ROTR, VT, Expand);
}
setOperationAction(ISD::CTTZ, MVT::i32, Custom);
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
setOperationAction(ISD::CTPOP, MVT::i32, LibCall);
setOperationAction(ISD::CTPOP, MVT::i64, LibCall);
if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
setOperationAction(ISD::CTLZ, MVT::i32, Expand);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, LibCall);
Expand Down
7 changes: 6 additions & 1 deletion llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
if (Subtarget.is64Bit())
setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom);
} else {
setOperationAction({ISD::CTTZ, ISD::CTPOP}, XLenVT, Expand);
setOperationAction(ISD::CTTZ, XLenVT, Expand);
if (Subtarget.is64Bit())
setOperationAction(ISD::CTPOP, MVT::i128, LibCall);
else
setOperationAction(ISD::CTPOP, MVT::i32, LibCall);
setOperationAction(ISD::CTPOP, MVT::i64, LibCall);
}

if (Subtarget.hasStdExtZbb() || Subtarget.hasVendorXTHeadBb() ||
Expand Down
70 changes: 8 additions & 62 deletions llvm/test/CodeGen/ARM/popcnt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -324,76 +324,22 @@ define i32 @ctpop16(i16 %x) nounwind readnone {
define i32 @ctpop32(i32 %x) nounwind readnone {
; CHECK-LABEL: ctpop32:
; CHECK: @ %bb.0:
; CHECK-NEXT: ldr r1, .LCPI22_0
; CHECK-NEXT: ldr r2, .LCPI22_3
; CHECK-NEXT: and r1, r1, r0, lsr #1
; CHECK-NEXT: ldr r12, .LCPI22_1
; CHECK-NEXT: sub r0, r0, r1
; CHECK-NEXT: ldr r3, .LCPI22_2
; CHECK-NEXT: and r1, r0, r2
; CHECK-NEXT: and r0, r2, r0, lsr #2
; CHECK-NEXT: add r0, r1, r0
; CHECK-NEXT: add r0, r0, r0, lsr #4
; CHECK-NEXT: and r0, r0, r12
; CHECK-NEXT: mul r1, r0, r3
; CHECK-NEXT: lsr r0, r1, #24
; CHECK-NEXT: mov pc, lr
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI22_0:
; CHECK-NEXT: .long 1431655765 @ 0x55555555
; CHECK-NEXT: .LCPI22_1:
; CHECK-NEXT: .long 252645135 @ 0xf0f0f0f
; CHECK-NEXT: .LCPI22_2:
; CHECK-NEXT: .long 16843009 @ 0x1010101
; CHECK-NEXT: .LCPI22_3:
; CHECK-NEXT: .long 858993459 @ 0x33333333
; CHECK-NEXT: b __popcountsi2
%count = tail call i32 @llvm.ctpop.i32(i32 %x)
ret i32 %count
}

define i32 @ctpop64(i64 %x) nounwind readnone {
define i64 @ctpop64(i64 %x) nounwind readnone {
; CHECK-LABEL: ctpop64:
; CHECK: @ %bb.0:
; CHECK-NEXT: .save {r4, lr}
; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: ldr r2, .LCPI23_0
; CHECK-NEXT: ldr r3, .LCPI23_3
; CHECK-NEXT: and r4, r2, r0, lsr #1
; CHECK-NEXT: and r2, r2, r1, lsr #1
; CHECK-NEXT: sub r0, r0, r4
; CHECK-NEXT: sub r1, r1, r2
; CHECK-NEXT: and r4, r0, r3
; CHECK-NEXT: and r2, r1, r3
; CHECK-NEXT: and r0, r3, r0, lsr #2
; CHECK-NEXT: and r1, r3, r1, lsr #2
; CHECK-NEXT: add r0, r4, r0
; CHECK-NEXT: ldr lr, .LCPI23_1
; CHECK-NEXT: add r1, r2, r1
; CHECK-NEXT: ldr r12, .LCPI23_2
; CHECK-NEXT: add r0, r0, r0, lsr #4
; CHECK-NEXT: and r0, r0, lr
; CHECK-NEXT: add r1, r1, r1, lsr #4
; CHECK-NEXT: mul r2, r0, r12
; CHECK-NEXT: and r0, r1, lr
; CHECK-NEXT: mul r1, r0, r12
; CHECK-NEXT: lsr r0, r2, #24
; CHECK-NEXT: add r0, r0, r1, lsr #24
; CHECK-NEXT: pop {r4, lr}
; CHECK-NEXT: .save {r11, lr}
; CHECK-NEXT: push {r11, lr}
; CHECK-NEXT: bl __popcountdi2
; CHECK-NEXT: asr r1, r0, #31
; CHECK-NEXT: pop {r11, lr}
; CHECK-NEXT: mov pc, lr
; CHECK-NEXT: .p2align 2
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI23_0:
; CHECK-NEXT: .long 1431655765 @ 0x55555555
; CHECK-NEXT: .LCPI23_1:
; CHECK-NEXT: .long 252645135 @ 0xf0f0f0f
; CHECK-NEXT: .LCPI23_2:
; CHECK-NEXT: .long 16843009 @ 0x1010101
; CHECK-NEXT: .LCPI23_3:
; CHECK-NEXT: .long 858993459 @ 0x33333333
%count = tail call i64 @llvm.ctpop.i64(i64 %x)
%conv = trunc i64 %count to i32
ret i32 %conv
ret i64 %count
}

define i32 @ctpop_eq_one(i64 %x) nounwind readnone {
Expand Down
Loading