Skip to content

Revert "[SDag][ARM][RISCV] Allow lowering CTPOP into a libcall" #101740

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 2, 2024

Conversation

s-barannikov
Copy link
Contributor

Reverts the rest of #99752

@llvmbot
Copy link
Member

llvmbot commented Aug 2, 2024

@llvm/pr-subscribers-llvm-analysis
@llvm/pr-subscribers-llvm-ir

@llvm/pr-subscribers-llvm-selectiondag

Author: Sergei Barannikov (s-barannikov)

Changes

Reverts the rest of llvm/llvm-project#99752


Patch is 93.48 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/101740.diff

17 Files Affected:

  • (modified) llvm/include/llvm/IR/RuntimeLibcalls.def (-3)
  • (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp (+21-17)
  • (modified) llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp (+7-25)
  • (modified) llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp (+2-3)
  • (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+1-4)
  • (modified) llvm/test/Analysis/CostModel/RISCV/int-bit-manip.ll (+8-8)
  • (modified) llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll (+887-185)
  • (modified) llvm/test/CodeGen/RISCV/ctz_zero_return_test.ll (+74-24)
  • (modified) llvm/test/CodeGen/RISCV/pr56457.ll (+26-5)
  • (modified) llvm/test/CodeGen/RISCV/pr95271.ll (+22-1)
  • (modified) llvm/test/CodeGen/RISCV/rv32xtheadbb.ll (+65-23)
  • (modified) llvm/test/CodeGen/RISCV/rv32zbb.ll (+237-74)
  • (modified) llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64xtheadbb.ll (+29-5)
  • (modified) llvm/test/CodeGen/RISCV/rv64-legal-i32/rv64zbb.ll (+59-6)
  • (modified) llvm/test/CodeGen/RISCV/rv64xtheadbb.ll (+29-5)
  • (modified) llvm/test/CodeGen/RISCV/rv64zbb.ll (+105-21)
  • (modified) llvm/test/CodeGen/RISCV/sextw-removal.ll (+40-6)
diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def
index 3dd75622b8e43..89aaf6d1ad83f 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -85,9 +85,6 @@ HANDLE_LIBCALL(NEG_I64, "__negdi2")
 HANDLE_LIBCALL(CTLZ_I32, "__clzsi2")
 HANDLE_LIBCALL(CTLZ_I64, "__clzdi2")
 HANDLE_LIBCALL(CTLZ_I128, "__clzti2")
-HANDLE_LIBCALL(CTPOP_I32, "__popcountsi2")
-HANDLE_LIBCALL(CTPOP_I64, "__popcountdi2")
-HANDLE_LIBCALL(CTPOP_I128, "__popcountti2")
 
 // Floating-point
 HANDLE_LIBCALL(ADD_F32, "__addsf3")
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index c91a2360d1599..bdb7917073020 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -140,9 +140,12 @@ class SelectionDAGLegalize {
                        RTLIB::Libcall Call_F128,
                        RTLIB::Libcall Call_PPCF128,
                        SmallVectorImpl<SDValue> &Results);
-  SDValue ExpandIntLibCall(SDNode *Node, bool IsSigned, RTLIB::Libcall Call_I8,
-                           RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32,
-                           RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128);
+  SDValue ExpandIntLibCall(SDNode *Node, bool isSigned,
+                           RTLIB::Libcall Call_I8,
+                           RTLIB::Libcall Call_I16,
+                           RTLIB::Libcall Call_I32,
+                           RTLIB::Libcall Call_I64,
+                           RTLIB::Libcall Call_I128);
   void ExpandArgFPLibCall(SDNode *Node,
                           RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64,
                           RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128,
@@ -2206,7 +2209,7 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
   ExpandFPLibCall(Node, LC, Results);
 }
 
-SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode *Node, bool IsSigned,
+SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
                                                RTLIB::Libcall Call_I8,
                                                RTLIB::Libcall Call_I16,
                                                RTLIB::Libcall Call_I32,
@@ -2221,9 +2224,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode *Node, bool IsSigned,
   case MVT::i64:  LC = Call_I64; break;
   case MVT::i128: LC = Call_I128; break;
   }
-  assert(LC != RTLIB::UNKNOWN_LIBCALL &&
-         "LibCall explicitly requested, but not available");
-  return ExpandLibCall(LC, Node, IsSigned).first;
+  return ExpandLibCall(LC, Node, isSigned).first;
 }
 
 /// Expand the node to a libcall based on first argument type (for instance
@@ -4999,16 +5000,19 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
                                        RTLIB::MUL_I64, RTLIB::MUL_I128));
     break;
   case ISD::CTLZ_ZERO_UNDEF:
-    Results.push_back(ExpandIntLibCall(Node, /*IsSigned=*/false,
-                                       RTLIB::UNKNOWN_LIBCALL,
-                                       RTLIB::UNKNOWN_LIBCALL, RTLIB::CTLZ_I32,
-                                       RTLIB::CTLZ_I64, RTLIB::CTLZ_I128));
-    break;
-  case ISD::CTPOP:
-    Results.push_back(ExpandIntLibCall(Node, /*IsSigned=*/false,
-                                       RTLIB::UNKNOWN_LIBCALL,
-                                       RTLIB::UNKNOWN_LIBCALL, RTLIB::CTPOP_I32,
-                                       RTLIB::CTPOP_I64, RTLIB::CTPOP_I128));
+    switch (Node->getSimpleValueType(0).SimpleTy) {
+    default:
+      llvm_unreachable("LibCall explicitly requested, but not available");
+    case MVT::i32:
+      Results.push_back(ExpandLibCall(RTLIB::CTLZ_I32, Node, false).first);
+      break;
+    case MVT::i64:
+      Results.push_back(ExpandLibCall(RTLIB::CTLZ_I64, Node, false).first);
+      break;
+    case MVT::i128:
+      Results.push_back(ExpandLibCall(RTLIB::CTLZ_I128, Node, false).first);
+      break;
+    }
     break;
   case ISD::RESET_FPENV: {
     // It is legalized to call 'fesetenv(FE_DFL_ENV)'. On most targets
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index d00785025bac5..b1ada66aa9aeb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -3850,33 +3850,15 @@ void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N,
   Hi = DAG.getConstant(0, dl, NVT);
 }
 
-void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N, SDValue &Lo, SDValue &Hi) {
-  SDValue Op = N->getOperand(0);
-  EVT VT = N->getValueType(0);
-  SDLoc DL(N);
-
-  if (TLI.getOperationAction(ISD::CTPOP, VT) == TargetLoweringBase::LibCall) {
-    RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
-    if (VT == MVT::i32)
-      LC = RTLIB::CTPOP_I32;
-    else if (VT == MVT::i64)
-      LC = RTLIB::CTPOP_I64;
-    else if (VT == MVT::i128)
-      LC = RTLIB::CTPOP_I128;
-    assert(LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC) &&
-           "LibCall explicitly requested, but not available");
-    TargetLowering::MakeLibCallOptions CallOptions;
-    SDValue Res = TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, DL).first;
-    SplitInteger(Res, Lo, Hi);
-    return;
-  }
-
+void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N,
+                                          SDValue &Lo, SDValue &Hi) {
+  SDLoc dl(N);
   // ctpop(HiLo) -> ctpop(Hi)+ctpop(Lo)
-  GetExpandedInteger(Op, Lo, Hi);
+  GetExpandedInteger(N->getOperand(0), Lo, Hi);
   EVT NVT = Lo.getValueType();
-  Lo = DAG.getNode(ISD::ADD, DL, NVT, DAG.getNode(ISD::CTPOP, DL, NVT, Lo),
-                   DAG.getNode(ISD::CTPOP, DL, NVT, Hi));
-  Hi = DAG.getConstant(0, DL, NVT);
+  Lo = DAG.getNode(ISD::ADD, dl, NVT, DAG.getNode(ISD::CTPOP, dl, NVT, Lo),
+                   DAG.getNode(ISD::CTPOP, dl, NVT, Hi));
+  Hi = DAG.getConstant(0, dl, NVT);
 }
 
 void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 8ab3103fda23f..83aadcfd241e9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -9171,9 +9171,8 @@ SDValue TargetLowering::expandCTTZ(SDNode *Node, SelectionDAG &DAG) const {
                         !isOperationLegalOrCustomOrPromote(ISD::XOR, VT)))
     return SDValue();
 
-  // Emit Table Lookup if ISD::CTPOP used in the fallback path below is going
-  // to be expanded or converted to a libcall.
-  if (!VT.isVector() && !isOperationLegalOrCustomOrPromote(ISD::CTPOP, VT) &&
+  // Emit Table Lookup if ISD::CTLZ and ISD::CTPOP are not legal.
+  if (!VT.isVector() && isOperationExpand(ISD::CTPOP, VT) &&
       !isOperationLegal(ISD::CTLZ, VT))
     if (SDValue V = CTTZTableLookup(Node, DAG, dl, VT, Op, NumBitsPerElt))
       return V;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 4a2193c8d5328..9ee60b9db2837 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -393,10 +393,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
         setOperationAction({ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF}, MVT::i32, Custom);
     }
   } else {
-    setOperationAction(ISD::CTTZ, XLenVT, Expand);
-    if (!Subtarget.is64Bit())
-      setOperationAction(ISD::CTPOP, MVT::i32, LibCall);
-    setOperationAction(ISD::CTPOP, MVT::i64, LibCall);
+    setOperationAction({ISD::CTTZ, ISD::CTPOP}, XLenVT, Expand);
     if (RV64LegalI32 && Subtarget.is64Bit())
       setOperationAction({ISD::CTTZ, ISD::CTPOP}, MVT::i32, Expand);
   }
diff --git a/llvm/test/Analysis/CostModel/RISCV/int-bit-manip.ll b/llvm/test/Analysis/CostModel/RISCV/int-bit-manip.ll
index c0ecc63b82dca..380f65b19b8fa 100644
--- a/llvm/test/Analysis/CostModel/RISCV/int-bit-manip.ll
+++ b/llvm/test/Analysis/CostModel/RISCV/int-bit-manip.ll
@@ -159,7 +159,7 @@ define void @bitreverse() {
 
 define void @ctpop() {
 ; NOZVBB-LABEL: 'ctpop'
-; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = call i8 @llvm.ctpop.i8(i8 undef)
+; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = call i8 @llvm.ctpop.i8(i8 undef)
 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %2 = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> undef)
 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %3 = call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> undef)
 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %4 = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> undef)
@@ -169,7 +169,7 @@ define void @ctpop() {
 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %8 = call <vscale x 4 x i8> @llvm.ctpop.nxv4i8(<vscale x 4 x i8> undef)
 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %9 = call <vscale x 8 x i8> @llvm.ctpop.nxv8i8(<vscale x 8 x i8> undef)
 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %10 = call <vscale x 16 x i8> @llvm.ctpop.nxv16i8(<vscale x 16 x i8> undef)
-; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %11 = call i16 @llvm.ctpop.i16(i16 undef)
+; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %11 = call i16 @llvm.ctpop.i16(i16 undef)
 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %12 = call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> undef)
 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %13 = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> undef)
 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %14 = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> undef)
@@ -179,7 +179,7 @@ define void @ctpop() {
 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %18 = call <vscale x 4 x i16> @llvm.ctpop.nxv4i16(<vscale x 4 x i16> undef)
 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %19 = call <vscale x 8 x i16> @llvm.ctpop.nxv8i16(<vscale x 8 x i16> undef)
 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 19 for instruction: %20 = call <vscale x 16 x i16> @llvm.ctpop.nxv16i16(<vscale x 16 x i16> undef)
-; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %21 = call i32 @llvm.ctpop.i32(i32 undef)
+; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %21 = call i32 @llvm.ctpop.i32(i32 undef)
 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %22 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> undef)
 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %23 = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> undef)
 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %24 = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> undef)
@@ -189,7 +189,7 @@ define void @ctpop() {
 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %28 = call <vscale x 4 x i32> @llvm.ctpop.nxv4i32(<vscale x 4 x i32> undef)
 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %29 = call <vscale x 8 x i32> @llvm.ctpop.nxv8i32(<vscale x 8 x i32> undef)
 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %30 = call <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32> undef)
-; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %31 = call i64 @llvm.ctpop.i64(i64 undef)
+; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %31 = call i64 @llvm.ctpop.i64(i64 undef)
 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %32 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> undef)
 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %33 = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> undef)
 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 21 for instruction: %34 = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> undef)
@@ -202,7 +202,7 @@ define void @ctpop() {
 ; NOZVBB-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; ZVBB-LABEL: 'ctpop'
-; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %1 = call i8 @llvm.ctpop.i8(i8 undef)
+; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %1 = call i8 @llvm.ctpop.i8(i8 undef)
 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %2 = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> undef)
 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %3 = call <4 x i8> @llvm.ctpop.v4i8(<4 x i8> undef)
 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %4 = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> undef)
@@ -212,7 +212,7 @@ define void @ctpop() {
 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %8 = call <vscale x 4 x i8> @llvm.ctpop.nxv4i8(<vscale x 4 x i8> undef)
 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %9 = call <vscale x 8 x i8> @llvm.ctpop.nxv8i8(<vscale x 8 x i8> undef)
 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %10 = call <vscale x 16 x i8> @llvm.ctpop.nxv16i8(<vscale x 16 x i8> undef)
-; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %11 = call i16 @llvm.ctpop.i16(i16 undef)
+; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %11 = call i16 @llvm.ctpop.i16(i16 undef)
 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %12 = call <2 x i16> @llvm.ctpop.v2i16(<2 x i16> undef)
 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %13 = call <4 x i16> @llvm.ctpop.v4i16(<4 x i16> undef)
 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %14 = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> undef)
@@ -222,7 +222,7 @@ define void @ctpop() {
 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %18 = call <vscale x 4 x i16> @llvm.ctpop.nxv4i16(<vscale x 4 x i16> undef)
 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %19 = call <vscale x 8 x i16> @llvm.ctpop.nxv8i16(<vscale x 8 x i16> undef)
 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %20 = call <vscale x 16 x i16> @llvm.ctpop.nxv16i16(<vscale x 16 x i16> undef)
-; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %21 = call i32 @llvm.ctpop.i32(i32 undef)
+; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %21 = call i32 @llvm.ctpop.i32(i32 undef)
 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %22 = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> undef)
 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %23 = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> undef)
 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %24 = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> undef)
@@ -232,7 +232,7 @@ define void @ctpop() {
 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %28 = call <vscale x 4 x i32> @llvm.ctpop.nxv4i32(<vscale x 4 x i32> undef)
 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %29 = call <vscale x 8 x i32> @llvm.ctpop.nxv8i32(<vscale x 8 x i32> undef)
 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %30 = call <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32> undef)
-; ZVBB-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %31 = call i64 @llvm.ctpop.i64(i64 undef)
+; ZVBB-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %31 = call i64 @llvm.ctpop.i64(i64 undef)
 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %32 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> undef)
 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %33 = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> undef)
 ; ZVBB-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %34 = call <8 x i64> @llvm.ctpop.v8i64(<8 x i64> undef)
diff --git a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
index 9123017918094..8caa64c9572ce 100644
--- a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll
@@ -1156,30 +1156,46 @@ define i16 @test_ctlz_i16(i16 %a) nounwind {
 }
 
 define i32 @test_ctlz_i32(i32 %a) nounwind {
-; RV32_NOZBB-LABEL: test_ctlz_i32:
-; RV32_NOZBB:       # %bb.0:
-; RV32_NOZBB-NEXT:    beqz a0, .LBB10_2
-; RV32_NOZBB-NEXT:  # %bb.1: # %cond.false
-; RV32_NOZBB-NEXT:    addi sp, sp, -16
-; RV32_NOZBB-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; RV32_NOZBB-NEXT:    srli a1, a0, 1
-; RV32_NOZBB-NEXT:    or a0, a0, a1
-; RV32_NOZBB-NEXT:    srli a1, a0, 2
-; RV32_NOZBB-NEXT:    or a0, a0, a1
-; RV32_NOZBB-NEXT:    srli a1, a0, 4
-; RV32_NOZBB-NEXT:    or a0, a0, a1
-; RV32_NOZBB-NEXT:    srli a1, a0, 8
-; RV32_NOZBB-NEXT:    or a0, a0, a1
-; RV32_NOZBB-NEXT:    srli a1, a0, 16
-; RV32_NOZBB-NEXT:    or a0, a0, a1
-; RV32_NOZBB-NEXT:    not a0, a0
-; RV32_NOZBB-NEXT:    call __popcountsi2
-; RV32_NOZBB-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; RV32_NOZBB-NEXT:    addi sp, sp, 16
-; RV32_NOZBB-NEXT:    ret
-; RV32_NOZBB-NEXT:  .LBB10_2:
-; RV32_NOZBB-NEXT:    li a0, 32
-; RV32_NOZBB-NEXT:    ret
+; RV32I-LABEL: test_ctlz_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    beqz a0, .LBB10_2
+; RV32I-NEXT:  # %bb.1: # %cond.false
+; RV32I-NEXT:    srli a1, a0, 1
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    srli a1, a0, 2
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    srli a1, a0, 4
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    srli a1, a0, 8
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    srli a1, a0, 16
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    not a0, a0
+; RV32I-NEXT:    srli a1, a0, 1
+; RV32I-NEXT:    lui a2, 349525
+; RV32I-NEXT:    addi a2, a2, 1365
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    lui a1, 209715
+; RV32I-NEXT:    addi a1, a1, 819
+; RV32I-NEXT:    and a2, a0, a1
+; RV32I-NEXT:    srli a0, a0, 2
+; RV32I-NEXT:    and a0, a0, a1
+; RV32I-NEXT:    add a0, a2, a0
+; RV32I-NEXT:    srli a1, a0, 4
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    lui a1, 61681
+; RV32I-NEXT:    addi a1, a1, -241
+; RV32I-NEXT:    and a0, a0, a1
+; RV32I-NEXT:    slli a1, a0, 8
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    slli a1, a0, 16
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    srli a0, a0, 24
+; RV32I-NEXT:    ret
+; RV32I-NEXT:  .LBB10_2:
+; RV32I-NEXT:    li a0, 32
+; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: test_ctlz_i32:
 ; RV64I:       # %bb.0:
@@ -1223,6 +1239,46 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
 ; RV64I-NEXT:    li a0, 32
 ; RV64I-NEXT:    ret
 ;
+; RV32M-LABEL: test_ctlz_i32:
+; RV32M:       # %bb.0:
+; RV32M-NEXT:    beqz a0, .LBB10_2
+; RV32M-NEXT:  # %bb.1: # %cond.false
+; RV32M-NEXT:    srli a1, a0, 1
+; RV32M-NEXT:    or a0, a0, a1
+; RV32M-NEXT:    srli a1, a0, 2
+; RV32M-NEXT:    or a0, a0, a1
+; RV32M-NEXT:    srli a1, a0, 4
+; RV32M-NEXT:    or a0, a0, a1
+; RV32M-NEXT:    srli a1, a0, 8
+; RV32M-NEXT:    or a0, a0, a1
+; RV32M-NEXT:    srli a1, a0, 16
+; RV32M-NEXT:    or a0, a0, a1
+; RV32M-NEXT:    not a0, a0
+; RV32M-NEXT:    srli a1, a0, 1
+; RV32M-NEXT:    lui a2, 349525
+; RV32M-NEXT:    addi a2, a2, 1365
+; RV32M-NEXT:    and a1, a1, a2
+; RV32M-NEXT:    sub a0, a0, a1
+; RV32M-NEXT:    lui a1, 209715
+; RV32M-NEXT:    addi a1, a1, 819
+; RV32M-NEXT:    and a2, a0, a1
+; RV32M-NEXT:    srli a0, a0, 2
+; RV32M-NEXT:    and a0, a0, a1
+; RV32M-NEXT:    add a0, a2, a0
+; RV32M-NEXT:    srli a1, a0, 4
+; RV32M-NEXT:    add a0, a0, a1
+; RV32M-NEXT:    lui a1, 61681
+; RV32M-NEXT:    addi a1, a1, -241
+; RV32M-NEXT:    and a0, a0, a1
+; RV32M-NEXT:    lui a1, 4112
+; RV32M-NEXT:    addi a1, a1, 257
+; RV32M-NEXT:    mul a0, a0, a1
+; RV32M-NEXT:    srli a0, a0, 24
+; RV32M-NEXT:    ret
+; RV32M-NEXT:  .LBB10_2:
+; RV32M-NEXT:    li a0, 32
+; RV32M-NEXT:    ret
+;
 ; RV64M-LABEL: test_ctlz_i32:
 ; RV64M:       # %bb.0:
 ; RV64M-NEXT:    sext.w a1, a0
@@ -1290,75 +1346,240 @@ define i32 @test_ctlz_i32(i32 %a) nounwind {
 }
 
 define i64 @test_ctlz_i64(i64 ...
[truncated]

Copy link

github-actions bot commented Aug 2, 2024

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:
git-clang-format --diff 90617e99bb17303b351351681a70394c312e0e58 dcf1296a5e76ef0a0a297284f03a8d4b470b8376 --extensions cpp -- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp llvm/lib/Target/RISCV/RISCVISelLowering.cpp
View the diff from clang-format here.
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index bdb7917073..4670f7397c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -140,12 +140,9 @@ private:
                        RTLIB::Libcall Call_F128,
                        RTLIB::Libcall Call_PPCF128,
                        SmallVectorImpl<SDValue> &Results);
-  SDValue ExpandIntLibCall(SDNode *Node, bool isSigned,
-                           RTLIB::Libcall Call_I8,
-                           RTLIB::Libcall Call_I16,
-                           RTLIB::Libcall Call_I32,
-                           RTLIB::Libcall Call_I64,
-                           RTLIB::Libcall Call_I128);
+  SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8,
+                           RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32,
+                           RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128);
   void ExpandArgFPLibCall(SDNode *Node,
                           RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64,
                           RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128,
@@ -2209,7 +2206,7 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
   ExpandFPLibCall(Node, LC, Results);
 }
 
-SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
+SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode *Node, bool isSigned,
                                                RTLIB::Libcall Call_I8,
                                                RTLIB::Libcall Call_I16,
                                                RTLIB::Libcall Call_I32,
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index b1ada66aa9..78b5d45967 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -3850,8 +3850,7 @@ void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N,
   Hi = DAG.getConstant(0, dl, NVT);
 }
 
-void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N,
-                                          SDValue &Lo, SDValue &Hi) {
+void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N, SDValue &Lo, SDValue &Hi) {
   SDLoc dl(N);
   // ctpop(HiLo) -> ctpop(Hi)+ctpop(Lo)
   GetExpandedInteger(N->getOperand(0), Lo, Hi);

@s-barannikov s-barannikov merged commit 4527fba into main Aug 2, 2024
11 of 12 checks passed
@s-barannikov s-barannikov deleted the revert-99752-popcount-libcall branch August 2, 2024 22:51
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants