[LV] Support binary and unary operations with EVL-vectorization #93854

nikolaypanchenko · 2024-05-30T18:01:55Z

The patch adds VPWidenEVLRecipe which represents VPWidenRecipe + EVL argument. The new recipe replaces VPWidenRecipe in tryAddExplicitVectorLength for each binary and unary operations. Follow up patches will extend support for remaining cases, like FCmp and ICmp

llvmbot · 2024-05-30T18:02:24Z

@llvm/pr-subscribers-llvm-transforms

Author: Kolya Panchenko (nikolaypanchenko)

Changes

The patch adds VPWidenEVLRecipe which represents VPWidenRecipe + EVL argument. The new recipe replaces VPWidenRecipe in tryAddExplicitVectorLength for each binary and unary operations. Follow up patches will extend support for remaining cases, like FCmp and ICmp

Patch is 138.14 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/93854.diff

8 Files Affected:

(modified) llvm/lib/Transforms/Vectorize/VPlan.h (+52-2)
(modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+71)
(modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+55-26)
(modified) llvm/lib/Transforms/Vectorize/VPlanValue.h (+1)
(added) llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-bin-unary-ops-args.ll (+1799)
(modified) llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll (+10-10)
(modified) llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll (+11-11)
(modified) llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll (+1-1)

diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index e75a1de548f7d..f1381b99ed68f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -855,6 +855,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
     case VPRecipeBase::VPWidenCastSC:
     case VPRecipeBase::VPWidenGEPSC:
     case VPRecipeBase::VPWidenSC:
+    case VPRecipeBase::VPWidenEVLSC:
     case VPRecipeBase::VPWidenSelectSC:
     case VPRecipeBase::VPBlendSC:
     case VPRecipeBase::VPPredInstPHISC:
@@ -1039,6 +1040,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
   static inline bool classof(const VPRecipeBase *R) {
     return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
            R->getVPDefID() == VPRecipeBase::VPWidenSC ||
+           R->getVPDefID() == VPRecipeBase::VPWidenEVLSC ||
            R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
            R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
            R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
@@ -1333,13 +1335,18 @@ class VPInstruction : public VPRecipeWithIRFlags {
 /// ingredient. This recipe covers most of the traditional vectorization cases
 /// where each ingredient transforms into a vectorized version of itself.
 class VPWidenRecipe : public VPRecipeWithIRFlags {
+protected:
   unsigned Opcode;
 
+  template <typename IterT>
+  VPWidenRecipe(unsigned VPDefOpcode, Instruction &I,
+                iterator_range<IterT> Operands)
+      : VPRecipeWithIRFlags(VPDefOpcode, Operands, I), Opcode(I.getOpcode()) {}
+
 public:
   template <typename IterT>
   VPWidenRecipe(Instruction &I, iterator_range<IterT> Operands)
-      : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I),
-        Opcode(I.getOpcode()) {}
+      : VPWidenRecipe(VPDef::VPWidenSC, I, Operands) {}
 
   ~VPWidenRecipe() override = default;
 
@@ -1363,6 +1370,49 @@ class VPWidenRecipe : public VPRecipeWithIRFlags {
 #endif
 };
 
+class VPWidenEVLRecipe : public VPWidenRecipe {
+private:
+  using VPRecipeWithIRFlags::transferFlags;
+
+public:
+  template <typename IterT>
+  VPWidenEVLRecipe(Instruction &I, iterator_range<IterT> Operands, VPValue &EVL)
+      : VPWidenRecipe(VPDef::VPWidenEVLSC, I, Operands) {
+    addOperand(&EVL);
+  }
+
+  ~VPWidenEVLRecipe() override = default;
+
+  VPWidenRecipe *clone() override final {
+    SmallVector<VPValue *> Ops(operands());
+    VPValue *EVL = Ops.pop_back_val();
+    auto *R = new VPWidenEVLRecipe(*getUnderlyingInstr(),
+                                   make_range(Ops.begin(), Ops.end()), *EVL);
+    R->transferFlags(*this);
+    return R;
+  }
+
+  VP_CLASSOF_IMPL(VPDef::VPWidenEVLSC);
+
+  VPValue *getEVL() { return getOperand(getNumOperands() - 1); }
+  const VPValue *getEVL() const { return getOperand(getNumOperands() - 1); }
+
+  /// A helper function to create widen EVL recipe from regular widen recipe.
+  static VPWidenEVLRecipe *create(VPWidenRecipe *W, VPValue &EVL);
+
+  /// Produce widened copies of all Ingredients.
+  void execute(VPTransformState &State) override final;
+
+  /// Returns true if the recipe only uses the first lane of operand \p Op.
+  bool onlyFirstLaneUsed(const VPValue *Op) const override;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+  /// Print the recipe.
+  void print(raw_ostream &O, const Twine &Indent,
+             VPSlotTracker &SlotTracker) const override final;
+#endif
+};
+
 /// VPWidenCastRecipe is a recipe to create vector cast instructions.
 class VPWidenCastRecipe : public VPRecipeWithIRFlags {
   /// Cast instruction opcode.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 5eb99ffd1e10e..a6fd26b666501 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -23,6 +23,7 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
+#include "llvm/IR/VectorBuilder.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -71,6 +72,7 @@ bool VPRecipeBase::mayWriteToMemory() const {
   case VPWidenLoadSC:
   case VPWidenPHISC:
   case VPWidenSC:
+  case VPWidenEVLSC:
   case VPWidenSelectSC: {
     const Instruction *I =
         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
@@ -110,6 +112,7 @@ bool VPRecipeBase::mayReadFromMemory() const {
   case VPWidenIntOrFpInductionSC:
   case VPWidenPHISC:
   case VPWidenSC:
+  case VPWidenEVLSC:
   case VPWidenSelectSC: {
     const Instruction *I =
         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
@@ -157,6 +160,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
   case VPWidenPHISC:
   case VPWidenPointerInductionSC:
   case VPWidenSC:
+  case VPWidenEVLSC:
   case VPWidenSelectSC: {
     const Instruction *I =
         dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
@@ -993,6 +997,64 @@ void VPWidenRecipe::execute(VPTransformState &State) {
 #endif
 }
 
+VPWidenEVLRecipe *VPWidenEVLRecipe::create(VPWidenRecipe *W, VPValue &EVL) {
+  auto *R = new VPWidenEVLRecipe(*W->getUnderlyingInstr(), W->operands(), EVL);
+  R->transferFlags(*W);
+  return R;
+}
+
+void VPWidenEVLRecipe::execute(VPTransformState &State) {
+  assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
+                          "explicit vector length.");
+  VPValue *Op0 = getOperand(0);
+
+  // If it's scalar operation, hand translation over to VPWidenRecipe
+  if (!State.get(Op0, 0)->getType()->isVectorTy())
+    return VPWidenRecipe::execute(State);
+
+  VPValue *EVL = getEVL();
+  Value *EVLArg = State.get(EVL, 0, /*NeedsScalar=*/true);
+  unsigned Opcode = getOpcode();
+  Instruction *I = getUnderlyingInstr();
+  IRBuilderBase &BuilderIR = State.Builder;
+  VectorBuilder Builder(BuilderIR);
+  Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());
+  Value *VPInst = nullptr;
+
+  //===------------------- Binary and Unary Ops ---------------------===//
+  if (Instruction::isBinaryOp(Opcode) || Instruction::isUnaryOp(Opcode)) {
+    // Just widen unops and binops.
+
+    SmallVector<Value *, 4> Ops;
+    for (unsigned I = 0, E = getNumOperands() - 1; I < E; ++I) {
+      VPValue *VPOp = getOperand(I);
+      Ops.push_back(State.get(VPOp, 0));
+    }
+
+    Builder.setMask(Mask).setEVL(EVLArg);
+    VPInst = Builder.createVectorInstruction(Opcode, Ops[0]->getType(), Ops,
+                                             "vp.op");
+
+    if (I)
+      if (auto *VecOp = dyn_cast<Instruction>(VPInst))
+        VecOp->copyIRFlags(I);
+  } else {
+    llvm_unreachable("Unsupported opcode in VPWidenEVLRecipe::execute");
+  }
+  State.set(this, VPInst, 0);
+  State.addMetadata(VPInst, I);
+}
+
+bool VPWidenEVLRecipe::onlyFirstLaneUsed(const VPValue *Op) const {
+  assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
+  // EVL in that recipe is always the last operand, thus any use before means
+  // the VPValue should be vectorized.
+  for (unsigned I = 0, E = getNumOperands() - 1; I != E; ++I)
+    if (getOperand(I) == Op)
+      return false;
+  return true;
+}
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
                           VPSlotTracker &SlotTracker) const {
@@ -1002,6 +1064,15 @@ void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
   printFlags(O);
   printOperands(O, SlotTracker);
 }
+
+void VPWidenEVLRecipe::print(raw_ostream &O, const Twine &Indent,
+                             VPSlotTracker &SlotTracker) const {
+  O << Indent << "WIDEN vp ";
+  printAsOperand(O, SlotTracker);
+  O << " = " << Instruction::getOpcodeName(Opcode);
+  printFlags(O);
+  printOperands(O, SlotTracker);
+}
 #endif
 
 void VPWidenCastRecipe::execute(VPTransformState &State) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 422579ea8b84f..39ebd44909ea6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -20,6 +20,7 @@
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/TypeSwitch.h"
 #include "llvm/Analysis/IVDescriptors.h"
 #include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/Intrinsics.h"
@@ -1219,7 +1220,7 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
 /// WideCanonicalIV, backedge-taken-count) pattern.
 /// TODO: Introduce explicit recipe for header-mask instead of searching
 /// for the header-mask pattern manually.
-static SmallVector<VPValue *> collectAllHeaderMasks(VPlan &Plan) {
+static DenseSet<VPValue *> collectAllHeaderMasks(VPlan &Plan) {
   SmallVector<VPValue *> WideCanonicalIVs;
   auto *FoundWidenCanonicalIVUser =
       find_if(Plan.getCanonicalIV()->users(),
@@ -1245,7 +1246,7 @@ static SmallVector<VPValue *> collectAllHeaderMasks(VPlan &Plan) {
 
   // Walk users of wide canonical IVs and collect to all compares of the form
   // (ICMP_ULE, WideCanonicalIV, backedge-taken-count).
-  SmallVector<VPValue *> HeaderMasks;
+  DenseSet<VPValue *> HeaderMasks;
   VPValue *BTC = Plan.getOrCreateBackedgeTakenCount();
   for (auto *Wide : WideCanonicalIVs) {
     for (VPUser *U : SmallVector<VPUser *>(Wide->users())) {
@@ -1257,7 +1258,7 @@ static SmallVector<VPValue *> collectAllHeaderMasks(VPlan &Plan) {
 
       assert(HeaderMask->getOperand(0) == Wide &&
              "WidenCanonicalIV must be the first operand of the compare");
-      HeaderMasks.push_back(HeaderMask);
+      HeaderMasks.insert(HeaderMask);
     }
   }
   return HeaderMasks;
@@ -1296,6 +1297,55 @@ void VPlanTransforms::addActiveLaneMask(
     HeaderMask->replaceAllUsesWith(LaneMask);
 }
 
+/// Replace recipes with their EVL variants.
+static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
+  DenseSet<VPRecipeBase *> ToRemove;
+
+  ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
+      Plan.getEntry());
+  DenseSet<VPValue *> HeaderMasks = collectAllHeaderMasks(Plan);
+  for (VPBasicBlock *VPBB : reverse(VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT))) {
+    // The recipes in the block are processed in reverse order, to catch chains
+    // of dead recipes.
+    for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) {
+      TypeSwitch<VPRecipeBase *>(&R)
+          .Case<VPWidenLoadRecipe>([&](VPWidenLoadRecipe *L) {
+            VPValue *NewMask =
+                HeaderMasks.contains(L->getMask()) ? nullptr : L->getMask();
+            auto *N = new VPWidenLoadEVLRecipe(L, &EVL, NewMask);
+            N->insertBefore(L);
+            L->replaceAllUsesWith(N);
+            ToRemove.insert(L);
+          })
+          .Case<VPWidenStoreRecipe>([&](VPWidenStoreRecipe *S) {
+            VPValue *NewMask =
+                HeaderMasks.contains(S->getMask()) ? nullptr : S->getMask();
+            auto *N = new VPWidenStoreEVLRecipe(S, &EVL, NewMask);
+            N->insertBefore(S);
+            ToRemove.insert(S);
+          })
+          .Case<VPWidenRecipe>([&](VPWidenRecipe *W) {
+            unsigned Opcode = W->getOpcode();
+            if (!Instruction::isBinaryOp(Opcode) &&
+                !Instruction::isUnaryOp(Opcode))
+              return;
+            auto *N = VPWidenEVLRecipe::create(W, EVL);
+            N->insertBefore(W);
+            W->replaceAllUsesWith(N);
+            ToRemove.insert(W);
+          });
+    }
+  }
+
+  for (VPRecipeBase *R : ToRemove)
+    R->eraseFromParent();
+
+  for (VPValue *HeaderMask : HeaderMasks)
+    recursivelyDeleteDeadRecipes(HeaderMask);
+}
+
+
+
 /// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and
 /// replaces all uses except the canonical IV increment of
 /// VPCanonicalIVPHIRecipe with a VPEVLBasedIVPHIRecipe. VPCanonicalIVPHIRecipe
@@ -1356,29 +1406,8 @@ bool VPlanTransforms::tryAddExplicitVectorLength(VPlan &Plan) {
   NextEVLIV->insertBefore(CanonicalIVIncrement);
   EVLPhi->addOperand(NextEVLIV);
 
-  for (VPValue *HeaderMask : collectAllHeaderMasks(Plan)) {
-    for (VPUser *U : collectUsersRecursively(HeaderMask)) {
-      auto *MemR = dyn_cast<VPWidenMemoryRecipe>(U);
-      if (!MemR)
-        continue;
-      VPValue *OrigMask = MemR->getMask();
-      assert(OrigMask && "Unmasked widen memory recipe when folding tail");
-      VPValue *NewMask = HeaderMask == OrigMask ? nullptr : OrigMask;
-      if (auto *L = dyn_cast<VPWidenLoadRecipe>(MemR)) {
-        auto *N = new VPWidenLoadEVLRecipe(L, VPEVL, NewMask);
-        N->insertBefore(L);
-        L->replaceAllUsesWith(N);
-        L->eraseFromParent();
-      } else if (auto *S = dyn_cast<VPWidenStoreRecipe>(MemR)) {
-        auto *N = new VPWidenStoreEVLRecipe(S, VPEVL, NewMask);
-        N->insertBefore(S);
-        S->eraseFromParent();
-      } else {
-        llvm_unreachable("unsupported recipe");
-      }
-    }
-    recursivelyDeleteDeadRecipes(HeaderMask);
-  }
+  transformRecipestoEVLRecipes(Plan, *VPEVL);
+
   // Replace all uses of VPCanonicalIVPHIRecipe by
   // VPEVLBasedIVPHIRecipe except for the canonical IV increment.
   CanonicalIVPHI->replaceAllUsesWith(EVLPhi);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 8d945f6f2b8ea..a90d42289e5f1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -356,6 +356,7 @@ class VPDef {
     VPWidenStoreEVLSC,
     VPWidenStoreSC,
     VPWidenSC,
+    VPWidenEVLSC,
     VPWidenSelectSC,
     VPBlendSC,
     // START: Phi-like recipes. Need to be kept together.
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-bin-unary-ops-args.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-bin-unary-ops-args.ll
new file mode 100644
index 0000000000000..77689fc8a76ee
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-bin-unary-ops-args.ll
@@ -0,0 +1,1799 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=loop-vectorize \
+; RUN: -force-tail-folding-style=data-with-evl \
+; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
+; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefix=IF-EVL
+
+; RUN: opt -passes=loop-vectorize \
+; RUN: -force-tail-folding-style=none \
+; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
+; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefix=NO-VP
+
+
+define void @test_and(ptr nocapture %a, ptr nocapture readonly %b) {
+; IF-EVL-LABEL: define void @test_and(
+; IF-EVL-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+; IF-EVL-NEXT:  [[LOOP_PREHEADER:.*]]:
+; IF-EVL-NEXT:    [[A2:%.*]] = ptrtoint ptr [[A]] to i64
+; IF-EVL-NEXT:    [[B1:%.*]] = ptrtoint ptr [[B]] to i64
+; IF-EVL-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
+; IF-EVL:       [[VECTOR_MEMCHECK]]:
+; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 16
+; IF-EVL-NEXT:    [[TMP2:%.*]] = sub i64 [[B1]], [[A2]]
+; IF-EVL-NEXT:    [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
+; IF-EVL-NEXT:    br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; IF-EVL:       [[VECTOR_PH]]:
+; IF-EVL-NEXT:    [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; IF-EVL-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP3]], 16
+; IF-EVL-NEXT:    [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; IF-EVL-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 16
+; IF-EVL-NEXT:    [[TMP7:%.*]] = sub i64 [[TMP6]], 1
+; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 100, [[TMP7]]
+; IF-EVL-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP4]]
+; IF-EVL-NEXT:    [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
+; IF-EVL-NEXT:    [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
+; IF-EVL-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 16
+; IF-EVL-NEXT:    br label %[[VECTOR_BODY:.*]]
+; IF-EVL:       [[VECTOR_BODY]]:
+; IF-EVL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IF-EVL-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IF-EVL-NEXT:    [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
+; IF-EVL-NEXT:    [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
+; IF-EVL-NEXT:    [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0
+; IF-EVL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
+; IF-EVL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
+; IF-EVL-NEXT:    [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
+; IF-EVL-NEXT:    [[TMP15:%.*]] = call <vscale x 16 x i8> @llvm.vp.and.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
+; IF-EVL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
+; IF-EVL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
+; IF-EVL-NEXT:    call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP15]], ptr align 1 [[TMP17]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
+; IF-EVL-NEXT:    [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
+; IF-EVL-NEXT:    [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]
+; IF-EVL-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT:    br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; IF-EVL:       [[MIDDLE_BLOCK]]:
+; IF-EVL-NEXT:    br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]]
+; IF-EVL:       [[SCALAR_PH]]:
+; IF-EVL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
+; IF-EVL-NEXT:    br label %[[LOOP:.*]]
+; IF-EVL:       [[LOOP]]:
+; IF-EVL-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
+; IF-EVL-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
+; IF-EVL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
+; IF-EVL-NEXT:    [[TMP20:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; IF-EVL-NEXT:    [[TMP:%.*]] = and i8 [[TMP20]], 1
+; IF-EVL-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
+; IF-EVL-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
+; IF-EVL-NEXT:    [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
+; IF-EVL-NEXT:    br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
+; IF-EVL:       [[FINISH_LOOPEXIT]]:
+; IF-EVL-NEXT:    ret void
+;
+; NO-VP-LABEL: define void @test_and(
+; NO-VP-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+; NO-VP-NEXT:  [[LOOP_PREHEADER:.*]]:
+; NO-VP-NEXT:    br label %[[LOOP:.*]]
+; NO-VP:       [[LOOP]]:
+; NO-VP-NEXT:    [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
+; NO-VP-NEXT:    [[DEC]] = add nsw i64 [[LEN]], 1
+; NO-VP-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
+; NO-VP-NEXT:    [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; NO-VP-NEXT:    [[TMP:%.*]] = and i8 [[TMP0]], 1
+; NO-VP-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
+; NO-VP-NEXT:    store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1...
[truncated]

github-actions · 2024-05-30T18:04:51Z

✅ With the latest revision this PR passed the C/C++ code formatter.

npanchen · 2024-06-03T19:40:25Z

CC: @ppenzin

arcbbb · 2024-06-04T01:17:22Z

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

+  for (unsigned I = 0, E = getNumOperands() - 1; I != E; ++I)
+    if (getOperand(I) == Op)
+      return false;
+  return true;


can this be simplified to return getEVL() == Op; ?

It's technically possible to use EVL as first or second operand in the operation itself:

%evl = ... %0 = add %evl, %0

i.e. that check makes sure onlyFirstLaneUsed returns false if Op == EVL for the example above

yes, looks like this corresponds to fhahn's comment about if (!State.get(Op0, 0)->getType()->isVectorTy()), regarding whether operands can be scalar."

If EVL would be the first/second operand, it would implicitly be splatted. In any case, no such plans can be constructed at the moment I think. Better to verify EVL is only used in supported places?

I will add verification

llvm/lib/Transforms/Vectorize/VPlan.h

arcbbb · 2024-06-04T09:14:26Z

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

+  ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
+      Plan.getEntry());


Consider utilizing Plan.getVectorLoopRegion()->getEntryBasicBlock() to replace recipes that are within the loop.

That's a good point. We really need to traverse entire VPlan, but replace instructions in postexit with init vl, like for a reduction.
That said, I need to add special check that EVL is reachable before replacing.

You're right; it reminds me that we must explicitly express InitVL in VPlan once we support reduction. This is because EVL from the last two iterations cannot be used in the post-exit for correctness. For this PR, can we temporarily skip the preheader and post-exit?

fhahn

Just to double check, this isn't needed for correctness, right?

llvm/lib/Transforms/Vectorize/VPlan.h

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

llvm/lib/Transforms/Vectorize/VPlan.h

nikolaypanchenko · 2024-06-04T17:20:39Z

Just to double check, this isn't needed for correctness, right?

that's correct.

fhahn

Just to double check, this isn't needed for correctness, right?

that's correct.

Great, could you add more detail about the motivation of the change to the PR description, i.e. why it is preferable to use the EVL version instead of regular wide ops?

fhahn · 2024-06-05T13:36:20Z

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

+  for (unsigned I = 0, E = getNumOperands() - 1; I != E; ++I)
+    if (getOperand(I) == Op)
+      return false;
+  return true;


If EVL would be the first/second operand, it would implicitly be splatted. In any case, no such plans can be constructed at the moment I think. Better to verify EVL is only used in supported places?

fhahn · 2024-06-13T19:35:04Z

llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp

+  // Verify that \p EVL is used correctly. The user must be  either in EVL-based
+  // recipes as a last operand or VPInstruction::Add which is incoming value
+  // into EVL's recipe.
+  bool verifyEVLRecipe(const VPInstruction &EVL) const;


Could this be split off, and submitted first?

llvm/lib/Transforms/Vectorize/VPlan.h

fhahn · 2024-06-17T20:16:59Z

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

        VPlanTransforms::truncateToMinimalBitwidths(
            *Plan, CM.getMinimalBitwidths(), PSE.getSE()->getContext());
      VPlanTransforms::optimize(*Plan, *PSE.getSE());
      // TODO: try to put it close to addActiveLaneMask().
      // Discard the plan if it is not EVL-compatible
-      if (CM.foldTailWithEVL() &&


This is independent of the unary/binary support, right? Can be split off + a test that forces VF =1 and IC> 1?

It does relate to that change. Without it we create EVL recipes for scalar VPlan. For load/store that is not a problem, but it's a problem for WidenEVL since it cannot handle VF=1

Hm, does it make sense to create VPlans with EVL in general? IIUC it doesn't and this could be done independent of the patch.

widen loads and stores shouldn't be created in a scalar plan, I guess at the moment this just works as there are no recipes that can be in scalar plans and get converted to an EVL version without this patch.

Hm, does it make sense to create VPlans with EVL in general? IIUC it doesn't and this could be done independent of the patch.

Not sure I follow.

widen loads and stores shouldn't be created in a scalar plan, I guess at the moment this just works as there are no recipes that can be in scalar plans and get converted to an EVL version without this patch.

I double checked the code and looks like I got confused with VPInstruction. So I agree VPWidenRecipe won't be a problem. Will move it to a separate patch

Sounds great!

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

fhahn · 2024-06-23T19:19:12Z

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

+static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
+  VPDominatorTree VPDT;
+  VPDT.recalculate(Plan);
+  DenseSet<VPRecipeBase *> ToRemove;


SmallVector should be sufficient?

fhahn · 2024-06-23T19:19:50Z

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

+  for (VPBasicBlock *VPBB :
+       reverse(VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT))) {
+    for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) {
+      if (!properlyDominates(EVL.getDefiningRecipe(), &R, VPDT))


Can this be an assert? We just earlier inserted EVL at a place that should dominate all recipes handled below?

It's related to comment above: there are some cases when preheader or postexit may technically contain evl-based recipes, but they do require EVL, but it should be constructed before.

will remove it for now as it does assert work for VF=1.

fhahn · 2024-06-23T19:21:07Z

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

+      Plan.getEntry());
+  DenseSet<VPValue *> HeaderMasks = collectAllHeaderMasks(Plan);
+  for (VPBasicBlock *VPBB :
+       reverse(VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT))) {


Is an RPOT & reverse needed?

Yes, otherwise recipes won't be removed correctly.
BTW, the comment I removed was copied with a loop from removeDeadRecipes

I am not sure, I think the reason removeDeadRecipes does it in that order to remove later dead recipes before their users, but the current patch removes all replaced recipes, then removes the masks. The only operand that isn't used any longer when removing the original recipes is the mask, so I am not sure in what cases the order matters?

agree. removed

fhahn · 2024-06-23T19:22:16Z

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

        VPlanTransforms::truncateToMinimalBitwidths(
            *Plan, CM.getMinimalBitwidths(), PSE.getSE()->getContext());
      VPlanTransforms::optimize(*Plan, *PSE.getSE());
      // TODO: try to put it close to addActiveLaneMask().
      // Discard the plan if it is not EVL-compatible
-      if (CM.foldTailWithEVL() &&


Hm, does it make sense to create VPlans with EVL in general? IIUC it doesn't and this could be done independent of the patch.

widen loads and stores shouldn't be created in a scalar plan, I guess at the moment this just works as there are no recipes that can be in scalar plans and get converted to an EVL version without this patch.

llvm/lib/Transforms/Vectorize/VPlan.h

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

fhahn · 2024-07-14T19:41:12Z

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

+  ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
+      Plan.getEntry());
+  DenseSet<VPValue *> HeaderMasks = collectAllHeaderMasks(Plan);
+  for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT)) {


RPOT still needed?

fhahn · 2024-07-14T19:41:51Z

llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll

@@ -31,7 +31,7 @@ define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) {
 ; IF-EVL-NEXT:    CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]>
 ; IF-EVL-NEXT:    vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
 ; IF-EVL-NEXT:    WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]>
-; IF-EVL-NEXT:    WIDEN ir<[[ADD:%.+]]> = add nsw ir<[[LD2]]>, ir<[[LD1]]>
+; IF-EVL-NEXT:    WIDEN-VP ir<[[ADD:%.+]]> = add nsw ir<[[LD2]]>, ir<[[LD1]]>


Should this also match the EVL operand?

agree. Fixed.

nikolaypanchenko · 2024-07-22T13:54:25Z

@fhahn ping

fhahn · 2024-07-22T19:32:17Z

llvm/lib/Transforms/Vectorize/VPlan.h

 public:
  template <typename IterT>
  VPWidenRecipe(Instruction &I, iterator_range<IterT> Operands)
-      : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I),
-        Opcode(I.getOpcode()) {}
+      : VPWidenRecipe(VPDef::VPWidenSC, I, Operands) {}


This should probably have a custom classof is isa<VPWidenRecipe> returns true for both VPWidenRecipe and VPWidenEVLRecipe?

why ? Was the goal of having dedicated EVL-recipes to prevent treating them as non-EVL ?
Should VPWidenLoad also return true for VPWidenLoadEVL ?

The main benefit from having a shared base-class is so analyses don't have to handle all recipes when it makes sense.

I think analyses that apply to VPWidenRecipe should also conservatively apply to WPWidenEVLRecipe, as the later only possibly operates on fewer values. If that's not sound, we probably shouldn't inherit from VPWidenRecipe without also implementing the corresponding isa relationship.

VPWidenLoad/VPWidenLoadEVL only share VPWidenMemoryRecipe as common base-class, for which all VPWidenLoad|Store? return true.

I assume than you really meant to introduce base class for VPWidenRecipe and VPWidenEVLRecipe that will return true for both of them, right ? In this case class hierarchy will be similar to VPWiden[Load|Store][|EVL].
If so, same should go to all future EVL-recipes:

VPSomeRecipeBase / \ VPSomeRecipe VPSomeEVLRecipe

VPSomeRecipeBase::classof(...) { return Opcode == VPSomeRecipeSC || Opcode == VPSomeEVLRecipeSC; }

Ideally VPWidenRecipe could serve as such a base class as mentioned above, unless there’s a compelling reason not to. That way we automatically benefit from all folds and analysis already implemented for VPWidenRecipe

Not sure I understand added hierarchy then. If the goal is to allow reuse of existing code, then what is different in VPWidenStoreEVL or VPWidenLoadEVL as they won't be treated as VPWidenStore or VPWidenLoad respectively:

isa<VPWidenMemoryRecipe>(EVLStore) => true isa<VPWidenStoreEVLRecipe>(EVLStore) => true isa<VPWidenStoreRecipe>(EVLStore) => false

Anyway, I'm ok to extend classof

Thanks for the update. Adjusting VPWidenStoreEVLRecipe/VPWidenLoadEVLRecipe would probably make sense separately, although the reasoning may be a bit more complicated.

llvm/lib/Transforms/Vectorize/VPlan.h

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

fhahn · 2024-08-01T18:56:20Z

llvm/lib/Transforms/Vectorize/VPlan.h

 public:
  template <typename IterT>
  VPWidenRecipe(Instruction &I, iterator_range<IterT> Operands)
-      : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I),
-        Opcode(I.getOpcode()) {}
+      : VPWidenRecipe(VPDef::VPWidenSC, I, Operands) {}


The main benefit from having a shared base-class is so analyses don't have to handle all recipes when it makes sense.

I think analyses that apply to VPWidenRecipe should also conservatively apply to WPWidenEVLRecipe, as the later only possibly operates on fewer values. If that's not sound, we probably shouldn't inherit from VPWidenRecipe without also implementing the corresponding isa relationship.

VPWidenLoad/VPWidenLoadEVL only share VPWidenMemoryRecipe as common base-class, for which all VPWidenLoad|Store? return true.

llvm/lib/Transforms/Vectorize/VPlan.h

fhahn · 2024-08-01T18:58:59Z

llvm/lib/Transforms/Vectorize/VPlan.h

@@ -2558,14 +2605,19 @@ struct VPWidenLoadRecipe final : public VPWidenMemoryRecipe, public VPValue {
 /// using the address to load from, the explicit vector length and an optional
 /// mask.
 struct VPWidenLoadEVLRecipe final : public VPWidenMemoryRecipe, public VPValue {
-  VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue &EVL, VPValue *Mask)
+  VPWidenLoadEVLRecipe(VPWidenLoadRecipe &L, VPValue *Mask, VPValue &EVL)


are those changes intentional? If so, would be good to split off, but as the mask is the optional last operand, the current order seems to be intentional (Mask could have nullptr as default arg)

fhahn

Thanks for the latest updates, a few final comments inline

llvm/lib/Transforms/Vectorize/VPlan.h

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

fhahn · 2024-08-12T06:17:42Z

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll

@@ -16,46 +16,46 @@ define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %pt
 ; IF-EVL:       vector.ph:
 ; IF-EVL-NEXT:    [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
 ; IF-EVL-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 4
-; IF-EVL-NEXT:    [[TMP4:%.*]] = sub i64 [[TMP1]], 1
-; IF-EVL-NEXT:    [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]]
+; IF-EVL-NEXT:    [[TMP2:%.*]] = sub i64 [[TMP1]], 1


Are the changes here caused by the patch? After a quick spot check, it seems only related to CHECK variable renames? If so would be good to remove them from the patch

most likely is an artifact from some of my previous commits

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

llvm/lib/Transforms/Vectorize/VPlan.h

fhahn · 2024-08-12T06:35:16Z

llvm/lib/Transforms/Vectorize/VPlan.h

 public:
  template <typename IterT>
  VPWidenRecipe(Instruction &I, iterator_range<IterT> Operands)
-      : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I),
-        Opcode(I.getOpcode()) {}
+      : VPWidenRecipe(VPDef::VPWidenSC, I, Operands) {}


Thanks for the update. Adjusting VPWidenStoreEVLRecipe/VPWidenLoadEVLRecipe would probably make sense separately, although the reasoning may be a bit more complicated.

fhahn · 2024-08-21T10:32:59Z

llvm/lib/Transforms/Vectorize/VPlan.h

+  bool onlyFirstLaneUsed(const VPValue *Op) const override {
+    assert(is_contained(operands(), Op) &&
+           "Op must be an operand of the recipe");
+    // EVL in that recipe is always the last operand, thus any use before means


Would be good to post a follow-up patch to enforce EVL only used as last operand of various recipes in the verifier

Sure. I will create PR with verification I added and removed previously

Great thanks!

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

fhahn

LGTM, thanks! A few small additional suggestions inline

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

fhahn · 2024-08-27T08:25:10Z

llvm/lib/Transforms/Vectorize/VPlan.h

+  bool onlyFirstLaneUsed(const VPValue *Op) const override {
+    assert(is_contained(operands(), Op) &&
+           "Op must be an operand of the recipe");
+    // EVL in that recipe is always the last operand, thus any use before means


Great thanks!

llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp

The patch adds `VPWidenEVLRecipe` which represents `VPWidenRecipe` + EVL argument. The new recipe replaces `VPWidenRecipe` in `tryAddExplicitVectorLength` for each binary and unary operations. Follow up patches will extend support for remaining cases, like `FCmp` and `ICmp`

supported by vp intrinsics

VP intrinsics can only accept FMFs at this moment, thus trying to set other flags will lead to ICE

LiqinWeng · 2024-09-06T06:18:36Z

@nikolaypanchenko can merge this patch？

kazutakahirata · 2024-09-06T16:14:29Z

I've fixed an unused variable warning from this PR with ce192b8.

nikolaypanchenko · 2024-09-06T16:26:34Z

I've fixed an unused variable warning from this PR with ce192b8.

thanks. I'll double check why I missed it in the first place

llvmbot added vectorizers llvm:transforms labels May 30, 2024

npanchen requested review from fhahn and alexey-bataev May 30, 2024 18:02

npanchen requested review from arcbbb and Mel-Chen May 30, 2024 18:02

nikolaypanchenko force-pushed the npanchen/pr/vp_widen_evl branch from fa0d938 to a42f974 Compare May 30, 2024 18:05

arcbbb reviewed Jun 4, 2024

View reviewed changes

llvm/lib/Transforms/Vectorize/VPlan.h Outdated Show resolved Hide resolved

arcbbb reviewed Jun 4, 2024

View reviewed changes

fhahn reviewed Jun 4, 2024

View reviewed changes

fhahn mentioned this pull request Jun 5, 2024

[LV][EVL] Support in-loop reduction using tail folding with EVL. #90184

Merged

fhahn reviewed Jun 5, 2024

View reviewed changes

nikolaypanchenko force-pushed the npanchen/pr/vp_widen_evl branch 2 times, most recently from b47b6a3 to 5311f92 Compare June 5, 2024 21:36

nikolaypanchenko force-pushed the npanchen/pr/vp_widen_evl branch from 5311f92 to 7ad2abe Compare June 13, 2024 17:20

fhahn reviewed Jun 13, 2024

View reviewed changes

fhahn reviewed Jun 17, 2024

View reviewed changes

nikolaypanchenko force-pushed the npanchen/pr/vp_widen_evl branch from 52b9548 to e0c7e7c Compare June 20, 2024 20:31

fhahn reviewed Jun 23, 2024

View reviewed changes

nikolaypanchenko force-pushed the npanchen/pr/vp_widen_evl branch from e0c7e7c to dd1f74f Compare June 25, 2024 21:43

nikolaypanchenko force-pushed the npanchen/pr/vp_widen_evl branch from dd1f74f to 5098471 Compare July 9, 2024 23:44

fhahn reviewed Jul 14, 2024

View reviewed changes

nikolaypanchenko force-pushed the npanchen/pr/vp_widen_evl branch from 5098471 to 7312b20 Compare July 16, 2024 17:55

fhahn reviewed Jul 22, 2024

View reviewed changes

fhahn reviewed Aug 1, 2024

View reviewed changes

nikolaypanchenko force-pushed the npanchen/pr/vp_widen_evl branch from 80c43db to 661356c Compare August 7, 2024 19:58

fhahn reviewed Aug 12, 2024

View reviewed changes

nikolaypanchenko force-pushed the npanchen/pr/vp_widen_evl branch 2 times, most recently from c8afdb5 to 6af8c2e Compare August 20, 2024 23:38

fhahn reviewed Aug 21, 2024

View reviewed changes

nikolaypanchenko force-pushed the npanchen/pr/vp_widen_evl branch from 71c9936 to da6c1f2 Compare August 26, 2024 20:29

fhahn approved these changes Aug 27, 2024

View reviewed changes

nikolaypanchenko added 15 commits September 4, 2024 06:50

Addressed comments

229b94c

Rebase

1416588

Removed evl verification

e5af0d5

Moved clone() methods to unreachable for EVL-recipes

61c08ce

Addressed comments

83512bb

Addressed comments

730eb10

Don't use RPOT + rebase

a2890b3

Addressed comments + rebase

8fb08bc

Removed dead code, moved EVL to last args in EVL recipes, added debugloc

9cffa8f

Addressed latest comments

73a7df3

Addressed comments

906c430

Refactored code and removed flag propagation as they're not yet

7ff8c04

supported by vp intrinsics

Only set FMFs on vp intrinsics

7987c79

VP intrinsics can only accept FMFs at this moment, thus trying to set other flags will lead to ICE

Rebase

1f8726e

nikolaypanchenko force-pushed the npanchen/pr/vp_widen_evl branch from 234479d to 1f8726e Compare September 4, 2024 19:46

npanchen merged commit 00e40c9 into llvm:main Sep 6, 2024
8 checks passed

npanchen mentioned this pull request Dec 10, 2024

[VPlan] Expand VPWidenIntOrFpInductionRecipe into separate recipes #118638

Open

		ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
		Plan.getEntry());

[LV] Support binary and unary operations with EVL-vectorization #93854

[LV] Support binary and unary operations with EVL-vectorization #93854

Uh oh!

Conversation

nikolaypanchenko commented May 30, 2024

Uh oh!

llvmbot commented May 30, 2024

Uh oh!

github-actions bot commented May 30, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

npanchen commented Jun 3, 2024

Uh oh!

arcbbb Jun 4, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

fhahn left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

nikolaypanchenko commented Jun 4, 2024

Uh oh!

fhahn left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

github-actions bot commented May 30, 2024 •

edited

Loading

arcbbb Jun 4, 2024 •

edited

Loading

nikolaypanchenko Aug 7, 2024 •

edited

Loading