!fixup, address comments.

ElvisWang123 · ElvisWang123 · commit 92163e9f3add · 2025-05-07T17:20:39.000-07:00
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1265,8 +1265,8 @@ class VPWidenRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
         Opcode(I.getOpcode()) {}
 
   template <typename IterT>
-  VPWidenRecipe(unsigned VPDefOpcode, unsigned Opcode,
-                iterator_range<IterT> Operands, bool NUW, bool NSW, DebugLoc DL)
+  VPWidenRecipe(unsigned VPDefOpcode, unsigned Opcode, ArrayRef<IterT> Operands,
+                bool NUW, bool NSW, DebugLoc DL)
       : VPRecipeWithIRFlags(VPDefOpcode, Operands, WrapFlagsTy(NUW, NSW), DL),
         Opcode(Opcode) {}
 
@@ -1275,8 +1275,8 @@ class VPWidenRecipe : public VPRecipeWithIRFlags, public VPIRMetadata {
       : VPWidenRecipe(VPDef::VPWidenSC, I, Operands) {}
 
   template <typename IterT>
-  VPWidenRecipe(unsigned Opcode, iterator_range<IterT> Operands, bool NUW,
-                bool NSW, DebugLoc DL)
+  VPWidenRecipe(unsigned Opcode, ArrayRef<IterT> Operands, bool NUW, bool NSW,
+                DebugLoc DL)
       : VPWidenRecipe(VPDef::VPWidenSC, Opcode, Operands, NUW, NSW, DL) {}
 
   ~VPWidenRecipe() override = default;
@@ -2558,9 +2558,10 @@ class VPReductionEVLRecipe : public VPReductionRecipe {
 /// concrete recipes before codegen. The operands are {ChainOp, VecOp,
 /// [Condition]}.
 class VPExtendedReductionRecipe : public VPReductionRecipe {
-  /// Opcode of the extend recipe will be lowered to.
+  /// Opcode of the extend for VecOp.
   Instruction::CastOps ExtOp;
 
+  /// The scalar type after extending.
   Type *ResultTy;
 
   /// For cloning VPExtendedReductionRecipe.
@@ -2581,7 +2582,7 @@ class VPExtendedReductionRecipe : public VPReductionRecipe {
         ExtOp(Ext->getOpcode()), ResultTy(Ext->getResultType()) {
     assert((ExtOp == Instruction::CastOps::ZExt ||
             ExtOp == Instruction::CastOps::SExt) &&
-           "VPExtendedReductionRecipe only support zext and sext.");
+           "VPExtendedReductionRecipe only supports zext and sext.");
 
     // Not all WidenCastRecipes contain nneg flag. Need to transfer flags from
     // the original recipe to prevent setting wrong flags.
@@ -2614,7 +2615,7 @@ class VPExtendedReductionRecipe : public VPReductionRecipe {
   /// Is the extend ZExt?
   bool isZExt() const { return getExtOpcode() == Instruction::ZExt; }
 
-  /// The opcode of extend recipe.
+  /// Get the opcode of the extend for VecOp.
   Instruction::CastOps getExtOpcode() const { return ExtOp; }
 };
 
@@ -2624,12 +2625,13 @@ class VPExtendedReductionRecipe : public VPReductionRecipe {
 /// recipe is abstract and needs to be lowered to concrete recipes before
 /// codegen. The operands are {ChainOp, VecOp1, VecOp2, [Condition]}.
 class VPMulAccumulateReductionRecipe : public VPReductionRecipe {
-  /// Opcode of the extend recipe.
+  /// Opcode of the extend for VecOp1 and VecOp2.
   Instruction::CastOps ExtOp;
 
   /// Non-neg flag of the extend recipe.
   bool IsNonNeg = false;
 
+  /// The scalar type after extending.
   Type *ResultTy;
 
   /// For cloning VPMulAccumulateReductionRecipe.
@@ -2660,7 +2662,7 @@ class VPMulAccumulateReductionRecipe : public VPReductionRecipe {
            "be Add");
     assert((ExtOp == Instruction::CastOps::ZExt ||
             ExtOp == Instruction::CastOps::SExt) &&
-           "VPMulAccumulateReductionRecipe only support zext and sext.");
+           "VPMulAccumulateReductionRecipe only supports zext and sext.");
     setUnderlyingValue(R->getUnderlyingValue());
     // Only set the non-negative flag if the original recipe contains.
     if (Ext0->hasNonNegFlag())
@@ -2706,24 +2708,26 @@ class VPMulAccumulateReductionRecipe : public VPReductionRecipe {
 
   Type *getResultType() const {
     assert(isExtended() && "Only support getResultType when this recipe "
-                           "contains implicit extend.");
+                           "is implicitly extend.");
     return ResultTy;
   }
 
-  /// The VPValue of the vector value to be extended and reduced.
+  /// The first vector value to be extended and reduced.
   VPValue *getVecOp0() const { return getOperand(1); }
+
+  /// The second vector value to be extended and reduced.
   VPValue *getVecOp1() const { return getOperand(2); }
 
-  /// Return if this MulAcc recipe contains extended operands.
+  /// Return true if this recipe contains extended operands.
   bool isExtended() const { return ExtOp != Instruction::CastOps::CastOpsEnd; }
 
   /// Return the opcode of the extends for the operands.
   Instruction::CastOps getExtOpcode() const { return ExtOp; }
 
-  /// Return if the operands are zero extended.
+  /// Return if the operands are zero-extended.
   bool isZExt() const { return ExtOp == Instruction::CastOps::ZExt; }
 
-  /// Return the non negative flag of the ext recipe.
+  /// Return true if the operand extends have the non-negative flag.
   bool isNonNeg() const { return IsNonNeg; }
 };
 
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -2563,7 +2563,8 @@ void VPExtendedReductionRecipe::print(raw_ostream &O, const Twine &Indent,
            RecurrenceDescriptor::getOpcode(getRecurrenceKind()))
     << " (";
   getVecOp()->printAsOperand(O, SlotTracker);
-  O << " extended to " << *getResultType();
+  printFlags(O);
+  O << Instruction::getOpcodeName(ExtOp) << " to " << *getResultType();
   if (isConditional()) {
     O << ", ";
     getCondOp()->printAsOperand(O, SlotTracker);
@@ -2588,12 +2589,14 @@ void VPMulAccumulateReductionRecipe::print(raw_ostream &O, const Twine &Indent,
     O << "(";
   getVecOp0()->printAsOperand(O, SlotTracker);
   if (isExtended())
-    O << " extended to " << *getResultType() << "), (";
+    O << " " << Instruction::getOpcodeName(ExtOp) << " to " << *getResultType()
+      << "), (";
   else
     O << ", ";
   getVecOp1()->printAsOperand(O, SlotTracker);
   if (isExtended())
-    O << " extended to " << *getResultType() << ")";
+    O << " " << Instruction::getOpcodeName(ExtOp) << " to " << *getResultType()
+      << ")";
   if (isConditional()) {
     O << ", ";
     getCondOp()->printAsOperand(O, SlotTracker);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2446,7 +2446,7 @@ static void expandVPExtendedReduction(VPExtendedReductionRecipe *ExtRed) {
 static void
 expandVPMulAccumulateReduction(VPMulAccumulateReductionRecipe *MulAcc) {
   // Generate inner VPWidenCastRecipes if necessary.
-  // Note that we will drop the extend after mul which transform
+  // Note that we will drop the extend after mul which transforms
   // reduce.add(ext(mul(ext, ext))) to reduce.add(mul(ext, ext)).
   VPValue *Op0, *Op1;
   if (MulAcc->isExtended()) {
@@ -2481,9 +2481,8 @@ expandVPMulAccumulateReduction(VPMulAccumulateReductionRecipe *MulAcc) {
 
   std::array<VPValue *, 2> MulOps = {Op0, Op1};
   auto *Mul = new VPWidenRecipe(
-      Instruction::Mul, make_range(MulOps.begin(), MulOps.end()),
-      MulAcc->hasNoUnsignedWrap(), MulAcc->hasNoSignedWrap(),
-      MulAcc->getDebugLoc());
+      Instruction::Mul, ArrayRef(MulOps), MulAcc->hasNoUnsignedWrap(),
+      MulAcc->hasNoSignedWrap(), MulAcc->getDebugLoc());
   Mul->insertBefore(MulAcc);
 
   auto *Red = new VPReductionRecipe(
@@ -2722,6 +2721,10 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
                                           VPCostContext &Ctx, VFRange &Range) {
   using namespace VPlanPatternMatch;
 
+  unsigned Opcode = RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind());
+  if (Opcode != Instruction::Add)
+    return nullptr;
+
   Type *RedTy = Ctx.Types.inferScalarType(Red);
 
   // Clamp the range if using multiply-accumulate-reduction is profitable.
@@ -2752,21 +2755,17 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
         Range);
   };
 
-  unsigned Opcode = RecurrenceDescriptor::getOpcode(Red->getRecurrenceKind());
-  if (Opcode != Instruction::Add)
-    return nullptr;
-
   VPValue *VecOp = Red->getVecOp();
   VPValue *A, *B;
-  // Try to match reduce.add(mul(...))
+  // Try to match reduce.add(mul(...)).
   if (match(VecOp, m_Mul(m_VPValue(A), m_VPValue(B)))) {
     auto *RecipeA =
         dyn_cast_if_present<VPWidenCastRecipe>(A->getDefiningRecipe());
     auto *RecipeB =
         dyn_cast_if_present<VPWidenCastRecipe>(B->getDefiningRecipe());
     auto *Mul = cast<VPWidenRecipe>(VecOp->getDefiningRecipe());
 
-    // Match reduce.add(mul(ext, ext))
+    // Match reduce.add(mul(ext, ext)).
     if (RecipeA && RecipeB &&
         (RecipeA->getOpcode() == RecipeB->getOpcode() || A == B) &&
         match(RecipeA, m_ZExtOrSExt(m_VPValue())) &&
@@ -2776,11 +2775,11 @@ tryToMatchAndCreateMulAccumulateReduction(VPReductionRecipe *Red,
                                    Mul, RecipeA, RecipeB, nullptr))
       return new VPMulAccumulateReductionRecipe(Red, Mul, RecipeA, RecipeB,
                                                 RecipeA->getResultType());
-    // Match reduce.add(mul)
+    // Match reduce.add(mul).
     if (IsMulAccValidAndClampRange(true, Mul, nullptr, nullptr, nullptr))
       return new VPMulAccumulateReductionRecipe(Red, Mul);
   }
-  // Match reduce.add(ext(mul(ext(A), ext(B))))
+  // Match reduce.add(ext(mul(ext(A), ext(B)))).
   // All extend recipes must have same opcode or A == B
   // which can be transform to reduce.add(zext(mul(sext(A), sext(B)))).
   if (match(VecOp, m_ZExtOrSExt(m_Mul(m_ZExtOrSExt(m_VPValue()),
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
@@ -288,7 +288,7 @@ define i64 @print_extended_reduction(ptr nocapture readonly %x, ptr nocapture re
 ; CHECK-NEXT:     CLONE ir<%arrayidx> = getelementptr inbounds ir<%x>, vp<[[STEPS]]>
 ; CHECK-NEXT:     vp<[[ADDR:%.+]]> = vector-pointer ir<%arrayidx>
 ; CHECK-NEXT:     WIDEN ir<[[LOAD:%.+]]> = load vp<[[ADDR]]>
-; CHECK-NEXT:     EXTENDED-REDUCE ir<[[RDX_NEXT:%.+]]> = ir<[[RDX]]> + reduce.add (ir<[[LOAD]]> extended to i64)
+; CHECK-NEXT:     EXTENDED-REDUCE ir<[[RDX_NEXT:%.+]]> = ir<[[RDX]]> + reduce.add (ir<[[LOAD]]> zext to i64)
 ; CHECK-NEXT:     EMIT vp<[[IV_NEXT]]> = add nuw vp<[[IV]]>, vp<[[VFxUF]]>
 ; CHECK-NEXT:     EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]>
 ; CHECK-NEXT:   No successors
@@ -384,7 +384,7 @@ define i64 @print_mulacc_extended(ptr nocapture readonly %x, ptr nocapture reado
 ; CHECK-NEXT:     CLONE ir<[[ARRAYIDX1:%.+]]> = getelementptr inbounds ir<%y>, vp<[[STEPS]]>
 ; CHECK-NEXT:     vp<[[ADDR1:%.+]]> = vector-pointer ir<[[ARRAYIDX1]]>
 ; CHECK-NEXT:     WIDEN ir<[[LOAD1:%.+]]> = load vp<[[ADDR1]]>
-; CHECK-NEXT:     MULACC-REDUCE ir<[[RDX_NEXT:%.+]]> = ir<[[RDX]]> + reduce.add (mul nsw (ir<[[LOAD0]]> extended to i64), (ir<[[LOAD1]]> extended to i64))
+; CHECK-NEXT:     MULACC-REDUCE ir<[[RDX_NEXT:%.+]]> = ir<[[RDX]]> + reduce.add (mul nsw (ir<[[LOAD0]]> sext to i64), (ir<[[LOAD1]]> sext to i64))
 ; CHECK-NEXT:     EMIT vp<[[IV_NEXT]]> = add nuw vp<[[IV]]>, vp<[[VFxUF]]>
 ; CHECK-NEXT:     EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]>
 ; CHECK-NEXT:   No successors