@@ -1659,10 +1659,13 @@ void VPlanTransforms::addActiveLaneMask(
1659
1659
// / \p AllOneMask The vector mask parameter of vector-predication intrinsics.
1660
1660
// / \p EVL The explicit vector length parameter of vector-predication
1661
1661
// / intrinsics.
1662
+ // / \p PrevEVL The explicit vector length of the previous iteration. Only
1663
+ // / required if \p CurRecipe is a VPInstruction::FirstOrderRecurrenceSplice.
1662
1664
static VPRecipeBase *createEVLRecipe (VPValue *HeaderMask,
1663
1665
VPRecipeBase &CurRecipe,
1664
1666
VPTypeAnalysis &TypeInfo,
1665
- VPValue &AllOneMask, VPValue &EVL) {
1667
+ VPValue &AllOneMask, VPValue &EVL,
1668
+ VPValue *PrevEVL) {
1666
1669
using namespace llvm ::VPlanPatternMatch;
1667
1670
auto GetNewMask = [&](VPValue *OrigMask) -> VPValue * {
1668
1671
assert (OrigMask && " Unmasked recipe when folding tail" );
@@ -1690,6 +1693,18 @@ static VPRecipeBase *createEVLRecipe(VPValue *HeaderMask,
1690
1693
Sel->getDebugLoc ());
1691
1694
})
1692
1695
.Case <VPInstruction>([&](VPInstruction *VPI) -> VPRecipeBase * {
1696
+ if (VPI->getOpcode () == VPInstruction::FirstOrderRecurrenceSplice) {
1697
+ assert (PrevEVL && " Fixed-order recurrences require previous EVL" );
1698
+ VPValue *MinusOneVPV = VPI->getParent ()->getPlan ()->getOrAddLiveIn (
1699
+ ConstantInt::getSigned (Type::getInt32Ty (TypeInfo.getContext ()),
1700
+ -1 ));
1701
+ SmallVector<VPValue *> Ops (VPI->operands ());
1702
+ Ops.append ({MinusOneVPV, &AllOneMask, PrevEVL, &EVL});
1703
+ return new VPWidenIntrinsicRecipe (Intrinsic::experimental_vp_splice,
1704
+ Ops, TypeInfo.inferScalarType (VPI),
1705
+ VPI->getDebugLoc ());
1706
+ }
1707
+
1693
1708
VPValue *LHS, *RHS;
1694
1709
// Transform select with a header mask condition
1695
1710
// select(header_mask, LHS, RHS)
@@ -1713,6 +1728,30 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
1713
1728
VPTypeAnalysis TypeInfo (CanonicalIVType);
1714
1729
LLVMContext &Ctx = CanonicalIVType->getContext ();
1715
1730
VPValue *AllOneMask = Plan.getOrAddLiveIn (ConstantInt::getTrue (Ctx));
1731
+ VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion ();
1732
+ VPBasicBlock *Header = LoopRegion->getEntryBasicBlock ();
1733
+
1734
+ // Create a scalar phi to track the previous EVL if fixed-order recurrence is
1735
+ // contained.
1736
+ VPScalarPHIRecipe *PrevEVL = nullptr ;
1737
+ bool ContainsFORs =
1738
+ any_of (Header->phis (), IsaPred<VPFirstOrderRecurrencePHIRecipe>);
1739
+ if (ContainsFORs) {
1740
+ // TODO: Use VPInstruction::ExplicitVectorLength to get maximum EVL.
1741
+ VPValue *MaxEVL = &Plan.getVF ();
1742
+ // Emit VPScalarCastRecipe in preheader if VF is not a 32 bits integer.
1743
+ if (unsigned VFSize =
1744
+ TypeInfo.inferScalarType (MaxEVL)->getScalarSizeInBits ();
1745
+ VFSize != 32 ) {
1746
+ MaxEVL = new VPScalarCastRecipe (
1747
+ VFSize > 32 ? Instruction::Trunc : Instruction::ZExt, MaxEVL,
1748
+ Type::getInt32Ty (Ctx), DebugLoc ());
1749
+ VPBasicBlock *Preheader = LoopRegion->getPreheaderVPBB ();
1750
+ Preheader->appendRecipe (cast<VPScalarCastRecipe>(MaxEVL));
1751
+ }
1752
+ PrevEVL = new VPScalarPHIRecipe (MaxEVL, &EVL, DebugLoc (), " prev.evl" );
1753
+ PrevEVL->insertBefore (*Header, Header->getFirstNonPhi ());
1754
+ }
1716
1755
1717
1756
for (VPUser *U : to_vector (Plan.getVF ().users ())) {
1718
1757
if (auto *R = dyn_cast<VPReverseVectorPointerRecipe>(U))
@@ -1724,8 +1763,8 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
1724
1763
for (VPValue *HeaderMask : collectAllHeaderMasks (Plan)) {
1725
1764
for (VPUser *U : collectUsersRecursively (HeaderMask)) {
1726
1765
auto *CurRecipe = cast<VPRecipeBase>(U);
1727
- VPRecipeBase *EVLRecipe =
1728
- createEVLRecipe ( HeaderMask, *CurRecipe, TypeInfo, *AllOneMask, EVL);
1766
+ VPRecipeBase *EVLRecipe = createEVLRecipe (
1767
+ HeaderMask, *CurRecipe, TypeInfo, *AllOneMask, EVL, PrevEVL );
1729
1768
if (!EVLRecipe)
1730
1769
continue ;
1731
1770
0 commit comments