Skip to content

Commit 9b4ad2f

Browse files
authored
[LV][EVL] Support fixed-order recurrence idiom with EVL tail folding. (#124093)
This patch converts the llvm.vector.splice intrinsic to llvm.experimental.vp.splice, ensuring that fixed-order recurrences execute correctly when tail folding by EVL is enable. Due to the non-VFxUF penultimate EVL issue, the EVL from the previous iteration will be preserved and used in llvm.experimental.vp.splice.
1 parent 3c9429f commit 9b4ad2f

File tree

7 files changed

+172
-67
lines changed

7 files changed

+172
-67
lines changed

llvm/lib/Analysis/VectorUtils.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,8 @@ bool llvm::isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
163163
case Intrinsic::umul_fix:
164164
case Intrinsic::umul_fix_sat:
165165
return (ScalarOpdIdx == 2);
166+
case Intrinsic::experimental_vp_splice:
167+
return ScalarOpdIdx == 2 || ScalarOpdIdx == 4 || ScalarOpdIdx == 5;
166168
default:
167169
return false;
168170
}

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1454,12 +1454,9 @@ class LoopVectorizationCostModel {
14541454
// Override forced styles if needed.
14551455
// FIXME: use actual opcode/data type for analysis here.
14561456
// FIXME: Investigate opportunity for fixed vector factor.
1457-
// FIXME: support fixed-order recurrences by fixing splice of non VFxUF
1458-
// penultimate EVL.
14591457
bool EVLIsLegal = UserIC <= 1 && IsScalableVF &&
14601458
TTI.hasActiveVectorLength(0, nullptr, Align()) &&
1461-
!EnableVPlanNativePath &&
1462-
Legal->getFixedOrderRecurrences().empty();
1459+
!EnableVPlanNativePath;
14631460
if (!EVLIsLegal) {
14641461
// If for some reason EVL mode is unsupported, fallback to
14651462
// DataWithoutLaneMask to try to vectorize the loop with folded tail

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -534,6 +534,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
534534
case VPRecipeBase::VPWidenPointerInductionSC:
535535
case VPRecipeBase::VPReductionPHISC:
536536
case VPRecipeBase::VPScalarCastSC:
537+
case VPRecipeBase::VPScalarPHISC:
537538
case VPRecipeBase::VPPartialReductionSC:
538539
return true;
539540
case VPRecipeBase::VPBranchOnMaskSC:

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 42 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1659,10 +1659,13 @@ void VPlanTransforms::addActiveLaneMask(
16591659
/// \p AllOneMask The vector mask parameter of vector-predication intrinsics.
16601660
/// \p EVL The explicit vector length parameter of vector-predication
16611661
/// intrinsics.
1662+
/// \p PrevEVL The explicit vector length of the previous iteration. Only
1663+
/// required if \p CurRecipe is a VPInstruction::FirstOrderRecurrenceSplice.
16621664
static VPRecipeBase *createEVLRecipe(VPValue *HeaderMask,
16631665
VPRecipeBase &CurRecipe,
16641666
VPTypeAnalysis &TypeInfo,
1665-
VPValue &AllOneMask, VPValue &EVL) {
1667+
VPValue &AllOneMask, VPValue &EVL,
1668+
VPValue *PrevEVL) {
16661669
using namespace llvm::VPlanPatternMatch;
16671670
auto GetNewMask = [&](VPValue *OrigMask) -> VPValue * {
16681671
assert(OrigMask && "Unmasked recipe when folding tail");
@@ -1690,6 +1693,18 @@ static VPRecipeBase *createEVLRecipe(VPValue *HeaderMask,
16901693
Sel->getDebugLoc());
16911694
})
16921695
.Case<VPInstruction>([&](VPInstruction *VPI) -> VPRecipeBase * {
1696+
if (VPI->getOpcode() == VPInstruction::FirstOrderRecurrenceSplice) {
1697+
assert(PrevEVL && "Fixed-order recurrences require previous EVL");
1698+
VPValue *MinusOneVPV = VPI->getParent()->getPlan()->getOrAddLiveIn(
1699+
ConstantInt::getSigned(Type::getInt32Ty(TypeInfo.getContext()),
1700+
-1));
1701+
SmallVector<VPValue *> Ops(VPI->operands());
1702+
Ops.append({MinusOneVPV, &AllOneMask, PrevEVL, &EVL});
1703+
return new VPWidenIntrinsicRecipe(Intrinsic::experimental_vp_splice,
1704+
Ops, TypeInfo.inferScalarType(VPI),
1705+
VPI->getDebugLoc());
1706+
}
1707+
16931708
VPValue *LHS, *RHS;
16941709
// Transform select with a header mask condition
16951710
// select(header_mask, LHS, RHS)
@@ -1713,6 +1728,30 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
17131728
VPTypeAnalysis TypeInfo(CanonicalIVType);
17141729
LLVMContext &Ctx = CanonicalIVType->getContext();
17151730
VPValue *AllOneMask = Plan.getOrAddLiveIn(ConstantInt::getTrue(Ctx));
1731+
VPRegionBlock *LoopRegion = Plan.getVectorLoopRegion();
1732+
VPBasicBlock *Header = LoopRegion->getEntryBasicBlock();
1733+
1734+
// Create a scalar phi to track the previous EVL if fixed-order recurrence is
1735+
// contained.
1736+
VPScalarPHIRecipe *PrevEVL = nullptr;
1737+
bool ContainsFORs =
1738+
any_of(Header->phis(), IsaPred<VPFirstOrderRecurrencePHIRecipe>);
1739+
if (ContainsFORs) {
1740+
// TODO: Use VPInstruction::ExplicitVectorLength to get maximum EVL.
1741+
VPValue *MaxEVL = &Plan.getVF();
1742+
// Emit VPScalarCastRecipe in preheader if VF is not a 32 bits integer.
1743+
if (unsigned VFSize =
1744+
TypeInfo.inferScalarType(MaxEVL)->getScalarSizeInBits();
1745+
VFSize != 32) {
1746+
MaxEVL = new VPScalarCastRecipe(
1747+
VFSize > 32 ? Instruction::Trunc : Instruction::ZExt, MaxEVL,
1748+
Type::getInt32Ty(Ctx), DebugLoc());
1749+
VPBasicBlock *Preheader = LoopRegion->getPreheaderVPBB();
1750+
Preheader->appendRecipe(cast<VPScalarCastRecipe>(MaxEVL));
1751+
}
1752+
PrevEVL = new VPScalarPHIRecipe(MaxEVL, &EVL, DebugLoc(), "prev.evl");
1753+
PrevEVL->insertBefore(*Header, Header->getFirstNonPhi());
1754+
}
17161755

17171756
for (VPUser *U : to_vector(Plan.getVF().users())) {
17181757
if (auto *R = dyn_cast<VPReverseVectorPointerRecipe>(U))
@@ -1724,8 +1763,8 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
17241763
for (VPValue *HeaderMask : collectAllHeaderMasks(Plan)) {
17251764
for (VPUser *U : collectUsersRecursively(HeaderMask)) {
17261765
auto *CurRecipe = cast<VPRecipeBase>(U);
1727-
VPRecipeBase *EVLRecipe =
1728-
createEVLRecipe(HeaderMask, *CurRecipe, TypeInfo, *AllOneMask, EVL);
1766+
VPRecipeBase *EVLRecipe = createEVLRecipe(
1767+
HeaderMask, *CurRecipe, TypeInfo, *AllOneMask, EVL, PrevEVL);
17291768
if (!EVLRecipe)
17301769
continue;
17311770

llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,8 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
143143
})
144144
.Case<VPWidenStoreEVLRecipe, VPReductionEVLRecipe>(
145145
[&](const VPRecipeBase *S) { return VerifyEVLUse(*S, 2); })
146-
.Case<VPWidenLoadEVLRecipe, VPReverseVectorPointerRecipe>(
146+
.Case<VPWidenLoadEVLRecipe, VPReverseVectorPointerRecipe,
147+
VPScalarPHIRecipe>(
147148
[&](const VPRecipeBase *R) { return VerifyEVLUse(*R, 1); })
148149
.Case<VPScalarCastRecipe>(
149150
[&](const VPScalarCastRecipe *S) { return VerifyEVLUse(*S, 0); })

0 commit comments

Comments
 (0)