@@ -1523,6 +1523,126 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
1523
1523
}
1524
1524
}
1525
1525
1526
+ // / This function adds (0 * Step, 1 * Step, 2 * Step, ...) to StartValue of
1527
+ // / an induction variable at the preheader.
1528
+ static VPSingleDefRecipe *createStepVector (VPValue *StartValue, VPValue *Step,
1529
+ Type *InductionTy,
1530
+ const InductionDescriptor &ID,
1531
+ VPBasicBlock *VectorPHVPBB,
1532
+ DebugLoc DL) {
1533
+ Type *IntTy = InductionTy->isIntegerTy ()
1534
+ ? InductionTy
1535
+ : IntegerType::get (InductionTy->getContext (),
1536
+ InductionTy->getScalarSizeInBits ());
1537
+ // Create a vector of consecutive numbers from zero to VF.
1538
+ VPSingleDefRecipe *InitVec =
1539
+ new VPWidenIntrinsicRecipe (Intrinsic::stepvector, {}, IntTy, DL);
1540
+ VectorPHVPBB->appendRecipe (InitVec);
1541
+
1542
+ if (InductionTy->isIntegerTy ()) {
1543
+ auto *Mul = new VPInstruction (Instruction::Mul, {InitVec, Step}, DL);
1544
+ VectorPHVPBB->appendRecipe (Mul);
1545
+ auto *SteppedStart =
1546
+ new VPInstruction (Instruction::Add, {StartValue, Mul}, {}, " induction" );
1547
+ VectorPHVPBB->appendRecipe (SteppedStart);
1548
+ return SteppedStart;
1549
+ } else {
1550
+ FastMathFlags FMF = ID.getInductionBinOp ()->getFastMathFlags ();
1551
+ InitVec = new VPWidenCastRecipe (Instruction::UIToFP, InitVec, InductionTy);
1552
+ VectorPHVPBB->appendRecipe (InitVec);
1553
+ auto *Mul = new VPInstruction (Instruction::FMul, {InitVec, Step}, FMF, DL);
1554
+ VectorPHVPBB->appendRecipe (Mul);
1555
+ Instruction::BinaryOps BinOp = ID.getInductionOpcode ();
1556
+ auto *SteppedStart =
1557
+ new VPInstruction (BinOp, {StartValue, Mul}, FMF, DL, " induction" );
1558
+ VectorPHVPBB->appendRecipe (SteppedStart);
1559
+ return SteppedStart;
1560
+ }
1561
+ }
1562
+
1563
+ // / Lower widen iv recipes into recipes with EVL.
1564
+ static void
1565
+ transformWidenIVRecipestoEVLRecipes (VPWidenIntOrFpInductionRecipe *WidenIV,
1566
+ VPlan &Plan, VPValue *EVL) {
1567
+ DebugLoc DL = WidenIV->getDebugLoc ();
1568
+ const InductionDescriptor &ID = WidenIV->getInductionDescriptor ();
1569
+ auto *CanonicalIVIncrement =
1570
+ cast<VPInstruction>(Plan.getCanonicalIV ()->getBackedgeValue ());
1571
+ VPBasicBlock *VectorPHVPBB = Plan.getVectorLoopRegion ()->getPreheaderVPBB ();
1572
+ VPBasicBlock *ExitingVPBB =
1573
+ Plan.getVectorLoopRegion ()->getExitingBasicBlock ();
1574
+ VPTypeAnalysis TypeInfo (Plan.getCanonicalIV ()->getScalarType ());
1575
+ VPValue *StartValue = WidenIV->getStartValue ();
1576
+ VPValue *Step = WidenIV->getStepValue ();
1577
+ if (TruncInst *I = WidenIV->getTruncInst ()) {
1578
+ Type *TruncTy = I->getType ();
1579
+ auto *R = new VPScalarCastRecipe (Instruction::Trunc, StartValue, TruncTy);
1580
+ VectorPHVPBB->appendRecipe (R);
1581
+ StartValue = R;
1582
+ R = new VPScalarCastRecipe (Instruction::Trunc, Step, TruncTy);
1583
+ VectorPHVPBB->appendRecipe (R);
1584
+ Step = R;
1585
+ }
1586
+ Type *InductionTy = TypeInfo.inferScalarType (StartValue);
1587
+ LLVMContext &Ctx = InductionTy->getContext ();
1588
+ VPValue *TrueMask = Plan.getOrAddLiveIn (ConstantInt::getTrue (Ctx));
1589
+
1590
+ // Construct the initial value of the vector IV in the vector loop preheader
1591
+ VPSingleDefRecipe *SteppedStart =
1592
+ createStepVector (StartValue, Step, InductionTy, ID, VectorPHVPBB, DL);
1593
+
1594
+ // Create the vector phi node for both int. and fp. induction variables
1595
+ // and determine the kind of arithmetic we will perform
1596
+ auto *VecInd = new VPWidenPHIRecipe (WidenIV->getPHINode ());
1597
+ VecInd->insertBefore (WidenIV);
1598
+ WidenIV->replaceAllUsesWith (VecInd);
1599
+ Intrinsic::ID VPArithOp;
1600
+ Instruction::BinaryOps MulOp;
1601
+ if (InductionTy->isIntegerTy ()) {
1602
+ VPArithOp = Intrinsic::vp_add;
1603
+ MulOp = Instruction::Mul;
1604
+ } else {
1605
+ VPArithOp = ID.getInductionOpcode () == Instruction::FAdd
1606
+ ? Intrinsic::vp_fadd
1607
+ : Intrinsic::vp_fsub;
1608
+ MulOp = Instruction::FMul;
1609
+ }
1610
+
1611
+ // Multiply the runtime VF by the step
1612
+ VPSingleDefRecipe *ScalarMul;
1613
+ if (InductionTy->isFloatingPointTy ()) {
1614
+ FastMathFlags FMF = ID.getInductionBinOp ()->getFastMathFlags ();
1615
+ auto *CastEVL =
1616
+ new VPScalarCastRecipe (Instruction::UIToFP, EVL, InductionTy);
1617
+ CastEVL->insertBefore (CanonicalIVIncrement);
1618
+ ScalarMul = new VPInstruction (MulOp, {Step, CastEVL}, FMF, DL);
1619
+ } else {
1620
+ unsigned InductionSz = InductionTy->getScalarSizeInBits ();
1621
+ unsigned EVLSz = TypeInfo.inferScalarType (EVL)->getScalarSizeInBits ();
1622
+ VPValue *CastEVL = EVL;
1623
+ if (InductionSz != EVLSz) {
1624
+ auto *R = new VPScalarCastRecipe (EVLSz > InductionSz ? Instruction::Trunc
1625
+ : Instruction::ZExt,
1626
+ EVL, InductionTy);
1627
+ R->insertBefore (CanonicalIVIncrement);
1628
+ CastEVL = R;
1629
+ }
1630
+ ScalarMul = new VPInstruction (MulOp, {Step, CastEVL}, DL);
1631
+ }
1632
+ ScalarMul->insertBefore (CanonicalIVIncrement);
1633
+ // Create a vector splat to use in the induction update.
1634
+ auto *SplatVF =
1635
+ new VPWidenIntrinsicRecipe (Intrinsic::experimental_vp_splat,
1636
+ {ScalarMul, TrueMask, EVL}, InductionTy, DL);
1637
+ SplatVF->insertBefore (CanonicalIVIncrement);
1638
+ // TODO: We may need to add the step a number of times if UF > 1
1639
+ auto *LastInduction = new VPWidenIntrinsicRecipe (
1640
+ VPArithOp, {VecInd, SplatVF, TrueMask, EVL}, InductionTy, DL);
1641
+ LastInduction->insertBefore (CanonicalIVIncrement);
1642
+ VecInd->addIncoming (SteppedStart, VectorPHVPBB);
1643
+ VecInd->addIncoming (LastInduction, ExitingVPBB);
1644
+ }
1645
+
1526
1646
// / Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and
1527
1647
// / replaces all uses except the canonical IV increment of
1528
1648
// / VPCanonicalIVPHIRecipe with a VPEVLBasedIVPHIRecipe. VPCanonicalIVPHIRecipe
@@ -1569,8 +1689,7 @@ bool VPlanTransforms::tryAddExplicitVectorLength(
1569
1689
// of the VF directly. At the moment, widened inductions cannot be updated, so
1570
1690
// bail out if the plan contains any.
1571
1691
bool ContainsWidenInductions = any_of (Header->phis (), [](VPRecipeBase &Phi) {
1572
- return isa<VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe>(
1573
- &Phi);
1692
+ return isa<VPWidenPointerInductionRecipe>(&Phi);
1574
1693
});
1575
1694
if (ContainsWidenInductions)
1576
1695
return false ;
@@ -1615,6 +1734,16 @@ bool VPlanTransforms::tryAddExplicitVectorLength(
1615
1734
1616
1735
transformRecipestoEVLRecipes (Plan, *VPEVL);
1617
1736
1737
+ VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion ()->getEntryBasicBlock ();
1738
+ SmallVector<VPRecipeBase *> ToRemove;
1739
+ for (VPRecipeBase &Phi : HeaderVPBB->phis ())
1740
+ if (auto *WidenIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi)) {
1741
+ transformWidenIVRecipestoEVLRecipes (WidenIV, Plan, VPEVL);
1742
+ ToRemove.push_back (WidenIV);
1743
+ }
1744
+ for (VPRecipeBase *R : ToRemove)
1745
+ R->eraseFromParent ();
1746
+
1618
1747
// Replace all uses of VPCanonicalIVPHIRecipe by
1619
1748
// VPEVLBasedIVPHIRecipe except for the canonical IV increment.
1620
1749
CanonicalIVPHI->replaceAllUsesWith (EVLPhi);
0 commit comments