arcbbb
diff --git a/‎llvm/lib/Analysis/VectorUtils.cpp
Lines changed: 8 additions & 0 deletions b/‎llvm/lib/Analysis/VectorUtils.cpp
Lines changed: 8 additions & 0 deletions
diff --git a/‎llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
Lines changed: 10 additions & 0 deletions b/‎llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
Lines changed: 10 additions & 0 deletions
diff --git a/‎llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Lines changed: 1 addition & 2 deletions b/‎llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Lines changed: 1 addition & 2 deletions
diff --git a/‎llvm/lib/Transforms/Vectorize/VPlan.cpp
Lines changed: 5 additions & 5 deletions b/‎llvm/lib/Transforms/Vectorize/VPlan.cpp
Lines changed: 5 additions & 5 deletions
diff --git a/‎llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Lines changed: 5 additions & 6 deletions b/‎llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Lines changed: 5 additions & 6 deletions
diff --git a/‎llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Lines changed: 131 additions & 2 deletions b/‎llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Lines changed: 131 additions & 2 deletions
diff --git a/‎llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
Lines changed: 2 additions & 6 deletions b/‎llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp
Lines changed: 2 additions & 6 deletions
@@ -115,6 +115,10 @@ bool llvm::isTriviallyVectorizable(Intrinsic::ID ID) {
 /// Identifies if the vector form of the intrinsic has a scalar operand.
 bool llvm::isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
                                               unsigned ScalarOpdIdx) {
+  if (VPIntrinsic::isVPIntrinsic(ID) &&
+      (ScalarOpdIdx == VPIntrinsic::getVectorLengthParamPos(ID)))
+    return true;
+
   switch (ID) {
   case Intrinsic::abs:
   case Intrinsic::ctlz:
@@ -127,6 +131,8 @@ bool llvm::isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID,
   case Intrinsic::umul_fix:
   case Intrinsic::umul_fix_sat:
     return (ScalarOpdIdx == 2);
+  case Intrinsic::experimental_vp_splat:
+    return (ScalarOpdIdx == 0);
   default:
     return false;
   }
@@ -148,6 +154,8 @@ bool llvm::isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID,
     return OpdIdx == 0;
   case Intrinsic::powi:
     return OpdIdx == -1 || OpdIdx == 1;
+  case Intrinsic::experimental_vp_splat:
+    return OpdIdx == -1;
   default:
     return OpdIdx == -1;
   }
 
@@ -1191,6 +1191,16 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
     return getCmpSelInstrCost(Instruction::Select, ICA.getReturnType(),
                               ICA.getArgTypes()[0], CmpInst::BAD_ICMP_PREDICATE,
                               CostKind);
+  case Intrinsic::experimental_vp_splat: {
+    auto LT = getTypeLegalizationCost(RetTy);
+    if (RetTy->getScalarSizeInBits() == 1) {
+      return LT.first *
+             (1 + getRISCVInstructionCost({RISCV::VMV_V_X, RISCV::VMSNE_VI},
+                                          LT.second, CostKind));
+    }
+    return LT.first *
+           getRISCVInstructionCost(RISCV::VMV_V_X, LT.second, CostKind);
+  }
   }
 
   if (ST->hasVInstructions() && RetTy->isVectorTy()) {
 
@@ -2939,8 +2939,7 @@ LoopVectorizationCostModel::getVectorIntrinsicCost(CallInst *CI,
 
 void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State) {
   // Fix widened non-induction PHIs by setting up the PHI operands.
-  if (EnableVPlanNativePath)
-    fixNonInductionPHIs(State);
+  fixNonInductionPHIs(State);
 
   // Forget the original basic block.
   PSE.getSE()->forgetLoop(OrigLoop);
 
@@ -285,15 +285,15 @@ Value *VPTransformState::get(VPValue *Def, bool NeedsScalar) {
     return Shuf;
   };
 
-  if (!hasScalarValue(Def, {0})) {
-    assert(Def->isLiveIn() && "expected a live-in");
-    Value *IRV = Def->getLiveInIRValue();
-    Value *B = GetBroadcastInstrs(IRV);
+  Value *ScalarValue = hasScalarValue(Def, {0}) ? get(Def, VPLane(0)) : nullptr;
+  if (!ScalarValue || isa<Constant>(ScalarValue)) {
+    assert((ScalarValue || Def->isLiveIn()) && "expected a live-in");
+    Value *B = ScalarValue ? GetBroadcastInstrs(ScalarValue)
+                           : GetBroadcastInstrs(Def->getLiveInIRValue());
     set(Def, B);
     return B;
   }
 
-  Value *ScalarValue = get(Def, VPLane(0));
   // If we aren't vectorizing, we can just copy the scalar map values over
   // to the vector map.
   if (VF.isScalar()) {
 
@@ -648,7 +648,8 @@ bool VPInstruction::isVectorToScalar() const {
 }
 
 bool VPInstruction::isSingleScalar() const {
-  return getOpcode() == VPInstruction::ResumePhi;
+  return getOpcode() == VPInstruction::ResumePhi ||
+         getOpcode() == VPInstruction::ExplicitVectorLength;
 }
 
 #if !defined(NDEBUG)
@@ -1022,6 +1023,8 @@ bool VPWidenIntrinsicRecipe::onlyFirstLaneUsed(const VPValue *Op) const {
   assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
   // Vector predication intrinsics only demand the the first lane the last
   // operand (the EVL operand).
+  if (VectorIntrinsicID == Intrinsic::experimental_vp_splat)
+    return Op == getOperand(0);
   return VPIntrinsic::isVPIntrinsic(VectorIntrinsicID) &&
          Op == getOperand(getNumOperands() - 1);
 }
@@ -2309,9 +2312,8 @@ void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
 #endif
 
 Value *VPScalarCastRecipe ::generate(VPTransformState &State) {
-  assert(vputils::onlyFirstLaneUsed(this) &&
-         "Codegen only implemented for first lane.");
   switch (Opcode) {
+  case Instruction::UIToFP:
   case Instruction::SExt:
   case Instruction::ZExt:
   case Instruction::Trunc: {
@@ -3414,9 +3416,6 @@ void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent,
 #endif
 
 void VPWidenPHIRecipe::execute(VPTransformState &State) {
-  assert(EnableVPlanNativePath &&
-         "Non-native vplans are not expected to have VPWidenPHIRecipes.");
-
   Value *Op0 = State.get(getOperand(0));
   Type *VecTy = Op0->getType();
   Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi");
 
@@ -1523,6 +1523,126 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
   }
 }
 
+/// This function adds (0 * Step, 1 * Step, 2 * Step, ...) to StartValue of
+/// an induction variable at the preheader.
+static VPSingleDefRecipe *createStepVector(VPValue *StartValue, VPValue *Step,
+                                           Type *InductionTy,
+                                           const InductionDescriptor &ID,
+                                           VPBasicBlock *VectorPHVPBB,
+                                           DebugLoc DL) {
+  Type *IntTy = InductionTy->isIntegerTy()
+                    ? InductionTy
+                    : IntegerType::get(InductionTy->getContext(),
+                                       InductionTy->getScalarSizeInBits());
+  // Create a vector of consecutive numbers from zero to VF.
+  VPSingleDefRecipe *InitVec =
+      new VPWidenIntrinsicRecipe(Intrinsic::stepvector, {}, IntTy, DL);
+  VectorPHVPBB->appendRecipe(InitVec);
+
+  if (InductionTy->isIntegerTy()) {
+    auto *Mul = new VPInstruction(Instruction::Mul, {InitVec, Step}, DL);
+    VectorPHVPBB->appendRecipe(Mul);
+    auto *SteppedStart =
+        new VPInstruction(Instruction::Add, {StartValue, Mul}, {}, "induction");
+    VectorPHVPBB->appendRecipe(SteppedStart);
+    return SteppedStart;
+  } else {
+    FastMathFlags FMF = ID.getInductionBinOp()->getFastMathFlags();
+    InitVec = new VPWidenCastRecipe(Instruction::UIToFP, InitVec, InductionTy);
+    VectorPHVPBB->appendRecipe(InitVec);
+    auto *Mul = new VPInstruction(Instruction::FMul, {InitVec, Step}, FMF, DL);
+    VectorPHVPBB->appendRecipe(Mul);
+    Instruction::BinaryOps BinOp = ID.getInductionOpcode();
+    auto *SteppedStart =
+        new VPInstruction(BinOp, {StartValue, Mul}, FMF, DL, "induction");
+    VectorPHVPBB->appendRecipe(SteppedStart);
+    return SteppedStart;
+  }
+}
+
+/// Lower widen iv recipes into recipes with EVL.
+static void
+transformWidenIVRecipestoEVLRecipes(VPWidenIntOrFpInductionRecipe *WidenIV,
+                                    VPlan &Plan, VPValue *EVL) {
+  DebugLoc DL = WidenIV->getDebugLoc();
+  const InductionDescriptor &ID = WidenIV->getInductionDescriptor();
+  auto *CanonicalIVIncrement =
+      cast<VPInstruction>(Plan.getCanonicalIV()->getBackedgeValue());
+  VPBasicBlock *VectorPHVPBB = Plan.getVectorLoopRegion()->getPreheaderVPBB();
+  VPBasicBlock *ExitingVPBB =
+      Plan.getVectorLoopRegion()->getExitingBasicBlock();
+  VPTypeAnalysis TypeInfo(Plan.getCanonicalIV()->getScalarType());
+  VPValue *StartValue = WidenIV->getStartValue();
+  VPValue *Step = WidenIV->getStepValue();
+  if (TruncInst *I = WidenIV->getTruncInst()) {
+    Type *TruncTy = I->getType();
+    auto *R = new VPScalarCastRecipe(Instruction::Trunc, StartValue, TruncTy);
+    VectorPHVPBB->appendRecipe(R);
+    StartValue = R;
+    R = new VPScalarCastRecipe(Instruction::Trunc, Step, TruncTy);
+    VectorPHVPBB->appendRecipe(R);
+    Step = R;
+  }
+  Type *InductionTy = TypeInfo.inferScalarType(StartValue);
+  LLVMContext &Ctx = InductionTy->getContext();
+  VPValue *TrueMask = Plan.getOrAddLiveIn(ConstantInt::getTrue(Ctx));
+
+  // Construct the initial value of the vector IV in the vector loop preheader
+  VPSingleDefRecipe *SteppedStart =
+      createStepVector(StartValue, Step, InductionTy, ID, VectorPHVPBB, DL);
+
+  // Create the vector phi node for both int. and fp. induction variables
+  // and determine the kind of arithmetic we will perform
+  auto *VecInd = new VPWidenPHIRecipe(WidenIV->getPHINode());
+  VecInd->insertBefore(WidenIV);
+  WidenIV->replaceAllUsesWith(VecInd);
+  Intrinsic::ID VPArithOp;
+  Instruction::BinaryOps MulOp;
+  if (InductionTy->isIntegerTy()) {
+    VPArithOp = Intrinsic::vp_add;
+    MulOp = Instruction::Mul;
+  } else {
+    VPArithOp = ID.getInductionOpcode() == Instruction::FAdd
+                    ? Intrinsic::vp_fadd
+                    : Intrinsic::vp_fsub;
+    MulOp = Instruction::FMul;
+  }
+
+  // Multiply the runtime VF by the step
+  VPSingleDefRecipe *ScalarMul;
+  if (InductionTy->isFloatingPointTy()) {
+    FastMathFlags FMF = ID.getInductionBinOp()->getFastMathFlags();
+    auto *CastEVL =
+        new VPScalarCastRecipe(Instruction::UIToFP, EVL, InductionTy);
+    CastEVL->insertBefore(CanonicalIVIncrement);
+    ScalarMul = new VPInstruction(MulOp, {Step, CastEVL}, FMF, DL);
+  } else {
+    unsigned InductionSz = InductionTy->getScalarSizeInBits();
+    unsigned EVLSz = TypeInfo.inferScalarType(EVL)->getScalarSizeInBits();
+    VPValue *CastEVL = EVL;
+    if (InductionSz != EVLSz) {
+      auto *R = new VPScalarCastRecipe(EVLSz > InductionSz ? Instruction::Trunc
+                                                           : Instruction::ZExt,
+                                       EVL, InductionTy);
+      R->insertBefore(CanonicalIVIncrement);
+      CastEVL = R;
+    }
+    ScalarMul = new VPInstruction(MulOp, {Step, CastEVL}, DL);
+  }
+  ScalarMul->insertBefore(CanonicalIVIncrement);
+  // Create a vector splat to use in the induction update.
+  auto *SplatVF =
+      new VPWidenIntrinsicRecipe(Intrinsic::experimental_vp_splat,
+                                 {ScalarMul, TrueMask, EVL}, InductionTy, DL);
+  SplatVF->insertBefore(CanonicalIVIncrement);
+  // TODO: We may need to add the step a number of times if UF > 1
+  auto *LastInduction = new VPWidenIntrinsicRecipe(
+      VPArithOp, {VecInd, SplatVF, TrueMask, EVL}, InductionTy, DL);
+  LastInduction->insertBefore(CanonicalIVIncrement);
+  VecInd->addIncoming(SteppedStart, VectorPHVPBB);
+  VecInd->addIncoming(LastInduction, ExitingVPBB);
+}
+
 /// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and
 /// replaces all uses except the canonical IV increment of
 /// VPCanonicalIVPHIRecipe with a VPEVLBasedIVPHIRecipe. VPCanonicalIVPHIRecipe
@@ -1569,8 +1689,7 @@ bool VPlanTransforms::tryAddExplicitVectorLength(
   // of the VF directly. At the moment, widened inductions cannot be updated, so
   // bail out if the plan contains any.
   bool ContainsWidenInductions = any_of(Header->phis(), [](VPRecipeBase &Phi) {
-    return isa<VPWidenIntOrFpInductionRecipe, VPWidenPointerInductionRecipe>(
-        &Phi);
+    return isa<VPWidenPointerInductionRecipe>(&Phi);
   });
   if (ContainsWidenInductions)
     return false;
@@ -1615,6 +1734,16 @@ bool VPlanTransforms::tryAddExplicitVectorLength(
 
   transformRecipestoEVLRecipes(Plan, *VPEVL);
 
+  VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
+  SmallVector<VPRecipeBase *> ToRemove;
+  for (VPRecipeBase &Phi : HeaderVPBB->phis())
+    if (auto *WidenIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi)) {
+      transformWidenIVRecipestoEVLRecipes(WidenIV, Plan, VPEVL);
+      ToRemove.push_back(WidenIV);
+    }
+  for (VPRecipeBase *R : ToRemove)
+    R->eraseFromParent();
+
   // Replace all uses of VPCanonicalIVPHIRecipe by
   // VPEVLBasedIVPHIRecipe except for the canonical IV increment.
   CanonicalIVPHI->replaceAllUsesWith(EVLPhi);
 
@@ -156,7 +156,8 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
              .Case<VPScalarCastRecipe>(
                  [&](const VPScalarCastRecipe *S) { return true; })
              .Case<VPInstruction>([&](const VPInstruction *I) {
-               if (I->getOpcode() != Instruction::Add) {
+               if ((I->getOpcode() != Instruction::Add) &&
+                   (I->getOpcode() != Instruction::Mul)) {
                  errs()
                      << "EVL is used as an operand in non-VPInstruction::Add\n";
                  return false;
@@ -166,11 +167,6 @@ bool VPlanVerifier::verifyEVLRecipe(const VPInstruction &EVL) const {
                            "users\n";
                  return false;
                }
-               if (!isa<VPEVLBasedIVPHIRecipe>(*I->users().begin())) {
-                 errs() << "Result of VPInstruction::Add with EVL operand is "
-                           "not used by VPEVLBasedIVPHIRecipe\n";
-                 return false;
-               }
                return true;
              })
              .Default([&](const VPUser *U) {