@@ -8114,34 +8114,6 @@ VPHeaderPHIRecipe *VPRecipeBuilder::tryToOptimizeInductionPHI(
8114
8114
return nullptr ;
8115
8115
}
8116
8116
8117
- VPWidenIntOrFpInductionRecipe *VPRecipeBuilder::tryToOptimizeInductionTruncate (
8118
- TruncInst *I, ArrayRef<VPValue *> Operands, VFRange &Range, VPlan &Plan) {
8119
- // Optimize the special case where the source is a constant integer
8120
- // induction variable. Notice that we can only optimize the 'trunc' case
8121
- // because (a) FP conversions lose precision, (b) sext/zext may wrap, and
8122
- // (c) other casts depend on pointer size.
8123
-
8124
- // Determine whether \p K is a truncation based on an induction variable that
8125
- // can be optimized.
8126
- auto isOptimizableIVTruncate =
8127
- [&](Instruction *K) -> std::function<bool (ElementCount)> {
8128
- return [=](ElementCount VF) -> bool {
8129
- return CM.isOptimizableIVTruncate (K, VF);
8130
- };
8131
- };
8132
-
8133
- if (LoopVectorizationPlanner::getDecisionAndClampRange (
8134
- isOptimizableIVTruncate (I), Range)) {
8135
-
8136
- auto *Phi = cast<PHINode>(I->getOperand (0 ));
8137
- const InductionDescriptor &II = *Legal->getIntOrFpInductionDescriptor (Phi);
8138
- VPValue *Start = Plan.getVPValueOrAddLiveIn (II.getStartValue ());
8139
- return createWidenInductionRecipes (Phi, I, Start, II, Plan, *PSE.getSE (),
8140
- *OrigLoop, Range);
8141
- }
8142
- return nullptr ;
8143
- }
8144
-
8145
8117
VPBlendRecipe *VPRecipeBuilder::tryToBlend (PHINode *Phi,
8146
8118
ArrayRef<VPValue *> Operands,
8147
8119
VPlanPtr &Plan) {
@@ -8275,6 +8247,70 @@ bool VPRecipeBuilder::shouldWiden(Instruction *I, VFRange &Range) const {
8275
8247
Range);
8276
8248
}
8277
8249
8250
+ VPWidenCastRecipe *VPRecipeBuilder::createCast (VPValue *V, Type *From,
8251
+ Type *To) {
8252
+ if (From == To)
8253
+ return nullptr ;
8254
+ Instruction::CastOps CastOpcode;
8255
+ if (To->isIntegerTy () && From->isIntegerTy ())
8256
+ CastOpcode = To->getPrimitiveSizeInBits () < From->getPrimitiveSizeInBits ()
8257
+ ? Instruction::Trunc
8258
+ : Instruction::ZExt;
8259
+ else if (To->isIntegerTy ())
8260
+ CastOpcode = Instruction::FPToUI;
8261
+ else
8262
+ CastOpcode = Instruction::UIToFP;
8263
+
8264
+ return new VPWidenCastRecipe (CastOpcode, V, To);
8265
+ }
8266
+
8267
+ VPRecipeBase *
8268
+ VPRecipeBuilder::createWidenStep (VPWidenIntOrFpInductionRecipe &WIV,
8269
+ ScalarEvolution &SE, VPlan &Plan,
8270
+ DenseSet<VPRecipeBase *> *CreatedRecipes) {
8271
+ PHINode *PN = WIV.getPHINode ();
8272
+ const InductionDescriptor &IndDesc = WIV.getInductionDescriptor ();
8273
+ VPValue *ScalarStep =
8274
+ vputils::getOrCreateVPValueForSCEVExpr (Plan, IndDesc.getStep (), SE);
8275
+ Type *VFxUFTy = Plan.getVFxUF ().getElementType ();
8276
+ Type *StepTy = IndDesc.getStep ()->getType ();
8277
+ VPValue *WidenVFxUF = &Plan.getWidenVFxUF ();
8278
+ VPBasicBlock *LatchVPBB = Plan.getVectorLoopRegion ()->getExitingBasicBlock ();
8279
+ if (VPWidenCastRecipe *WidenVFxUFCast =
8280
+ createCast (&Plan.getWidenVFxUF (), VFxUFTy, StepTy)) {
8281
+ WidenVFxUFCast->insertBefore (LatchVPBB->getTerminator ());
8282
+ if (CreatedRecipes)
8283
+ CreatedRecipes->insert (WidenVFxUFCast);
8284
+ WidenVFxUF = WidenVFxUFCast->getVPSingleValue ();
8285
+ }
8286
+ const Instruction::BinaryOps UpdateOp =
8287
+ IndDesc.getInductionOpcode () != Instruction::BinaryOpsEnd
8288
+ ? IndDesc.getInductionOpcode ()
8289
+ : Instruction::Add;
8290
+ VPInstruction *Update;
8291
+ if (StepTy->isIntegerTy ()) {
8292
+ VPInstruction *Mul = new VPInstruction (
8293
+ Instruction::Mul, {WidenVFxUF, ScalarStep}, PN->getDebugLoc ());
8294
+ Mul->insertBefore (LatchVPBB->getTerminator ());
8295
+ if (CreatedRecipes)
8296
+ CreatedRecipes->insert (Mul);
8297
+ Update = new VPInstruction (UpdateOp, {&WIV, Mul}, PN->getDebugLoc ());
8298
+ Update->insertBefore (LatchVPBB->getTerminator ());
8299
+ } else {
8300
+ FastMathFlags FMF = IndDesc.getExactFPMathInst ()
8301
+ ? IndDesc.getExactFPMathInst ()->getFastMathFlags ()
8302
+ : FastMathFlags ();
8303
+ VPInstruction *Mul = new VPInstruction (
8304
+ Instruction::FMul, {WidenVFxUF, ScalarStep}, FMF, PN->getDebugLoc ());
8305
+ Mul->insertBefore (LatchVPBB->getTerminator ());
8306
+ Update = new VPInstruction (UpdateOp, {&WIV, Mul}, FMF, PN->getDebugLoc ());
8307
+ Update->insertBefore (LatchVPBB->getTerminator ());
8308
+ }
8309
+ if (CreatedRecipes)
8310
+ CreatedRecipes->insert (Update);
8311
+ return Update;
8312
+ }
8313
+
8278
8314
VPWidenRecipe *VPRecipeBuilder::tryToWiden (Instruction *I,
8279
8315
ArrayRef<VPValue *> Operands,
8280
8316
VPBasicBlock *VPBB, VPlanPtr &Plan) {
@@ -8324,10 +8360,15 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
8324
8360
};
8325
8361
}
8326
8362
8327
- void VPRecipeBuilder::fixHeaderPhis () {
8363
+ void VPRecipeBuilder::fixHeaderPhis (VPlan &Plan ) {
8328
8364
BasicBlock *OrigLatch = OrigLoop->getLoopLatch ();
8329
8365
for (VPHeaderPHIRecipe *R : PhisToFix) {
8330
- auto *PN = cast<PHINode>(R->getUnderlyingValue ());
8366
+ if (auto *VPWIFR = dyn_cast<VPWidenIntOrFpInductionRecipe>(R)) {
8367
+ VPWIFR->addOperand (
8368
+ createWidenStep (*VPWIFR, *PSE.getSE (), Plan)->getVPSingleValue ());
8369
+ continue ;
8370
+ }
8371
+ PHINode *PN = cast<PHINode>(R->getUnderlyingValue ());
8331
8372
VPRecipeBase *IncR =
8332
8373
getRecipe (cast<Instruction>(PN->getIncomingValueForBlock (OrigLatch)));
8333
8374
R->addOperand (IncR->getVPSingleValue ());
@@ -8405,8 +8446,12 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(
8405
8446
// can have earlier phis as incoming values.
8406
8447
recordRecipeOf (Phi);
8407
8448
8408
- if ((Recipe = tryToOptimizeInductionPHI (Phi, Operands, *Plan, Range)))
8449
+ if ((Recipe = tryToOptimizeInductionPHI (Phi, Operands, *Plan, Range))) {
8450
+ if (isa<VPWidenPointerInductionRecipe>(Recipe))
8451
+ return Recipe;
8452
+ PhisToFix.push_back (cast<VPWidenIntOrFpInductionRecipe>(Recipe));
8409
8453
return Recipe;
8454
+ }
8410
8455
8411
8456
VPHeaderPHIRecipe *PhiRecipe = nullptr ;
8412
8457
assert ((Legal->isReductionVariable (Phi) ||
@@ -8441,10 +8486,17 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(
8441
8486
return PhiRecipe;
8442
8487
}
8443
8488
8444
- if (isa<TruncInst>(Instr) &&
8445
- (Recipe = tryToOptimizeInductionTruncate (cast<TruncInst>(Instr), Operands,
8446
- Range, *Plan)))
8447
- return Recipe;
8489
+ if (isa<TruncInst>(Instr)) {
8490
+ auto IsOptimizableIVTruncate =
8491
+ [&](Instruction *K) -> std::function<bool (ElementCount)> {
8492
+ return [=](ElementCount VF) -> bool {
8493
+ return CM.isOptimizableIVTruncate (K, VF);
8494
+ };
8495
+ };
8496
+
8497
+ LoopVectorizationPlanner::getDecisionAndClampRange (
8498
+ IsOptimizableIVTruncate (Instr), Range);
8499
+ }
8448
8500
8449
8501
// All widen recipes below deal only with VF > 1.
8450
8502
if (LoopVectorizationPlanner::getDecisionAndClampRange (
@@ -8707,7 +8759,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
8707
8759
!Plan->getVectorLoopRegion ()->getEntryBasicBlock ()->empty () &&
8708
8760
" entry block must be set to a VPRegionBlock having a non-empty entry "
8709
8761
" VPBasicBlock" );
8710
- RecipeBuilder.fixHeaderPhis ();
8762
+ RecipeBuilder.fixHeaderPhis (*Plan );
8711
8763
8712
8764
// ---------------------------------------------------------------------------
8713
8765
// Transform initial VPlan: Apply previously taken decisions, in order, to
0 commit comments