@@ -3792,6 +3792,8 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
3792
3792
State.setDebugLocFrom(I->getDebugLoc());
3793
3793
3794
3794
VPValue *LoopExitInstDef = PhiR->getBackedgeValue();
3795
+ // This is the vector-clone of the value that leaves the loop.
3796
+ Type *VecTy = State.get(LoopExitInstDef, 0)->getType();
3795
3797
3796
3798
// Before each round, move the insertion point right between
3797
3799
// the PHIs and the values we are going to write.
@@ -3803,6 +3805,10 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
3803
3805
State.setDebugLocFrom(LoopExitInst->getDebugLoc());
3804
3806
3805
3807
Type *PhiTy = OrigPhi->getType();
3808
+
3809
+ VPBasicBlock *LatchVPBB =
3810
+ PhiR->getParent()->getEnclosingLoopRegion()->getExitingBasicBlock();
3811
+ BasicBlock *VectorLoopLatch = State.CFG.VPBB2IRBB[LatchVPBB];
3806
3812
// If tail is folded by masking, the vector value to leave the loop should be
3807
3813
// a Select choosing between the vectorized LoopExitInst and vectorized Phi,
3808
3814
// instead of the former. For an inloop reduction the reduction will already
@@ -3828,12 +3834,23 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
3828
3834
// then extend the loop exit value to enable InstCombine to evaluate the
3829
3835
// entire expression in the smaller type.
3830
3836
if (VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {
3831
- Builder.SetInsertPoint (LoopMiddleBlock,
3832
- LoopMiddleBlock->getFirstInsertionPt ());
3837
+ assert(!PhiR->isInLoop() && "Unexpected truncated inloop reduction!");
3833
3838
Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), VF);
3839
+ Builder.SetInsertPoint(VectorLoopLatch->getTerminator());
3834
3840
for (unsigned Part = 0; Part < UF; ++Part) {
3835
- RdxParts[Part] = Builder.CreateTrunc (RdxParts[Part], RdxVecTy);
3841
+ Value *Trunc = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
3842
+ Value *Extnd = RdxDesc.isSigned() ? Builder.CreateSExt(Trunc, VecTy)
3843
+ : Builder.CreateZExt(Trunc, VecTy);
3844
+ for (User *U : llvm::make_early_inc_range(RdxParts[Part]->users()))
3845
+ if (U != Trunc) {
3846
+ U->replaceUsesOfWith(RdxParts[Part], Extnd);
3847
+ RdxParts[Part] = Extnd;
3848
+ }
3836
3849
}
3850
+ Builder.SetInsertPoint(LoopMiddleBlock,
3851
+ LoopMiddleBlock->getFirstInsertionPt());
3852
+ for (unsigned Part = 0; Part < UF; ++Part)
3853
+ RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
3837
3854
}
3838
3855
3839
3856
// Reduce all of the unrolled parts into a single vector.
@@ -9138,55 +9155,35 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9138
9155
PreviousLink = RedRecipe;
9139
9156
}
9140
9157
}
9158
+
9159
+ // If tail is folded by masking, introduce selects between the phi
9160
+ // and the live-out instruction of each reduction, at the beginning of the
9161
+ // dedicated latch block.
9162
+ if (CM.foldTailByMasking()) {
9141
9163
Builder.setInsertPoint(&*LatchVPBB->begin());
9142
9164
for (VPRecipeBase &R :
9143
9165
Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
9144
- VPReductionPHIRecipe *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
9145
- if (!PhiR || PhiR->isInLoop ())
9146
- continue ;
9147
-
9148
- const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor ();
9149
- auto *Result = PhiR->getBackedgeValue ()->getDefiningRecipe ();
9150
- // If tail is folded by masking, introduce selects between the phi
9151
- // and the live-out instruction of each reduction, at the beginning of the
9152
- // dedicated latch block.
9153
- if (CM.foldTailByMasking ()) {
9166
+ VPReductionPHIRecipe *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
9167
+ if (!PhiR || PhiR->isInLoop())
9168
+ continue;
9169
+ const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
9154
9170
VPValue *Cond =
9155
9171
RecipeBuilder.createBlockInMask(OrigLoop->getHeader(), *Plan);
9156
9172
VPValue *Red = PhiR->getBackedgeValue();
9157
9173
assert(Red->getDefiningRecipe()->getParent() != LatchVPBB &&
9158
9174
"reduction recipe must be defined before latch");
9159
9175
FastMathFlags FMFs = RdxDesc.getFastMathFlags();
9160
9176
Type *PhiTy = PhiR->getOperand(0)->getLiveInIRValue()->getType();
9161
- Result =
9177
+ auto *Select =
9162
9178
PhiTy->isFloatingPointTy()
9163
9179
? new VPInstruction(Instruction::Select, {Cond, Red, PhiR}, FMFs)
9164
9180
: new VPInstruction(Instruction::Select, {Cond, Red, PhiR});
9165
- Result ->insertBefore (&*Builder.getInsertPoint ());
9181
+ Select ->insertBefore(&*Builder.getInsertPoint());
9166
9182
if (PreferPredicatedReductionSelect ||
9167
9183
TTI.preferPredicatedReductionSelect(
9168
9184
PhiR->getRecurrenceDescriptor().getOpcode(), PhiTy,
9169
9185
TargetTransformInfo::ReductionFlags()))
9170
- PhiR->setOperand (1 , Result->getVPSingleValue ());
9171
- }
9172
- // If the vector reduction can be performed in a smaller type, we truncate
9173
- // then extend the loop exit value to enable InstCombine to evaluate the
9174
- // entire expression in the smaller type.
9175
- Type *PhiTy = PhiR->getStartValue ()->getLiveInIRValue ()->getType ();
9176
- if (PhiTy != RdxDesc.getRecurrenceType ()) {
9177
- assert (!PhiR->isInLoop () && " Unexpected truncated inloop reduction!" );
9178
- Type *RdxTy = RdxDesc.getRecurrenceType ();
9179
- auto *Trunc = new VPWidenCastRecipe (Instruction::Trunc,
9180
- Result->getVPSingleValue (), RdxTy);
9181
- auto *Extnd =
9182
- RdxDesc.isSigned ()
9183
- ? new VPWidenCastRecipe (Instruction::SExt, Trunc, PhiTy)
9184
- : new VPWidenCastRecipe (Instruction::ZExt, Trunc, PhiTy);
9185
-
9186
- Trunc->insertAfter (Result);
9187
- Extnd->insertAfter (Trunc);
9188
- Result->getVPSingleValue ()->replaceAllUsesWith (Extnd);
9189
- Trunc->setOperand (0 , Result->getVPSingleValue ());
9186
+ PhiR->setOperand(1, Select);
9190
9187
}
9191
9188
}
9192
9189
0 commit comments