@@ -3792,8 +3792,6 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
3792
3792
State.setDebugLocFrom (I->getDebugLoc ());
3793
3793
3794
3794
VPValue *LoopExitInstDef = PhiR->getBackedgeValue ();
3795
- // This is the vector-clone of the value that leaves the loop.
3796
- Type *VecTy = State.get (LoopExitInstDef, 0 )->getType ();
3797
3795
3798
3796
// Before each round, move the insertion point right between
3799
3797
// the PHIs and the values we are going to write.
@@ -3805,10 +3803,6 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
3805
3803
State.setDebugLocFrom (LoopExitInst->getDebugLoc ());
3806
3804
3807
3805
Type *PhiTy = OrigPhi->getType ();
3808
-
3809
- VPBasicBlock *LatchVPBB =
3810
- PhiR->getParent ()->getEnclosingLoopRegion ()->getExitingBasicBlock ();
3811
- BasicBlock *VectorLoopLatch = State.CFG .VPBB2IRBB [LatchVPBB];
3812
3806
// If tail is folded by masking, the vector value to leave the loop should be
3813
3807
// a Select choosing between the vectorized LoopExitInst and vectorized Phi,
3814
3808
// instead of the former. For an inloop reduction the reduction will already
@@ -3834,23 +3828,12 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
3834
3828
// then extend the loop exit value to enable InstCombine to evaluate the
3835
3829
// entire expression in the smaller type.
3836
3830
if (VF.isVector () && PhiTy != RdxDesc.getRecurrenceType ()) {
3837
- assert (!PhiR->isInLoop () && " Unexpected truncated inloop reduction!" );
3838
- Type *RdxVecTy = VectorType::get (RdxDesc.getRecurrenceType (), VF);
3839
- Builder.SetInsertPoint (VectorLoopLatch->getTerminator ());
3840
- for (unsigned Part = 0 ; Part < UF; ++Part) {
3841
- Value *Trunc = Builder.CreateTrunc (RdxParts[Part], RdxVecTy);
3842
- Value *Extnd = RdxDesc.isSigned () ? Builder.CreateSExt (Trunc, VecTy)
3843
- : Builder.CreateZExt (Trunc, VecTy);
3844
- for (User *U : llvm::make_early_inc_range (RdxParts[Part]->users ()))
3845
- if (U != Trunc) {
3846
- U->replaceUsesOfWith (RdxParts[Part], Extnd);
3847
- RdxParts[Part] = Extnd;
3848
- }
3849
- }
3850
3831
Builder.SetInsertPoint (LoopMiddleBlock,
3851
3832
LoopMiddleBlock->getFirstInsertionPt ());
3852
- for (unsigned Part = 0 ; Part < UF; ++Part)
3833
+ Type *RdxVecTy = VectorType::get (RdxDesc.getRecurrenceType (), VF);
3834
+ for (unsigned Part = 0 ; Part < UF; ++Part) {
3853
3835
RdxParts[Part] = Builder.CreateTrunc (RdxParts[Part], RdxVecTy);
3836
+ }
3854
3837
}
3855
3838
3856
3839
// Reduce all of the unrolled parts into a single vector.
@@ -9155,35 +9138,55 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
9155
9138
PreviousLink = RedRecipe;
9156
9139
}
9157
9140
}
9158
-
9159
- // If tail is folded by masking, introduce selects between the phi
9160
- // and the live-out instruction of each reduction, at the beginning of the
9161
- // dedicated latch block.
9162
- if (CM.foldTailByMasking ()) {
9163
9141
Builder.setInsertPoint (&*LatchVPBB->begin ());
9164
9142
for (VPRecipeBase &R :
9165
9143
Plan->getVectorLoopRegion ()->getEntryBasicBlock ()->phis ()) {
9166
- VPReductionPHIRecipe *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
9167
- if (!PhiR || PhiR->isInLoop ())
9168
- continue ;
9169
- const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor ();
9144
+ VPReductionPHIRecipe *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
9145
+ if (!PhiR || PhiR->isInLoop ())
9146
+ continue ;
9147
+
9148
+ const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor ();
9149
+ auto *Result = PhiR->getBackedgeValue ()->getDefiningRecipe ();
9150
+ // If tail is folded by masking, introduce selects between the phi
9151
+ // and the live-out instruction of each reduction, at the beginning of the
9152
+ // dedicated latch block.
9153
+ if (CM.foldTailByMasking ()) {
9170
9154
VPValue *Cond =
9171
9155
RecipeBuilder.createBlockInMask (OrigLoop->getHeader (), *Plan);
9172
9156
VPValue *Red = PhiR->getBackedgeValue ();
9173
9157
assert (Red->getDefiningRecipe ()->getParent () != LatchVPBB &&
9174
9158
" reduction recipe must be defined before latch" );
9175
9159
FastMathFlags FMFs = RdxDesc.getFastMathFlags ();
9176
9160
Type *PhiTy = PhiR->getOperand (0 )->getLiveInIRValue ()->getType ();
9177
- auto *Select =
9161
+ Result =
9178
9162
PhiTy->isFloatingPointTy ()
9179
9163
? new VPInstruction (Instruction::Select, {Cond, Red, PhiR}, FMFs)
9180
9164
: new VPInstruction (Instruction::Select, {Cond, Red, PhiR});
9181
- Select ->insertBefore (&*Builder.getInsertPoint ());
9165
+ Result ->insertBefore (&*Builder.getInsertPoint ());
9182
9166
if (PreferPredicatedReductionSelect ||
9183
9167
TTI.preferPredicatedReductionSelect (
9184
9168
PhiR->getRecurrenceDescriptor ().getOpcode (), PhiTy,
9185
9169
TargetTransformInfo::ReductionFlags ()))
9186
- PhiR->setOperand (1 , Select);
9170
+ PhiR->setOperand (1 , Result->getVPSingleValue ());
9171
+ }
9172
+ // If the vector reduction can be performed in a smaller type, we truncate
9173
+ // then extend the loop exit value to enable InstCombine to evaluate the
9174
+ // entire expression in the smaller type.
9175
+ Type *PhiTy = PhiR->getStartValue ()->getLiveInIRValue ()->getType ();
9176
+ if (PhiTy != RdxDesc.getRecurrenceType ()) {
9177
+ assert (!PhiR->isInLoop () && " Unexpected truncated inloop reduction!" );
9178
+ Type *RdxTy = RdxDesc.getRecurrenceType ();
9179
+ auto *Trunc = new VPWidenCastRecipe (Instruction::Trunc,
9180
+ Result->getVPSingleValue (), RdxTy);
9181
+ auto *Extnd =
9182
+ RdxDesc.isSigned ()
9183
+ ? new VPWidenCastRecipe (Instruction::SExt, Trunc, PhiTy)
9184
+ : new VPWidenCastRecipe (Instruction::ZExt, Trunc, PhiTy);
9185
+
9186
+ Trunc->insertAfter (Result);
9187
+ Extnd->insertAfter (Trunc);
9188
+ Result->getVPSingleValue ()->replaceAllUsesWith (Extnd);
9189
+ Trunc->setOperand (0 , Result->getVPSingleValue ());
9187
9190
}
9188
9191
}
9189
9192
0 commit comments