Skip to content

Commit e4ea099

Browse files
committed
Revert "[VPlan] Insert Trunc/Exts for reductions directly in VPlan."
This reverts commit fd31112. There are two different crash reports on fd31112
1 parent 59d2dc2 commit e4ea099

File tree

4 files changed

+44
-47
lines changed

4 files changed

+44
-47
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 32 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -3792,6 +3792,8 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
37923792
State.setDebugLocFrom(I->getDebugLoc());
37933793

37943794
VPValue *LoopExitInstDef = PhiR->getBackedgeValue();
3795+
// This is the vector-clone of the value that leaves the loop.
3796+
Type *VecTy = State.get(LoopExitInstDef, 0)->getType();
37953797

37963798
// Before each round, move the insertion point right between
37973799
// the PHIs and the values we are going to write.
@@ -3803,6 +3805,10 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
38033805
State.setDebugLocFrom(LoopExitInst->getDebugLoc());
38043806

38053807
Type *PhiTy = OrigPhi->getType();
3808+
3809+
VPBasicBlock *LatchVPBB =
3810+
PhiR->getParent()->getEnclosingLoopRegion()->getExitingBasicBlock();
3811+
BasicBlock *VectorLoopLatch = State.CFG.VPBB2IRBB[LatchVPBB];
38063812
// If tail is folded by masking, the vector value to leave the loop should be
38073813
// a Select choosing between the vectorized LoopExitInst and vectorized Phi,
38083814
// instead of the former. For an inloop reduction the reduction will already
@@ -3828,12 +3834,23 @@ void InnerLoopVectorizer::fixReduction(VPReductionPHIRecipe *PhiR,
38283834
// then extend the loop exit value to enable InstCombine to evaluate the
38293835
// entire expression in the smaller type.
38303836
if (VF.isVector() && PhiTy != RdxDesc.getRecurrenceType()) {
3831-
Builder.SetInsertPoint(LoopMiddleBlock,
3832-
LoopMiddleBlock->getFirstInsertionPt());
3837+
assert(!PhiR->isInLoop() && "Unexpected truncated inloop reduction!");
38333838
Type *RdxVecTy = VectorType::get(RdxDesc.getRecurrenceType(), VF);
3839+
Builder.SetInsertPoint(VectorLoopLatch->getTerminator());
38343840
for (unsigned Part = 0; Part < UF; ++Part) {
3835-
RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
3841+
Value *Trunc = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
3842+
Value *Extnd = RdxDesc.isSigned() ? Builder.CreateSExt(Trunc, VecTy)
3843+
: Builder.CreateZExt(Trunc, VecTy);
3844+
for (User *U : llvm::make_early_inc_range(RdxParts[Part]->users()))
3845+
if (U != Trunc) {
3846+
U->replaceUsesOfWith(RdxParts[Part], Extnd);
3847+
RdxParts[Part] = Extnd;
3848+
}
38363849
}
3850+
Builder.SetInsertPoint(LoopMiddleBlock,
3851+
LoopMiddleBlock->getFirstInsertionPt());
3852+
for (unsigned Part = 0; Part < UF; ++Part)
3853+
RdxParts[Part] = Builder.CreateTrunc(RdxParts[Part], RdxVecTy);
38373854
}
38383855

38393856
// Reduce all of the unrolled parts into a single vector.
@@ -9138,55 +9155,35 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
91389155
PreviousLink = RedRecipe;
91399156
}
91409157
}
9158+
9159+
// If tail is folded by masking, introduce selects between the phi
9160+
// and the live-out instruction of each reduction, at the beginning of the
9161+
// dedicated latch block.
9162+
if (CM.foldTailByMasking()) {
91419163
Builder.setInsertPoint(&*LatchVPBB->begin());
91429164
for (VPRecipeBase &R :
91439165
Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
9144-
VPReductionPHIRecipe *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
9145-
if (!PhiR || PhiR->isInLoop())
9146-
continue;
9147-
9148-
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
9149-
auto *Result = PhiR->getBackedgeValue()->getDefiningRecipe();
9150-
// If tail is folded by masking, introduce selects between the phi
9151-
// and the live-out instruction of each reduction, at the beginning of the
9152-
// dedicated latch block.
9153-
if (CM.foldTailByMasking()) {
9166+
VPReductionPHIRecipe *PhiR = dyn_cast<VPReductionPHIRecipe>(&R);
9167+
if (!PhiR || PhiR->isInLoop())
9168+
continue;
9169+
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
91549170
VPValue *Cond =
91559171
RecipeBuilder.createBlockInMask(OrigLoop->getHeader(), *Plan);
91569172
VPValue *Red = PhiR->getBackedgeValue();
91579173
assert(Red->getDefiningRecipe()->getParent() != LatchVPBB &&
91589174
"reduction recipe must be defined before latch");
91599175
FastMathFlags FMFs = RdxDesc.getFastMathFlags();
91609176
Type *PhiTy = PhiR->getOperand(0)->getLiveInIRValue()->getType();
9161-
Result =
9177+
auto *Select =
91629178
PhiTy->isFloatingPointTy()
91639179
? new VPInstruction(Instruction::Select, {Cond, Red, PhiR}, FMFs)
91649180
: new VPInstruction(Instruction::Select, {Cond, Red, PhiR});
9165-
Result->insertBefore(&*Builder.getInsertPoint());
9181+
Select->insertBefore(&*Builder.getInsertPoint());
91669182
if (PreferPredicatedReductionSelect ||
91679183
TTI.preferPredicatedReductionSelect(
91689184
PhiR->getRecurrenceDescriptor().getOpcode(), PhiTy,
91699185
TargetTransformInfo::ReductionFlags()))
9170-
PhiR->setOperand(1, Result->getVPSingleValue());
9171-
}
9172-
// If the vector reduction can be performed in a smaller type, we truncate
9173-
// then extend the loop exit value to enable InstCombine to evaluate the
9174-
// entire expression in the smaller type.
9175-
Type *PhiTy = PhiR->getStartValue()->getLiveInIRValue()->getType();
9176-
if (PhiTy != RdxDesc.getRecurrenceType()) {
9177-
assert(!PhiR->isInLoop() && "Unexpected truncated inloop reduction!");
9178-
Type *RdxTy = RdxDesc.getRecurrenceType();
9179-
auto *Trunc = new VPWidenCastRecipe(Instruction::Trunc,
9180-
Result->getVPSingleValue(), RdxTy);
9181-
auto *Extnd =
9182-
RdxDesc.isSigned()
9183-
? new VPWidenCastRecipe(Instruction::SExt, Trunc, PhiTy)
9184-
: new VPWidenCastRecipe(Instruction::ZExt, Trunc, PhiTy);
9185-
9186-
Trunc->insertAfter(Result);
9187-
Extnd->insertAfter(Trunc);
9188-
Result->getVPSingleValue()->replaceAllUsesWith(Extnd);
9189-
Trunc->setOperand(0, Result->getVPSingleValue());
9186+
PhiR->setOperand(1, Select);
91909187
}
91919188
}
91929189

llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -207,10 +207,10 @@ define i16 @reduction_or_trunc(ptr noalias nocapture %ptr) {
207207
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP3]], align 2
208208
; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
209209
; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i32> [[TMP1]], [[TMP4]]
210-
; CHECK-NEXT: [[TMP7:%.*]] = trunc <4 x i32> [[TMP5]] to <4 x i16>
211-
; CHECK-NEXT: [[TMP8]] = zext <4 x i16> [[TMP7]] to <4 x i32>
212210
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
213211
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
212+
; CHECK-NEXT: [[TMP7:%.*]] = trunc <4 x i32> [[TMP5]] to <4 x i16>
213+
; CHECK-NEXT: [[TMP8]] = zext <4 x i16> [[TMP7]] to <4 x i32>
214214
; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
215215
; CHECK: middle.block:
216216
; CHECK-NEXT: [[TMP9:%.*]] = trunc <4 x i32> [[TMP8]] to <4 x i16>
@@ -234,10 +234,10 @@ define i16 @reduction_or_trunc(ptr noalias nocapture %ptr) {
234234
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <4 x i16>, ptr [[TMP16]], align 2
235235
; CHECK-NEXT: [[TMP17:%.*]] = zext <4 x i16> [[WIDE_LOAD4]] to <4 x i32>
236236
; CHECK-NEXT: [[TMP18:%.*]] = or <4 x i32> [[TMP14]], [[TMP17]]
237-
; CHECK-NEXT: [[TMP20:%.*]] = trunc <4 x i32> [[TMP18]] to <4 x i16>
238-
; CHECK-NEXT: [[TMP21]] = zext <4 x i16> [[TMP20]] to <4 x i32>
239237
; CHECK-NEXT: [[INDEX_NEXT5]] = add nuw i32 [[INDEX2]], 4
240238
; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i32 [[INDEX_NEXT5]], 256
239+
; CHECK-NEXT: [[TMP20:%.*]] = trunc <4 x i32> [[TMP18]] to <4 x i16>
240+
; CHECK-NEXT: [[TMP21]] = zext <4 x i16> [[TMP20]] to <4 x i32>
241241
; CHECK-NEXT: br i1 [[TMP19]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
242242
; CHECK: vec.epilog.middle.block:
243243
; CHECK-NEXT: [[TMP22:%.*]] = trunc <4 x i32> [[TMP21]] to <4 x i16>

llvm/test/Transforms/LoopVectorize/reduction-small-size.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,10 @@ define i8 @PR34687(i1 %c, i32 %x, i32 %n) {
2222
; CHECK-NEXT: [[TMP0:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
2323
; CHECK-NEXT: [[TMP1:%.*]] = and <4 x i32> [[VEC_PHI]], <i32 255, i32 255, i32 255, i32 255>
2424
; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[TMP1]], [[BROADCAST_SPLAT2]]
25-
; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i8>
26-
; CHECK-NEXT: [[TMP5]] = zext <4 x i8> [[TMP4]] to <4 x i32>
2725
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
2826
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
27+
; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i8>
28+
; CHECK-NEXT: [[TMP5]] = zext <4 x i8> [[TMP4]] to <4 x i32>
2929
; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
3030
; CHECK: middle.block:
3131
; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i32> [[TMP5]] to <4 x i8>
@@ -99,10 +99,10 @@ define i32 @PR35734(i32 %x, i32 %y) {
9999
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP2]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
100100
; CHECK-NEXT: [[TMP3:%.*]] = and <4 x i32> [[VEC_PHI]], <i32 1, i32 1, i32 1, i32 1>
101101
; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i32> [[TMP3]], <i32 -1, i32 -1, i32 -1, i32 -1>
102-
; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i32> [[TMP4]] to <4 x i1>
103-
; CHECK-NEXT: [[TMP7]] = sext <4 x i1> [[TMP6]] to <4 x i32>
104102
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
105103
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
104+
; CHECK-NEXT: [[TMP6:%.*]] = trunc <4 x i32> [[TMP4]] to <4 x i1>
105+
; CHECK-NEXT: [[TMP7]] = sext <4 x i1> [[TMP6]] to <4 x i32>
106106
; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
107107
; CHECK: middle.block:
108108
; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i1>

llvm/test/Transforms/LoopVectorize/scalable-reduction-inloop.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,14 @@ define i8 @reduction_add_trunc(ptr noalias nocapture %A) {
1717
; CHECK-NEXT: [[TMP27:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD2]] to <vscale x 8 x i32>
1818
; CHECK-NEXT: [[TMP28:%.*]] = add <vscale x 8 x i32> [[TMP14]], [[TMP26]]
1919
; CHECK-NEXT: [[TMP29:%.*]] = add <vscale x 8 x i32> [[TMP15]], [[TMP27]]
20-
; CHECK-NEXT: [[TMP33:%.*]] = trunc <vscale x 8 x i32> [[TMP28]] to <vscale x 8 x i8>
21-
; CHECK-NEXT: [[TMP35:%.*]] = trunc <vscale x 8 x i32> [[TMP29]] to <vscale x 8 x i8>
22-
; CHECK-NEXT: [[TMP34]] = zext <vscale x 8 x i8> [[TMP33]] to <vscale x 8 x i32>
23-
; CHECK-NEXT: [[TMP36]] = zext <vscale x 8 x i8> [[TMP35]] to <vscale x 8 x i32>
2420
; CHECK-NEXT: [[TMP30:%.*]] = call i32 @llvm.vscale.i32()
2521
; CHECK-NEXT: [[TMP31:%.*]] = mul i32 [[TMP30]], 16
2622
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP31]]
2723
; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i32 [[INDEX_NEXT]], {{%.*}}
24+
; CHECK-NEXT: [[TMP33:%.*]] = trunc <vscale x 8 x i32> [[TMP28]] to <vscale x 8 x i8>
25+
; CHECK-NEXT: [[TMP34]] = zext <vscale x 8 x i8> [[TMP33]] to <vscale x 8 x i32>
26+
; CHECK-NEXT: [[TMP35:%.*]] = trunc <vscale x 8 x i32> [[TMP29]] to <vscale x 8 x i8>
27+
; CHECK-NEXT: [[TMP36]] = zext <vscale x 8 x i8> [[TMP35]] to <vscale x 8 x i32>
2828
; CHECK: middle.block:
2929
; CHECK-NEXT: [[TMP37:%.*]] = trunc <vscale x 8 x i32> [[TMP34]] to <vscale x 8 x i8>
3030
; CHECK-NEXT: [[TMP38:%.*]] = trunc <vscale x 8 x i32> [[TMP36]] to <vscale x 8 x i8>

0 commit comments

Comments
 (0)