@@ -223,47 +223,47 @@ VPTransformState::VPTransformState(ElementCount VF, unsigned UF, LoopInfo *LI,
223
223
LVer(nullptr ),
224
224
TypeAnalysis(Plan->getCanonicalIV ()->getScalarType(), Ctx) {}
225
225
226
- Value *VPTransformState::get (VPValue *Def, const VPIteration &Instance ) {
226
+ Value *VPTransformState::get (VPValue *Def, const VPLane &Lane ) {
227
227
if (Def->isLiveIn ())
228
228
return Def->getLiveInIRValue ();
229
229
230
- if (hasScalarValue (Def, Instance)) {
231
- return Data
232
- .PerPartScalars [Def][Instance.Part ][Instance.Lane .mapToCacheIndex (VF)];
230
+ if (hasScalarValue (Def, Lane)) {
231
+ return Data.Scalars [Def][Lane.mapToCacheIndex (VF)];
233
232
}
234
233
235
- assert (hasVectorValue (Def, Instance. Part ));
236
- auto *VecPart = Data.PerPartOutput [Def][Instance. Part ];
234
+ assert (hasVectorValue (Def));
235
+ auto *VecPart = Data.Output [Def];
237
236
if (!VecPart->getType ()->isVectorTy ()) {
238
- assert (Instance. Lane .isFirstLane () && " cannot get lane > 0 for scalar" );
237
+ assert (Lane.isFirstLane () && " cannot get lane > 0 for scalar" );
239
238
return VecPart;
240
239
}
241
240
// TODO: Cache created scalar values.
242
- Value *Lane = Instance. Lane .getAsRuntimeExpr (Builder, VF);
243
- auto *Extract = Builder.CreateExtractElement (VecPart, Lane );
244
- // set(Def, Extract, Instance );
241
+ Value *LaneV = Lane.getAsRuntimeExpr (Builder, VF);
242
+ auto *Extract = Builder.CreateExtractElement (VecPart, LaneV );
243
+ // set(Def, Extract, Lane );
245
244
return Extract;
246
245
}
247
246
248
- Value *VPTransformState::get (VPValue *Def, unsigned Part, bool NeedsScalar) {
247
+ Value *VPTransformState::get (VPValue *Def, bool NeedsScalar) {
249
248
if (NeedsScalar) {
250
- assert ((VF. isScalar () || Def-> isLiveIn () || hasVectorValue (Def, Part) ||
251
- ( hasScalarValue ( Def, VPIteration (Part, 0 )) &&
252
- Data.PerPartScalars [Def][Part ].size () == 1 )) &&
253
- " Trying to access a single scalar per part but has multiple scalars "
254
- " per part." );
255
- return get (Def, VPIteration (Part, 0 ));
249
+ assert (
250
+ (VF. isScalar () || Def-> isLiveIn () || hasVectorValue ( Def) ||
251
+ ( hasScalarValue (Def, VPLane ( 0 )) && Data.Scalars [Def].size () == 1 )) &&
252
+ " Trying to access a single scalar per part but has multiple scalars "
253
+ " per part." );
254
+ return get (Def, VPLane ( 0 ));
256
255
}
257
256
258
257
// If Values have been set for this Def return the one relevant for \p Part.
259
- if (hasVectorValue (Def, Part ))
260
- return Data.PerPartOutput [Def][Part ];
258
+ if (hasVectorValue (Def))
259
+ return Data.Output [Def];
261
260
262
261
auto GetBroadcastInstrs = [this , Def](Value *V) {
263
262
bool SafeToHoist = Def->isDefinedOutsideVectorRegions ();
264
263
if (VF.isScalar ())
265
264
return V;
266
- // Place the code for broadcasting invariant variables in the new preheader.
265
+ // Place the code for broadcasting invariant variables in the new
266
+ // preheader.
267
267
IRBuilder<>::InsertPointGuard Guard (Builder);
268
268
if (SafeToHoist) {
269
269
BasicBlock *LoopVectorPreHeader = CFG.VPBB2IRBB [cast<VPBasicBlock>(
@@ -272,56 +272,55 @@ Value *VPTransformState::get(VPValue *Def, unsigned Part, bool NeedsScalar) {
272
272
Builder.SetInsertPoint (LoopVectorPreHeader->getTerminator ());
273
273
}
274
274
275
- // Place the code for broadcasting invariant variables in the new preheader.
276
- // Broadcast the scalar into all locations in the vector.
275
+ // Place the code for broadcasting invariant variables in the new
276
+ // preheader. Broadcast the scalar into all locations in the vector.
277
277
Value *Shuf = Builder.CreateVectorSplat (VF, V, " broadcast" );
278
278
279
279
return Shuf;
280
280
};
281
281
282
- if (!hasScalarValue (Def, {Part, 0 } )) {
282
+ if (!hasScalarValue (Def, VPLane ( 0 ) )) {
283
283
assert (Def->isLiveIn () && " expected a live-in" );
284
- if (Part != 0 )
285
- return get (Def, 0 );
286
284
Value *IRV = Def->getLiveInIRValue ();
287
285
Value *B = GetBroadcastInstrs (IRV);
288
- set (Def, B, Part );
286
+ set (Def, B);
289
287
return B;
290
288
}
291
289
292
- Value *ScalarValue = get (Def, {Part, 0 } );
290
+ Value *ScalarValue = get (Def, VPLane ( 0 ) );
293
291
// If we aren't vectorizing, we can just copy the scalar map values over
294
292
// to the vector map.
295
293
if (VF.isScalar ()) {
296
- set (Def, ScalarValue, Part );
294
+ set (Def, ScalarValue);
297
295
return ScalarValue;
298
296
}
299
297
300
298
bool IsUniform = vputils::isUniformAfterVectorization (Def);
301
299
302
- unsigned LastLane = IsUniform ? 0 : VF.getKnownMinValue () - 1 ;
300
+ VPLane LastLane = VPLane ( IsUniform ? 0 : VF.getKnownMinValue () - 1 ) ;
303
301
// Check if there is a scalar value for the selected lane.
304
- if (!hasScalarValue (Def, {Part, LastLane} )) {
305
- // At the moment, VPWidenIntOrFpInductionRecipes, VPScalarIVStepsRecipes and
306
- // VPExpandSCEVRecipes can also be uniform.
302
+ if (!hasScalarValue (Def, LastLane)) {
303
+ // At the moment, VPWidenIntOrFpInductionRecipes, VPScalarIVStepsRecipes
304
+ // and VPExpandSCEVRecipes can also be uniform.
307
305
assert ((isa<VPWidenIntOrFpInductionRecipe>(Def->getDefiningRecipe ()) ||
308
306
isa<VPScalarIVStepsRecipe>(Def->getDefiningRecipe ()) ||
309
307
isa<VPExpandSCEVRecipe>(Def->getDefiningRecipe ())) &&
310
308
" unexpected recipe found to be invariant" );
311
309
IsUniform = true ;
312
- LastLane = 0 ;
310
+ LastLane = VPLane ( 0 ) ;
313
311
}
314
312
315
- auto *LastInst = cast<Instruction>(get (Def, {Part, LastLane}));
316
- // Set the insert point after the last scalarized instruction or after the
317
- // last PHI, if LastInst is a PHI. This ensures the insertelement sequence
318
- // will directly follow the scalar definitions.
319
313
auto OldIP = Builder.saveIP ();
320
- auto NewIP =
321
- isa<PHINode>(LastInst)
322
- ? BasicBlock::iterator (LastInst->getParent ()->getFirstNonPHI ())
323
- : std::next (BasicBlock::iterator (LastInst));
324
- Builder.SetInsertPoint (&*NewIP);
314
+ if (auto *LastInst = dyn_cast<Instruction>(get (Def, LastLane))) {
315
+ // Set the insert point after the last scalarized instruction or after the
316
+ // last PHI, if LastInst is a PHI. This ensures the insertelement sequence
317
+ // will directly follow the scalar definitions.
318
+ auto NewIP =
319
+ isa<PHINode>(LastInst)
320
+ ? BasicBlock::iterator (LastInst->getParent ()->getFirstNonPHI ())
321
+ : std::next (BasicBlock::iterator (LastInst));
322
+ Builder.SetInsertPoint (&*NewIP);
323
+ }
325
324
326
325
// However, if we are vectorizing, we need to construct the vector values.
327
326
// If the value is known to be uniform after vectorization, we can just
@@ -332,15 +331,16 @@ Value *VPTransformState::get(VPValue *Def, unsigned Part, bool NeedsScalar) {
332
331
Value *VectorValue = nullptr ;
333
332
if (IsUniform) {
334
333
VectorValue = GetBroadcastInstrs (ScalarValue);
335
- set (Def, VectorValue, Part );
334
+ set (Def, VectorValue);
336
335
} else {
337
336
// Initialize packing with insertelements to start from undef.
338
337
assert (!VF.isScalable () && " VF is assumed to be non scalable." );
339
- Value *Undef = PoisonValue::get (VectorType::get (LastInst->getType (), VF));
340
- set (Def, Undef, Part);
338
+ Value *Undef =
339
+ PoisonValue::get (VectorType::get (ScalarValue->getType (), VF));
340
+ set (Def, Undef);
341
341
for (unsigned Lane = 0 ; Lane < VF.getKnownMinValue (); ++Lane)
342
- packScalarIntoVectorValue (Def, {Part, Lane} );
343
- VectorValue = get (Def, Part );
342
+ packScalarIntoVectorValue (Def, Lane);
343
+ VectorValue = get (Def);
344
344
}
345
345
Builder.restoreIP (OldIP);
346
346
return VectorValue;
@@ -392,12 +392,12 @@ void VPTransformState::setDebugLocFrom(DebugLoc DL) {
392
392
}
393
393
394
394
void VPTransformState::packScalarIntoVectorValue (VPValue *Def,
395
- const VPIteration &Instance ) {
396
- Value *ScalarInst = get (Def, Instance );
397
- Value *VectorValue = get (Def, Instance. Part );
398
- VectorValue = Builder.CreateInsertElement (
399
- VectorValue, ScalarInst, Instance. Lane .getAsRuntimeExpr (Builder, VF));
400
- set (Def, VectorValue, Instance. Part );
395
+ const VPLane &Lane ) {
396
+ Value *ScalarInst = get (Def, Lane );
397
+ Value *VectorValue = get (Def);
398
+ VectorValue = Builder.CreateInsertElement (VectorValue, ScalarInst,
399
+ Lane.getAsRuntimeExpr (Builder, VF));
400
+ set (Def, VectorValue);
401
401
}
402
402
403
403
BasicBlock *
@@ -453,7 +453,7 @@ void VPIRBasicBlock::execute(VPTransformState *State) {
453
453
}
454
454
455
455
void VPBasicBlock::execute (VPTransformState *State) {
456
- bool Replica = State->Instance && !State->Instance -> isFirstIteration ();
456
+ bool Replica = State->Lane && !State->Lane -> isFirstLane ();
457
457
VPBasicBlock *PrevVPBB = State->CFG .PrevVPBB ;
458
458
VPBlockBase *SingleHPred = nullptr ;
459
459
BasicBlock *NewBB = State->CFG .PrevBB ; // Reuse it if possible.
@@ -724,27 +724,24 @@ void VPRegionBlock::execute(VPTransformState *State) {
724
724
return ;
725
725
}
726
726
727
- assert (!State->Instance && " Replicating a Region with non-null instance." );
727
+ assert (!State->Lane && " Replicating a Region with non-null instance." );
728
728
729
729
// Enter replicating mode.
730
- State->Instance = VPIteration (0 , 0 );
731
-
732
- for (unsigned Part = 0 , UF = State->UF ; Part < UF; ++Part) {
733
- State->Instance ->Part = Part;
734
- assert (!State->VF .isScalable () && " VF is assumed to be non scalable." );
735
- for (unsigned Lane = 0 , VF = State->VF .getKnownMinValue (); Lane < VF;
736
- ++Lane) {
737
- State->Instance ->Lane = VPLane (Lane, VPLane::Kind::First);
738
- // Visit the VPBlocks connected to \p this, starting from it.
739
- for (VPBlockBase *Block : RPOT) {
740
- LLVM_DEBUG (dbgs () << " LV: VPBlock in RPO " << Block->getName () << ' \n ' );
741
- Block->execute (State);
742
- }
730
+ State->Lane = VPLane (0 );
731
+
732
+ assert (!State->VF .isScalable () && " VF is assumed to be non scalable." );
733
+ for (unsigned Lane = 0 , VF = State->VF .getKnownMinValue (); Lane < VF;
734
+ ++Lane) {
735
+ State->Lane = VPLane (Lane, VPLane::Kind::First);
736
+ // Visit the VPBlocks connected to \p this, starting from it.
737
+ for (VPBlockBase *Block : RPOT) {
738
+ LLVM_DEBUG (dbgs () << " LV: VPBlock in RPO " << Block->getName () << ' \n ' );
739
+ Block->execute (State);
743
740
}
744
741
}
745
742
746
743
// Exit replicating mode.
747
- State->Instance .reset ();
744
+ State->Lane .reset ();
748
745
}
749
746
750
747
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -816,10 +813,15 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
816
813
// FIXME: Model VF * UF computation completely in VPlan.
817
814
VFxUF.setUnderlyingValue (
818
815
createStepForVF (Builder, TripCountV->getType (), State.VF , State.UF ));
816
+ if (VF.getNumUsers () > 0 ) {
817
+ VF.setUnderlyingValue (
818
+ createStepForVF (Builder, TripCountV->getType (), State.VF , 1 ));
819
+ }
819
820
820
821
// When vectorizing the epilogue loop, the canonical induction start value
821
822
// needs to be changed from zero to the value after the main vector loop.
822
- // FIXME: Improve modeling for canonical IV start values in the epilogue loop.
823
+ // FIXME: Improve modeling for canonical IV start values in the epilogue
824
+ // loop.
823
825
if (CanonicalIVStartValue) {
824
826
VPValue *VPV = getOrAddLiveIn (CanonicalIVStartValue);
825
827
auto *IV = getCanonicalIV ();
@@ -871,12 +873,12 @@ void VPlan::execute(VPTransformState *State) {
871
873
isa<VPWidenIntOrFpInductionRecipe>(&R)) {
872
874
PHINode *Phi = nullptr ;
873
875
if (isa<VPWidenIntOrFpInductionRecipe>(&R)) {
874
- Phi = cast<PHINode>(State->get (R.getVPSingleValue (), 0 ));
876
+ Phi = cast<PHINode>(State->get (R.getVPSingleValue ()));
875
877
} else {
876
878
auto *WidenPhi = cast<VPWidenPointerInductionRecipe>(&R);
877
879
assert (!WidenPhi->onlyScalarsGenerated (State->VF .isScalable ()) &&
878
880
" recipe generating only scalars should have been replaced" );
879
- auto *GEP = cast<GetElementPtrInst>(State->get (WidenPhi, 0 ));
881
+ auto *GEP = cast<GetElementPtrInst>(State->get (WidenPhi));
880
882
Phi = cast<PHINode>(GEP->getPointerOperand ());
881
883
}
882
884
@@ -885,6 +887,9 @@ void VPlan::execute(VPTransformState *State) {
885
887
// Move the last step to the end of the latch block. This ensures
886
888
// consistent placement of all induction updates.
887
889
Instruction *Inc = cast<Instruction>(Phi->getIncomingValue (1 ));
890
+ if (isa<VPWidenIntOrFpInductionRecipe>(&R) && R.getNumOperands () == 4 )
891
+ Inc->setOperand (0 , State->get (R.getOperand (3 )));
892
+
888
893
Inc->moveBefore (VectorLatchBB->getTerminator ()->getPrevNode ());
889
894
continue ;
890
895
}
@@ -894,24 +899,13 @@ void VPlan::execute(VPTransformState *State) {
894
899
// only a single part is generated, which provides the last part from the
895
900
// previous iteration. For non-ordered reductions all UF parts are
896
901
// generated.
897
- bool SinglePartNeeded =
898
- isa<VPCanonicalIVPHIRecipe>(PhiR) ||
899
- isa<VPFirstOrderRecurrencePHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
900
- (isa<VPReductionPHIRecipe>(PhiR) &&
901
- cast<VPReductionPHIRecipe>(PhiR)->isOrdered ());
902
902
bool NeedsScalar =
903
903
isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe>(PhiR) ||
904
904
(isa<VPReductionPHIRecipe>(PhiR) &&
905
905
cast<VPReductionPHIRecipe>(PhiR)->isInLoop ());
906
- unsigned LastPartForNewPhi = SinglePartNeeded ? 1 : State->UF ;
907
-
908
- for (unsigned Part = 0 ; Part < LastPartForNewPhi; ++Part) {
909
- Value *Phi = State->get (PhiR, Part, NeedsScalar);
910
- Value *Val =
911
- State->get (PhiR->getBackedgeValue (),
912
- SinglePartNeeded ? State->UF - 1 : Part, NeedsScalar);
913
- cast<PHINode>(Phi)->addIncoming (Val, VectorLatchBB);
914
- }
906
+ Value *Phi = State->get (PhiR, NeedsScalar);
907
+ Value *Val = State->get (PhiR->getBackedgeValue (), NeedsScalar);
908
+ cast<PHINode>(Phi)->addIncoming (Val, VectorLatchBB);
915
909
}
916
910
917
911
State->CFG .DTU .flush ();
@@ -1249,6 +1243,10 @@ void VPlanIngredient::print(raw_ostream &O) const {
1249
1243
1250
1244
template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT);
1251
1245
1246
+ bool VPValue::isDefinedOutsideVectorRegions () const {
1247
+ return !hasDefiningRecipe () || !getDefiningRecipe ()->getParent ()->getParent ();
1248
+ }
1249
+
1252
1250
void VPValue::replaceAllUsesWith (VPValue *New) {
1253
1251
replaceUsesWithIf (New, [](VPUser &, unsigned ) { return true ; });
1254
1252
}
0 commit comments