56
56
57
57
#include " llvm/Transforms/Vectorize/LoopVectorize.h"
58
58
#include " LoopVectorizationPlanner.h"
59
+ #include " VPlanHCFGBuilder.h"
59
60
#include " llvm/ADT/APInt.h"
60
61
#include " llvm/ADT/ArrayRef.h"
61
62
#include " llvm/ADT/DenseMap.h"
@@ -244,6 +245,17 @@ static cl::opt<bool> EnableVPlanNativePath(
244
245
cl::desc(" Enable VPlan-native vectorization path with "
245
246
" support for outer loop vectorization." ));
246
247
248
+ // This flag enables the stress testing of the VPlan H-CFG construction in the
249
+ // VPlan-native vectorization path. It must be used in conjuction with
250
+ // -enable-vplan-native-path. -vplan-verify-hcfg can also be used to enable the
251
+ // verification of the H-CFGs built.
252
+ static cl::opt<bool > VPlanBuildStressTest (
253
+ " vplan-build-stress-test" , cl::init(false ), cl::Hidden,
254
+ cl::desc(
255
+ " Build VPlan for every supported loop nest in the function and bail "
256
+ " out right after the build (stress test the VPlan H-CFG construction "
257
+ " in the VPlan-native vectorization path)." ));
258
+
247
259
// / A helper function for converting Scalar types to vector types.
248
260
// / If the incoming type is void, we return void. If the VF is 1, we return
249
261
// / the scalar type.
@@ -1653,8 +1665,11 @@ static void collectSupportedLoops(Loop &L, LoopInfo *LI,
1653
1665
OptimizationRemarkEmitter *ORE,
1654
1666
SmallVectorImpl<Loop *> &V) {
1655
1667
// Collect inner loops and outer loops without irreducible control flow. For
1656
- // now, only collect outer loops that have explicit vectorization hints.
1657
- if (L.empty () || (EnableVPlanNativePath && isExplicitVecOuterLoop (&L, ORE))) {
1668
+ // now, only collect outer loops that have explicit vectorization hints. If we
1669
+ // are stress testing the VPlan H-CFG construction, we collect the outermost
1670
+ // loop of every loop nest.
1671
+ if (L.empty () || VPlanBuildStressTest ||
1672
+ (EnableVPlanNativePath && isExplicitVecOuterLoop (&L, ORE))) {
1658
1673
LoopBlocksRPO RPOT (&L);
1659
1674
RPOT.perform (LI);
1660
1675
if (!containsIrreducibleCFG<const BasicBlock *>(RPOT, *LI)) {
@@ -6254,20 +6269,30 @@ void LoopVectorizationCostModel::collectValuesToIgnore() {
6254
6269
VectorizationFactor
6255
6270
LoopVectorizationPlanner::planInVPlanNativePath (bool OptForSize,
6256
6271
unsigned UserVF) {
6257
- // Width 1 means no vectorize , cost 0 means uncomputed cost.
6272
+ // Width 1 means no vectorization , cost 0 means uncomputed cost.
6258
6273
const VectorizationFactor NoVectorization = {1U , 0U };
6259
6274
6260
6275
// Outer loop handling: They may require CFG and instruction level
6261
6276
// transformations before even evaluating whether vectorization is profitable.
6262
6277
// Since we cannot modify the incoming IR, we need to build VPlan upfront in
6263
6278
// the vectorization pipeline.
6264
6279
if (!OrigLoop->empty ()) {
6280
+ // TODO: If UserVF is not provided, we set UserVF to 4 for stress testing.
6281
+ // This won't be necessary when UserVF is not required in the VPlan-native
6282
+ // path.
6283
+ if (VPlanBuildStressTest && !UserVF)
6284
+ UserVF = 4 ;
6285
+
6265
6286
assert (EnableVPlanNativePath && " VPlan-native path is not enabled." );
6266
6287
assert (UserVF && " Expected UserVF for outer loop vectorization." );
6267
6288
assert (isPowerOf2_32 (UserVF) && " VF needs to be a power of two" );
6268
6289
LLVM_DEBUG (dbgs () << " LV: Using user VF " << UserVF << " .\n " );
6269
6290
buildVPlans (UserVF, UserVF);
6270
6291
6292
+ // For VPlan build stress testing, we bail out after VPlan construction.
6293
+ if (VPlanBuildStressTest)
6294
+ return NoVectorization;
6295
+
6271
6296
return {UserVF, 0 };
6272
6297
}
6273
6298
@@ -6280,7 +6305,7 @@ LoopVectorizationPlanner::planInVPlanNativePath(bool OptForSize,
6280
6305
VectorizationFactor
6281
6306
LoopVectorizationPlanner::plan (bool OptForSize, unsigned UserVF) {
6282
6307
assert (OrigLoop->empty () && " Inner loop expected." );
6283
- // Width 1 means no vectorize , cost 0 means uncomputed cost.
6308
+ // Width 1 means no vectorization , cost 0 means uncomputed cost.
6284
6309
const VectorizationFactor NoVectorization = {1U , 0U };
6285
6310
Optional<unsigned > MaybeMaxVF = CM.computeMaxVF (OptForSize);
6286
6311
if (!MaybeMaxVF.hasValue ()) // Cases considered too costly to vectorize.
@@ -6806,9 +6831,11 @@ VPBasicBlock *LoopVectorizationPlanner::handleReplication(
6806
6831
" VPBB has successors when handling predicated replication." );
6807
6832
// Record predicated instructions for above packing optimizations.
6808
6833
PredInst2Recipe[I] = Recipe;
6809
- VPBlockBase *Region =
6810
- VPBB->setOneSuccessor (createReplicateRegion (I, Recipe, Plan));
6811
- return cast<VPBasicBlock>(Region->setOneSuccessor (new VPBasicBlock ()));
6834
+ VPBlockBase *Region = createReplicateRegion (I, Recipe, Plan);
6835
+ VPBlockUtils::insertBlockAfter (Region, VPBB);
6836
+ auto *RegSucc = new VPBasicBlock ();
6837
+ VPBlockUtils::insertBlockAfter (RegSucc, Region);
6838
+ return RegSucc;
6812
6839
}
6813
6840
6814
6841
VPRegionBlock *
@@ -6834,8 +6861,8 @@ LoopVectorizationPlanner::createReplicateRegion(Instruction *Instr,
6834
6861
6835
6862
// Note: first set Entry as region entry and then connect successors starting
6836
6863
// from it in order, to propagate the "parent" of each VPBasicBlock.
6837
- Entry-> setTwoSuccessors (Pred, Exit);
6838
- Pred-> setOneSuccessor ( Exit);
6864
+ VPBlockUtils::insertTwoBlocksAfter (Pred, Exit, Entry );
6865
+ VPBlockUtils::connectBlocks (Pred, Exit);
6839
6866
6840
6867
return Region;
6841
6868
}
@@ -6852,6 +6879,11 @@ LoopVectorizationPlanner::buildVPlan(VFRange &Range,
6852
6879
6853
6880
// Create new empty VPlan
6854
6881
auto Plan = llvm::make_unique<VPlan>();
6882
+
6883
+ // Build hierarchical CFG
6884
+ VPlanHCFGBuilder HCFGBuilder (OrigLoop, LI);
6885
+ HCFGBuilder.buildHierarchicalCFG (*Plan.get ());
6886
+
6855
6887
return Plan;
6856
6888
}
6857
6889
@@ -6893,7 +6925,7 @@ LoopVectorizationPlanner::buildVPlan(VFRange &Range,
6893
6925
// ingredients and fill a new VPBasicBlock.
6894
6926
unsigned VPBBsForBB = 0 ;
6895
6927
auto *FirstVPBBForBB = new VPBasicBlock (BB->getName ());
6896
- VPBB-> setOneSuccessor (FirstVPBBForBB);
6928
+ VPBlockUtils::insertBlockAfter (FirstVPBBForBB, VPBB );
6897
6929
VPBB = FirstVPBBForBB;
6898
6930
Builder.setInsertPoint (VPBB);
6899
6931
@@ -6997,7 +7029,7 @@ LoopVectorizationPlanner::buildVPlan(VFRange &Range,
6997
7029
VPBasicBlock *PreEntry = cast<VPBasicBlock>(Plan->getEntry ());
6998
7030
assert (PreEntry->empty () && " Expecting empty pre-entry block." );
6999
7031
VPBlockBase *Entry = Plan->setEntry (PreEntry->getSingleSuccessor ());
7000
- PreEntry-> disconnectSuccessor ( Entry);
7032
+ VPBlockUtils::disconnectBlocks (PreEntry, Entry);
7001
7033
delete PreEntry;
7002
7034
7003
7035
std::string PlanName;
0 commit comments