Skip to content

Commit 1f4de6b

Browse files
committed
[LV][VPlan] Build plain CFG with simple VPInstructions for outer loops.
Patch brson#3 from VPlan Outer Loop Vectorization Patch Series brson#1 (RFC: http://lists.llvm.org/pipermail/llvm-dev/2017-December/119523.html). Expected to be NFC for the current inner loop vectorization path. It introduces the basic algorithm to build the VPlan plain CFG (single-level CFG, no hierarchical CFG (H-CFG), yet) in the VPlan-native vectorization path using VPInstructions. It includes: - VPlanHCFGBuilder: Main class to build the VPlan H-CFG (plain CFG without nested regions, for now). - VPlanVerifier: Main class with utilities to check the consistency of a H-CFG. - VPlanBlockUtils: Main class with utilities to manipulate VPBlockBases in VPlan. Reviewers: rengolin, fhahn, mkuper, mssimpso, a.elovikov, hfinkel, aprantl. Differential Revision: https://reviews.llvm.org/D44338 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@332654 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 56649b7 commit 1f4de6b

File tree

10 files changed

+889
-42
lines changed

10 files changed

+889
-42
lines changed

lib/Transforms/Vectorize/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ add_llvm_library(LLVMVectorize
55
SLPVectorizer.cpp
66
Vectorize.cpp
77
VPlan.cpp
8+
VPlanHCFGBuilder.cpp
9+
VPlanVerifier.cpp
810

911
ADDITIONAL_HEADER_DIRS
1012
${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms

lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 96 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,23 +39,94 @@ class VPBuilder {
3939
VPBasicBlock::iterator InsertPt = VPBasicBlock::iterator();
4040

4141
VPInstruction *createInstruction(unsigned Opcode,
42-
std::initializer_list<VPValue *> Operands) {
42+
ArrayRef<VPValue *> Operands) {
4343
VPInstruction *Instr = new VPInstruction(Opcode, Operands);
44-
BB->insert(Instr, InsertPt);
44+
if (BB)
45+
BB->insert(Instr, InsertPt);
4546
return Instr;
4647
}
4748

49+
VPInstruction *createInstruction(unsigned Opcode,
50+
std::initializer_list<VPValue *> Operands) {
51+
return createInstruction(Opcode, ArrayRef<VPValue *>(Operands));
52+
}
53+
4854
public:
4955
VPBuilder() {}
5056

51-
/// This specifies that created VPInstructions should be appended to
52-
/// the end of the specified block.
57+
/// Clear the insertion point: created instructions will not be inserted into
58+
/// a block.
59+
void clearInsertionPoint() {
60+
BB = nullptr;
61+
InsertPt = VPBasicBlock::iterator();
62+
}
63+
64+
VPBasicBlock *getInsertBlock() const { return BB; }
65+
VPBasicBlock::iterator getInsertPoint() const { return InsertPt; }
66+
67+
/// InsertPoint - A saved insertion point.
68+
class VPInsertPoint {
69+
VPBasicBlock *Block = nullptr;
70+
VPBasicBlock::iterator Point;
71+
72+
public:
73+
/// Creates a new insertion point which doesn't point to anything.
74+
VPInsertPoint() = default;
75+
76+
/// Creates a new insertion point at the given location.
77+
VPInsertPoint(VPBasicBlock *InsertBlock, VPBasicBlock::iterator InsertPoint)
78+
: Block(InsertBlock), Point(InsertPoint) {}
79+
80+
/// Returns true if this insert point is set.
81+
bool isSet() const { return Block != nullptr; }
82+
83+
VPBasicBlock *getBlock() const { return Block; }
84+
VPBasicBlock::iterator getPoint() const { return Point; }
85+
};
86+
87+
/// Sets the current insert point to a previously-saved location.
88+
void restoreIP(VPInsertPoint IP) {
89+
if (IP.isSet())
90+
setInsertPoint(IP.getBlock(), IP.getPoint());
91+
else
92+
clearInsertionPoint();
93+
}
94+
95+
/// This specifies that created VPInstructions should be appended to the end
96+
/// of the specified block.
5397
void setInsertPoint(VPBasicBlock *TheBB) {
5498
assert(TheBB && "Attempting to set a null insert point");
5599
BB = TheBB;
56100
InsertPt = BB->end();
57101
}
58102

103+
/// This specifies that created instructions should be inserted at the
104+
/// specified point.
105+
void setInsertPoint(VPBasicBlock *TheBB, VPBasicBlock::iterator IP) {
106+
BB = TheBB;
107+
InsertPt = IP;
108+
}
109+
110+
/// Insert and return the specified instruction.
111+
VPInstruction *insert(VPInstruction *I) const {
112+
BB->insert(I, InsertPt);
113+
return I;
114+
}
115+
116+
/// Create an N-ary operation with \p Opcode, \p Operands and set \p Inst as
117+
/// its underlying Instruction.
118+
VPValue *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands,
119+
Instruction *Inst = nullptr) {
120+
VPInstruction *NewVPInst = createInstruction(Opcode, Operands);
121+
NewVPInst->setUnderlyingValue(Inst);
122+
return NewVPInst;
123+
}
124+
VPValue *createNaryOp(unsigned Opcode,
125+
std::initializer_list<VPValue *> Operands,
126+
Instruction *Inst = nullptr) {
127+
return createNaryOp(Opcode, ArrayRef<VPValue *>(Operands), Inst);
128+
}
129+
59130
VPValue *createNot(VPValue *Operand) {
60131
return createInstruction(VPInstruction::Not, {Operand});
61132
}
@@ -67,8 +138,28 @@ class VPBuilder {
67138
VPValue *createOr(VPValue *LHS, VPValue *RHS) {
68139
return createInstruction(Instruction::BinaryOps::Or, {LHS, RHS});
69140
}
70-
};
71141

142+
//===--------------------------------------------------------------------===//
143+
// RAII helpers.
144+
//===--------------------------------------------------------------------===//
145+
146+
/// RAII object that stores the current insertion point and restores it when
147+
/// the object is destroyed.
148+
class InsertPointGuard {
149+
VPBuilder &Builder;
150+
VPBasicBlock *Block;
151+
VPBasicBlock::iterator Point;
152+
153+
public:
154+
InsertPointGuard(VPBuilder &B)
155+
: Builder(B), Block(B.getInsertBlock()), Point(B.getInsertPoint()) {}
156+
157+
InsertPointGuard(const InsertPointGuard &) = delete;
158+
InsertPointGuard &operator=(const InsertPointGuard &) = delete;
159+
160+
~InsertPointGuard() { Builder.restoreIP(VPInsertPoint(Block, Point)); }
161+
};
162+
};
72163

73164
/// TODO: The following VectorizationFactor was pulled out of
74165
/// LoopVectorizationCostModel class. LV also deals with

lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 43 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@
5656

5757
#include "llvm/Transforms/Vectorize/LoopVectorize.h"
5858
#include "LoopVectorizationPlanner.h"
59+
#include "VPlanHCFGBuilder.h"
5960
#include "llvm/ADT/APInt.h"
6061
#include "llvm/ADT/ArrayRef.h"
6162
#include "llvm/ADT/DenseMap.h"
@@ -244,6 +245,17 @@ static cl::opt<bool> EnableVPlanNativePath(
244245
cl::desc("Enable VPlan-native vectorization path with "
245246
"support for outer loop vectorization."));
246247

248+
// This flag enables the stress testing of the VPlan H-CFG construction in the
249+
// VPlan-native vectorization path. It must be used in conjuction with
250+
// -enable-vplan-native-path. -vplan-verify-hcfg can also be used to enable the
251+
// verification of the H-CFGs built.
252+
static cl::opt<bool> VPlanBuildStressTest(
253+
"vplan-build-stress-test", cl::init(false), cl::Hidden,
254+
cl::desc(
255+
"Build VPlan for every supported loop nest in the function and bail "
256+
"out right after the build (stress test the VPlan H-CFG construction "
257+
"in the VPlan-native vectorization path)."));
258+
247259
/// A helper function for converting Scalar types to vector types.
248260
/// If the incoming type is void, we return void. If the VF is 1, we return
249261
/// the scalar type.
@@ -1653,8 +1665,11 @@ static void collectSupportedLoops(Loop &L, LoopInfo *LI,
16531665
OptimizationRemarkEmitter *ORE,
16541666
SmallVectorImpl<Loop *> &V) {
16551667
// Collect inner loops and outer loops without irreducible control flow. For
1656-
// now, only collect outer loops that have explicit vectorization hints.
1657-
if (L.empty() || (EnableVPlanNativePath && isExplicitVecOuterLoop(&L, ORE))) {
1668+
// now, only collect outer loops that have explicit vectorization hints. If we
1669+
// are stress testing the VPlan H-CFG construction, we collect the outermost
1670+
// loop of every loop nest.
1671+
if (L.empty() || VPlanBuildStressTest ||
1672+
(EnableVPlanNativePath && isExplicitVecOuterLoop(&L, ORE))) {
16581673
LoopBlocksRPO RPOT(&L);
16591674
RPOT.perform(LI);
16601675
if (!containsIrreducibleCFG<const BasicBlock *>(RPOT, *LI)) {
@@ -6254,20 +6269,30 @@ void LoopVectorizationCostModel::collectValuesToIgnore() {
62546269
VectorizationFactor
62556270
LoopVectorizationPlanner::planInVPlanNativePath(bool OptForSize,
62566271
unsigned UserVF) {
6257-
// Width 1 means no vectorize, cost 0 means uncomputed cost.
6272+
// Width 1 means no vectorization, cost 0 means uncomputed cost.
62586273
const VectorizationFactor NoVectorization = {1U, 0U};
62596274

62606275
// Outer loop handling: They may require CFG and instruction level
62616276
// transformations before even evaluating whether vectorization is profitable.
62626277
// Since we cannot modify the incoming IR, we need to build VPlan upfront in
62636278
// the vectorization pipeline.
62646279
if (!OrigLoop->empty()) {
6280+
// TODO: If UserVF is not provided, we set UserVF to 4 for stress testing.
6281+
// This won't be necessary when UserVF is not required in the VPlan-native
6282+
// path.
6283+
if (VPlanBuildStressTest && !UserVF)
6284+
UserVF = 4;
6285+
62656286
assert(EnableVPlanNativePath && "VPlan-native path is not enabled.");
62666287
assert(UserVF && "Expected UserVF for outer loop vectorization.");
62676288
assert(isPowerOf2_32(UserVF) && "VF needs to be a power of two");
62686289
LLVM_DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n");
62696290
buildVPlans(UserVF, UserVF);
62706291

6292+
// For VPlan build stress testing, we bail out after VPlan construction.
6293+
if (VPlanBuildStressTest)
6294+
return NoVectorization;
6295+
62716296
return {UserVF, 0};
62726297
}
62736298

@@ -6280,7 +6305,7 @@ LoopVectorizationPlanner::planInVPlanNativePath(bool OptForSize,
62806305
VectorizationFactor
62816306
LoopVectorizationPlanner::plan(bool OptForSize, unsigned UserVF) {
62826307
assert(OrigLoop->empty() && "Inner loop expected.");
6283-
// Width 1 means no vectorize, cost 0 means uncomputed cost.
6308+
// Width 1 means no vectorization, cost 0 means uncomputed cost.
62846309
const VectorizationFactor NoVectorization = {1U, 0U};
62856310
Optional<unsigned> MaybeMaxVF = CM.computeMaxVF(OptForSize);
62866311
if (!MaybeMaxVF.hasValue()) // Cases considered too costly to vectorize.
@@ -6806,9 +6831,11 @@ VPBasicBlock *LoopVectorizationPlanner::handleReplication(
68066831
"VPBB has successors when handling predicated replication.");
68076832
// Record predicated instructions for above packing optimizations.
68086833
PredInst2Recipe[I] = Recipe;
6809-
VPBlockBase *Region =
6810-
VPBB->setOneSuccessor(createReplicateRegion(I, Recipe, Plan));
6811-
return cast<VPBasicBlock>(Region->setOneSuccessor(new VPBasicBlock()));
6834+
VPBlockBase *Region = createReplicateRegion(I, Recipe, Plan);
6835+
VPBlockUtils::insertBlockAfter(Region, VPBB);
6836+
auto *RegSucc = new VPBasicBlock();
6837+
VPBlockUtils::insertBlockAfter(RegSucc, Region);
6838+
return RegSucc;
68126839
}
68136840

68146841
VPRegionBlock *
@@ -6834,8 +6861,8 @@ LoopVectorizationPlanner::createReplicateRegion(Instruction *Instr,
68346861

68356862
// Note: first set Entry as region entry and then connect successors starting
68366863
// from it in order, to propagate the "parent" of each VPBasicBlock.
6837-
Entry->setTwoSuccessors(Pred, Exit);
6838-
Pred->setOneSuccessor(Exit);
6864+
VPBlockUtils::insertTwoBlocksAfter(Pred, Exit, Entry);
6865+
VPBlockUtils::connectBlocks(Pred, Exit);
68396866

68406867
return Region;
68416868
}
@@ -6852,6 +6879,11 @@ LoopVectorizationPlanner::buildVPlan(VFRange &Range,
68526879

68536880
// Create new empty VPlan
68546881
auto Plan = llvm::make_unique<VPlan>();
6882+
6883+
// Build hierarchical CFG
6884+
VPlanHCFGBuilder HCFGBuilder(OrigLoop, LI);
6885+
HCFGBuilder.buildHierarchicalCFG(*Plan.get());
6886+
68556887
return Plan;
68566888
}
68576889

@@ -6893,7 +6925,7 @@ LoopVectorizationPlanner::buildVPlan(VFRange &Range,
68936925
// ingredients and fill a new VPBasicBlock.
68946926
unsigned VPBBsForBB = 0;
68956927
auto *FirstVPBBForBB = new VPBasicBlock(BB->getName());
6896-
VPBB->setOneSuccessor(FirstVPBBForBB);
6928+
VPBlockUtils::insertBlockAfter(FirstVPBBForBB, VPBB);
68976929
VPBB = FirstVPBBForBB;
68986930
Builder.setInsertPoint(VPBB);
68996931

@@ -6997,7 +7029,7 @@ LoopVectorizationPlanner::buildVPlan(VFRange &Range,
69977029
VPBasicBlock *PreEntry = cast<VPBasicBlock>(Plan->getEntry());
69987030
assert(PreEntry->empty() && "Expecting empty pre-entry block.");
69997031
VPBlockBase *Entry = Plan->setEntry(PreEntry->getSingleSuccessor());
7000-
PreEntry->disconnectSuccessor(Entry);
7032+
VPBlockUtils::disconnectBlocks(PreEntry, Entry);
70017033
delete PreEntry;
70027034

70037035
std::string PlanName;

0 commit comments

Comments
 (0)