Skip to content

Commit a42f974

Browse files
[LV] Support binary and unary operations with EVL-vectorization
The patch adds `VPWidenEVLRecipe` which represents `VPWidenRecipe` + EVL argument. The new recipe replaces `VPWidenRecipe` in `tryAddExplicitVectorLength` for each binary and unary operations. Follow up patches will extend support for remaining cases, like `FCmp` and `ICmp`
1 parent ce5b371 commit a42f974

File tree

8 files changed

+2001
-50
lines changed

8 files changed

+2001
-50
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -855,6 +855,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
855855
case VPRecipeBase::VPWidenCastSC:
856856
case VPRecipeBase::VPWidenGEPSC:
857857
case VPRecipeBase::VPWidenSC:
858+
case VPRecipeBase::VPWidenEVLSC:
858859
case VPRecipeBase::VPWidenSelectSC:
859860
case VPRecipeBase::VPBlendSC:
860861
case VPRecipeBase::VPPredInstPHISC:
@@ -1039,6 +1040,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
10391040
static inline bool classof(const VPRecipeBase *R) {
10401041
return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
10411042
R->getVPDefID() == VPRecipeBase::VPWidenSC ||
1043+
R->getVPDefID() == VPRecipeBase::VPWidenEVLSC ||
10421044
R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
10431045
R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
10441046
R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
@@ -1333,13 +1335,18 @@ class VPInstruction : public VPRecipeWithIRFlags {
13331335
/// ingredient. This recipe covers most of the traditional vectorization cases
13341336
/// where each ingredient transforms into a vectorized version of itself.
13351337
class VPWidenRecipe : public VPRecipeWithIRFlags {
1338+
protected:
13361339
unsigned Opcode;
13371340

1341+
template <typename IterT>
1342+
VPWidenRecipe(unsigned VPDefOpcode, Instruction &I,
1343+
iterator_range<IterT> Operands)
1344+
: VPRecipeWithIRFlags(VPDefOpcode, Operands, I), Opcode(I.getOpcode()) {}
1345+
13381346
public:
13391347
template <typename IterT>
13401348
VPWidenRecipe(Instruction &I, iterator_range<IterT> Operands)
1341-
: VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I),
1342-
Opcode(I.getOpcode()) {}
1349+
: VPWidenRecipe(VPDef::VPWidenSC, I, Operands) {}
13431350

13441351
~VPWidenRecipe() override = default;
13451352

@@ -1363,6 +1370,49 @@ class VPWidenRecipe : public VPRecipeWithIRFlags {
13631370
#endif
13641371
};
13651372

1373+
class VPWidenEVLRecipe : public VPWidenRecipe {
1374+
private:
1375+
using VPRecipeWithIRFlags::transferFlags;
1376+
1377+
public:
1378+
template <typename IterT>
1379+
VPWidenEVLRecipe(Instruction &I, iterator_range<IterT> Operands, VPValue &EVL)
1380+
: VPWidenRecipe(VPDef::VPWidenEVLSC, I, Operands) {
1381+
addOperand(&EVL);
1382+
}
1383+
1384+
~VPWidenEVLRecipe() override = default;
1385+
1386+
VPWidenRecipe *clone() override final {
1387+
SmallVector<VPValue *> Ops(operands());
1388+
VPValue *EVL = Ops.pop_back_val();
1389+
auto *R = new VPWidenEVLRecipe(*getUnderlyingInstr(),
1390+
make_range(Ops.begin(), Ops.end()), *EVL);
1391+
R->transferFlags(*this);
1392+
return R;
1393+
}
1394+
1395+
VP_CLASSOF_IMPL(VPDef::VPWidenEVLSC);
1396+
1397+
VPValue *getEVL() { return getOperand(getNumOperands() - 1); }
1398+
const VPValue *getEVL() const { return getOperand(getNumOperands() - 1); }
1399+
1400+
/// A helper function to create widen EVL recipe from regular widen recipe.
1401+
static VPWidenEVLRecipe *create(VPWidenRecipe *W, VPValue &EVL);
1402+
1403+
/// Produce widened copies of all Ingredients.
1404+
void execute(VPTransformState &State) override final;
1405+
1406+
/// Returns true if the recipe only uses the first lane of operand \p Op.
1407+
bool onlyFirstLaneUsed(const VPValue *Op) const override;
1408+
1409+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1410+
/// Print the recipe.
1411+
void print(raw_ostream &O, const Twine &Indent,
1412+
VPSlotTracker &SlotTracker) const override final;
1413+
#endif
1414+
};
1415+
13661416
/// VPWidenCastRecipe is a recipe to create vector cast instructions.
13671417
class VPWidenCastRecipe : public VPRecipeWithIRFlags {
13681418
/// Cast instruction opcode.

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "llvm/IR/Instructions.h"
2424
#include "llvm/IR/Type.h"
2525
#include "llvm/IR/Value.h"
26+
#include "llvm/IR/VectorBuilder.h"
2627
#include "llvm/Support/Casting.h"
2728
#include "llvm/Support/CommandLine.h"
2829
#include "llvm/Support/Debug.h"
@@ -71,6 +72,7 @@ bool VPRecipeBase::mayWriteToMemory() const {
7172
case VPWidenLoadSC:
7273
case VPWidenPHISC:
7374
case VPWidenSC:
75+
case VPWidenEVLSC:
7476
case VPWidenSelectSC: {
7577
const Instruction *I =
7678
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
@@ -110,6 +112,7 @@ bool VPRecipeBase::mayReadFromMemory() const {
110112
case VPWidenIntOrFpInductionSC:
111113
case VPWidenPHISC:
112114
case VPWidenSC:
115+
case VPWidenEVLSC:
113116
case VPWidenSelectSC: {
114117
const Instruction *I =
115118
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
@@ -157,6 +160,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
157160
case VPWidenPHISC:
158161
case VPWidenPointerInductionSC:
159162
case VPWidenSC:
163+
case VPWidenEVLSC:
160164
case VPWidenSelectSC: {
161165
const Instruction *I =
162166
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
@@ -993,6 +997,64 @@ void VPWidenRecipe::execute(VPTransformState &State) {
993997
#endif
994998
}
995999

1000+
VPWidenEVLRecipe *VPWidenEVLRecipe::create(VPWidenRecipe *W, VPValue &EVL) {
1001+
auto *R = new VPWidenEVLRecipe(*W->getUnderlyingInstr(), W->operands(), EVL);
1002+
R->transferFlags(*W);
1003+
return R;
1004+
}
1005+
1006+
void VPWidenEVLRecipe::execute(VPTransformState &State) {
1007+
assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
1008+
"explicit vector length.");
1009+
VPValue *Op0 = getOperand(0);
1010+
1011+
// If it's scalar operation, hand translation over to VPWidenRecipe
1012+
if (!State.get(Op0, 0)->getType()->isVectorTy())
1013+
return VPWidenRecipe::execute(State);
1014+
1015+
VPValue *EVL = getEVL();
1016+
Value *EVLArg = State.get(EVL, 0, /*NeedsScalar=*/true);
1017+
unsigned Opcode = getOpcode();
1018+
Instruction *I = getUnderlyingInstr();
1019+
IRBuilderBase &BuilderIR = State.Builder;
1020+
VectorBuilder Builder(BuilderIR);
1021+
Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());
1022+
Value *VPInst = nullptr;
1023+
1024+
//===------------------- Binary and Unary Ops ---------------------===//
1025+
if (Instruction::isBinaryOp(Opcode) || Instruction::isUnaryOp(Opcode)) {
1026+
// Just widen unops and binops.
1027+
1028+
SmallVector<Value *, 4> Ops;
1029+
for (unsigned I = 0, E = getNumOperands() - 1; I < E; ++I) {
1030+
VPValue *VPOp = getOperand(I);
1031+
Ops.push_back(State.get(VPOp, 0));
1032+
}
1033+
1034+
Builder.setMask(Mask).setEVL(EVLArg);
1035+
VPInst = Builder.createVectorInstruction(Opcode, Ops[0]->getType(), Ops,
1036+
"vp.op");
1037+
1038+
if (I)
1039+
if (auto *VecOp = dyn_cast<Instruction>(VPInst))
1040+
VecOp->copyIRFlags(I);
1041+
} else {
1042+
llvm_unreachable("Unsupported opcode in VPWidenEVLRecipe::execute");
1043+
}
1044+
State.set(this, VPInst, 0);
1045+
State.addMetadata(VPInst, I);
1046+
}
1047+
1048+
bool VPWidenEVLRecipe::onlyFirstLaneUsed(const VPValue *Op) const {
1049+
assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
1050+
// EVL in that recipe is always the last operand, thus any use before means
1051+
// the VPValue should be vectorized.
1052+
for (unsigned I = 0, E = getNumOperands() - 1; I != E; ++I)
1053+
if (getOperand(I) == Op)
1054+
return false;
1055+
return true;
1056+
}
1057+
9961058
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
9971059
void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
9981060
VPSlotTracker &SlotTracker) const {
@@ -1002,6 +1064,15 @@ void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
10021064
printFlags(O);
10031065
printOperands(O, SlotTracker);
10041066
}
1067+
1068+
void VPWidenEVLRecipe::print(raw_ostream &O, const Twine &Indent,
1069+
VPSlotTracker &SlotTracker) const {
1070+
O << Indent << "WIDEN vp ";
1071+
printAsOperand(O, SlotTracker);
1072+
O << " = " << Instruction::getOpcodeName(Opcode);
1073+
printFlags(O);
1074+
printOperands(O, SlotTracker);
1075+
}
10051076
#endif
10061077

10071078
void VPWidenCastRecipe::execute(VPTransformState &State) {

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 56 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include "llvm/ADT/PostOrderIterator.h"
2121
#include "llvm/ADT/STLExtras.h"
2222
#include "llvm/ADT/SetVector.h"
23+
#include "llvm/ADT/TypeSwitch.h"
2324
#include "llvm/Analysis/IVDescriptors.h"
2425
#include "llvm/Analysis/VectorUtils.h"
2526
#include "llvm/IR/Intrinsics.h"
@@ -1219,7 +1220,7 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
12191220
/// WideCanonicalIV, backedge-taken-count) pattern.
12201221
/// TODO: Introduce explicit recipe for header-mask instead of searching
12211222
/// for the header-mask pattern manually.
1222-
static SmallVector<VPValue *> collectAllHeaderMasks(VPlan &Plan) {
1223+
static DenseSet<VPValue *> collectAllHeaderMasks(VPlan &Plan) {
12231224
SmallVector<VPValue *> WideCanonicalIVs;
12241225
auto *FoundWidenCanonicalIVUser =
12251226
find_if(Plan.getCanonicalIV()->users(),
@@ -1245,7 +1246,7 @@ static SmallVector<VPValue *> collectAllHeaderMasks(VPlan &Plan) {
12451246

12461247
// Walk users of wide canonical IVs and collect to all compares of the form
12471248
// (ICMP_ULE, WideCanonicalIV, backedge-taken-count).
1248-
SmallVector<VPValue *> HeaderMasks;
1249+
DenseSet<VPValue *> HeaderMasks;
12491250
VPValue *BTC = Plan.getOrCreateBackedgeTakenCount();
12501251
for (auto *Wide : WideCanonicalIVs) {
12511252
for (VPUser *U : SmallVector<VPUser *>(Wide->users())) {
@@ -1257,7 +1258,7 @@ static SmallVector<VPValue *> collectAllHeaderMasks(VPlan &Plan) {
12571258

12581259
assert(HeaderMask->getOperand(0) == Wide &&
12591260
"WidenCanonicalIV must be the first operand of the compare");
1260-
HeaderMasks.push_back(HeaderMask);
1261+
HeaderMasks.insert(HeaderMask);
12611262
}
12621263
}
12631264
return HeaderMasks;
@@ -1296,6 +1297,56 @@ void VPlanTransforms::addActiveLaneMask(
12961297
HeaderMask->replaceAllUsesWith(LaneMask);
12971298
}
12981299

1300+
/// Replace recipes with their EVL variants.
1301+
static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
1302+
DenseSet<VPRecipeBase *> ToRemove;
1303+
1304+
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
1305+
Plan.getEntry());
1306+
DenseSet<VPValue *> HeaderMasks = collectAllHeaderMasks(Plan);
1307+
for (VPBasicBlock *VPBB :
1308+
reverse(VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT))) {
1309+
// The recipes in the block are processed in reverse order, to catch chains
1310+
// of dead recipes.
1311+
for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) {
1312+
TypeSwitch<VPRecipeBase *>(&R)
1313+
.Case<VPWidenLoadRecipe>([&](VPWidenLoadRecipe *L) {
1314+
VPValue *NewMask =
1315+
HeaderMasks.contains(L->getMask()) ? nullptr : L->getMask();
1316+
auto *N = new VPWidenLoadEVLRecipe(L, &EVL, NewMask);
1317+
N->insertBefore(L);
1318+
L->replaceAllUsesWith(N);
1319+
ToRemove.insert(L);
1320+
})
1321+
.Case<VPWidenStoreRecipe>([&](VPWidenStoreRecipe *S) {
1322+
VPValue *NewMask =
1323+
HeaderMasks.contains(S->getMask()) ? nullptr : S->getMask();
1324+
auto *N = new VPWidenStoreEVLRecipe(S, &EVL, NewMask);
1325+
N->insertBefore(S);
1326+
ToRemove.insert(S);
1327+
})
1328+
.Case<VPWidenRecipe>([&](VPWidenRecipe *W) {
1329+
unsigned Opcode = W->getOpcode();
1330+
if (!Instruction::isBinaryOp(Opcode) &&
1331+
!Instruction::isUnaryOp(Opcode))
1332+
return;
1333+
auto *N = VPWidenEVLRecipe::create(W, EVL);
1334+
N->insertBefore(W);
1335+
W->replaceAllUsesWith(N);
1336+
ToRemove.insert(W);
1337+
});
1338+
}
1339+
}
1340+
1341+
for (VPRecipeBase *R : ToRemove)
1342+
R->eraseFromParent();
1343+
1344+
for (VPValue *HeaderMask : HeaderMasks)
1345+
recursivelyDeleteDeadRecipes(HeaderMask);
1346+
}
1347+
1348+
1349+
12991350
/// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and
13001351
/// replaces all uses except the canonical IV increment of
13011352
/// VPCanonicalIVPHIRecipe with a VPEVLBasedIVPHIRecipe. VPCanonicalIVPHIRecipe
@@ -1356,29 +1407,8 @@ bool VPlanTransforms::tryAddExplicitVectorLength(VPlan &Plan) {
13561407
NextEVLIV->insertBefore(CanonicalIVIncrement);
13571408
EVLPhi->addOperand(NextEVLIV);
13581409

1359-
for (VPValue *HeaderMask : collectAllHeaderMasks(Plan)) {
1360-
for (VPUser *U : collectUsersRecursively(HeaderMask)) {
1361-
auto *MemR = dyn_cast<VPWidenMemoryRecipe>(U);
1362-
if (!MemR)
1363-
continue;
1364-
VPValue *OrigMask = MemR->getMask();
1365-
assert(OrigMask && "Unmasked widen memory recipe when folding tail");
1366-
VPValue *NewMask = HeaderMask == OrigMask ? nullptr : OrigMask;
1367-
if (auto *L = dyn_cast<VPWidenLoadRecipe>(MemR)) {
1368-
auto *N = new VPWidenLoadEVLRecipe(L, VPEVL, NewMask);
1369-
N->insertBefore(L);
1370-
L->replaceAllUsesWith(N);
1371-
L->eraseFromParent();
1372-
} else if (auto *S = dyn_cast<VPWidenStoreRecipe>(MemR)) {
1373-
auto *N = new VPWidenStoreEVLRecipe(S, VPEVL, NewMask);
1374-
N->insertBefore(S);
1375-
S->eraseFromParent();
1376-
} else {
1377-
llvm_unreachable("unsupported recipe");
1378-
}
1379-
}
1380-
recursivelyDeleteDeadRecipes(HeaderMask);
1381-
}
1410+
transformRecipestoEVLRecipes(Plan, *VPEVL);
1411+
13821412
// Replace all uses of VPCanonicalIVPHIRecipe by
13831413
// VPEVLBasedIVPHIRecipe except for the canonical IV increment.
13841414
CanonicalIVPHI->replaceAllUsesWith(EVLPhi);

llvm/lib/Transforms/Vectorize/VPlanValue.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,7 @@ class VPDef {
356356
VPWidenStoreEVLSC,
357357
VPWidenStoreSC,
358358
VPWidenSC,
359+
VPWidenEVLSC,
359360
VPWidenSelectSC,
360361
VPBlendSC,
361362
// START: Phi-like recipes. Need to be kept together.

0 commit comments

Comments
 (0)