Skip to content

Commit 94fbe7e

Browse files
authored
[SLP] NFC. Replace TreeEntry::setOperandsInOrder with VLOperands. (#113880)
To reduce repeated code, TreeEntry::setOperandsInOrder will be replaced by VLOperands. Arg_size will be provided to make sure other operands will not be reorderd when VL[0] is IntrinsicInst (because APO is a boolean value). In addition, BoUpSLP::reorderInputsAccordingToOpcode will also be removed since it is simple.
1 parent 36c2940 commit 94fbe7e

File tree

1 file changed

+65
-142
lines changed

1 file changed

+65
-142
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 65 additions & 142 deletions
Original file line numberDiff line numberDiff line change
@@ -2017,6 +2017,9 @@ class BoUpSLP {
20172017

20182018
/// A vector of operand vectors.
20192019
SmallVector<OperandDataVec, 4> OpsVec;
2020+
/// When VL[0] is IntrinsicInst, ArgSize is CallBase::arg_size. When VL[0]
2021+
/// is not IntrinsicInst, ArgSize is User::getNumOperands.
2022+
unsigned ArgSize = 0;
20202023

20212024
const TargetLibraryInfo &TLI;
20222025
const DataLayout &DL;
@@ -2404,10 +2407,12 @@ class BoUpSLP {
24042407
assert(!VL.empty() && "Bad VL");
24052408
assert((empty() || VL.size() == getNumLanes()) &&
24062409
"Expected same number of lanes");
2410+
// IntrinsicInst::isCommutative returns true if swapping the first "two"
2411+
// arguments to the intrinsic produces the same result.
24072412
constexpr unsigned IntrinsicNumOperands = 2;
24082413
auto *VL0 = cast<Instruction>(*find_if(VL, IsaPred<Instruction>));
2409-
unsigned NumOperands = isa<IntrinsicInst>(VL0) ? IntrinsicNumOperands
2410-
: VL0->getNumOperands();
2414+
unsigned NumOperands = VL0->getNumOperands();
2415+
ArgSize = isa<IntrinsicInst>(VL0) ? IntrinsicNumOperands : NumOperands;
24112416
OpsVec.resize(NumOperands);
24122417
unsigned NumLanes = VL.size();
24132418
for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
@@ -2440,7 +2445,7 @@ class BoUpSLP {
24402445
}
24412446

24422447
/// \returns the number of operands.
2443-
unsigned getNumOperands() const { return OpsVec.size(); }
2448+
unsigned getNumOperands() const { return ArgSize; }
24442449

24452450
/// \returns the number of lanes.
24462451
unsigned getNumLanes() const { return OpsVec[0].size(); }
@@ -2617,7 +2622,8 @@ class BoUpSLP {
26172622
ArrayRef<OperandData> Op0 = OpsVec.front();
26182623
for (const OperandData &Data : Op0)
26192624
UniqueValues.insert(Data.V);
2620-
for (ArrayRef<OperandData> Op : drop_begin(OpsVec, 1)) {
2625+
for (ArrayRef<OperandData> Op :
2626+
ArrayRef(OpsVec).slice(1, getNumOperands() - 1)) {
26212627
if (any_of(Op, [&UniqueValues](const OperandData &Data) {
26222628
return !UniqueValues.contains(Data.V);
26232629
}))
@@ -3138,13 +3144,6 @@ class BoUpSLP {
31383144
SmallVector<SmallVector<std::pair<LoadInst *, int>>>,
31393145
8> &GatheredLoads);
31403146

3141-
/// Reorder commutative or alt operands to get better probability of
3142-
/// generating vectorized code.
3143-
static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
3144-
SmallVectorImpl<Value *> &Left,
3145-
SmallVectorImpl<Value *> &Right,
3146-
const BoUpSLP &R);
3147-
31483147
/// Helper for `findExternalStoreUsersReorderIndices()`. It iterates over the
31493148
/// users of \p TE and collects the stores. It returns the map from the store
31503149
/// pointers to the collected stores.
@@ -3339,27 +3338,15 @@ class BoUpSLP {
33393338
copy(OpVL, Operands[OpIdx].begin());
33403339
}
33413340

3342-
/// Set the operands of this bundle in their original order.
3343-
void setOperandsInOrder() {
3344-
assert(Operands.empty() && "Already initialized?");
3345-
auto *I0 = cast<Instruction>(*find_if(Scalars, IsaPred<Instruction>));
3346-
Operands.resize(I0->getNumOperands());
3347-
unsigned NumLanes = Scalars.size();
3348-
for (unsigned OpIdx = 0, NumOperands = I0->getNumOperands();
3349-
OpIdx != NumOperands; ++OpIdx) {
3350-
Operands[OpIdx].resize(NumLanes);
3351-
for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3352-
if (isa<PoisonValue>(Scalars[Lane])) {
3353-
Operands[OpIdx][Lane] =
3354-
PoisonValue::get(I0->getOperand(OpIdx)->getType());
3355-
continue;
3356-
}
3357-
auto *I = cast<Instruction>(Scalars[Lane]);
3358-
assert(I->getNumOperands() == NumOperands &&
3359-
"Expected same number of operands");
3360-
Operands[OpIdx][Lane] = I->getOperand(OpIdx);
3361-
}
3362-
}
3341+
/// Set this bundle's operand from \p VL.
3342+
void setOperand(ArrayRef<Value *> VL, const BoUpSLP &R,
3343+
bool RequireReorder = false) {
3344+
VLOperands Ops(VL, R);
3345+
if (RequireReorder)
3346+
Ops.reorder();
3347+
for (unsigned I :
3348+
seq<unsigned>(cast<Instruction>(VL[0])->getNumOperands()))
3349+
setOperand(I, Ops.getVL(I));
33633350
}
33643351

33653352
/// Reorders operands of the node to the given mask \p Mask.
@@ -8459,7 +8446,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
84598446
{}, CurrentOrder);
84608447
LLVM_DEBUG(dbgs() << "SLP: added inserts bundle.\n");
84618448

8462-
TE->setOperandsInOrder();
8449+
TE->setOperand(VL, *this);
84638450
buildTree_rec(TE->getOperand(1), Depth + 1, {TE, 1});
84648451
return;
84658452
}
@@ -8480,27 +8467,26 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
84808467
LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n");
84818468
else
84828469
LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n");
8483-
TE->setOperandsInOrder();
84848470
break;
84858471
case TreeEntry::StridedVectorize:
84868472
// Vectorizing non-consecutive loads with `llvm.masked.gather`.
84878473
TE = newTreeEntry(VL, TreeEntry::StridedVectorize, Bundle, S,
84888474
UserTreeIdx, ReuseShuffleIndices, CurrentOrder);
8489-
TE->setOperandsInOrder();
84908475
LLVM_DEBUG(dbgs() << "SLP: added a vector of strided loads.\n");
84918476
break;
84928477
case TreeEntry::ScatterVectorize:
84938478
// Vectorizing non-consecutive loads with `llvm.masked.gather`.
84948479
TE = newTreeEntry(VL, TreeEntry::ScatterVectorize, Bundle, S,
84958480
UserTreeIdx, ReuseShuffleIndices);
8496-
TE->setOperandsInOrder();
8497-
buildTree_rec(PointerOps, Depth + 1, {TE, 0});
84988481
LLVM_DEBUG(dbgs() << "SLP: added a vector of non-consecutive loads.\n");
84998482
break;
85008483
case TreeEntry::CombinedVectorize:
85018484
case TreeEntry::NeedToGather:
85028485
llvm_unreachable("Unexpected loads state.");
85038486
}
8487+
TE->setOperand(VL, *this);
8488+
if (State == TreeEntry::ScatterVectorize)
8489+
buildTree_rec(PointerOps, Depth + 1, {TE, 0});
85048490
return;
85058491
}
85068492
case Instruction::ZExt:
@@ -8538,8 +8524,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
85388524
ReuseShuffleIndices);
85398525
LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
85408526

8541-
TE->setOperandsInOrder();
8542-
for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
8527+
TE->setOperand(VL, *this);
8528+
for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
85438529
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
85448530
if (ShuffleOrOp == Instruction::Trunc) {
85458531
ExtraBitWidthNodes.insert(getOperandEntry(TE, 0)->Idx);
@@ -8566,12 +8552,15 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
85668552
LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n");
85678553

85688554
ValueList Left, Right;
8555+
VLOperands Ops(VL, *this);
85698556
if (cast<CmpInst>(VL0)->isCommutative()) {
85708557
// Commutative predicate - collect + sort operands of the instructions
85718558
// so that each side is more likely to have the same opcode.
85728559
assert(P0 == CmpInst::getSwappedPredicate(P0) &&
85738560
"Commutative Predicate mismatch");
8574-
reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8561+
Ops.reorder();
8562+
Left = Ops.getVL(0);
8563+
Right = Ops.getVL(1);
85758564
} else {
85768565
// Collect operands - commute if it uses the swapped predicate.
85778566
for (Value *V : VL) {
@@ -8632,20 +8621,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
86328621
ReuseShuffleIndices);
86338622
LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
86348623

8635-
// Sort operands of the instructions so that each side is more likely to
8636-
// have the same opcode.
8637-
if (isa<BinaryOperator>(VL0) && isCommutative(VL0)) {
8638-
ValueList Left, Right;
8639-
reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8640-
TE->setOperand(0, Left);
8641-
TE->setOperand(1, Right);
8642-
buildTree_rec(Left, Depth + 1, {TE, 0});
8643-
buildTree_rec(Right, Depth + 1, {TE, 1});
8644-
return;
8645-
}
8646-
8647-
TE->setOperandsInOrder();
8648-
for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
8624+
TE->setOperand(VL, *this, isa<BinaryOperator>(VL0) && isCommutative(VL0));
8625+
for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
86498626
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
86508627
return;
86518628
}
@@ -8710,7 +8687,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
87108687
fixupOrderingIndices(CurrentOrder);
87118688
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
87128689
ReuseShuffleIndices, CurrentOrder);
8713-
TE->setOperandsInOrder();
8690+
TE->setOperand(VL, *this);
87148691
buildTree_rec(TE->getOperand(0), Depth + 1, {TE, 0});
87158692
if (Consecutive)
87168693
LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
@@ -8726,46 +8703,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
87268703

87278704
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
87288705
ReuseShuffleIndices);
8729-
// Sort operands of the instructions so that each side is more likely to
8730-
// have the same opcode.
8731-
if (isCommutative(VL0)) {
8732-
ValueList Left, Right;
8733-
reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8734-
TE->setOperand(0, Left);
8735-
TE->setOperand(1, Right);
8736-
SmallVector<ValueList> Operands;
8737-
for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
8738-
Operands.emplace_back();
8739-
if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
8740-
continue;
8741-
for (Value *V : VL) {
8742-
auto *CI2 = cast<CallInst>(V);
8743-
Operands.back().push_back(CI2->getArgOperand(I));
8744-
}
8745-
TE->setOperand(I, Operands.back());
8746-
}
8747-
buildTree_rec(Left, Depth + 1, {TE, 0});
8748-
buildTree_rec(Right, Depth + 1, {TE, 1});
8749-
for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
8750-
if (Operands[I - 2].empty())
8751-
continue;
8752-
buildTree_rec(Operands[I - 2], Depth + 1, {TE, I});
8753-
}
8754-
return;
8755-
}
8756-
TE->setOperandsInOrder();
8757-
for (unsigned I : seq<unsigned>(0, CI->arg_size())) {
8706+
TE->setOperand(VL, *this, isCommutative(VL0));
8707+
for (unsigned I : seq<unsigned>(CI->arg_size())) {
87588708
// For scalar operands no need to create an entry since no need to
87598709
// vectorize it.
87608710
if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
87618711
continue;
8762-
ValueList Operands;
8763-
// Prepare the operand vector.
8764-
for (Value *V : VL) {
8765-
auto *CI2 = cast<CallInst>(V);
8766-
Operands.push_back(CI2->getArgOperand(I));
8767-
}
8768-
buildTree_rec(Operands, Depth + 1, {TE, I});
8712+
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
87698713
}
87708714
return;
87718715
}
@@ -8776,43 +8720,37 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
87768720

87778721
// Reorder operands if reordering would enable vectorization.
87788722
auto *CI = dyn_cast<CmpInst>(VL0);
8779-
if (isa<BinaryOperator>(VL0) || CI) {
8723+
if (CI && any_of(VL, [](Value *V) {
8724+
return !isa<PoisonValue>(V) && !cast<CmpInst>(V)->isCommutative();
8725+
})) {
8726+
auto *MainCI = cast<CmpInst>(S.MainOp);
8727+
auto *AltCI = cast<CmpInst>(S.AltOp);
8728+
CmpInst::Predicate MainP = MainCI->getPredicate();
8729+
CmpInst::Predicate AltP = AltCI->getPredicate();
8730+
assert(MainP != AltP &&
8731+
"Expected different main/alternate predicates.");
87808732
ValueList Left, Right;
8781-
if (!CI || all_of(VL, [](Value *V) {
8782-
return isa<PoisonValue>(V) || cast<CmpInst>(V)->isCommutative();
8783-
})) {
8784-
reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8785-
} else {
8786-
auto *MainCI = cast<CmpInst>(S.MainOp);
8787-
auto *AltCI = cast<CmpInst>(S.AltOp);
8788-
CmpInst::Predicate MainP = MainCI->getPredicate();
8789-
CmpInst::Predicate AltP = AltCI->getPredicate();
8790-
assert(MainP != AltP &&
8791-
"Expected different main/alternate predicates.");
8792-
// Collect operands - commute if it uses the swapped predicate or
8793-
// alternate operation.
8794-
for (Value *V : VL) {
8795-
if (isa<PoisonValue>(V)) {
8796-
Left.push_back(
8797-
PoisonValue::get(MainCI->getOperand(0)->getType()));
8798-
Right.push_back(
8799-
PoisonValue::get(MainCI->getOperand(1)->getType()));
8800-
continue;
8801-
}
8802-
auto *Cmp = cast<CmpInst>(V);
8803-
Value *LHS = Cmp->getOperand(0);
8804-
Value *RHS = Cmp->getOperand(1);
8733+
// Collect operands - commute if it uses the swapped predicate or
8734+
// alternate operation.
8735+
for (Value *V : VL) {
8736+
if (isa<PoisonValue>(V)) {
8737+
Left.push_back(PoisonValue::get(MainCI->getOperand(0)->getType()));
8738+
Right.push_back(PoisonValue::get(MainCI->getOperand(1)->getType()));
8739+
continue;
8740+
}
8741+
auto *Cmp = cast<CmpInst>(V);
8742+
Value *LHS = Cmp->getOperand(0);
8743+
Value *RHS = Cmp->getOperand(1);
88058744

8806-
if (isAlternateInstruction(Cmp, MainCI, AltCI, *TLI)) {
8807-
if (AltP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8808-
std::swap(LHS, RHS);
8809-
} else {
8810-
if (MainP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8811-
std::swap(LHS, RHS);
8812-
}
8813-
Left.push_back(LHS);
8814-
Right.push_back(RHS);
8745+
if (isAlternateInstruction(Cmp, MainCI, AltCI, *TLI)) {
8746+
if (AltP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8747+
std::swap(LHS, RHS);
8748+
} else {
8749+
if (MainP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8750+
std::swap(LHS, RHS);
88158751
}
8752+
Left.push_back(LHS);
8753+
Right.push_back(RHS);
88168754
}
88178755
TE->setOperand(0, Left);
88188756
TE->setOperand(1, Right);
@@ -8821,8 +8759,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
88218759
return;
88228760
}
88238761

8824-
TE->setOperandsInOrder();
8825-
for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
8762+
TE->setOperand(VL, *this, isa<BinaryOperator>(VL0) || CI);
8763+
for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
88268764
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
88278765
return;
88288766
}
@@ -13526,21 +13464,6 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
1352613464
return Cost;
1352713465
}
1352813466

13529-
// Perform operand reordering on the instructions in VL and return the reordered
13530-
// operands in Left and Right.
13531-
void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
13532-
SmallVectorImpl<Value *> &Left,
13533-
SmallVectorImpl<Value *> &Right,
13534-
const BoUpSLP &R) {
13535-
if (VL.empty())
13536-
return;
13537-
VLOperands Ops(VL, R);
13538-
// Reorder the operands in place.
13539-
Ops.reorder();
13540-
Left = Ops.getVL(0);
13541-
Right = Ops.getVL(1);
13542-
}
13543-
1354413467
Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
1354513468
auto &Res = EntryToLastInstruction.try_emplace(E).first->second;
1354613469
if (Res)

0 commit comments

Comments
 (0)