Skip to content

Commit 6307e4b

Browse files
committed
Revert "[SLP] NFC. Replace TreeEntry::setOperandsInOrder with VLOperands. (llvm#113880)"
This reverts commit 94fbe7e. Causes a crash when linking mafft in ReleaseLTO-g config.
1 parent 89e919f commit 6307e4b

File tree

1 file changed

+142
-65
lines changed

1 file changed

+142
-65
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 142 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -2017,9 +2017,6 @@ class BoUpSLP {
20172017

20182018
/// A vector of operand vectors.
20192019
SmallVector<OperandDataVec, 4> OpsVec;
2020-
/// When VL[0] is IntrinsicInst, ArgSize is CallBase::arg_size. When VL[0]
2021-
/// is not IntrinsicInst, ArgSize is User::getNumOperands.
2022-
unsigned ArgSize = 0;
20232020

20242021
const TargetLibraryInfo &TLI;
20252022
const DataLayout &DL;
@@ -2407,12 +2404,10 @@ class BoUpSLP {
24072404
assert(!VL.empty() && "Bad VL");
24082405
assert((empty() || VL.size() == getNumLanes()) &&
24092406
"Expected same number of lanes");
2410-
// IntrinsicInst::isCommutative returns true if swapping the first "two"
2411-
// arguments to the intrinsic produces the same result.
24122407
constexpr unsigned IntrinsicNumOperands = 2;
24132408
auto *VL0 = cast<Instruction>(*find_if(VL, IsaPred<Instruction>));
2414-
unsigned NumOperands = VL0->getNumOperands();
2415-
ArgSize = isa<IntrinsicInst>(VL0) ? IntrinsicNumOperands : NumOperands;
2409+
unsigned NumOperands = isa<IntrinsicInst>(VL0) ? IntrinsicNumOperands
2410+
: VL0->getNumOperands();
24162411
OpsVec.resize(NumOperands);
24172412
unsigned NumLanes = VL.size();
24182413
for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
@@ -2445,7 +2440,7 @@ class BoUpSLP {
24452440
}
24462441

24472442
/// \returns the number of operands.
2448-
unsigned getNumOperands() const { return ArgSize; }
2443+
unsigned getNumOperands() const { return OpsVec.size(); }
24492444

24502445
/// \returns the number of lanes.
24512446
unsigned getNumLanes() const { return OpsVec[0].size(); }
@@ -2622,8 +2617,7 @@ class BoUpSLP {
26222617
ArrayRef<OperandData> Op0 = OpsVec.front();
26232618
for (const OperandData &Data : Op0)
26242619
UniqueValues.insert(Data.V);
2625-
for (ArrayRef<OperandData> Op :
2626-
ArrayRef(OpsVec).slice(1, getNumOperands() - 1)) {
2620+
for (ArrayRef<OperandData> Op : drop_begin(OpsVec, 1)) {
26272621
if (any_of(Op, [&UniqueValues](const OperandData &Data) {
26282622
return !UniqueValues.contains(Data.V);
26292623
}))
@@ -3144,6 +3138,13 @@ class BoUpSLP {
31443138
SmallVector<SmallVector<std::pair<LoadInst *, int>>>,
31453139
8> &GatheredLoads);
31463140

3141+
/// Reorder commutative or alt operands to get better probability of
3142+
/// generating vectorized code.
3143+
static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
3144+
SmallVectorImpl<Value *> &Left,
3145+
SmallVectorImpl<Value *> &Right,
3146+
const BoUpSLP &R);
3147+
31473148
/// Helper for `findExternalStoreUsersReorderIndices()`. It iterates over the
31483149
/// users of \p TE and collects the stores. It returns the map from the store
31493150
/// pointers to the collected stores.
@@ -3338,15 +3339,27 @@ class BoUpSLP {
33383339
copy(OpVL, Operands[OpIdx].begin());
33393340
}
33403341

3341-
/// Set this bundle's operand from \p VL.
3342-
void setOperand(ArrayRef<Value *> VL, const BoUpSLP &R,
3343-
bool RequireReorder = false) {
3344-
VLOperands Ops(VL, R);
3345-
if (RequireReorder)
3346-
Ops.reorder();
3347-
for (unsigned I :
3348-
seq<unsigned>(cast<Instruction>(VL[0])->getNumOperands()))
3349-
setOperand(I, Ops.getVL(I));
3342+
/// Set the operands of this bundle in their original order.
3343+
void setOperandsInOrder() {
3344+
assert(Operands.empty() && "Already initialized?");
3345+
auto *I0 = cast<Instruction>(*find_if(Scalars, IsaPred<Instruction>));
3346+
Operands.resize(I0->getNumOperands());
3347+
unsigned NumLanes = Scalars.size();
3348+
for (unsigned OpIdx = 0, NumOperands = I0->getNumOperands();
3349+
OpIdx != NumOperands; ++OpIdx) {
3350+
Operands[OpIdx].resize(NumLanes);
3351+
for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3352+
if (isa<PoisonValue>(Scalars[Lane])) {
3353+
Operands[OpIdx][Lane] =
3354+
PoisonValue::get(I0->getOperand(OpIdx)->getType());
3355+
continue;
3356+
}
3357+
auto *I = cast<Instruction>(Scalars[Lane]);
3358+
assert(I->getNumOperands() == NumOperands &&
3359+
"Expected same number of operands");
3360+
Operands[OpIdx][Lane] = I->getOperand(OpIdx);
3361+
}
3362+
}
33503363
}
33513364

33523365
/// Reorders operands of the node to the given mask \p Mask.
@@ -8446,7 +8459,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
84468459
{}, CurrentOrder);
84478460
LLVM_DEBUG(dbgs() << "SLP: added inserts bundle.\n");
84488461

8449-
TE->setOperand(VL, *this);
8462+
TE->setOperandsInOrder();
84508463
buildTree_rec(TE->getOperand(1), Depth + 1, {TE, 1});
84518464
return;
84528465
}
@@ -8467,26 +8480,27 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
84678480
LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n");
84688481
else
84698482
LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n");
8483+
TE->setOperandsInOrder();
84708484
break;
84718485
case TreeEntry::StridedVectorize:
84728486
// Vectorizing non-consecutive loads with `llvm.masked.gather`.
84738487
TE = newTreeEntry(VL, TreeEntry::StridedVectorize, Bundle, S,
84748488
UserTreeIdx, ReuseShuffleIndices, CurrentOrder);
8489+
TE->setOperandsInOrder();
84758490
LLVM_DEBUG(dbgs() << "SLP: added a vector of strided loads.\n");
84768491
break;
84778492
case TreeEntry::ScatterVectorize:
84788493
// Vectorizing non-consecutive loads with `llvm.masked.gather`.
84798494
TE = newTreeEntry(VL, TreeEntry::ScatterVectorize, Bundle, S,
84808495
UserTreeIdx, ReuseShuffleIndices);
8496+
TE->setOperandsInOrder();
8497+
buildTree_rec(PointerOps, Depth + 1, {TE, 0});
84818498
LLVM_DEBUG(dbgs() << "SLP: added a vector of non-consecutive loads.\n");
84828499
break;
84838500
case TreeEntry::CombinedVectorize:
84848501
case TreeEntry::NeedToGather:
84858502
llvm_unreachable("Unexpected loads state.");
84868503
}
8487-
TE->setOperand(VL, *this);
8488-
if (State == TreeEntry::ScatterVectorize)
8489-
buildTree_rec(PointerOps, Depth + 1, {TE, 0});
84908504
return;
84918505
}
84928506
case Instruction::ZExt:
@@ -8524,8 +8538,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
85248538
ReuseShuffleIndices);
85258539
LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
85268540

8527-
TE->setOperand(VL, *this);
8528-
for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
8541+
TE->setOperandsInOrder();
8542+
for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
85298543
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
85308544
if (ShuffleOrOp == Instruction::Trunc) {
85318545
ExtraBitWidthNodes.insert(getOperandEntry(TE, 0)->Idx);
@@ -8552,15 +8566,12 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
85528566
LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n");
85538567

85548568
ValueList Left, Right;
8555-
VLOperands Ops(VL, *this);
85568569
if (cast<CmpInst>(VL0)->isCommutative()) {
85578570
// Commutative predicate - collect + sort operands of the instructions
85588571
// so that each side is more likely to have the same opcode.
85598572
assert(P0 == CmpInst::getSwappedPredicate(P0) &&
85608573
"Commutative Predicate mismatch");
8561-
Ops.reorder();
8562-
Left = Ops.getVL(0);
8563-
Right = Ops.getVL(1);
8574+
reorderInputsAccordingToOpcode(VL, Left, Right, *this);
85648575
} else {
85658576
// Collect operands - commute if it uses the swapped predicate.
85668577
for (Value *V : VL) {
@@ -8621,8 +8632,20 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
86218632
ReuseShuffleIndices);
86228633
LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
86238634

8624-
TE->setOperand(VL, *this, isa<BinaryOperator>(VL0) && isCommutative(VL0));
8625-
for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
8635+
// Sort operands of the instructions so that each side is more likely to
8636+
// have the same opcode.
8637+
if (isa<BinaryOperator>(VL0) && isCommutative(VL0)) {
8638+
ValueList Left, Right;
8639+
reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8640+
TE->setOperand(0, Left);
8641+
TE->setOperand(1, Right);
8642+
buildTree_rec(Left, Depth + 1, {TE, 0});
8643+
buildTree_rec(Right, Depth + 1, {TE, 1});
8644+
return;
8645+
}
8646+
8647+
TE->setOperandsInOrder();
8648+
for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
86268649
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
86278650
return;
86288651
}
@@ -8687,7 +8710,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
86878710
fixupOrderingIndices(CurrentOrder);
86888711
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
86898712
ReuseShuffleIndices, CurrentOrder);
8690-
TE->setOperand(VL, *this);
8713+
TE->setOperandsInOrder();
86918714
buildTree_rec(TE->getOperand(0), Depth + 1, {TE, 0});
86928715
if (Consecutive)
86938716
LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
@@ -8703,13 +8726,46 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
87038726

87048727
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
87058728
ReuseShuffleIndices);
8706-
TE->setOperand(VL, *this, isCommutative(VL0));
8707-
for (unsigned I : seq<unsigned>(CI->arg_size())) {
8729+
// Sort operands of the instructions so that each side is more likely to
8730+
// have the same opcode.
8731+
if (isCommutative(VL0)) {
8732+
ValueList Left, Right;
8733+
reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8734+
TE->setOperand(0, Left);
8735+
TE->setOperand(1, Right);
8736+
SmallVector<ValueList> Operands;
8737+
for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
8738+
Operands.emplace_back();
8739+
if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
8740+
continue;
8741+
for (Value *V : VL) {
8742+
auto *CI2 = cast<CallInst>(V);
8743+
Operands.back().push_back(CI2->getArgOperand(I));
8744+
}
8745+
TE->setOperand(I, Operands.back());
8746+
}
8747+
buildTree_rec(Left, Depth + 1, {TE, 0});
8748+
buildTree_rec(Right, Depth + 1, {TE, 1});
8749+
for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
8750+
if (Operands[I - 2].empty())
8751+
continue;
8752+
buildTree_rec(Operands[I - 2], Depth + 1, {TE, I});
8753+
}
8754+
return;
8755+
}
8756+
TE->setOperandsInOrder();
8757+
for (unsigned I : seq<unsigned>(0, CI->arg_size())) {
87088758
// For scalar operands no need to create an entry since no need to
87098759
// vectorize it.
87108760
if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
87118761
continue;
8712-
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
8762+
ValueList Operands;
8763+
// Prepare the operand vector.
8764+
for (Value *V : VL) {
8765+
auto *CI2 = cast<CallInst>(V);
8766+
Operands.push_back(CI2->getArgOperand(I));
8767+
}
8768+
buildTree_rec(Operands, Depth + 1, {TE, I});
87138769
}
87148770
return;
87158771
}
@@ -8720,37 +8776,43 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
87208776

87218777
// Reorder operands if reordering would enable vectorization.
87228778
auto *CI = dyn_cast<CmpInst>(VL0);
8723-
if (CI && any_of(VL, [](Value *V) {
8724-
return !isa<PoisonValue>(V) && !cast<CmpInst>(V)->isCommutative();
8725-
})) {
8726-
auto *MainCI = cast<CmpInst>(S.MainOp);
8727-
auto *AltCI = cast<CmpInst>(S.AltOp);
8728-
CmpInst::Predicate MainP = MainCI->getPredicate();
8729-
CmpInst::Predicate AltP = AltCI->getPredicate();
8730-
assert(MainP != AltP &&
8731-
"Expected different main/alternate predicates.");
8779+
if (isa<BinaryOperator>(VL0) || CI) {
87328780
ValueList Left, Right;
8733-
// Collect operands - commute if it uses the swapped predicate or
8734-
// alternate operation.
8735-
for (Value *V : VL) {
8736-
if (isa<PoisonValue>(V)) {
8737-
Left.push_back(PoisonValue::get(MainCI->getOperand(0)->getType()));
8738-
Right.push_back(PoisonValue::get(MainCI->getOperand(1)->getType()));
8739-
continue;
8740-
}
8741-
auto *Cmp = cast<CmpInst>(V);
8742-
Value *LHS = Cmp->getOperand(0);
8743-
Value *RHS = Cmp->getOperand(1);
8781+
if (!CI || all_of(VL, [](Value *V) {
8782+
return isa<PoisonValue>(V) || cast<CmpInst>(V)->isCommutative();
8783+
})) {
8784+
reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8785+
} else {
8786+
auto *MainCI = cast<CmpInst>(S.MainOp);
8787+
auto *AltCI = cast<CmpInst>(S.AltOp);
8788+
CmpInst::Predicate MainP = MainCI->getPredicate();
8789+
CmpInst::Predicate AltP = AltCI->getPredicate();
8790+
assert(MainP != AltP &&
8791+
"Expected different main/alternate predicates.");
8792+
// Collect operands - commute if it uses the swapped predicate or
8793+
// alternate operation.
8794+
for (Value *V : VL) {
8795+
if (isa<PoisonValue>(V)) {
8796+
Left.push_back(
8797+
PoisonValue::get(MainCI->getOperand(0)->getType()));
8798+
Right.push_back(
8799+
PoisonValue::get(MainCI->getOperand(1)->getType()));
8800+
continue;
8801+
}
8802+
auto *Cmp = cast<CmpInst>(V);
8803+
Value *LHS = Cmp->getOperand(0);
8804+
Value *RHS = Cmp->getOperand(1);
87448805

8745-
if (isAlternateInstruction(Cmp, MainCI, AltCI, *TLI)) {
8746-
if (AltP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8747-
std::swap(LHS, RHS);
8748-
} else {
8749-
if (MainP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8750-
std::swap(LHS, RHS);
8806+
if (isAlternateInstruction(Cmp, MainCI, AltCI, *TLI)) {
8807+
if (AltP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8808+
std::swap(LHS, RHS);
8809+
} else {
8810+
if (MainP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8811+
std::swap(LHS, RHS);
8812+
}
8813+
Left.push_back(LHS);
8814+
Right.push_back(RHS);
87518815
}
8752-
Left.push_back(LHS);
8753-
Right.push_back(RHS);
87548816
}
87558817
TE->setOperand(0, Left);
87568818
TE->setOperand(1, Right);
@@ -8759,8 +8821,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
87598821
return;
87608822
}
87618823

8762-
TE->setOperand(VL, *this, isa<BinaryOperator>(VL0) || CI);
8763-
for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
8824+
TE->setOperandsInOrder();
8825+
for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
87648826
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
87658827
return;
87668828
}
@@ -13465,6 +13527,21 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
1346513527
return Cost;
1346613528
}
1346713529

13530+
// Perform operand reordering on the instructions in VL and return the reordered
13531+
// operands in Left and Right.
13532+
void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
13533+
SmallVectorImpl<Value *> &Left,
13534+
SmallVectorImpl<Value *> &Right,
13535+
const BoUpSLP &R) {
13536+
if (VL.empty())
13537+
return;
13538+
VLOperands Ops(VL, R);
13539+
// Reorder the operands in place.
13540+
Ops.reorder();
13541+
Left = Ops.getVL(0);
13542+
Right = Ops.getVL(1);
13543+
}
13544+
1346813545
Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
1346913546
auto &Res = EntryToLastInstruction.try_emplace(E).first->second;
1347013547
if (Res)

0 commit comments

Comments
 (0)