@@ -2017,6 +2017,9 @@ class BoUpSLP {
2017
2017
2018
2018
/// A vector of operand vectors.
2019
2019
SmallVector<OperandDataVec, 4> OpsVec;
2020
+ /// When VL[0] is IntrinsicInst, ArgSize is CallBase::arg_size. When VL[0]
2021
+ /// is not IntrinsicInst, ArgSize is User::getNumOperands.
2022
+ unsigned ArgSize = 0;
2020
2023
2021
2024
const TargetLibraryInfo &TLI;
2022
2025
const DataLayout &DL;
@@ -2404,10 +2407,12 @@ class BoUpSLP {
2404
2407
assert(!VL.empty() && "Bad VL");
2405
2408
assert((empty() || VL.size() == getNumLanes()) &&
2406
2409
"Expected same number of lanes");
2410
+ // IntrinsicInst::isCommutative returns true if swapping the first "two"
2411
+ // arguments to the intrinsic produces the same result.
2407
2412
constexpr unsigned IntrinsicNumOperands = 2;
2408
2413
auto *VL0 = cast<Instruction>(*find_if(VL, IsaPred<Instruction>));
2409
- unsigned NumOperands = isa<IntrinsicInst>(VL0) ? IntrinsicNumOperands
2410
- : VL0->getNumOperands() ;
2414
+ unsigned NumOperands = VL0->getNumOperands();
2415
+ ArgSize = isa<IntrinsicInst>(VL0) ? IntrinsicNumOperands : NumOperands ;
2411
2416
OpsVec.resize(NumOperands);
2412
2417
unsigned NumLanes = VL.size();
2413
2418
for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
@@ -2440,7 +2445,7 @@ class BoUpSLP {
2440
2445
}
2441
2446
2442
2447
/// \returns the number of operands.
2443
- unsigned getNumOperands() const { return OpsVec.size() ; }
2448
+ unsigned getNumOperands() const { return ArgSize ; }
2444
2449
2445
2450
/// \returns the number of lanes.
2446
2451
unsigned getNumLanes() const { return OpsVec[0].size(); }
@@ -2617,7 +2622,8 @@ class BoUpSLP {
2617
2622
ArrayRef<OperandData> Op0 = OpsVec.front();
2618
2623
for (const OperandData &Data : Op0)
2619
2624
UniqueValues.insert(Data.V);
2620
- for (ArrayRef<OperandData> Op : drop_begin(OpsVec, 1)) {
2625
+ for (ArrayRef<OperandData> Op :
2626
+ ArrayRef(OpsVec).slice(1, getNumOperands() - 1)) {
2621
2627
if (any_of(Op, [&UniqueValues](const OperandData &Data) {
2622
2628
return !UniqueValues.contains(Data.V);
2623
2629
}))
@@ -3138,13 +3144,6 @@ class BoUpSLP {
3138
3144
SmallVector<SmallVector<std::pair<LoadInst *, int>>>,
3139
3145
8> &GatheredLoads);
3140
3146
3141
- /// Reorder commutative or alt operands to get better probability of
3142
- /// generating vectorized code.
3143
- static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
3144
- SmallVectorImpl<Value *> &Left,
3145
- SmallVectorImpl<Value *> &Right,
3146
- const BoUpSLP &R);
3147
-
3148
3147
/// Helper for `findExternalStoreUsersReorderIndices()`. It iterates over the
3149
3148
/// users of \p TE and collects the stores. It returns the map from the store
3150
3149
/// pointers to the collected stores.
@@ -3339,27 +3338,15 @@ class BoUpSLP {
3339
3338
copy(OpVL, Operands[OpIdx].begin());
3340
3339
}
3341
3340
3342
- /// Set the operands of this bundle in their original order.
3343
- void setOperandsInOrder() {
3344
- assert(Operands.empty() && "Already initialized?");
3345
- auto *I0 = cast<Instruction>(*find_if(Scalars, IsaPred<Instruction>));
3346
- Operands.resize(I0->getNumOperands());
3347
- unsigned NumLanes = Scalars.size();
3348
- for (unsigned OpIdx = 0, NumOperands = I0->getNumOperands();
3349
- OpIdx != NumOperands; ++OpIdx) {
3350
- Operands[OpIdx].resize(NumLanes);
3351
- for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3352
- if (isa<PoisonValue>(Scalars[Lane])) {
3353
- Operands[OpIdx][Lane] =
3354
- PoisonValue::get(I0->getOperand(OpIdx)->getType());
3355
- continue;
3356
- }
3357
- auto *I = cast<Instruction>(Scalars[Lane]);
3358
- assert(I->getNumOperands() == NumOperands &&
3359
- "Expected same number of operands");
3360
- Operands[OpIdx][Lane] = I->getOperand(OpIdx);
3361
- }
3362
- }
3341
+ /// Set this bundle's operand from \p VL.
3342
+ void setOperand(ArrayRef<Value *> VL, const BoUpSLP &R,
3343
+ bool RequireReorder = false) {
3344
+ VLOperands Ops(VL, R);
3345
+ if (RequireReorder)
3346
+ Ops.reorder();
3347
+ for (unsigned I :
3348
+ seq<unsigned>(cast<Instruction>(VL[0])->getNumOperands()))
3349
+ setOperand(I, Ops.getVL(I));
3363
3350
}
3364
3351
3365
3352
/// Reorders operands of the node to the given mask \p Mask.
@@ -8459,7 +8446,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8459
8446
{}, CurrentOrder);
8460
8447
LLVM_DEBUG(dbgs() << "SLP: added inserts bundle.\n");
8461
8448
8462
- TE->setOperandsInOrder( );
8449
+ TE->setOperand(VL, *this );
8463
8450
buildTree_rec(TE->getOperand(1), Depth + 1, {TE, 1});
8464
8451
return;
8465
8452
}
@@ -8480,27 +8467,26 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8480
8467
LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n");
8481
8468
else
8482
8469
LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n");
8483
- TE->setOperandsInOrder();
8484
8470
break;
8485
8471
case TreeEntry::StridedVectorize:
8486
8472
// Vectorizing non-consecutive loads with `llvm.masked.gather`.
8487
8473
TE = newTreeEntry(VL, TreeEntry::StridedVectorize, Bundle, S,
8488
8474
UserTreeIdx, ReuseShuffleIndices, CurrentOrder);
8489
- TE->setOperandsInOrder();
8490
8475
LLVM_DEBUG(dbgs() << "SLP: added a vector of strided loads.\n");
8491
8476
break;
8492
8477
case TreeEntry::ScatterVectorize:
8493
8478
// Vectorizing non-consecutive loads with `llvm.masked.gather`.
8494
8479
TE = newTreeEntry(VL, TreeEntry::ScatterVectorize, Bundle, S,
8495
8480
UserTreeIdx, ReuseShuffleIndices);
8496
- TE->setOperandsInOrder();
8497
- buildTree_rec(PointerOps, Depth + 1, {TE, 0});
8498
8481
LLVM_DEBUG(dbgs() << "SLP: added a vector of non-consecutive loads.\n");
8499
8482
break;
8500
8483
case TreeEntry::CombinedVectorize:
8501
8484
case TreeEntry::NeedToGather:
8502
8485
llvm_unreachable("Unexpected loads state.");
8503
8486
}
8487
+ TE->setOperand(VL, *this);
8488
+ if (State == TreeEntry::ScatterVectorize)
8489
+ buildTree_rec(PointerOps, Depth + 1, {TE, 0});
8504
8490
return;
8505
8491
}
8506
8492
case Instruction::ZExt:
@@ -8538,8 +8524,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8538
8524
ReuseShuffleIndices);
8539
8525
LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
8540
8526
8541
- TE->setOperandsInOrder( );
8542
- for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
8527
+ TE->setOperand(VL, *this );
8528
+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
8543
8529
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
8544
8530
if (ShuffleOrOp == Instruction::Trunc) {
8545
8531
ExtraBitWidthNodes.insert(getOperandEntry(TE, 0)->Idx);
@@ -8566,12 +8552,15 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8566
8552
LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n");
8567
8553
8568
8554
ValueList Left, Right;
8555
+ VLOperands Ops(VL, *this);
8569
8556
if (cast<CmpInst>(VL0)->isCommutative()) {
8570
8557
// Commutative predicate - collect + sort operands of the instructions
8571
8558
// so that each side is more likely to have the same opcode.
8572
8559
assert(P0 == CmpInst::getSwappedPredicate(P0) &&
8573
8560
"Commutative Predicate mismatch");
8574
- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8561
+ Ops.reorder();
8562
+ Left = Ops.getVL(0);
8563
+ Right = Ops.getVL(1);
8575
8564
} else {
8576
8565
// Collect operands - commute if it uses the swapped predicate.
8577
8566
for (Value *V : VL) {
@@ -8632,20 +8621,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8632
8621
ReuseShuffleIndices);
8633
8622
LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
8634
8623
8635
- // Sort operands of the instructions so that each side is more likely to
8636
- // have the same opcode.
8637
- if (isa<BinaryOperator>(VL0) && isCommutative(VL0)) {
8638
- ValueList Left, Right;
8639
- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8640
- TE->setOperand(0, Left);
8641
- TE->setOperand(1, Right);
8642
- buildTree_rec(Left, Depth + 1, {TE, 0});
8643
- buildTree_rec(Right, Depth + 1, {TE, 1});
8644
- return;
8645
- }
8646
-
8647
- TE->setOperandsInOrder();
8648
- for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
8624
+ TE->setOperand(VL, *this, isa<BinaryOperator>(VL0) && isCommutative(VL0));
8625
+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
8649
8626
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
8650
8627
return;
8651
8628
}
@@ -8710,7 +8687,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8710
8687
fixupOrderingIndices(CurrentOrder);
8711
8688
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
8712
8689
ReuseShuffleIndices, CurrentOrder);
8713
- TE->setOperandsInOrder( );
8690
+ TE->setOperand(VL, *this );
8714
8691
buildTree_rec(TE->getOperand(0), Depth + 1, {TE, 0});
8715
8692
if (Consecutive)
8716
8693
LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
@@ -8726,46 +8703,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8726
8703
8727
8704
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
8728
8705
ReuseShuffleIndices);
8729
- // Sort operands of the instructions so that each side is more likely to
8730
- // have the same opcode.
8731
- if (isCommutative(VL0)) {
8732
- ValueList Left, Right;
8733
- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8734
- TE->setOperand(0, Left);
8735
- TE->setOperand(1, Right);
8736
- SmallVector<ValueList> Operands;
8737
- for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
8738
- Operands.emplace_back();
8739
- if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
8740
- continue;
8741
- for (Value *V : VL) {
8742
- auto *CI2 = cast<CallInst>(V);
8743
- Operands.back().push_back(CI2->getArgOperand(I));
8744
- }
8745
- TE->setOperand(I, Operands.back());
8746
- }
8747
- buildTree_rec(Left, Depth + 1, {TE, 0});
8748
- buildTree_rec(Right, Depth + 1, {TE, 1});
8749
- for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
8750
- if (Operands[I - 2].empty())
8751
- continue;
8752
- buildTree_rec(Operands[I - 2], Depth + 1, {TE, I});
8753
- }
8754
- return;
8755
- }
8756
- TE->setOperandsInOrder();
8757
- for (unsigned I : seq<unsigned>(0, CI->arg_size())) {
8706
+ TE->setOperand(VL, *this, isCommutative(VL0));
8707
+ for (unsigned I : seq<unsigned>(CI->arg_size())) {
8758
8708
// For scalar operands no need to create an entry since no need to
8759
8709
// vectorize it.
8760
8710
if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
8761
8711
continue;
8762
- ValueList Operands;
8763
- // Prepare the operand vector.
8764
- for (Value *V : VL) {
8765
- auto *CI2 = cast<CallInst>(V);
8766
- Operands.push_back(CI2->getArgOperand(I));
8767
- }
8768
- buildTree_rec(Operands, Depth + 1, {TE, I});
8712
+ buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
8769
8713
}
8770
8714
return;
8771
8715
}
@@ -8776,43 +8720,37 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8776
8720
8777
8721
// Reorder operands if reordering would enable vectorization.
8778
8722
auto *CI = dyn_cast<CmpInst>(VL0);
8779
- if (isa<BinaryOperator>(VL0) || CI) {
8723
+ if (CI && any_of(VL, [](Value *V) {
8724
+ return !isa<PoisonValue>(V) && !cast<CmpInst>(V)->isCommutative();
8725
+ })) {
8726
+ auto *MainCI = cast<CmpInst>(S.MainOp);
8727
+ auto *AltCI = cast<CmpInst>(S.AltOp);
8728
+ CmpInst::Predicate MainP = MainCI->getPredicate();
8729
+ CmpInst::Predicate AltP = AltCI->getPredicate();
8730
+ assert(MainP != AltP &&
8731
+ "Expected different main/alternate predicates.");
8780
8732
ValueList Left, Right;
8781
- if (!CI || all_of(VL, [](Value *V) {
8782
- return isa<PoisonValue>(V) || cast<CmpInst>(V)->isCommutative();
8783
- })) {
8784
- reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8785
- } else {
8786
- auto *MainCI = cast<CmpInst>(S.MainOp);
8787
- auto *AltCI = cast<CmpInst>(S.AltOp);
8788
- CmpInst::Predicate MainP = MainCI->getPredicate();
8789
- CmpInst::Predicate AltP = AltCI->getPredicate();
8790
- assert(MainP != AltP &&
8791
- "Expected different main/alternate predicates.");
8792
- // Collect operands - commute if it uses the swapped predicate or
8793
- // alternate operation.
8794
- for (Value *V : VL) {
8795
- if (isa<PoisonValue>(V)) {
8796
- Left.push_back(
8797
- PoisonValue::get(MainCI->getOperand(0)->getType()));
8798
- Right.push_back(
8799
- PoisonValue::get(MainCI->getOperand(1)->getType()));
8800
- continue;
8801
- }
8802
- auto *Cmp = cast<CmpInst>(V);
8803
- Value *LHS = Cmp->getOperand(0);
8804
- Value *RHS = Cmp->getOperand(1);
8733
+ // Collect operands - commute if it uses the swapped predicate or
8734
+ // alternate operation.
8735
+ for (Value *V : VL) {
8736
+ if (isa<PoisonValue>(V)) {
8737
+ Left.push_back(PoisonValue::get(MainCI->getOperand(0)->getType()));
8738
+ Right.push_back(PoisonValue::get(MainCI->getOperand(1)->getType()));
8739
+ continue;
8740
+ }
8741
+ auto *Cmp = cast<CmpInst>(V);
8742
+ Value *LHS = Cmp->getOperand(0);
8743
+ Value *RHS = Cmp->getOperand(1);
8805
8744
8806
- if (isAlternateInstruction(Cmp, MainCI, AltCI, *TLI)) {
8807
- if (AltP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8808
- std::swap(LHS, RHS);
8809
- } else {
8810
- if (MainP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8811
- std::swap(LHS, RHS);
8812
- }
8813
- Left.push_back(LHS);
8814
- Right.push_back(RHS);
8745
+ if (isAlternateInstruction(Cmp, MainCI, AltCI, *TLI)) {
8746
+ if (AltP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8747
+ std::swap(LHS, RHS);
8748
+ } else {
8749
+ if (MainP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8750
+ std::swap(LHS, RHS);
8815
8751
}
8752
+ Left.push_back(LHS);
8753
+ Right.push_back(RHS);
8816
8754
}
8817
8755
TE->setOperand(0, Left);
8818
8756
TE->setOperand(1, Right);
@@ -8821,8 +8759,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8821
8759
return;
8822
8760
}
8823
8761
8824
- TE->setOperandsInOrder( );
8825
- for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
8762
+ TE->setOperand(VL, *this, isa<BinaryOperator>(VL0) || CI );
8763
+ for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
8826
8764
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
8827
8765
return;
8828
8766
}
@@ -13526,21 +13464,6 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
13526
13464
return Cost;
13527
13465
}
13528
13466
13529
- // Perform operand reordering on the instructions in VL and return the reordered
13530
- // operands in Left and Right.
13531
- void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
13532
- SmallVectorImpl<Value *> &Left,
13533
- SmallVectorImpl<Value *> &Right,
13534
- const BoUpSLP &R) {
13535
- if (VL.empty())
13536
- return;
13537
- VLOperands Ops(VL, R);
13538
- // Reorder the operands in place.
13539
- Ops.reorder();
13540
- Left = Ops.getVL(0);
13541
- Right = Ops.getVL(1);
13542
- }
13543
-
13544
13467
Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
13545
13468
auto &Res = EntryToLastInstruction.try_emplace(E).first->second;
13546
13469
if (Res)
0 commit comments