@@ -2017,9 +2017,6 @@ class BoUpSLP {
2017
2017
2018
2018
/// A vector of operand vectors.
2019
2019
SmallVector<OperandDataVec, 4> OpsVec;
2020
- /// When VL[0] is IntrinsicInst, ArgSize is CallBase::arg_size. When VL[0]
2021
- /// is not IntrinsicInst, ArgSize is User::getNumOperands.
2022
- unsigned ArgSize = 0;
2023
2020
2024
2021
const TargetLibraryInfo &TLI;
2025
2022
const DataLayout &DL;
@@ -2407,12 +2404,10 @@ class BoUpSLP {
2407
2404
assert(!VL.empty() && "Bad VL");
2408
2405
assert((empty() || VL.size() == getNumLanes()) &&
2409
2406
"Expected same number of lanes");
2410
- // IntrinsicInst::isCommutative returns true if swapping the first "two"
2411
- // arguments to the intrinsic produces the same result.
2412
2407
constexpr unsigned IntrinsicNumOperands = 2;
2413
2408
auto *VL0 = cast<Instruction>(*find_if(VL, IsaPred<Instruction>));
2414
- unsigned NumOperands = VL0->getNumOperands();
2415
- ArgSize = isa<IntrinsicInst>(VL0) ? IntrinsicNumOperands : NumOperands ;
2409
+ unsigned NumOperands = isa<IntrinsicInst>(VL0) ? IntrinsicNumOperands
2410
+ : VL0->getNumOperands() ;
2416
2411
OpsVec.resize(NumOperands);
2417
2412
unsigned NumLanes = VL.size();
2418
2413
for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
@@ -2445,7 +2440,7 @@ class BoUpSLP {
2445
2440
}
2446
2441
2447
2442
/// \returns the number of operands.
2448
- unsigned getNumOperands() const { return ArgSize ; }
2443
+ unsigned getNumOperands() const { return OpsVec.size() ; }
2449
2444
2450
2445
/// \returns the number of lanes.
2451
2446
unsigned getNumLanes() const { return OpsVec[0].size(); }
@@ -2622,8 +2617,7 @@ class BoUpSLP {
2622
2617
ArrayRef<OperandData> Op0 = OpsVec.front();
2623
2618
for (const OperandData &Data : Op0)
2624
2619
UniqueValues.insert(Data.V);
2625
- for (ArrayRef<OperandData> Op :
2626
- ArrayRef(OpsVec).slice(1, getNumOperands() - 1)) {
2620
+ for (ArrayRef<OperandData> Op : drop_begin(OpsVec, 1)) {
2627
2621
if (any_of(Op, [&UniqueValues](const OperandData &Data) {
2628
2622
return !UniqueValues.contains(Data.V);
2629
2623
}))
@@ -3144,6 +3138,13 @@ class BoUpSLP {
3144
3138
SmallVector<SmallVector<std::pair<LoadInst *, int>>>,
3145
3139
8> &GatheredLoads);
3146
3140
3141
+ /// Reorder commutative or alt operands to get better probability of
3142
+ /// generating vectorized code.
3143
+ static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
3144
+ SmallVectorImpl<Value *> &Left,
3145
+ SmallVectorImpl<Value *> &Right,
3146
+ const BoUpSLP &R);
3147
+
3147
3148
/// Helper for `findExternalStoreUsersReorderIndices()`. It iterates over the
3148
3149
/// users of \p TE and collects the stores. It returns the map from the store
3149
3150
/// pointers to the collected stores.
@@ -3338,15 +3339,27 @@ class BoUpSLP {
3338
3339
copy(OpVL, Operands[OpIdx].begin());
3339
3340
}
3340
3341
3341
- /// Set this bundle's operand from \p VL.
3342
- void setOperand(ArrayRef<Value *> VL, const BoUpSLP &R,
3343
- bool RequireReorder = false) {
3344
- VLOperands Ops(VL, R);
3345
- if (RequireReorder)
3346
- Ops.reorder();
3347
- for (unsigned I :
3348
- seq<unsigned>(cast<Instruction>(VL[0])->getNumOperands()))
3349
- setOperand(I, Ops.getVL(I));
3342
+ /// Set the operands of this bundle in their original order.
3343
+ void setOperandsInOrder() {
3344
+ assert(Operands.empty() && "Already initialized?");
3345
+ auto *I0 = cast<Instruction>(*find_if(Scalars, IsaPred<Instruction>));
3346
+ Operands.resize(I0->getNumOperands());
3347
+ unsigned NumLanes = Scalars.size();
3348
+ for (unsigned OpIdx = 0, NumOperands = I0->getNumOperands();
3349
+ OpIdx != NumOperands; ++OpIdx) {
3350
+ Operands[OpIdx].resize(NumLanes);
3351
+ for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3352
+ if (isa<PoisonValue>(Scalars[Lane])) {
3353
+ Operands[OpIdx][Lane] =
3354
+ PoisonValue::get(I0->getOperand(OpIdx)->getType());
3355
+ continue;
3356
+ }
3357
+ auto *I = cast<Instruction>(Scalars[Lane]);
3358
+ assert(I->getNumOperands() == NumOperands &&
3359
+ "Expected same number of operands");
3360
+ Operands[OpIdx][Lane] = I->getOperand(OpIdx);
3361
+ }
3362
+ }
3350
3363
}
3351
3364
3352
3365
/// Reorders operands of the node to the given mask \p Mask.
@@ -8446,7 +8459,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8446
8459
{}, CurrentOrder);
8447
8460
LLVM_DEBUG(dbgs() << "SLP: added inserts bundle.\n");
8448
8461
8449
- TE->setOperand(VL, *this );
8462
+ TE->setOperandsInOrder( );
8450
8463
buildTree_rec(TE->getOperand(1), Depth + 1, {TE, 1});
8451
8464
return;
8452
8465
}
@@ -8467,26 +8480,27 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8467
8480
LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n");
8468
8481
else
8469
8482
LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n");
8483
+ TE->setOperandsInOrder();
8470
8484
break;
8471
8485
case TreeEntry::StridedVectorize:
8472
8486
// Vectorizing non-consecutive loads with `llvm.masked.gather`.
8473
8487
TE = newTreeEntry(VL, TreeEntry::StridedVectorize, Bundle, S,
8474
8488
UserTreeIdx, ReuseShuffleIndices, CurrentOrder);
8489
+ TE->setOperandsInOrder();
8475
8490
LLVM_DEBUG(dbgs() << "SLP: added a vector of strided loads.\n");
8476
8491
break;
8477
8492
case TreeEntry::ScatterVectorize:
8478
8493
// Vectorizing non-consecutive loads with `llvm.masked.gather`.
8479
8494
TE = newTreeEntry(VL, TreeEntry::ScatterVectorize, Bundle, S,
8480
8495
UserTreeIdx, ReuseShuffleIndices);
8496
+ TE->setOperandsInOrder();
8497
+ buildTree_rec(PointerOps, Depth + 1, {TE, 0});
8481
8498
LLVM_DEBUG(dbgs() << "SLP: added a vector of non-consecutive loads.\n");
8482
8499
break;
8483
8500
case TreeEntry::CombinedVectorize:
8484
8501
case TreeEntry::NeedToGather:
8485
8502
llvm_unreachable("Unexpected loads state.");
8486
8503
}
8487
- TE->setOperand(VL, *this);
8488
- if (State == TreeEntry::ScatterVectorize)
8489
- buildTree_rec(PointerOps, Depth + 1, {TE, 0});
8490
8504
return;
8491
8505
}
8492
8506
case Instruction::ZExt:
@@ -8524,8 +8538,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8524
8538
ReuseShuffleIndices);
8525
8539
LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
8526
8540
8527
- TE->setOperand(VL, *this );
8528
- for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
8541
+ TE->setOperandsInOrder( );
8542
+ for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
8529
8543
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
8530
8544
if (ShuffleOrOp == Instruction::Trunc) {
8531
8545
ExtraBitWidthNodes.insert(getOperandEntry(TE, 0)->Idx);
@@ -8552,15 +8566,12 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8552
8566
LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n");
8553
8567
8554
8568
ValueList Left, Right;
8555
- VLOperands Ops(VL, *this);
8556
8569
if (cast<CmpInst>(VL0)->isCommutative()) {
8557
8570
// Commutative predicate - collect + sort operands of the instructions
8558
8571
// so that each side is more likely to have the same opcode.
8559
8572
assert(P0 == CmpInst::getSwappedPredicate(P0) &&
8560
8573
"Commutative Predicate mismatch");
8561
- Ops.reorder();
8562
- Left = Ops.getVL(0);
8563
- Right = Ops.getVL(1);
8574
+ reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8564
8575
} else {
8565
8576
// Collect operands - commute if it uses the swapped predicate.
8566
8577
for (Value *V : VL) {
@@ -8621,8 +8632,20 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8621
8632
ReuseShuffleIndices);
8622
8633
LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
8623
8634
8624
- TE->setOperand(VL, *this, isa<BinaryOperator>(VL0) && isCommutative(VL0));
8625
- for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
8635
+ // Sort operands of the instructions so that each side is more likely to
8636
+ // have the same opcode.
8637
+ if (isa<BinaryOperator>(VL0) && isCommutative(VL0)) {
8638
+ ValueList Left, Right;
8639
+ reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8640
+ TE->setOperand(0, Left);
8641
+ TE->setOperand(1, Right);
8642
+ buildTree_rec(Left, Depth + 1, {TE, 0});
8643
+ buildTree_rec(Right, Depth + 1, {TE, 1});
8644
+ return;
8645
+ }
8646
+
8647
+ TE->setOperandsInOrder();
8648
+ for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
8626
8649
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
8627
8650
return;
8628
8651
}
@@ -8687,7 +8710,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8687
8710
fixupOrderingIndices(CurrentOrder);
8688
8711
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
8689
8712
ReuseShuffleIndices, CurrentOrder);
8690
- TE->setOperand(VL, *this );
8713
+ TE->setOperandsInOrder( );
8691
8714
buildTree_rec(TE->getOperand(0), Depth + 1, {TE, 0});
8692
8715
if (Consecutive)
8693
8716
LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
@@ -8703,13 +8726,46 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8703
8726
8704
8727
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
8705
8728
ReuseShuffleIndices);
8706
- TE->setOperand(VL, *this, isCommutative(VL0));
8707
- for (unsigned I : seq<unsigned>(CI->arg_size())) {
8729
+ // Sort operands of the instructions so that each side is more likely to
8730
+ // have the same opcode.
8731
+ if (isCommutative(VL0)) {
8732
+ ValueList Left, Right;
8733
+ reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8734
+ TE->setOperand(0, Left);
8735
+ TE->setOperand(1, Right);
8736
+ SmallVector<ValueList> Operands;
8737
+ for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
8738
+ Operands.emplace_back();
8739
+ if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
8740
+ continue;
8741
+ for (Value *V : VL) {
8742
+ auto *CI2 = cast<CallInst>(V);
8743
+ Operands.back().push_back(CI2->getArgOperand(I));
8744
+ }
8745
+ TE->setOperand(I, Operands.back());
8746
+ }
8747
+ buildTree_rec(Left, Depth + 1, {TE, 0});
8748
+ buildTree_rec(Right, Depth + 1, {TE, 1});
8749
+ for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
8750
+ if (Operands[I - 2].empty())
8751
+ continue;
8752
+ buildTree_rec(Operands[I - 2], Depth + 1, {TE, I});
8753
+ }
8754
+ return;
8755
+ }
8756
+ TE->setOperandsInOrder();
8757
+ for (unsigned I : seq<unsigned>(0, CI->arg_size())) {
8708
8758
// For scalar operands no need to create an entry since no need to
8709
8759
// vectorize it.
8710
8760
if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
8711
8761
continue;
8712
- buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
8762
+ ValueList Operands;
8763
+ // Prepare the operand vector.
8764
+ for (Value *V : VL) {
8765
+ auto *CI2 = cast<CallInst>(V);
8766
+ Operands.push_back(CI2->getArgOperand(I));
8767
+ }
8768
+ buildTree_rec(Operands, Depth + 1, {TE, I});
8713
8769
}
8714
8770
return;
8715
8771
}
@@ -8720,37 +8776,43 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8720
8776
8721
8777
// Reorder operands if reordering would enable vectorization.
8722
8778
auto *CI = dyn_cast<CmpInst>(VL0);
8723
- if (CI && any_of(VL, [](Value *V) {
8724
- return !isa<PoisonValue>(V) && !cast<CmpInst>(V)->isCommutative();
8725
- })) {
8726
- auto *MainCI = cast<CmpInst>(S.MainOp);
8727
- auto *AltCI = cast<CmpInst>(S.AltOp);
8728
- CmpInst::Predicate MainP = MainCI->getPredicate();
8729
- CmpInst::Predicate AltP = AltCI->getPredicate();
8730
- assert(MainP != AltP &&
8731
- "Expected different main/alternate predicates.");
8779
+ if (isa<BinaryOperator>(VL0) || CI) {
8732
8780
ValueList Left, Right;
8733
- // Collect operands - commute if it uses the swapped predicate or
8734
- // alternate operation.
8735
- for (Value *V : VL) {
8736
- if (isa<PoisonValue>(V)) {
8737
- Left.push_back(PoisonValue::get(MainCI->getOperand(0)->getType()));
8738
- Right.push_back(PoisonValue::get(MainCI->getOperand(1)->getType()));
8739
- continue;
8740
- }
8741
- auto *Cmp = cast<CmpInst>(V);
8742
- Value *LHS = Cmp->getOperand(0);
8743
- Value *RHS = Cmp->getOperand(1);
8781
+ if (!CI || all_of(VL, [](Value *V) {
8782
+ return isa<PoisonValue>(V) || cast<CmpInst>(V)->isCommutative();
8783
+ })) {
8784
+ reorderInputsAccordingToOpcode(VL, Left, Right, *this);
8785
+ } else {
8786
+ auto *MainCI = cast<CmpInst>(S.MainOp);
8787
+ auto *AltCI = cast<CmpInst>(S.AltOp);
8788
+ CmpInst::Predicate MainP = MainCI->getPredicate();
8789
+ CmpInst::Predicate AltP = AltCI->getPredicate();
8790
+ assert(MainP != AltP &&
8791
+ "Expected different main/alternate predicates.");
8792
+ // Collect operands - commute if it uses the swapped predicate or
8793
+ // alternate operation.
8794
+ for (Value *V : VL) {
8795
+ if (isa<PoisonValue>(V)) {
8796
+ Left.push_back(
8797
+ PoisonValue::get(MainCI->getOperand(0)->getType()));
8798
+ Right.push_back(
8799
+ PoisonValue::get(MainCI->getOperand(1)->getType()));
8800
+ continue;
8801
+ }
8802
+ auto *Cmp = cast<CmpInst>(V);
8803
+ Value *LHS = Cmp->getOperand(0);
8804
+ Value *RHS = Cmp->getOperand(1);
8744
8805
8745
- if (isAlternateInstruction(Cmp, MainCI, AltCI, *TLI)) {
8746
- if (AltP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8747
- std::swap(LHS, RHS);
8748
- } else {
8749
- if (MainP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8750
- std::swap(LHS, RHS);
8806
+ if (isAlternateInstruction(Cmp, MainCI, AltCI, *TLI)) {
8807
+ if (AltP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8808
+ std::swap(LHS, RHS);
8809
+ } else {
8810
+ if (MainP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
8811
+ std::swap(LHS, RHS);
8812
+ }
8813
+ Left.push_back(LHS);
8814
+ Right.push_back(RHS);
8751
8815
}
8752
- Left.push_back(LHS);
8753
- Right.push_back(RHS);
8754
8816
}
8755
8817
TE->setOperand(0, Left);
8756
8818
TE->setOperand(1, Right);
@@ -8759,8 +8821,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
8759
8821
return;
8760
8822
}
8761
8823
8762
- TE->setOperand(VL, *this, isa<BinaryOperator>(VL0) || CI );
8763
- for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
8824
+ TE->setOperandsInOrder( );
8825
+ for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
8764
8826
buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
8765
8827
return;
8766
8828
}
@@ -13465,6 +13527,21 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
13465
13527
return Cost;
13466
13528
}
13467
13529
13530
+ // Perform operand reordering on the instructions in VL and return the reordered
13531
+ // operands in Left and Right.
13532
+ void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
13533
+ SmallVectorImpl<Value *> &Left,
13534
+ SmallVectorImpl<Value *> &Right,
13535
+ const BoUpSLP &R) {
13536
+ if (VL.empty())
13537
+ return;
13538
+ VLOperands Ops(VL, R);
13539
+ // Reorder the operands in place.
13540
+ Ops.reorder();
13541
+ Left = Ops.getVL(0);
13542
+ Right = Ops.getVL(1);
13543
+ }
13544
+
13468
13545
Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
13469
13546
auto &Res = EntryToLastInstruction.try_emplace(E).first->second;
13470
13547
if (Res)
0 commit comments