@@ -1107,7 +1107,7 @@ class BoUpSLP {
1107
1107
MinBWs.clear();
1108
1108
ReductionBitWidth = 0;
1109
1109
CastMaxMinBWSizes.reset();
1110
- TruncNodes .clear();
1110
+ ExtraBitWidthNodes .clear();
1111
1111
InstrElementSize.clear();
1112
1112
UserIgnoreList = nullptr;
1113
1113
PostponedGathers.clear();
@@ -3683,8 +3683,9 @@ class BoUpSLP {
3683
3683
/// type sizes, used in the tree.
3684
3684
std::optional<std::pair<unsigned, unsigned>> CastMaxMinBWSizes;
3685
3685
3686
- /// Indices of the vectorized trunc nodes.
3687
- DenseSet<unsigned> TruncNodes;
3686
+ /// Indices of the vectorized nodes, which supposed to be the roots of the new
3687
+ /// bitwidth analysis attempt, like trunc, IToFP or ICmp.
3688
+ DenseSet<unsigned> ExtraBitWidthNodes;
3688
3689
};
3689
3690
3690
3691
} // end namespace slpvectorizer
@@ -6612,7 +6613,18 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
6612
6613
PrevMaxBW),
6613
6614
std::min<unsigned>(DL->getTypeSizeInBits(VL0->getType()),
6614
6615
PrevMinBW));
6615
- TruncNodes.insert(VectorizableTree.size());
6616
+ ExtraBitWidthNodes.insert(VectorizableTree.size() + 1);
6617
+ } else if (ShuffleOrOp == Instruction::SIToFP ||
6618
+ ShuffleOrOp == Instruction::UIToFP) {
6619
+ unsigned NumSignBits =
6620
+ ComputeNumSignBits(VL0->getOperand(0), *DL, 0, AC, nullptr, DT);
6621
+ if (auto *OpI = dyn_cast<Instruction>(VL0->getOperand(0))) {
6622
+ APInt Mask = DB->getDemandedBits(OpI);
6623
+ NumSignBits = std::max(NumSignBits, Mask.countl_zero());
6624
+ }
6625
+ if (NumSignBits * 2 >=
6626
+ DL->getTypeSizeInBits(VL0->getOperand(0)->getType()))
6627
+ ExtraBitWidthNodes.insert(VectorizableTree.size() + 1);
6616
6628
}
6617
6629
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
6618
6630
ReuseShuffleIndicies);
@@ -6660,6 +6672,18 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
6660
6672
TE->setOperand(1, Right);
6661
6673
buildTree_rec(Left, Depth + 1, {TE, 0});
6662
6674
buildTree_rec(Right, Depth + 1, {TE, 1});
6675
+ if (ShuffleOrOp == Instruction::ICmp) {
6676
+ unsigned NumSignBits0 =
6677
+ ComputeNumSignBits(VL0->getOperand(0), *DL, 0, AC, nullptr, DT);
6678
+ if (NumSignBits0 * 2 >=
6679
+ DL->getTypeSizeInBits(VL0->getOperand(0)->getType()))
6680
+ ExtraBitWidthNodes.insert(getOperandEntry(TE, 0)->Idx);
6681
+ unsigned NumSignBits1 =
6682
+ ComputeNumSignBits(VL0->getOperand(1), *DL, 0, AC, nullptr, DT);
6683
+ if (NumSignBits1 * 2 >=
6684
+ DL->getTypeSizeInBits(VL0->getOperand(1)->getType()))
6685
+ ExtraBitWidthNodes.insert(getOperandEntry(TE, 1)->Idx);
6686
+ }
6663
6687
return;
6664
6688
}
6665
6689
case Instruction::Select:
@@ -14302,7 +14326,8 @@ void BoUpSLP::computeMinimumValueSizes() {
14302
14326
bool IsStoreOrInsertElt =
14303
14327
VectorizableTree.front()->getOpcode() == Instruction::Store ||
14304
14328
VectorizableTree.front()->getOpcode() == Instruction::InsertElement;
14305
- if ((IsStoreOrInsertElt || UserIgnoreList) && TruncNodes.size() <= 1 &&
14329
+ if ((IsStoreOrInsertElt || UserIgnoreList) &&
14330
+ ExtraBitWidthNodes.size() <= 1 &&
14306
14331
(!CastMaxMinBWSizes || CastMaxMinBWSizes->second == 0 ||
14307
14332
CastMaxMinBWSizes->first / CastMaxMinBWSizes->second <= 2))
14308
14333
return;
@@ -14506,16 +14531,21 @@ void BoUpSLP::computeMinimumValueSizes() {
14506
14531
IsTopRoot = false;
14507
14532
IsProfitableToDemoteRoot = true;
14508
14533
14509
- if (TruncNodes .empty()) {
14534
+ if (ExtraBitWidthNodes .empty()) {
14510
14535
NodeIdx = VectorizableTree.size();
14511
14536
} else {
14512
14537
unsigned NewIdx = 0;
14513
14538
do {
14514
- NewIdx = *TruncNodes .begin() + 1 ;
14515
- TruncNodes .erase(TruncNodes .begin());
14516
- } while (NewIdx <= NodeIdx && !TruncNodes .empty());
14539
+ NewIdx = *ExtraBitWidthNodes .begin();
14540
+ ExtraBitWidthNodes .erase(ExtraBitWidthNodes .begin());
14541
+ } while (NewIdx <= NodeIdx && !ExtraBitWidthNodes .empty());
14517
14542
NodeIdx = NewIdx;
14518
- IsTruncRoot = true;
14543
+ IsTruncRoot = any_of(
14544
+ VectorizableTree[NewIdx]->UserTreeIndices, [](const EdgeInfo &EI) {
14545
+ return EI.EdgeIdx == 0 &&
14546
+ EI.UserTE->getOpcode() == Instruction::ICmp &&
14547
+ !EI.UserTE->isAltShuffle();
14548
+ });
14519
14549
}
14520
14550
14521
14551
// If the maximum bit width we compute is less than the with of the roots'
0 commit comments