Skip to content

Commit fa2bbea

Browse files
committed
Revert "[SLP]Improve minbitwidth analysis for operands of IToFP and ICmp instructions."
This reverts commit 899855d to fix the issue reported in https://lab.llvm.org/buildbot/#/builders/165/builds/51659.
1 parent 899855d commit fa2bbea

File tree

3 files changed

+16
-48
lines changed

3 files changed

+16
-48
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 10 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -1107,7 +1107,7 @@ class BoUpSLP {
11071107
MinBWs.clear();
11081108
ReductionBitWidth = 0;
11091109
CastMaxMinBWSizes.reset();
1110-
ExtraBitWidthNodes.clear();
1110+
TruncNodes.clear();
11111111
InstrElementSize.clear();
11121112
UserIgnoreList = nullptr;
11131113
PostponedGathers.clear();
@@ -3683,9 +3683,8 @@ class BoUpSLP {
36833683
/// type sizes, used in the tree.
36843684
std::optional<std::pair<unsigned, unsigned>> CastMaxMinBWSizes;
36853685

3686-
/// Indices of the vectorized nodes, which supposed to be the roots of the new
3687-
/// bitwidth analysis attempt, like trunc, IToFP or ICmp.
3688-
DenseSet<unsigned> ExtraBitWidthNodes;
3686+
/// Indices of the vectorized trunc nodes.
3687+
DenseSet<unsigned> TruncNodes;
36893688
};
36903689

36913690
} // end namespace slpvectorizer
@@ -6613,18 +6612,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
66136612
PrevMaxBW),
66146613
std::min<unsigned>(DL->getTypeSizeInBits(VL0->getType()),
66156614
PrevMinBW));
6616-
ExtraBitWidthNodes.insert(VectorizableTree.size() + 1);
6617-
} else if (ShuffleOrOp == Instruction::SIToFP ||
6618-
ShuffleOrOp == Instruction::UIToFP) {
6619-
unsigned NumSignBits =
6620-
ComputeNumSignBits(VL0->getOperand(0), *DL, 0, AC, nullptr, DT);
6621-
if (auto *OpI = dyn_cast<Instruction>(VL0->getOperand(0))) {
6622-
APInt Mask = DB->getDemandedBits(OpI);
6623-
NumSignBits = std::max(NumSignBits, Mask.countl_zero());
6624-
}
6625-
if (NumSignBits * 2 >=
6626-
DL->getTypeSizeInBits(VL0->getOperand(0)->getType()))
6627-
ExtraBitWidthNodes.insert(VectorizableTree.size() + 1);
6615+
TruncNodes.insert(VectorizableTree.size());
66286616
}
66296617
TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
66306618
ReuseShuffleIndicies);
@@ -6672,18 +6660,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
66726660
TE->setOperand(1, Right);
66736661
buildTree_rec(Left, Depth + 1, {TE, 0});
66746662
buildTree_rec(Right, Depth + 1, {TE, 1});
6675-
if (ShuffleOrOp == Instruction::ICmp) {
6676-
unsigned NumSignBits0 =
6677-
ComputeNumSignBits(VL0->getOperand(0), *DL, 0, AC, nullptr, DT);
6678-
if (NumSignBits0 * 2 >=
6679-
DL->getTypeSizeInBits(VL0->getOperand(0)->getType()))
6680-
ExtraBitWidthNodes.insert(getOperandEntry(TE, 0)->Idx);
6681-
unsigned NumSignBits1 =
6682-
ComputeNumSignBits(VL0->getOperand(1), *DL, 0, AC, nullptr, DT);
6683-
if (NumSignBits1 * 2 >=
6684-
DL->getTypeSizeInBits(VL0->getOperand(1)->getType()))
6685-
ExtraBitWidthNodes.insert(getOperandEntry(TE, 1)->Idx);
6686-
}
66876663
return;
66886664
}
66896665
case Instruction::Select:
@@ -14326,8 +14302,7 @@ void BoUpSLP::computeMinimumValueSizes() {
1432614302
bool IsStoreOrInsertElt =
1432714303
VectorizableTree.front()->getOpcode() == Instruction::Store ||
1432814304
VectorizableTree.front()->getOpcode() == Instruction::InsertElement;
14329-
if ((IsStoreOrInsertElt || UserIgnoreList) &&
14330-
ExtraBitWidthNodes.size() <= 1 &&
14305+
if ((IsStoreOrInsertElt || UserIgnoreList) && TruncNodes.size() <= 1 &&
1433114306
(!CastMaxMinBWSizes || CastMaxMinBWSizes->second == 0 ||
1433214307
CastMaxMinBWSizes->first / CastMaxMinBWSizes->second <= 2))
1433314308
return;
@@ -14531,21 +14506,16 @@ void BoUpSLP::computeMinimumValueSizes() {
1453114506
IsTopRoot = false;
1453214507
IsProfitableToDemoteRoot = true;
1453314508

14534-
if (ExtraBitWidthNodes.empty()) {
14509+
if (TruncNodes.empty()) {
1453514510
NodeIdx = VectorizableTree.size();
1453614511
} else {
1453714512
unsigned NewIdx = 0;
1453814513
do {
14539-
NewIdx = *ExtraBitWidthNodes.begin();
14540-
ExtraBitWidthNodes.erase(ExtraBitWidthNodes.begin());
14541-
} while (NewIdx <= NodeIdx && !ExtraBitWidthNodes.empty());
14514+
NewIdx = *TruncNodes.begin() + 1;
14515+
TruncNodes.erase(TruncNodes.begin());
14516+
} while (NewIdx <= NodeIdx && !TruncNodes.empty());
1454214517
NodeIdx = NewIdx;
14543-
IsTruncRoot = any_of(
14544-
VectorizableTree[NewIdx]->UserTreeIndices, [](const EdgeInfo &EI) {
14545-
return EI.EdgeIdx == 0 &&
14546-
EI.UserTE->getOpcode() == Instruction::ICmp &&
14547-
!EI.UserTE->isAltShuffle();
14548-
});
14518+
IsTruncRoot = true;
1454914519
}
1455014520

1455114521
// If the maximum bit width we compute is less than the with of the roots'

llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ define i1 @test(ptr noalias %0, i64 %1, ptr noalias %p, ptr %p1) {
1919
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <2 x i24> [[TMP8]], <i24 24, i24 24>
2020
; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP9]], <2 x i24> <i24 23, i24 23>, <2 x i24> [[TMP8]]
2121
; CHECK-NEXT: [[TMP23:%.*]] = trunc <2 x i24> [[TMP10]] to <2 x i8>
22-
; CHECK-NEXT: [[TMP26:%.*]] = zext <2 x i8> [[TMP23]] to <2 x i32>
23-
; CHECK-NEXT: [[TMP12:%.*]] = and <2 x i32> [[TMP26]], <i32 254, i32 254>
22+
; CHECK-NEXT: [[TMP11:%.*]] = zext <2 x i8> [[TMP23]] to <2 x i32>
23+
; CHECK-NEXT: [[TMP12:%.*]] = and <2 x i32> [[TMP11]], <i32 254, i32 254>
2424
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq <2 x i32> [[TMP12]], <i32 4, i32 4>
2525
; CHECK-NEXT: [[TMP25:%.*]] = select <2 x i1> [[TMP13]], <2 x i8> <i8 2, i8 2>, <2 x i8> [[TMP23]]
2626
; CHECK-NEXT: [[TMP14:%.*]] = zext <2 x i8> [[TMP25]] to <2 x i32>

llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-node-with-multi-users.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,12 @@ define void @test() {
1010
; CHECK-NEXT: [[TMP3:%.*]] = select i1 false, i32 0, i32 0
1111
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i8> <i8 poison, i8 0, i8 poison, i8 poison>, i8 [[TMP1]], i32 0
1212
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
13-
; CHECK-NEXT: [[TMP15:%.*]] = trunc <4 x i8> [[TMP5]] to <4 x i1>
13+
; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i8> [[TMP5]] to <4 x i32>
1414
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <4 x i32> zeroinitializer
1515
; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i8> [[TMP7]], zeroinitializer
16-
; CHECK-NEXT: [[TMP9:%.*]] = trunc <4 x i8> [[TMP8]] to <4 x i1>
17-
; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i1> zeroinitializer, [[TMP15]]
18-
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i1> [[TMP9]], [[TMP10]]
19-
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i1> [[TMP15]], <4 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
20-
; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i1> [[TMP16]] to <4 x i32>
16+
; CHECK-NEXT: [[TMP9:%.*]] = sext <4 x i8> [[TMP8]] to <4 x i32>
17+
; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i32> zeroinitializer, [[TMP6]]
18+
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i32> [[TMP9]], [[TMP10]]
2119
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
2220
; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> zeroinitializer
2321
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP13]])

0 commit comments

Comments
 (0)