Skip to content

Commit 556ec4a

Browse files
authored
[SLP] Pass operand info to getCmpSelInstrInfo (#109998)
Depending on the constant, selects with constant arms can have highly varying cost. This adjusts SLP to use the new API introduced in d288574. Fixes #109466.
1 parent 808c498 commit 556ec4a

File tree

2 files changed

+15
-11
lines changed

2 files changed

+15
-11
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10669,8 +10669,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
1066910669

1067010670
InstructionCost ScalarCost = TTI->getCmpSelInstrCost(
1067110671
E->getOpcode(), OrigScalarTy, Builder.getInt1Ty(), CurrentPred,
10672-
CostKind, {TTI::OK_AnyValue, TTI::OP_None},
10673-
{TTI::OK_AnyValue, TTI::OP_None}, VI);
10672+
CostKind, getOperandInfo(VI->getOperand(0)),
10673+
getOperandInfo(VI->getOperand(1)), VI);
1067410674
InstructionCost IntrinsicCost = GetMinMaxCost(OrigScalarTy, VI);
1067510675
if (IntrinsicCost.isValid())
1067610676
ScalarCost = IntrinsicCost;
@@ -10682,8 +10682,8 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
1068210682

1068310683
InstructionCost VecCost =
1068410684
TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy, VecPred,
10685-
CostKind, {TTI::OK_AnyValue, TTI::OP_None},
10686-
{TTI::OK_AnyValue, TTI::OP_None}, VL0);
10685+
CostKind, getOperandInfo(E->getOperand(0)),
10686+
getOperandInfo(E->getOperand(1)), VL0);
1068710687
if (auto *SI = dyn_cast<SelectInst>(VL0)) {
1068810688
auto *CondType =
1068910689
getWidenedType(SI->getCondition()->getType(), VL.size());

llvm/test/Transforms/SLPVectorizer/RISCV/select-profitability.ll

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,17 @@ define i32 @pow2_zero_constant_shift(i16 zeroext %a, i16 zeroext %b, i16 zeroext
3131
define i32 @pow2_zero_variable_shift(i16 zeroext %a, i16 zeroext %b, i16 zeroext %c, i16 zeroext %d) {
3232
; CHECK-LABEL: define i32 @pow2_zero_variable_shift(
3333
; CHECK-SAME: i16 zeroext [[A:%.*]], i16 zeroext [[B:%.*]], i16 zeroext [[C:%.*]], i16 zeroext [[D:%.*]]) #[[ATTR0]] {
34-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i16> poison, i16 [[A]], i32 0
35-
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> [[TMP1]], i16 [[B]], i32 1
36-
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[C]], i32 2
37-
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i16> [[TMP3]], i16 [[D]], i32 3
38-
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq <4 x i16> [[TMP4]], <i16 1, i16 1, i16 1, i16 1>
39-
; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> <i32 524288, i32 262144, i32 131072, i32 65536>, <4 x i32> zeroinitializer
40-
; CHECK-NEXT: [[OR_RDX2:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP6]])
34+
; CHECK-NEXT: [[T39_I0:%.*]] = icmp eq i16 [[A]], 1
35+
; CHECK-NEXT: [[T39_I1:%.*]] = icmp eq i16 [[B]], 1
36+
; CHECK-NEXT: [[T39_I2:%.*]] = icmp eq i16 [[C]], 1
37+
; CHECK-NEXT: [[T39_I3:%.*]] = icmp eq i16 [[D]], 1
38+
; CHECK-NEXT: [[T40_I0:%.*]] = select i1 [[T39_I0]], i32 524288, i32 0
39+
; CHECK-NEXT: [[T40_I1:%.*]] = select i1 [[T39_I1]], i32 262144, i32 0
40+
; CHECK-NEXT: [[T40_I2:%.*]] = select i1 [[T39_I2]], i32 131072, i32 0
41+
; CHECK-NEXT: [[T40_I3:%.*]] = select i1 [[T39_I3]], i32 65536, i32 0
42+
; CHECK-NEXT: [[OR_RDX0:%.*]] = or i32 [[T40_I0]], [[T40_I1]]
43+
; CHECK-NEXT: [[OR_RDX1:%.*]] = or i32 [[T40_I2]], [[T40_I3]]
44+
; CHECK-NEXT: [[OR_RDX2:%.*]] = or i32 [[OR_RDX0]], [[OR_RDX1]]
4145
; CHECK-NEXT: ret i32 [[OR_RDX2]]
4246
;
4347
%t39.i0 = icmp eq i16 %a, 1

0 commit comments

Comments
 (0)