Skip to content

Commit ffe640e

Browse files
committed
[SLP] Make getSameOpcode support different instructions if they have same semantics. (llvm#112181)
1 parent c50370c commit ffe640e

14 files changed

+323
-158
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 207 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -850,8 +850,123 @@ class InstructionsState {
850850
static InstructionsState invalid() { return {nullptr, nullptr}; }
851851
};
852852

853+
struct InterchangeableInstruction {
854+
unsigned Opcode;
855+
SmallVector<Value *> Ops;
856+
template <class... ArgTypes>
857+
InterchangeableInstruction(unsigned Opcode, ArgTypes &&...Args)
858+
: Opcode(Opcode), Ops{std::forward<decltype(Args)>(Args)...} {}
859+
};
860+
861+
bool operator<(const InterchangeableInstruction &LHS,
862+
const InterchangeableInstruction &RHS) {
863+
return LHS.Opcode < RHS.Opcode;
864+
}
865+
853866
} // end anonymous namespace
854867

868+
/// \returns a sorted list of interchangeable instructions by instruction opcode
869+
/// that \p I can be converted to.
870+
/// e.g.,
871+
/// x << y -> x * (2^y)
872+
/// x << 1 -> x * 2
873+
/// x << 0 -> x * 1 -> x - 0 -> x + 0 -> x & 11...1 -> x | 0
874+
/// x * 0 -> x & 0
875+
/// x * -1 -> 0 - x
876+
/// TODO: support more patterns
877+
static SmallVector<InterchangeableInstruction>
878+
getInterchangeableInstruction(Instruction *I) {
879+
// PII = Possible Interchangeable Instruction
880+
SmallVector<InterchangeableInstruction> PII;
881+
unsigned Opcode = I->getOpcode();
882+
PII.emplace_back(Opcode, I->operands());
883+
if (!is_contained({Instruction::Shl, Instruction::Mul, Instruction::Sub,
884+
Instruction::Add},
885+
Opcode))
886+
return PII;
887+
Constant *C;
888+
if (match(I, m_BinOp(m_Value(), m_Constant(C)))) {
889+
ConstantInt *V = nullptr;
890+
if (auto *CI = dyn_cast<ConstantInt>(C)) {
891+
V = CI;
892+
} else if (auto *CDV = dyn_cast<ConstantDataVector>(C)) {
893+
if (auto *CI = dyn_cast_if_present<ConstantInt>(CDV->getSplatValue()))
894+
V = CI;
895+
}
896+
if (!V)
897+
return PII;
898+
Value *Op0 = I->getOperand(0);
899+
Type *Op1Ty = I->getOperand(1)->getType();
900+
const APInt &Op1Int = V->getValue();
901+
Constant *Zero =
902+
ConstantInt::get(Op1Ty, APInt::getZero(Op1Int.getBitWidth()));
903+
Constant *UnsignedMax =
904+
ConstantInt::get(Op1Ty, APInt::getMaxValue(Op1Int.getBitWidth()));
905+
switch (Opcode) {
906+
case Instruction::Shl: {
907+
PII.emplace_back(Instruction::Mul, Op0,
908+
ConstantInt::get(Op1Ty, 1 << Op1Int.getZExtValue()));
909+
if (Op1Int.isZero()) {
910+
PII.emplace_back(Instruction::Sub, Op0, Zero);
911+
PII.emplace_back(Instruction::Add, Op0, Zero);
912+
PII.emplace_back(Instruction::And, Op0, UnsignedMax);
913+
PII.emplace_back(Instruction::Or, Op0, Zero);
914+
}
915+
break;
916+
}
917+
case Instruction::Mul: {
918+
if (Op1Int.isOne()) {
919+
PII.emplace_back(Instruction::Sub, Op0, Zero);
920+
PII.emplace_back(Instruction::Add, Op0, Zero);
921+
PII.emplace_back(Instruction::And, Op0, UnsignedMax);
922+
PII.emplace_back(Instruction::Or, Op0, Zero);
923+
} else if (Op1Int.isZero()) {
924+
PII.emplace_back(Instruction::And, Op0, Zero);
925+
} else if (Op1Int.isAllOnes()) {
926+
PII.emplace_back(Instruction::Sub, Zero, Op0);
927+
}
928+
break;
929+
}
930+
case Instruction::Sub:
931+
if (Op1Int.isZero()) {
932+
PII.emplace_back(Instruction::Add, Op0, Zero);
933+
PII.emplace_back(Instruction::And, Op0, UnsignedMax);
934+
PII.emplace_back(Instruction::Or, Op0, Zero);
935+
}
936+
break;
937+
case Instruction::Add:
938+
if (Op1Int.isZero()) {
939+
PII.emplace_back(Instruction::And, Op0, UnsignedMax);
940+
PII.emplace_back(Instruction::Or, Op0, Zero);
941+
}
942+
break;
943+
}
944+
}
945+
// std::set_intersection requires a sorted range.
946+
sort(PII);
947+
return PII;
948+
}
949+
950+
/// \returns the Op and operands which \p I convert to.
951+
static std::pair<Value *, SmallVector<Value *>>
952+
getInterchangeableInstruction(Instruction *I, Instruction *MainOp,
953+
Instruction *AltOp) {
954+
SmallVector<InterchangeableInstruction> IIList =
955+
getInterchangeableInstruction(I);
956+
const auto *Iter = find_if(IIList, [&](const InterchangeableInstruction &II) {
957+
return II.Opcode == MainOp->getOpcode();
958+
});
959+
if (Iter == IIList.end()) {
960+
Iter = find_if(IIList, [&](const InterchangeableInstruction &II) {
961+
return II.Opcode == AltOp->getOpcode();
962+
});
963+
assert(Iter != IIList.end() &&
964+
"Cannot find an interchangeable instruction.");
965+
return std::make_pair(AltOp, Iter->Ops);
966+
}
967+
return std::make_pair(MainOp, Iter->Ops);
968+
}
969+
855970
/// \returns true if \p Opcode is allowed as part of the main/alternate
856971
/// instruction for SLP vectorization.
857972
///
@@ -965,6 +1080,22 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
9651080
return InstructionsState::invalid();
9661081
}
9671082
bool AnyPoison = InstCnt != VL.size();
1083+
// Currently, this is only used for binary ops.
1084+
// TODO: support all instructions
1085+
SmallVector<InterchangeableInstruction> InterchangeableOpcode =
1086+
getInterchangeableInstruction(cast<Instruction>(V));
1087+
SmallVector<InterchangeableInstruction> AlternateInterchangeableOpcode;
1088+
auto UpdateInterchangeableOpcode =
1089+
[](SmallVector<InterchangeableInstruction> &LHS,
1090+
ArrayRef<InterchangeableInstruction> RHS) {
1091+
SmallVector<InterchangeableInstruction> NewInterchangeableOpcode;
1092+
std::set_intersection(LHS.begin(), LHS.end(), RHS.begin(), RHS.end(),
1093+
std::back_inserter(NewInterchangeableOpcode));
1094+
if (NewInterchangeableOpcode.empty())
1095+
return false;
1096+
LHS.swap(NewInterchangeableOpcode);
1097+
return true;
1098+
};
9681099
for (int Cnt = 0, E = VL.size(); Cnt < E; Cnt++) {
9691100
auto *I = dyn_cast<Instruction>(VL[Cnt]);
9701101
if (!I)
@@ -977,14 +1108,32 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
9771108
return InstructionsState::invalid();
9781109
unsigned InstOpcode = I->getOpcode();
9791110
if (IsBinOp && isa<BinaryOperator>(I)) {
980-
if (InstOpcode == Opcode || InstOpcode == AltOpcode)
1111+
SmallVector<InterchangeableInstruction> ThisInterchangeableOpcode(
1112+
getInterchangeableInstruction(I));
1113+
if (UpdateInterchangeableOpcode(InterchangeableOpcode,
1114+
ThisInterchangeableOpcode))
9811115
continue;
982-
if (Opcode == AltOpcode && isValidForAlternation(InstOpcode) &&
983-
isValidForAlternation(Opcode)) {
984-
AltOpcode = InstOpcode;
985-
AltIndex = Cnt;
1116+
if (AlternateInterchangeableOpcode.empty()) {
1117+
InterchangeableOpcode.erase(
1118+
remove_if(InterchangeableOpcode,
1119+
[](const InterchangeableInstruction &I) {
1120+
return !isValidForAlternation(I.Opcode);
1121+
}),
1122+
InterchangeableOpcode.end());
1123+
ThisInterchangeableOpcode.erase(
1124+
remove_if(ThisInterchangeableOpcode,
1125+
[](const InterchangeableInstruction &I) {
1126+
return !isValidForAlternation(I.Opcode);
1127+
}),
1128+
ThisInterchangeableOpcode.end());
1129+
if (InterchangeableOpcode.empty() || ThisInterchangeableOpcode.empty())
1130+
return InstructionsState::invalid();
1131+
AlternateInterchangeableOpcode.swap(ThisInterchangeableOpcode);
9861132
continue;
9871133
}
1134+
if (UpdateInterchangeableOpcode(AlternateInterchangeableOpcode,
1135+
ThisInterchangeableOpcode))
1136+
continue;
9881137
} else if (IsCastOp && isa<CastInst>(I)) {
9891138
Value *Op0 = IBase->getOperand(0);
9901139
Type *Ty0 = Op0->getType();
@@ -1085,6 +1234,24 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
10851234
return InstructionsState::invalid();
10861235
}
10871236

1237+
if (IsBinOp) {
1238+
auto FindOp = [&](ArrayRef<InterchangeableInstruction> CandidateOp) {
1239+
for (Value *V : VL) {
1240+
if (isa<PoisonValue>(V))
1241+
continue;
1242+
for (const InterchangeableInstruction &I : CandidateOp)
1243+
if (cast<Instruction>(V)->getOpcode() == I.Opcode)
1244+
return cast<Instruction>(V);
1245+
}
1246+
llvm_unreachable(
1247+
"Cannot find the candidate instruction for InstructionsState.");
1248+
};
1249+
Instruction *MainOp = FindOp(InterchangeableOpcode);
1250+
Instruction *AltOp = AlternateInterchangeableOpcode.empty()
1251+
? MainOp
1252+
: FindOp(AlternateInterchangeableOpcode);
1253+
return InstructionsState(MainOp, AltOp);
1254+
}
10881255
return InstructionsState(cast<Instruction>(V),
10891256
cast<Instruction>(VL[AltIndex]));
10901257
}
@@ -2416,42 +2583,46 @@ class BoUpSLP {
24162583
}
24172584

24182585
/// Go through the instructions in VL and append their operands.
2419-
void appendOperandsOfVL(ArrayRef<Value *> VL, Instruction *VL0) {
2586+
void appendOperandsOfVL(ArrayRef<Value *> VL, Instruction *MainOp,
2587+
Instruction *AltOp) {
24202588
assert(!VL.empty() && "Bad VL");
24212589
assert((empty() || VL.size() == getNumLanes()) &&
24222590
"Expected same number of lanes");
24232591
// IntrinsicInst::isCommutative returns true if swapping the first "two"
24242592
// arguments to the intrinsic produces the same result.
24252593
constexpr unsigned IntrinsicNumOperands = 2;
2426-
unsigned NumOperands = VL0->getNumOperands();
2427-
ArgSize = isa<IntrinsicInst>(VL0) ? IntrinsicNumOperands : NumOperands;
2594+
unsigned NumOperands = MainOp->getNumOperands();
2595+
ArgSize = isa<IntrinsicInst>(MainOp) ? IntrinsicNumOperands : NumOperands;
24282596
OpsVec.resize(NumOperands);
24292597
unsigned NumLanes = VL.size();
2430-
for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
2598+
for (unsigned OpIdx : seq<unsigned>(NumOperands))
24312599
OpsVec[OpIdx].resize(NumLanes);
2432-
for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
2433-
assert((isa<Instruction>(VL[Lane]) || isa<PoisonValue>(VL[Lane])) &&
2434-
"Expected instruction or poison value");
2435-
// Our tree has just 3 nodes: the root and two operands.
2436-
// It is therefore trivial to get the APO. We only need to check the
2437-
// opcode of VL[Lane] and whether the operand at OpIdx is the LHS or
2438-
// RHS operand. The LHS operand of both add and sub is never attached
2439-
// to an inversese operation in the linearized form, therefore its APO
2440-
// is false. The RHS is true only if VL[Lane] is an inverse operation.
2441-
2442-
// Since operand reordering is performed on groups of commutative
2443-
// operations or alternating sequences (e.g., +, -), we can safely
2444-
// tell the inverse operations by checking commutativity.
2445-
if (isa<PoisonValue>(VL[Lane])) {
2600+
for (auto [Lane, V] : enumerate(VL)) {
2601+
assert((isa<Instruction>(V) || isa<PoisonValue>(V)) &&
2602+
"Expected instruction or poison value");
2603+
if (isa<PoisonValue>(V)) {
2604+
for (unsigned OpIdx : seq<unsigned>(NumOperands))
24462605
OpsVec[OpIdx][Lane] = {
2447-
PoisonValue::get(VL0->getOperand(OpIdx)->getType()), true,
2606+
PoisonValue::get(MainOp->getOperand(OpIdx)->getType()), true,
24482607
false};
2449-
continue;
2450-
}
2451-
bool IsInverseOperation = !isCommutative(cast<Instruction>(VL[Lane]));
2608+
continue;
2609+
}
2610+
auto [SelectedOp, Ops] =
2611+
getInterchangeableInstruction(cast<Instruction>(V), MainOp, AltOp);
2612+
// Our tree has just 3 nodes: the root and two operands.
2613+
// It is therefore trivial to get the APO. We only need to check the
2614+
// opcode of V and whether the operand at OpIdx is the LHS or RHS
2615+
// operand. The LHS operand of both add and sub is never attached to an
2616+
// inversese operation in the linearized form, therefore its APO is
2617+
// false. The RHS is true only if V is an inverse operation.
2618+
2619+
// Since operand reordering is performed on groups of commutative
2620+
// operations or alternating sequences (e.g., +, -), we can safely
2621+
// tell the inverse operations by checking commutativity.
2622+
bool IsInverseOperation = !isCommutative(cast<Instruction>(SelectedOp));
2623+
for (unsigned OpIdx : seq<unsigned>(NumOperands)) {
24522624
bool APO = (OpIdx == 0) ? false : IsInverseOperation;
2453-
OpsVec[OpIdx][Lane] = {cast<Instruction>(VL[Lane])->getOperand(OpIdx),
2454-
APO, false};
2625+
OpsVec[OpIdx][Lane] = {Ops[OpIdx], APO, false};
24552626
}
24562627
}
24572628
}
@@ -2557,11 +2728,12 @@ class BoUpSLP {
25572728

25582729
public:
25592730
/// Initialize with all the operands of the instruction vector \p RootVL.
2560-
VLOperands(ArrayRef<Value *> RootVL, Instruction *VL0, const BoUpSLP &R)
2731+
VLOperands(ArrayRef<Value *> RootVL, Instruction *MainOp,
2732+
Instruction *AltOp, const BoUpSLP &R)
25612733
: TLI(*R.TLI), DL(*R.DL), SE(*R.SE), R(R),
2562-
L(R.LI->getLoopFor((VL0->getParent()))) {
2734+
L(R.LI->getLoopFor(MainOp->getParent())) {
25632735
// Append all the operands of RootVL.
2564-
appendOperandsOfVL(RootVL, VL0);
2736+
appendOperandsOfVL(RootVL, MainOp, AltOp);
25652737
}
25662738

25672739
/// \Returns a value vector with the operands across all lanes for the
@@ -3351,7 +3523,7 @@ class BoUpSLP {
33513523

33523524
/// Set this bundle's operand from Scalars.
33533525
void setOperand(const BoUpSLP &R, bool RequireReorder = false) {
3354-
VLOperands Ops(Scalars, MainOp, R);
3526+
VLOperands Ops(Scalars, MainOp, AltOp, R);
33553527
if (RequireReorder)
33563528
Ops.reorder();
33573529
for (unsigned I : seq<unsigned>(MainOp->getNumOperands()))
@@ -8592,7 +8764,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
85928764
TE->dump());
85938765

85948766
ValueList Left, Right;
8595-
VLOperands Ops(VL, VL0, *this);
8767+
VLOperands Ops(VL, VL0, S.getAltOp(), *this);
85968768
if (cast<CmpInst>(VL0)->isCommutative()) {
85978769
// Commutative predicate - collect + sort operands of the instructions
85988770
// so that each side is more likely to have the same opcode.
@@ -15797,7 +15969,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
1579715969
Value *V = Builder.CreateBinOp(
1579815970
static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS,
1579915971
RHS);
15800-
propagateIRFlags(V, E->Scalars, VL0, It == MinBWs.end());
15972+
propagateIRFlags(V, E->Scalars, nullptr, It == MinBWs.end());
1580115973
if (auto *I = dyn_cast<Instruction>(V)) {
1580215974
V = ::propagateMetadata(I, E->Scalars);
1580315975
// Drop nuw flags for abs(sub(commutative), true).

llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -314,10 +314,10 @@ define void @store_try_reorder(ptr %dst) {
314314
;
315315
; POW2-ONLY-LABEL: @store_try_reorder(
316316
; POW2-ONLY-NEXT: entry:
317-
; POW2-ONLY-NEXT: [[ADD:%.*]] = add i32 0, 0
318-
; POW2-ONLY-NEXT: store i32 [[ADD]], ptr [[DST:%.*]], align 4
319-
; POW2-ONLY-NEXT: [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1
320-
; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4
317+
; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[DST:%.*]], align 4
318+
; POW2-ONLY-NEXT: [[ADD216:%.*]] = sub i32 0, 0
319+
; POW2-ONLY-NEXT: [[ARRAYIDX_I1891:%.*]] = getelementptr i32, ptr [[DST]], i64 2
320+
; POW2-ONLY-NEXT: store i32 [[ADD216]], ptr [[ARRAYIDX_I1891]], align 4
321321
; POW2-ONLY-NEXT: ret void
322322
;
323323
entry:

llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,19 +7,18 @@ define void @test(ptr %a, i64 %0) {
77
; CHECK-NEXT: [[ENTRY:.*:]]
88
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[A]], i32 0
99
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x ptr> [[TMP1]], <2 x ptr> poison, <2 x i32> zeroinitializer
10+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[TMP0]], i32 0
1011
; CHECK-NEXT: br label %[[BB:.*]]
1112
; CHECK: [[BB]]:
12-
; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i64 [[TMP0]], 1
13-
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i32 0
14-
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 0, i32 1
13+
; CHECK-NEXT: [[TMP5:%.*]] = or disjoint <2 x i64> [[TMP3]], <i64 1, i64 0>
1514
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr double, <2 x ptr> [[TMP2]], <2 x i64> [[TMP5]]
16-
; CHECK-NEXT: [[ARRAYIDX17_I28_1:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP3]]
17-
; CHECK-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[TMP6]], i32 8, <2 x i1> splat (i1 true), <2 x double> poison)
18-
; CHECK-NEXT: [[TMP8:%.*]] = load <2 x double>, ptr [[A]], align 8
19-
; CHECK-NEXT: [[TMP9:%.*]] = load <2 x double>, ptr [[A]], align 8
20-
; CHECK-NEXT: [[TMP10:%.*]] = fsub <2 x double> [[TMP8]], [[TMP9]]
15+
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x ptr> [[TMP6]], i32 0
16+
; CHECK-NEXT: [[TMP9:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[TMP6]], i32 8, <2 x i1> splat (i1 true), <2 x double> poison)
17+
; CHECK-NEXT: [[TMP7:%.*]] = load <2 x double>, ptr [[A]], align 8
18+
; CHECK-NEXT: [[TMP10:%.*]] = load <2 x double>, ptr [[A]], align 8
2119
; CHECK-NEXT: [[TMP11:%.*]] = fsub <2 x double> [[TMP7]], [[TMP10]]
22-
; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v2f64.p0.i64(<2 x double> [[TMP11]], ptr align 8 [[ARRAYIDX17_I28_1]], i64 -8, <2 x i1> splat (i1 true), i32 2)
20+
; CHECK-NEXT: [[TMP12:%.*]] = fsub <2 x double> [[TMP9]], [[TMP11]]
21+
; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v2f64.p0.i64(<2 x double> [[TMP12]], ptr align 8 [[TMP8]], i64 -8, <2 x i1> splat (i1 true), i32 2)
2322
; CHECK-NEXT: br label %[[BB]]
2423
;
2524
entry:

llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -324,10 +324,10 @@ define void @store_try_reorder(ptr %dst) {
324324
;
325325
; POW2-ONLY-LABEL: @store_try_reorder(
326326
; POW2-ONLY-NEXT: entry:
327-
; POW2-ONLY-NEXT: [[ADD:%.*]] = add i32 0, 0
328-
; POW2-ONLY-NEXT: store i32 [[ADD]], ptr [[DST:%.*]], align 4
329-
; POW2-ONLY-NEXT: [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1
330-
; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4
327+
; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[DST:%.*]], align 4
328+
; POW2-ONLY-NEXT: [[ADD216:%.*]] = sub i32 0, 0
329+
; POW2-ONLY-NEXT: [[ARRAYIDX_I1891:%.*]] = getelementptr i32, ptr [[DST]], i64 2
330+
; POW2-ONLY-NEXT: store i32 [[ADD216]], ptr [[ARRAYIDX_I1891]], align 4
331331
; POW2-ONLY-NEXT: ret void
332332
;
333333
entry:

llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,8 @@ define i32 @foo(ptr nocapture %A, i32 %n) {
1010
; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (...) @bar()
1111
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
1212
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
13-
; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
14-
; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
15-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
16-
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], splat (i32 9)
13+
; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 8, i32 10>
14+
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP2]], splat (i32 9)
1715
; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[A:%.*]], align 4
1816
; CHECK-NEXT: ret i32 undef
1917
;

0 commit comments

Comments
 (0)