Skip to content

Commit ad591ac

Browse files
committed
[SLP] Make getSameOpcode support different instructions if they have
same semantics.
1 parent 12bcea3 commit ad591ac

14 files changed

+313
-138
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 207 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -832,8 +832,107 @@ struct InstructionsState {
832832
: OpValue(OpValue), MainOp(MainOp), AltOp(AltOp) {}
833833
};
834834

835+
struct InterchangeableInstruction {
836+
unsigned Opcode;
837+
SmallVector<Value *> Ops;
838+
template <class... ArgTypes>
839+
InterchangeableInstruction(unsigned Opcode, ArgTypes &&...Args)
840+
: Opcode(Opcode), Ops{std::forward<decltype(Args)>(Args)...} {}
841+
};
842+
843+
bool operator<(const InterchangeableInstruction &LHS,
844+
const InterchangeableInstruction &RHS) {
845+
return LHS.Opcode < RHS.Opcode;
846+
}
847+
835848
} // end anonymous namespace
836849

850+
/// \returns a sorted list of interchangeable instructions by instruction opcode
851+
/// that \p I can be converted to.
852+
/// e.g.,
853+
/// x << y -> x * (2^y)
854+
/// x << 1 -> x * 2
855+
/// x << 0 -> x * 1 -> x - 0 -> x + 0 -> x & 11...1 -> x | 0
856+
/// x * 0 -> x & 0
857+
/// x * -1 -> 0 - x
858+
/// TODO: support more patterns
859+
static SmallVector<InterchangeableInstruction>
860+
getInterchangeableInstruction(Instruction *I) {
861+
// PII = Possible Interchangeable Instruction
862+
SmallVector<InterchangeableInstruction> PII;
863+
unsigned Opcode = I->getOpcode();
864+
PII.emplace_back(Opcode, I->operands());
865+
if (!is_contained({Instruction::Shl, Instruction::Mul, Instruction::Sub,
866+
Instruction::Add},
867+
Opcode))
868+
return PII;
869+
Constant *C;
870+
if (match(I, m_BinOp(m_Value(), m_Constant(C)))) {
871+
ConstantInt *V = nullptr;
872+
if (auto *CI = dyn_cast<ConstantInt>(C)) {
873+
V = CI;
874+
} else if (auto *CDV = dyn_cast<ConstantDataVector>(C)) {
875+
if (auto *CI = dyn_cast_if_present<ConstantInt>(CDV->getSplatValue()))
876+
V = CI;
877+
}
878+
if (!V)
879+
return PII;
880+
Value *Op0 = I->getOperand(0);
881+
Type *Op1Ty = I->getOperand(1)->getType();
882+
const APInt &Op1Int = V->getValue();
883+
Constant *Zero =
884+
ConstantInt::get(Op1Ty, APInt::getZero(Op1Int.getBitWidth()));
885+
Constant *UnsignedMax =
886+
ConstantInt::get(Op1Ty, APInt::getMaxValue(Op1Int.getBitWidth()));
887+
switch (Opcode) {
888+
case Instruction::Shl: {
889+
PII.emplace_back(Instruction::Mul, Op0,
890+
ConstantInt::get(Op1Ty, 1 << Op1Int.getZExtValue()));
891+
if (Op1Int.isZero()) {
892+
PII.emplace_back(Instruction::Sub, Op0, Zero);
893+
PII.emplace_back(Instruction::Add, Op0, Zero);
894+
PII.emplace_back(Instruction::And, Op0, UnsignedMax);
895+
PII.emplace_back(Instruction::Or, Op0, Zero);
896+
}
897+
break;
898+
}
899+
case Instruction::Mul: {
900+
switch (Op1Int.getSExtValue()) {
901+
case 1:
902+
PII.emplace_back(Instruction::Sub, Op0, Zero);
903+
PII.emplace_back(Instruction::Add, Op0, Zero);
904+
PII.emplace_back(Instruction::And, Op0, UnsignedMax);
905+
PII.emplace_back(Instruction::Or, Op0, Zero);
906+
break;
907+
case 0:
908+
PII.emplace_back(Instruction::And, Op0, Zero);
909+
break;
910+
case -1:
911+
PII.emplace_back(Instruction::Sub, Zero, Op0);
912+
break;
913+
}
914+
break;
915+
}
916+
case Instruction::Sub:
917+
if (Op1Int.isZero()) {
918+
PII.emplace_back(Instruction::Add, Op0, Zero);
919+
PII.emplace_back(Instruction::And, Op0, UnsignedMax);
920+
PII.emplace_back(Instruction::Or, Op0, Zero);
921+
}
922+
break;
923+
case Instruction::Add:
924+
if (Op1Int.isZero()) {
925+
PII.emplace_back(Instruction::And, Op0, UnsignedMax);
926+
PII.emplace_back(Instruction::Or, Op0, Zero);
927+
}
928+
break;
929+
}
930+
}
931+
// std::set_intersection requires a sorted range.
932+
sort(PII);
933+
return PII;
934+
}
935+
837936
/// \returns true if \p Opcode is allowed as part of the main/alternate
838937
/// instruction for SLP vectorization.
839938
///
@@ -938,18 +1037,54 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
9381037
if (!isTriviallyVectorizable(BaseID) && BaseMappings.empty())
9391038
return InstructionsState(VL[BaseIndex], nullptr, nullptr);
9401039
}
1040+
// Currently, this is only used for binary ops.
1041+
// TODO: support all instructions
1042+
SmallVector<InterchangeableInstruction> InterchangeableOpcode =
1043+
getInterchangeableInstruction(cast<Instruction>(VL[BaseIndex]));
1044+
SmallVector<InterchangeableInstruction> AlternateInterchangeableOpcode;
1045+
auto UpdateInterchangeableOpcode =
1046+
[](SmallVector<InterchangeableInstruction> &LHS,
1047+
ArrayRef<InterchangeableInstruction> RHS) {
1048+
SmallVector<InterchangeableInstruction> NewInterchangeableOpcode;
1049+
std::set_intersection(LHS.begin(), LHS.end(), RHS.begin(), RHS.end(),
1050+
std::back_inserter(NewInterchangeableOpcode));
1051+
if (NewInterchangeableOpcode.empty())
1052+
return false;
1053+
LHS = std::move(NewInterchangeableOpcode);
1054+
return true;
1055+
};
9411056
for (int Cnt = 0, E = VL.size(); Cnt < E; Cnt++) {
9421057
auto *I = cast<Instruction>(VL[Cnt]);
9431058
unsigned InstOpcode = I->getOpcode();
9441059
if (IsBinOp && isa<BinaryOperator>(I)) {
945-
if (InstOpcode == Opcode || InstOpcode == AltOpcode)
1060+
SmallVector<InterchangeableInstruction> ThisInterchangeableOpcode(
1061+
getInterchangeableInstruction(I));
1062+
if (UpdateInterchangeableOpcode(InterchangeableOpcode,
1063+
ThisInterchangeableOpcode))
9461064
continue;
947-
if (Opcode == AltOpcode && isValidForAlternation(InstOpcode) &&
948-
isValidForAlternation(Opcode)) {
949-
AltOpcode = InstOpcode;
950-
AltIndex = Cnt;
1065+
if (AlternateInterchangeableOpcode.empty()) {
1066+
InterchangeableOpcode.erase(
1067+
std::remove_if(InterchangeableOpcode.begin(),
1068+
InterchangeableOpcode.end(),
1069+
[](const InterchangeableInstruction &I) {
1070+
return !isValidForAlternation(I.Opcode);
1071+
}),
1072+
InterchangeableOpcode.end());
1073+
ThisInterchangeableOpcode.erase(
1074+
std::remove_if(ThisInterchangeableOpcode.begin(),
1075+
ThisInterchangeableOpcode.end(),
1076+
[](const InterchangeableInstruction &I) {
1077+
return !isValidForAlternation(I.Opcode);
1078+
}),
1079+
ThisInterchangeableOpcode.end());
1080+
if (InterchangeableOpcode.empty() || ThisInterchangeableOpcode.empty())
1081+
return InstructionsState(VL[BaseIndex], nullptr, nullptr);
1082+
AlternateInterchangeableOpcode = std::move(ThisInterchangeableOpcode);
9511083
continue;
9521084
}
1085+
if (UpdateInterchangeableOpcode(AlternateInterchangeableOpcode,
1086+
ThisInterchangeableOpcode))
1087+
continue;
9531088
} else if (IsCastOp && isa<CastInst>(I)) {
9541089
Value *Op0 = IBase->getOperand(0);
9551090
Type *Ty0 = Op0->getType();
@@ -1043,6 +1178,21 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
10431178
return InstructionsState(VL[BaseIndex], nullptr, nullptr);
10441179
}
10451180

1181+
if (IsBinOp) {
1182+
auto FindOp = [&](ArrayRef<InterchangeableInstruction> CandidateOp) {
1183+
for (Value *V : VL)
1184+
for (const InterchangeableInstruction &I : CandidateOp)
1185+
if (cast<Instruction>(V)->getOpcode() == I.Opcode)
1186+
return cast<Instruction>(V);
1187+
llvm_unreachable(
1188+
"Cannot find the candidate instruction for InstructionsState.");
1189+
};
1190+
Instruction *MainOp = FindOp(InterchangeableOpcode);
1191+
Instruction *AltOp = AlternateInterchangeableOpcode.empty()
1192+
? MainOp
1193+
: FindOp(AlternateInterchangeableOpcode);
1194+
return InstructionsState(VL[BaseIndex], MainOp, AltOp);
1195+
}
10461196
return InstructionsState(VL[BaseIndex], cast<Instruction>(VL[BaseIndex]),
10471197
cast<Instruction>(VL[AltIndex]));
10481198
}
@@ -2335,24 +2485,41 @@ class BoUpSLP {
23352485
: cast<Instruction>(VL[0])->getNumOperands();
23362486
OpsVec.resize(NumOperands);
23372487
unsigned NumLanes = VL.size();
2338-
for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
2488+
InstructionsState S = getSameOpcode(VL, TLI);
2489+
for (unsigned OpIdx : seq<unsigned>(NumOperands))
23392490
OpsVec[OpIdx].resize(NumLanes);
2340-
for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
2341-
assert(isa<Instruction>(VL[Lane]) && "Expected instruction");
2342-
// Our tree has just 3 nodes: the root and two operands.
2343-
// It is therefore trivial to get the APO. We only need to check the
2344-
// opcode of VL[Lane] and whether the operand at OpIdx is the LHS or
2345-
// RHS operand. The LHS operand of both add and sub is never attached
2346-
// to an inversese operation in the linearized form, therefore its APO
2347-
// is false. The RHS is true only if VL[Lane] is an inverse operation.
2348-
2349-
// Since operand reordering is performed on groups of commutative
2350-
// operations or alternating sequences (e.g., +, -), we can safely
2351-
// tell the inverse operations by checking commutativity.
2352-
bool IsInverseOperation = !isCommutative(cast<Instruction>(VL[Lane]));
2491+
for (auto [I, V] : enumerate(VL)) {
2492+
assert(isa<Instruction>(V) && "Expected instruction");
2493+
SmallVector<InterchangeableInstruction> IIList =
2494+
getInterchangeableInstruction(cast<Instruction>(V));
2495+
Value *SelectedOp;
2496+
auto Iter = find_if(IIList, [&](const InterchangeableInstruction &II) {
2497+
return II.Opcode == S.MainOp->getOpcode();
2498+
});
2499+
if (Iter == IIList.end()) {
2500+
Iter = find_if(IIList, [&](const InterchangeableInstruction &II) {
2501+
return II.Opcode == S.AltOp->getOpcode();
2502+
});
2503+
SelectedOp = S.AltOp;
2504+
} else {
2505+
SelectedOp = S.MainOp;
2506+
}
2507+
assert(Iter != IIList.end() &&
2508+
"Cannot find an interchangeable instruction.");
2509+
// Our tree has just 3 nodes: the root and two operands.
2510+
// It is therefore trivial to get the APO. We only need to check the
2511+
// opcode of V and whether the operand at OpIdx is the LHS or RHS
2512+
// operand. The LHS operand of both add and sub is never attached to an
2513+
// inversese operation in the linearized form, therefore its APO is
2514+
// false. The RHS is true only if V is an inverse operation.
2515+
2516+
// Since operand reordering is performed on groups of commutative
2517+
// operations or alternating sequences (e.g., +, -), we can safely
2518+
// tell the inverse operations by checking commutativity.
2519+
bool IsInverseOperation = !isCommutative(cast<Instruction>(SelectedOp));
2520+
for (unsigned OpIdx : seq<unsigned>(NumOperands)) {
23532521
bool APO = (OpIdx == 0) ? false : IsInverseOperation;
2354-
OpsVec[OpIdx][Lane] = {cast<Instruction>(VL[Lane])->getOperand(OpIdx),
2355-
APO, false};
2522+
OpsVec[OpIdx][I] = {Iter->Ops[OpIdx], APO, false};
23562523
}
23572524
}
23582525
}
@@ -3252,15 +3419,25 @@ class BoUpSLP {
32523419
auto *I0 = cast<Instruction>(Scalars[0]);
32533420
Operands.resize(I0->getNumOperands());
32543421
unsigned NumLanes = Scalars.size();
3255-
for (unsigned OpIdx = 0, NumOperands = I0->getNumOperands();
3256-
OpIdx != NumOperands; ++OpIdx) {
3422+
unsigned NumOperands = I0->getNumOperands();
3423+
for (unsigned OpIdx : seq<unsigned>(NumOperands))
32573424
Operands[OpIdx].resize(NumLanes);
3258-
for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
3259-
auto *I = cast<Instruction>(Scalars[Lane]);
3260-
assert(I->getNumOperands() == NumOperands &&
3261-
"Expected same number of operands");
3262-
Operands[OpIdx][Lane] = I->getOperand(OpIdx);
3263-
}
3425+
for (auto [I, V] : enumerate(Scalars)) {
3426+
SmallVector<InterchangeableInstruction> IIList =
3427+
getInterchangeableInstruction(cast<Instruction>(V));
3428+
auto Iter = find_if(IIList, [&](const InterchangeableInstruction &II) {
3429+
return II.Opcode == MainOp->getOpcode();
3430+
});
3431+
if (Iter == IIList.end())
3432+
Iter = find_if(IIList, [&](const InterchangeableInstruction &II) {
3433+
return II.Opcode == AltOp->getOpcode();
3434+
});
3435+
assert(Iter != IIList.end() &&
3436+
"Cannot find an interchangeable instruction.");
3437+
assert(Iter->Ops.size() == NumOperands &&
3438+
"Expected same number of operands");
3439+
for (auto [J, Op] : enumerate(Iter->Ops))
3440+
Operands[J][I] = Op;
32643441
}
32653442
}
32663443

@@ -14935,7 +15112,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
1493515112
Value *V = Builder.CreateBinOp(
1493615113
static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS,
1493715114
RHS);
14938-
propagateIRFlags(V, E->Scalars, VL0, It == MinBWs.end());
15115+
propagateIRFlags(V, E->Scalars, nullptr, It == MinBWs.end());
1493915116
if (auto *I = dyn_cast<Instruction>(V)) {
1494015117
V = propagateMetadata(I, E->Scalars);
1494115118
// Drop nuw flags for abs(sub(commutative), true).

llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -314,10 +314,10 @@ define void @store_try_reorder(ptr %dst) {
314314
;
315315
; POW2-ONLY-LABEL: @store_try_reorder(
316316
; POW2-ONLY-NEXT: entry:
317-
; POW2-ONLY-NEXT: [[ADD:%.*]] = add i32 0, 0
318-
; POW2-ONLY-NEXT: store i32 [[ADD]], ptr [[DST:%.*]], align 4
319-
; POW2-ONLY-NEXT: [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1
320-
; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4
317+
; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[DST:%.*]], align 4
318+
; POW2-ONLY-NEXT: [[ADD216:%.*]] = sub i32 0, 0
319+
; POW2-ONLY-NEXT: [[ARRAYIDX_I1891:%.*]] = getelementptr i32, ptr [[DST]], i64 2
320+
; POW2-ONLY-NEXT: store i32 [[ADD216]], ptr [[ARRAYIDX_I1891]], align 4
321321
; POW2-ONLY-NEXT: ret void
322322
;
323323
entry:

llvm/test/Transforms/SLPVectorizer/RISCV/reversed-strided-node-with-external-ptr.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,12 @@ define void @test(ptr %a, i64 %0) {
77
; CHECK-NEXT: [[ENTRY:.*:]]
88
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[A]], i32 0
99
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x ptr> [[TMP1]], <2 x ptr> poison, <2 x i32> zeroinitializer
10+
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[TMP0]], i32 0
1011
; CHECK-NEXT: br label %[[BB:.*]]
1112
; CHECK: [[BB]]:
12-
; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i64 [[TMP0]], 1
13-
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i32 0
14-
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 0, i32 1
13+
; CHECK-NEXT: [[TMP5:%.*]] = or disjoint <2 x i64> [[TMP3]], <i64 1, i64 0>
1514
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr double, <2 x ptr> [[TMP2]], <2 x i64> [[TMP5]]
16-
; CHECK-NEXT: [[ARRAYIDX17_I28_1:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP3]]
15+
; CHECK-NEXT: [[ARRAYIDX17_I28_1:%.*]] = extractelement <2 x ptr> [[TMP6]], i32 0
1716
; CHECK-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[TMP6]], i32 8, <2 x i1> <i1 true, i1 true>, <2 x double> poison)
1817
; CHECK-NEXT: [[TMP8:%.*]] = load <2 x double>, ptr [[A]], align 8
1918
; CHECK-NEXT: [[TMP9:%.*]] = load <2 x double>, ptr [[A]], align 8

llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -324,10 +324,10 @@ define void @store_try_reorder(ptr %dst) {
324324
;
325325
; POW2-ONLY-LABEL: @store_try_reorder(
326326
; POW2-ONLY-NEXT: entry:
327-
; POW2-ONLY-NEXT: [[ADD:%.*]] = add i32 0, 0
328-
; POW2-ONLY-NEXT: store i32 [[ADD]], ptr [[DST:%.*]], align 4
329-
; POW2-ONLY-NEXT: [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1
330-
; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4
327+
; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[DST:%.*]], align 4
328+
; POW2-ONLY-NEXT: [[ADD216:%.*]] = sub i32 0, 0
329+
; POW2-ONLY-NEXT: [[ARRAYIDX_I1891:%.*]] = getelementptr i32, ptr [[DST]], i64 2
330+
; POW2-ONLY-NEXT: store i32 [[ADD216]], ptr [[ARRAYIDX_I1891]], align 4
331331
; POW2-ONLY-NEXT: ret void
332332
;
333333
entry:

llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,7 @@ define i32 @foo(ptr nocapture %A, i32 %n) {
1010
; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (...) @bar()
1111
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
1212
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
13-
; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
14-
; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
15-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
13+
; CHECK-NEXT: [[TMP3:%.*]] = mul <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 8, i32 10>
1614
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], <i32 9, i32 9, i32 9, i32 9>
1715
; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[A:%.*]], align 4
1816
; CHECK-NEXT: ret i32 undef

llvm/test/Transforms/SLPVectorizer/X86/bottom-to-top-reorder.ll

Lines changed: 11 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,22 +4,17 @@
44
define void @test(ptr %0, ptr %1, ptr %2) {
55
; CHECK-LABEL: @test(
66
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP0:%.*]], i64 4
7-
; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr [[TMP1:%.*]], align 4
8-
; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
9-
; CHECK-NEXT: [[TMP10:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
10-
; CHECK-NEXT: [[TMP11:%.*]] = sub <4 x i32> <i32 0, i32 0, i32 undef, i32 0>, [[TMP8]]
11-
; CHECK-NEXT: [[TMP12:%.*]] = sub <4 x i32> [[TMP11]], [[TMP10]]
12-
; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[TMP12]], [[TMP6]]
13-
; CHECK-NEXT: [[TMP14:%.*]] = add <4 x i32> [[TMP13]], <i32 0, i32 0, i32 1, i32 0>
14-
; CHECK-NEXT: [[TMP15:%.*]] = sub <4 x i32> [[TMP13]], <i32 0, i32 0, i32 1, i32 0>
15-
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], <4 x i32> <i32 2, i32 0, i32 1, i32 7>
16-
; CHECK-NEXT: [[TMP17:%.*]] = add <4 x i32> [[TMP16]], zeroinitializer
17-
; CHECK-NEXT: [[TMP18:%.*]] = sub <4 x i32> [[TMP16]], zeroinitializer
18-
; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <4 x i32> [[TMP17]], <4 x i32> [[TMP18]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
19-
; CHECK-NEXT: [[TMP20:%.*]] = add <4 x i32> [[TMP19]], zeroinitializer
20-
; CHECK-NEXT: [[TMP21:%.*]] = sub <4 x i32> [[TMP19]], zeroinitializer
21-
; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <4 x i32> [[TMP20]], <4 x i32> [[TMP21]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
22-
; CHECK-NEXT: store <4 x i32> [[TMP22]], ptr [[TMP2:%.*]], align 4
7+
; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[TMP1:%.*]], align 4
8+
; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr [[TMP0]], align 4
9+
; CHECK-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr [[TMP4]], align 4
10+
; CHECK-NEXT: [[TMP8:%.*]] = sub <4 x i32> <i32 0, i32 0, i32 undef, i32 0>, [[TMP6]]
11+
; CHECK-NEXT: [[TMP9:%.*]] = sub <4 x i32> [[TMP8]], [[TMP7]]
12+
; CHECK-NEXT: [[TMP10:%.*]] = add <4 x i32> [[TMP9]], [[TMP5]]
13+
; CHECK-NEXT: [[TMP11:%.*]] = add <4 x i32> <i32 0, i32 0, i32 1, i32 0>, [[TMP10]]
14+
; CHECK-NEXT: [[TMP12:%.*]] = add <4 x i32> [[TMP11]], zeroinitializer
15+
; CHECK-NEXT: [[TMP13:%.*]] = add <4 x i32> [[TMP12]], zeroinitializer
16+
; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i32> [[TMP13]], <4 x i32> poison, <4 x i32> <i32 2, i32 0, i32 1, i32 3>
17+
; CHECK-NEXT: store <4 x i32> [[TMP14]], ptr [[TMP2:%.*]], align 4
2318
; CHECK-NEXT: ret void
2419
;
2520
%4 = load i32, ptr %1, align 4

0 commit comments

Comments
 (0)