Skip to content

[SLP] Make getSameOpcode support different instructions if they have same semantics. #112181

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Dec 13, 2024
Merged
242 changes: 207 additions & 35 deletions llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -840,8 +840,123 @@ class InstructionsState {
static InstructionsState invalid() { return {nullptr, nullptr}; }
};

struct InterchangeableInstruction {
unsigned Opcode;
SmallVector<Value *> Ops;
template <class... ArgTypes>
InterchangeableInstruction(unsigned Opcode, ArgTypes &&...Args)
: Opcode(Opcode), Ops{std::forward<decltype(Args)>(Args)...} {}
};

bool operator<(const InterchangeableInstruction &LHS,
const InterchangeableInstruction &RHS) {
return LHS.Opcode < RHS.Opcode;
}

} // end anonymous namespace

/// \returns a sorted list of interchangeable instructions by instruction opcode
/// that \p I can be converted to.
/// e.g.,
/// x << y -> x * (2^y)
/// x << 1 -> x * 2
/// x << 0 -> x * 1 -> x - 0 -> x + 0 -> x & 11...1 -> x | 0
/// x * 0 -> x & 0
/// x * -1 -> 0 - x
/// TODO: support more patterns
static SmallVector<InterchangeableInstruction>
getInterchangeableInstruction(Instruction *I) {
// PII = Possible Interchangeable Instruction
SmallVector<InterchangeableInstruction> PII;
unsigned Opcode = I->getOpcode();
PII.emplace_back(Opcode, I->operands());
if (!is_contained({Instruction::Shl, Instruction::Mul, Instruction::Sub,
Instruction::Add},
Opcode))
return PII;
Constant *C;
if (match(I, m_BinOp(m_Value(), m_Constant(C)))) {
ConstantInt *V = nullptr;
if (auto *CI = dyn_cast<ConstantInt>(C)) {
V = CI;
} else if (auto *CDV = dyn_cast<ConstantDataVector>(C)) {
if (auto *CI = dyn_cast_if_present<ConstantInt>(CDV->getSplatValue()))
V = CI;
}
if (!V)
return PII;
Value *Op0 = I->getOperand(0);
Type *Op1Ty = I->getOperand(1)->getType();
const APInt &Op1Int = V->getValue();
Constant *Zero =
ConstantInt::get(Op1Ty, APInt::getZero(Op1Int.getBitWidth()));
Constant *UnsignedMax =
ConstantInt::get(Op1Ty, APInt::getMaxValue(Op1Int.getBitWidth()));
switch (Opcode) {
case Instruction::Shl: {
PII.emplace_back(Instruction::Mul, Op0,
ConstantInt::get(Op1Ty, 1 << Op1Int.getZExtValue()));
if (Op1Int.isZero()) {
PII.emplace_back(Instruction::Sub, Op0, Zero);
PII.emplace_back(Instruction::Add, Op0, Zero);
PII.emplace_back(Instruction::And, Op0, UnsignedMax);
PII.emplace_back(Instruction::Or, Op0, Zero);
}
break;
}
case Instruction::Mul: {
if (Op1Int.isOne()) {
PII.emplace_back(Instruction::Sub, Op0, Zero);
PII.emplace_back(Instruction::Add, Op0, Zero);
PII.emplace_back(Instruction::And, Op0, UnsignedMax);
PII.emplace_back(Instruction::Or, Op0, Zero);
} else if (Op1Int.isZero()) {
PII.emplace_back(Instruction::And, Op0, Zero);
} else if (Op1Int.isAllOnes()) {
PII.emplace_back(Instruction::Sub, Zero, Op0);
}
break;
}
case Instruction::Sub:
if (Op1Int.isZero()) {
PII.emplace_back(Instruction::Add, Op0, Zero);
PII.emplace_back(Instruction::And, Op0, UnsignedMax);
PII.emplace_back(Instruction::Or, Op0, Zero);
}
break;
case Instruction::Add:
if (Op1Int.isZero()) {
PII.emplace_back(Instruction::And, Op0, UnsignedMax);
PII.emplace_back(Instruction::Or, Op0, Zero);
}
break;
}
}
// std::set_intersection requires a sorted range.
sort(PII);
return PII;
}

/// \returns the Op and operands which \p I convert to.
static std::pair<Value *, SmallVector<Value *>>
getInterchangeableInstruction(Instruction *I, Instruction *MainOp,
Instruction *AltOp) {
SmallVector<InterchangeableInstruction> IIList =
getInterchangeableInstruction(I);
const auto *Iter = find_if(IIList, [&](const InterchangeableInstruction &II) {
return II.Opcode == MainOp->getOpcode();
});
if (Iter == IIList.end()) {
Iter = find_if(IIList, [&](const InterchangeableInstruction &II) {
return II.Opcode == AltOp->getOpcode();
});
assert(Iter != IIList.end() &&
"Cannot find an interchangeable instruction.");
return std::make_pair(AltOp, Iter->Ops);
}
return std::make_pair(MainOp, Iter->Ops);
}

/// \returns true if \p Opcode is allowed as part of the main/alternate
/// instruction for SLP vectorization.
///
Expand Down Expand Up @@ -955,6 +1070,22 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
return InstructionsState::invalid();
}
bool AnyPoison = InstCnt != VL.size();
// Currently, this is only used for binary ops.
// TODO: support all instructions
SmallVector<InterchangeableInstruction> InterchangeableOpcode =
getInterchangeableInstruction(cast<Instruction>(V));
SmallVector<InterchangeableInstruction> AlternateInterchangeableOpcode;
auto UpdateInterchangeableOpcode =
[](SmallVector<InterchangeableInstruction> &LHS,
ArrayRef<InterchangeableInstruction> RHS) {
SmallVector<InterchangeableInstruction> NewInterchangeableOpcode;
std::set_intersection(LHS.begin(), LHS.end(), RHS.begin(), RHS.end(),
std::back_inserter(NewInterchangeableOpcode));
if (NewInterchangeableOpcode.empty())
return false;
LHS.swap(NewInterchangeableOpcode);
return true;
Comment on lines +1084 to +1087
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (NewInterchangeableOpcode.empty())
return false;
LHS.swap(NewInterchangeableOpcode);
return true;
LHS.swap(NewInterchangeableOpcode);
return !LHS.empty();

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We do not want LHS be empty if NewInterchangeableOpcode is empty.

};
for (int Cnt = 0, E = VL.size(); Cnt < E; Cnt++) {
auto *I = dyn_cast<Instruction>(VL[Cnt]);
if (!I)
Expand All @@ -967,14 +1098,32 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
return InstructionsState::invalid();
unsigned InstOpcode = I->getOpcode();
if (IsBinOp && isa<BinaryOperator>(I)) {
if (InstOpcode == Opcode || InstOpcode == AltOpcode)
SmallVector<InterchangeableInstruction> ThisInterchangeableOpcode(
getInterchangeableInstruction(I));
if (UpdateInterchangeableOpcode(InterchangeableOpcode,
ThisInterchangeableOpcode))
continue;
if (Opcode == AltOpcode && isValidForAlternation(InstOpcode) &&
isValidForAlternation(Opcode)) {
AltOpcode = InstOpcode;
AltIndex = Cnt;
if (AlternateInterchangeableOpcode.empty()) {
InterchangeableOpcode.erase(
remove_if(InterchangeableOpcode,
[](const InterchangeableInstruction &I) {
return !isValidForAlternation(I.Opcode);
}),
InterchangeableOpcode.end());
ThisInterchangeableOpcode.erase(
remove_if(ThisInterchangeableOpcode,
[](const InterchangeableInstruction &I) {
return !isValidForAlternation(I.Opcode);
}),
ThisInterchangeableOpcode.end());
if (InterchangeableOpcode.empty() || ThisInterchangeableOpcode.empty())
return InstructionsState::invalid();
AlternateInterchangeableOpcode.swap(ThisInterchangeableOpcode);
continue;
}
if (UpdateInterchangeableOpcode(AlternateInterchangeableOpcode,
ThisInterchangeableOpcode))
continue;
} else if (IsCastOp && isa<CastInst>(I)) {
Value *Op0 = IBase->getOperand(0);
Type *Ty0 = Op0->getType();
Expand Down Expand Up @@ -1075,6 +1224,24 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
return InstructionsState::invalid();
}

if (IsBinOp) {
auto FindOp = [&](ArrayRef<InterchangeableInstruction> CandidateOp) {
for (Value *V : VL) {
if (isa<PoisonValue>(V))
continue;
for (const InterchangeableInstruction &I : CandidateOp)
if (cast<Instruction>(V)->getOpcode() == I.Opcode)
return cast<Instruction>(V);
}
llvm_unreachable(
"Cannot find the candidate instruction for InstructionsState.");
};
Instruction *MainOp = FindOp(InterchangeableOpcode);
Instruction *AltOp = AlternateInterchangeableOpcode.empty()
? MainOp
: FindOp(AlternateInterchangeableOpcode);
return InstructionsState(MainOp, AltOp);
}
return InstructionsState(cast<Instruction>(V),
cast<Instruction>(VL[AltIndex]));
}
Expand Down Expand Up @@ -2405,42 +2572,46 @@ class BoUpSLP {
}

/// Go through the instructions in VL and append their operands.
void appendOperandsOfVL(ArrayRef<Value *> VL, Instruction *VL0) {
void appendOperandsOfVL(ArrayRef<Value *> VL, Instruction *MainOp,
Instruction *AltOp) {
assert(!VL.empty() && "Bad VL");
assert((empty() || VL.size() == getNumLanes()) &&
"Expected same number of lanes");
// IntrinsicInst::isCommutative returns true if swapping the first "two"
// arguments to the intrinsic produces the same result.
constexpr unsigned IntrinsicNumOperands = 2;
unsigned NumOperands = VL0->getNumOperands();
ArgSize = isa<IntrinsicInst>(VL0) ? IntrinsicNumOperands : NumOperands;
unsigned NumOperands = MainOp->getNumOperands();
ArgSize = isa<IntrinsicInst>(MainOp) ? IntrinsicNumOperands : NumOperands;
OpsVec.resize(NumOperands);
unsigned NumLanes = VL.size();
for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
for (unsigned OpIdx : seq<unsigned>(NumOperands))
OpsVec[OpIdx].resize(NumLanes);
for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
assert((isa<Instruction>(VL[Lane]) || isa<PoisonValue>(VL[Lane])) &&
"Expected instruction or poison value");
// Our tree has just 3 nodes: the root and two operands.
// It is therefore trivial to get the APO. We only need to check the
// opcode of VL[Lane] and whether the operand at OpIdx is the LHS or
// RHS operand. The LHS operand of both add and sub is never attached
// to an inversese operation in the linearized form, therefore its APO
// is false. The RHS is true only if VL[Lane] is an inverse operation.

// Since operand reordering is performed on groups of commutative
// operations or alternating sequences (e.g., +, -), we can safely
// tell the inverse operations by checking commutativity.
if (isa<PoisonValue>(VL[Lane])) {
for (auto [Lane, V] : enumerate(VL)) {
assert((isa<Instruction>(V) || isa<PoisonValue>(V)) &&
"Expected instruction or poison value");
if (isa<PoisonValue>(V)) {
for (unsigned OpIdx : seq<unsigned>(NumOperands))
OpsVec[OpIdx][Lane] = {
PoisonValue::get(VL0->getOperand(OpIdx)->getType()), true,
PoisonValue::get(MainOp->getOperand(OpIdx)->getType()), true,
false};
continue;
}
bool IsInverseOperation = !isCommutative(cast<Instruction>(VL[Lane]));
continue;
}
auto [SelectedOp, Ops] =
getInterchangeableInstruction(cast<Instruction>(V), MainOp, AltOp);
// Our tree has just 3 nodes: the root and two operands.
// It is therefore trivial to get the APO. We only need to check the
// opcode of V and whether the operand at OpIdx is the LHS or RHS
// operand. The LHS operand of both add and sub is never attached to an
// inversese operation in the linearized form, therefore its APO is
// false. The RHS is true only if V is an inverse operation.

// Since operand reordering is performed on groups of commutative
// operations or alternating sequences (e.g., +, -), we can safely
// tell the inverse operations by checking commutativity.
bool IsInverseOperation = !isCommutative(cast<Instruction>(SelectedOp));
for (unsigned OpIdx : seq<unsigned>(NumOperands)) {
bool APO = (OpIdx == 0) ? false : IsInverseOperation;
OpsVec[OpIdx][Lane] = {cast<Instruction>(VL[Lane])->getOperand(OpIdx),
APO, false};
OpsVec[OpIdx][Lane] = {Ops[OpIdx], APO, false};
}
}
}
Expand Down Expand Up @@ -2547,11 +2718,12 @@ class BoUpSLP {

public:
/// Initialize with all the operands of the instruction vector \p RootVL.
VLOperands(ArrayRef<Value *> RootVL, Instruction *VL0, const BoUpSLP &R)
VLOperands(ArrayRef<Value *> RootVL, Instruction *MainOp,
Instruction *AltOp, const BoUpSLP &R)
: TLI(*R.TLI), DL(*R.DL), SE(*R.SE), R(R),
L(R.LI->getLoopFor((VL0->getParent()))) {
L(R.LI->getLoopFor(MainOp->getParent())) {
// Append all the operands of RootVL.
appendOperandsOfVL(RootVL, VL0);
appendOperandsOfVL(RootVL, MainOp, AltOp);
}

/// \Returns a value vector with the operands across all lanes for the
Expand Down Expand Up @@ -3343,7 +3515,7 @@ class BoUpSLP {

/// Set this bundle's operand from Scalars.
void setOperand(const BoUpSLP &R, bool RequireReorder = false) {
VLOperands Ops(Scalars, MainOp, R);
VLOperands Ops(Scalars, MainOp, AltOp, R);
if (RequireReorder)
Ops.reorder();
for (unsigned I : seq<unsigned>(MainOp->getNumOperands()))
Expand Down Expand Up @@ -8559,7 +8731,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n");

ValueList Left, Right;
VLOperands Ops(VL, VL0, *this);
VLOperands Ops(VL, VL0, S.getAltOp(), *this);
if (cast<CmpInst>(VL0)->isCommutative()) {
// Commutative predicate - collect + sort operands of the instructions
// so that each side is more likely to have the same opcode.
Expand Down Expand Up @@ -15617,7 +15789,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) {
Value *V = Builder.CreateBinOp(
static_cast<Instruction::BinaryOps>(E->getOpcode()), LHS,
RHS);
propagateIRFlags(V, E->Scalars, VL0, It == MinBWs.end());
propagateIRFlags(V, E->Scalars, nullptr, It == MinBWs.end());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should it always be nullptr or are there cases where we can keep it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually I don't know why we pass VL0 here. Only alternate operation should pass non nullptr value.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should pass VL0 here, but need to check if all opcodes are originally mathed and if not, then pass fourth argument /*IncludeWrapFlags=*/false

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Before the PR, there is no difference between VL0 and nullptr. The opcode must be the same for all VL.
After the PR, VL0 cannot be used because opcode may be different.
I don't know why VL0 is used in the beginning since pass VL0 and nullptr will get the same result for propagateIRFlags.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this part still requires extra work. If the opcode of the instruction does not match the opcode of intersection, its flags are ignored. This is not correct and must be fixed

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is used to fix

- ; CHECK-NEXT:    [[TMP1:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
- ; CHECK-NEXT:    [[TMP2:%.*]] = shl <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
- ; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
+ ; CHECK-NEXT:    [[TMP3:%.*]] = mul <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 8, i32 10>

VL0 is mul here. If we pass VL0, then eventually nsw will be passed.
However, shl does not contain nsw. We should pass nullptr here to get the correct result.

Copy link
Member

@alexey-bataev alexey-bataev Oct 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Say, you have something like {mul nsw, shl, mul nsw, mul nsw}. For this case you will still emit mul nsw <4 x >, b ecause shl will be ignored

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. But actually shl does not have nsw. We should emit mul <4 x >.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it what is expected. But I assume that instead currently it will emit mul nsw <4 x >

if (auto *I = dyn_cast<Instruction>(V)) {
V = ::propagateMetadata(I, E->Scalars);
// Drop nuw flags for abs(sub(commutative), true).
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Transforms/SLPVectorizer/AArch64/vec3-base.ll
Original file line number Diff line number Diff line change
Expand Up @@ -314,10 +314,10 @@ define void @store_try_reorder(ptr %dst) {
;
; POW2-ONLY-LABEL: @store_try_reorder(
; POW2-ONLY-NEXT: entry:
; POW2-ONLY-NEXT: [[ADD:%.*]] = add i32 0, 0
; POW2-ONLY-NEXT: store i32 [[ADD]], ptr [[DST:%.*]], align 4
; POW2-ONLY-NEXT: [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1
; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4
; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[DST:%.*]], align 4
; POW2-ONLY-NEXT: [[ADD216:%.*]] = sub i32 0, 0
; POW2-ONLY-NEXT: [[ARRAYIDX_I1891:%.*]] = getelementptr i32, ptr [[DST]], i64 2
; POW2-ONLY-NEXT: store i32 [[ADD216]], ptr [[ARRAYIDX_I1891]], align 4
; POW2-ONLY-NEXT: ret void
;
entry:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,18 @@ define void @test(ptr %a, i64 %0) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x ptr> poison, ptr [[A]], i32 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x ptr> [[TMP1]], <2 x ptr> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i64> <i64 poison, i64 0>, i64 [[TMP0]], i32 0
; CHECK-NEXT: br label %[[BB:.*]]
; CHECK: [[BB]]:
; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i64 [[TMP0]], 1
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i32 0
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i64> [[TMP4]], i64 0, i32 1
; CHECK-NEXT: [[TMP5:%.*]] = or disjoint <2 x i64> [[TMP3]], <i64 1, i64 0>
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr double, <2 x ptr> [[TMP2]], <2 x i64> [[TMP5]]
; CHECK-NEXT: [[ARRAYIDX17_I28_1:%.*]] = getelementptr double, ptr [[A]], i64 [[TMP3]]
; CHECK-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[TMP6]], i32 8, <2 x i1> splat (i1 true), <2 x double> poison)
; CHECK-NEXT: [[TMP8:%.*]] = load <2 x double>, ptr [[A]], align 8
; CHECK-NEXT: [[TMP9:%.*]] = load <2 x double>, ptr [[A]], align 8
; CHECK-NEXT: [[TMP10:%.*]] = fsub <2 x double> [[TMP8]], [[TMP9]]
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x ptr> [[TMP6]], i32 0
; CHECK-NEXT: [[TMP9:%.*]] = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> [[TMP6]], i32 8, <2 x i1> splat (i1 true), <2 x double> poison)
; CHECK-NEXT: [[TMP7:%.*]] = load <2 x double>, ptr [[A]], align 8
; CHECK-NEXT: [[TMP10:%.*]] = load <2 x double>, ptr [[A]], align 8
; CHECK-NEXT: [[TMP11:%.*]] = fsub <2 x double> [[TMP7]], [[TMP10]]
; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v2f64.p0.i64(<2 x double> [[TMP11]], ptr align 8 [[ARRAYIDX17_I28_1]], i64 -8, <2 x i1> splat (i1 true), i32 2)
; CHECK-NEXT: [[TMP12:%.*]] = fsub <2 x double> [[TMP9]], [[TMP11]]
; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v2f64.p0.i64(<2 x double> [[TMP12]], ptr align 8 [[TMP8]], i64 -8, <2 x i1> splat (i1 true), i32 2)
; CHECK-NEXT: br label %[[BB]]
;
entry:
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll
Original file line number Diff line number Diff line change
Expand Up @@ -324,10 +324,10 @@ define void @store_try_reorder(ptr %dst) {
;
; POW2-ONLY-LABEL: @store_try_reorder(
; POW2-ONLY-NEXT: entry:
; POW2-ONLY-NEXT: [[ADD:%.*]] = add i32 0, 0
; POW2-ONLY-NEXT: store i32 [[ADD]], ptr [[DST:%.*]], align 4
; POW2-ONLY-NEXT: [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1
; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4
; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[DST:%.*]], align 4
; POW2-ONLY-NEXT: [[ADD216:%.*]] = sub i32 0, 0
; POW2-ONLY-NEXT: [[ARRAYIDX_I1891:%.*]] = getelementptr i32, ptr [[DST]], i64 2
; POW2-ONLY-NEXT: store i32 [[ADD216]], ptr [[ARRAYIDX_I1891]], align 4
; POW2-ONLY-NEXT: ret void
;
entry:
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/Transforms/SLPVectorizer/X86/barriercall.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,8 @@ define i32 @foo(ptr nocapture %A, i32 %n) {
; CHECK-NEXT: [[CALL:%.*]] = tail call i32 (...) @bar()
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> poison, i32 [[N:%.*]], i32 0
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 3, i32 10>
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP3]], splat (i32 9)
; CHECK-NEXT: [[TMP2:%.*]] = mul <4 x i32> [[SHUFFLE]], <i32 5, i32 9, i32 8, i32 10>
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> [[TMP2]], splat (i32 9)
; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[A:%.*]], align 4
; CHECK-NEXT: ret i32 undef
;
Expand Down
Loading
Loading