Skip to content

Commit d7258d8

Browse files
committed
[SLP] Vectorize non-power-of-2 ops with padding.
This patch introduces a new VectorizeWithPadding node type for root and leave nodes to allow vectorizing loads/stores with non-power-of-2 number of elements. VectorizeWithPadding load nodes will pad the result to the next power of 2 with poison elements. Non-leaf nodes will operate on normal power-of-2 vectors. For those non-leaf nodes, we still track the number of padding elements needed to go to the next power-of-2, to be used in various places, like cost computation. VectorizeWithPadding store nodes strip away the padding elements and store the non-power-of-2 number of data elements. Note that re-ordering and shuffling is not implemented for nodes requiring padding yet to keep the initial implementation simpler. The initial implementation also only tries to vectorize with padding if original number of elements + 1 is a power-of-2, i.e. if only a single padding element is needed. The feature is guarded by a new flag, off by defaul for now.
1 parent e4375bf commit d7258d8

File tree

7 files changed

+695
-353
lines changed

7 files changed

+695
-353
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 105 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,10 @@ static cl::opt<bool>
179179
ViewSLPTree("view-slp-tree", cl::Hidden,
180180
cl::desc("Display the SLP trees with Graphviz"));
181181

182+
static cl::opt<bool> VectorizeWithPadding(
183+
"slp-vectorize-with-padding", cl::init(false), cl::Hidden,
184+
cl::desc("Try to vectorize non-power-of-2 operations using padding."));
185+
182186
// Limit the number of alias checks. The limit is chosen so that
183187
// it has no negative effect on the llvm benchmarks.
184188
static const unsigned AliasedCheckLimit = 10;
@@ -2733,6 +2737,9 @@ class BoUpSLP {
27332737
SmallVectorImpl<Value *> *OpScalars = nullptr,
27342738
SmallVectorImpl<Value *> *AltScalars = nullptr) const;
27352739

2740+
/// Return the number of padding lanes (containg poison) for this node.
2741+
unsigned isNonPowOf2Vec() const { return !isPowerOf2_32(Scalars.size()); }
2742+
27362743
#ifndef NDEBUG
27372744
/// Debug printer.
27382745
LLVM_DUMP_METHOD void dump() const {
@@ -2891,9 +2898,13 @@ class BoUpSLP {
28912898
ValueToGatherNodes.try_emplace(V).first->getSecond().insert(Last);
28922899
}
28932900

2894-
if (UserTreeIdx.UserTE)
2901+
if (UserTreeIdx.UserTE) {
28952902
Last->UserTreeIndices.push_back(UserTreeIdx);
2896-
2903+
if (!isPowerOf2_32(Last->Scalars.size())) {
2904+
assert((Last->ReorderIndices.empty()) &&
2905+
"Reodering isn't implemented for nodes with padding yet");
2906+
}
2907+
}
28972908
return Last;
28982909
}
28992910

@@ -2921,7 +2932,8 @@ class BoUpSLP {
29212932
/// and fills required data before actual scheduling of the instructions.
29222933
TreeEntry::EntryState getScalarsVectorizationState(
29232934
InstructionsState &S, ArrayRef<Value *> VL, bool IsScatterVectorizeUserTE,
2924-
OrdersType &CurrentOrder, SmallVectorImpl<Value *> &PointerOps) const;
2935+
OrdersType &CurrentOrder, SmallVectorImpl<Value *> &PointerOps,
2936+
bool HasPadding) const;
29252937

29262938
/// Maps a specific scalar to its tree entry.
29272939
SmallDenseMap<Value *, TreeEntry *> ScalarToTreeEntry;
@@ -3881,6 +3893,9 @@ static LoadsState canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
38813893
Order.clear();
38823894
// Check the order of pointer operands or that all pointers are the same.
38833895
bool IsSorted = sortPtrAccesses(PointerOps, ScalarTy, DL, SE, Order);
3896+
if (!Order.empty() && !isPowerOf2_32(VL.size()))
3897+
return LoadsState::Gather;
3898+
38843899
if (IsSorted || all_of(PointerOps, [&](Value *P) {
38853900
return arePointersCompatible(P, PointerOps.front(), TLI);
38863901
})) {
@@ -4570,6 +4585,10 @@ bool BoUpSLP::canReorderOperands(
45704585
TreeEntry *UserTE, SmallVectorImpl<std::pair<unsigned, TreeEntry *>> &Edges,
45714586
ArrayRef<TreeEntry *> ReorderableGathers,
45724587
SmallVectorImpl<TreeEntry *> &GatherOps) {
4588+
// Reordering isn't implemented for nodes with padding yet.
4589+
if (UserTE->isNonPowOf2Vec())
4590+
return false;
4591+
45734592
for (unsigned I = 0, E = UserTE->getNumOperands(); I < E; ++I) {
45744593
if (any_of(Edges, [I](const std::pair<unsigned, TreeEntry *> &OpData) {
45754594
return OpData.first == I &&
@@ -4748,6 +4767,9 @@ void BoUpSLP::reorderBottomToTop(bool IgnoreReorder) {
47484767
auto Res = OrdersUses.insert(std::make_pair(OrdersType(), 0));
47494768
const auto &&AllowsReordering = [IgnoreReorder, &GathersToOrders](
47504769
const TreeEntry *TE) {
4770+
// Reordering for nodes with padding not implemented yet.
4771+
if (TE->isNonPowOf2Vec())
4772+
return false;
47514773
if (!TE->ReorderIndices.empty() || !TE->ReuseShuffleIndices.empty() ||
47524774
(TE->State == TreeEntry::Vectorize && TE->isAltShuffle()) ||
47534775
(IgnoreReorder && TE->Idx == 0))
@@ -5235,7 +5257,8 @@ static bool isAlternateInstruction(const Instruction *I,
52355257

52365258
BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
52375259
InstructionsState &S, ArrayRef<Value *> VL, bool IsScatterVectorizeUserTE,
5238-
OrdersType &CurrentOrder, SmallVectorImpl<Value *> &PointerOps) const {
5260+
OrdersType &CurrentOrder, SmallVectorImpl<Value *> &PointerOps,
5261+
bool HasPadding) const {
52395262
assert(S.MainOp && "Expected instructions with same/alternate opcodes only.");
52405263

52415264
unsigned ShuffleOrOp =
@@ -5258,7 +5281,7 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
52585281
}
52595282
case Instruction::ExtractValue:
52605283
case Instruction::ExtractElement: {
5261-
bool Reuse = canReuseExtract(VL, VL0, CurrentOrder);
5284+
bool Reuse = !HasPadding && canReuseExtract(VL, VL0, CurrentOrder);
52625285
if (Reuse || !CurrentOrder.empty())
52635286
return TreeEntry::Vectorize;
52645287
LLVM_DEBUG(dbgs() << "SLP: Gather extract sequence.\n");
@@ -5355,6 +5378,15 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
53555378
}
53565379
return TreeEntry::Vectorize;
53575380
}
5381+
case Instruction::UDiv:
5382+
case Instruction::SDiv:
5383+
case Instruction::URem:
5384+
case Instruction::SRem:
5385+
// The instruction may trigger immediate UB on the poison/undef padding
5386+
// elements, so force gather to avoid introducing new UB.
5387+
if (HasPadding)
5388+
return TreeEntry::NeedToGather;
5389+
[[fallthrough]];
53585390
case Instruction::Select:
53595391
case Instruction::FNeg:
53605392
case Instruction::Add:
@@ -5363,11 +5395,7 @@ BoUpSLP::TreeEntry::EntryState BoUpSLP::getScalarsVectorizationState(
53635395
case Instruction::FSub:
53645396
case Instruction::Mul:
53655397
case Instruction::FMul:
5366-
case Instruction::UDiv:
5367-
case Instruction::SDiv:
53685398
case Instruction::FDiv:
5369-
case Instruction::URem:
5370-
case Instruction::SRem:
53715399
case Instruction::FRem:
53725400
case Instruction::Shl:
53735401
case Instruction::LShr:
@@ -5550,6 +5578,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
55505578
bool DoNotFail = false) {
55515579
// Check that every instruction appears once in this bundle.
55525580
DenseMap<Value *, unsigned> UniquePositions(VL.size());
5581+
auto OriginalVL = VL;
55535582
for (Value *V : VL) {
55545583
if (isConstant(V)) {
55555584
ReuseShuffleIndicies.emplace_back(
@@ -5562,6 +5591,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
55625591
if (Res.second)
55635592
UniqueValues.emplace_back(V);
55645593
}
5594+
55655595
size_t NumUniqueScalarValues = UniqueValues.size();
55665596
if (NumUniqueScalarValues == VL.size()) {
55675597
ReuseShuffleIndicies.clear();
@@ -5589,6 +5619,14 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
55895619
NonUniqueValueVL.append(PWSz - UniqueValues.size(),
55905620
UniqueValues.back());
55915621
VL = NonUniqueValueVL;
5622+
5623+
if (UserTreeIdx.UserTE && UserTreeIdx.UserTE->isNonPowOf2Vec()) {
5624+
LLVM_DEBUG(dbgs() << "SLP: Reshuffling scalars not yet supported "
5625+
"for nodes with padding.\n");
5626+
newTreeEntry(OriginalVL, std::nullopt /*not vectorized*/, S,
5627+
UserTreeIdx);
5628+
return false;
5629+
}
55925630
}
55935631
return true;
55945632
}
@@ -5597,6 +5635,13 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
55975635
return false;
55985636
}
55995637
VL = UniqueValues;
5638+
if (UserTreeIdx.UserTE && UserTreeIdx.UserTE->isNonPowOf2Vec()) {
5639+
LLVM_DEBUG(dbgs() << "SLP: Reshuffling scalars not yet supported for "
5640+
"nodes with padding.\n");
5641+
newTreeEntry(OriginalVL, std::nullopt /*not vectorized*/, S,
5642+
UserTreeIdx);
5643+
return false;
5644+
}
56005645
}
56015646
return true;
56025647
};
@@ -5861,7 +5906,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
58615906
OrdersType CurrentOrder;
58625907
SmallVector<Value *> PointerOps;
58635908
TreeEntry::EntryState State = getScalarsVectorizationState(
5864-
S, VL, IsScatterVectorizeUserTE, CurrentOrder, PointerOps);
5909+
S, VL, IsScatterVectorizeUserTE, CurrentOrder, PointerOps,
5910+
UserTreeIdx.UserTE && UserTreeIdx.UserTE->isNonPowOf2Vec() > 0);
58655911
if (State == TreeEntry::NeedToGather) {
58665912
newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
58675913
ReuseShuffleIndicies);
@@ -6957,7 +7003,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
69577003
return Constant::getAllOnesValue(Ty);
69587004
}
69597005

6960-
InstructionCost getBuildVectorCost(ArrayRef<Value *> VL, Value *Root) {
7006+
InstructionCost getBuildVectorCost(ArrayRef<Value *> VL, Value *Root,
7007+
bool WithPadding = false) {
69617008
if ((!Root && allConstant(VL)) || all_of(VL, UndefValue::classof))
69627009
return TTI::TCC_Free;
69637010
auto *VecTy = FixedVectorType::get(VL.front()->getType(), VL.size());
@@ -6968,7 +7015,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
69687015
InstructionsState S = getSameOpcode(VL, *R.TLI);
69697016
const unsigned Sz = R.DL->getTypeSizeInBits(VL.front()->getType());
69707017
unsigned MinVF = R.getMinVF(2 * Sz);
6971-
if (VL.size() > 2 &&
7018+
if (!WithPadding && VL.size() > 2 &&
69727019
((S.getOpcode() == Instruction::Load && !S.isAltShuffle()) ||
69737020
(InVectors.empty() &&
69747021
any_of(seq<unsigned>(0, VL.size() / MinVF),
@@ -7079,7 +7126,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
70797126
}
70807127
GatherCost -= ScalarsCost;
70817128
}
7082-
} else if (!Root && isSplat(VL)) {
7129+
} else if (!WithPadding && !Root && isSplat(VL)) {
70837130
// Found the broadcasting of the single scalar, calculate the cost as
70847131
// the broadcast.
70857132
const auto *It =
@@ -7642,8 +7689,8 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
76427689
CommonMask[Idx] = Mask[Idx] + VF;
76437690
}
76447691
Value *gather(ArrayRef<Value *> VL, unsigned MaskVF = 0,
7645-
Value *Root = nullptr) {
7646-
Cost += getBuildVectorCost(VL, Root);
7692+
Value *Root = nullptr, bool WithPadding = false) {
7693+
Cost += getBuildVectorCost(VL, Root, WithPadding);
76477694
if (!Root) {
76487695
// FIXME: Need to find a way to avoid use of getNullValue here.
76497696
SmallVector<Constant *> Vals;
@@ -9712,6 +9759,9 @@ BoUpSLP::isGatherShuffledEntry(
97129759
// No need to check for the topmost gather node.
97139760
if (TE == VectorizableTree.front().get())
97149761
return {};
9762+
// Gathering for nodes with padding is not implemented yet.
9763+
if (TE->isNonPowOf2Vec())
9764+
return {};
97159765
Mask.assign(VL.size(), PoisonMaskElem);
97169766
assert(TE->UserTreeIndices.size() == 1 &&
97179767
"Expected only single user of the gather node.");
@@ -10433,7 +10483,7 @@ class BoUpSLP::ShuffleInstructionBuilder final : public BaseShuffleAnalysis {
1043310483
add(V1, NewMask);
1043410484
}
1043510485
Value *gather(ArrayRef<Value *> VL, unsigned MaskVF = 0,
10436-
Value *Root = nullptr) {
10486+
Value *Root = nullptr, bool WithPadding = false) {
1043710487
return R.gather(VL, Root);
1043810488
}
1043910489
Value *createFreeze(Value *V) { return Builder.CreateFreeze(V); }
@@ -10502,7 +10552,6 @@ Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx,
1050210552
SmallVector<int> Mask(E->ReorderIndices.begin(), E->ReorderIndices.end());
1050310553
reorderScalars(VL, Mask);
1050410554
}
10505-
const unsigned VF = VL.size();
1050610555
InstructionsState S = getSameOpcode(VL, *TLI);
1050710556
// Special processing for GEPs bundle, which may include non-gep values.
1050810557
if (!S.getOpcode() && VL.front()->getType()->isPointerTy()) {
@@ -10544,6 +10593,7 @@ Value *BoUpSLP::vectorizeOperand(TreeEntry *E, unsigned NodeIdx,
1054410593
ShuffleBuilder.add(V, Mask);
1054510594
return ShuffleBuilder.finalize(std::nullopt);
1054610595
};
10596+
const unsigned VF = VL.size();
1054710597
Value *V = vectorizeTree(VE, PostponedPHIs);
1054810598
if (VF != cast<FixedVectorType>(V->getType())->getNumElements()) {
1054910599
if (!VE->ReuseShuffleIndices.empty()) {
@@ -10670,6 +10720,14 @@ ResTy BoUpSLP::processBuildVector(const TreeEntry *E, Args &...Params) {
1067010720
return true;
1067110721
};
1067210722
BVTy ShuffleBuilder(Params...);
10723+
if (E->isNonPowOf2Vec()) {
10724+
Value *BV = ShuffleBuilder.gather(E->Scalars, 0, nullptr, true);
10725+
SmallVector<int> Mask(VF, PoisonMaskElem);
10726+
std::iota(Mask.begin(), Mask.begin() + E->Scalars.size(), 0);
10727+
ShuffleBuilder.add(BV, Mask);
10728+
return ShuffleBuilder.finalize(E->ReuseShuffleIndices);
10729+
}
10730+
1067310731
ResTy Res = ResTy();
1067410732
SmallVector<int> Mask;
1067510733
SmallVector<int> ExtractMask(GatheredScalars.size(), PoisonMaskElem);
@@ -13434,7 +13492,8 @@ bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
1343413492
unsigned VF = Chain.size();
1343513493

1343613494
if (!isPowerOf2_32(Sz) || !isPowerOf2_32(VF) || VF < 2 || VF < MinVF)
13437-
return false;
13495+
if (!VectorizeWithPadding || (VF < MinVF && VF + 1 != MinVF))
13496+
return false;
1343813497

1343913498
LLVM_DEBUG(dbgs() << "SLP: Analyzing " << VF << " stores at offset " << Idx
1344013499
<< "\n");
@@ -13530,9 +13589,36 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
1353013589
<< "MinVF (" << MinVF << ")\n");
1353113590
}
1353213591

13592+
unsigned StartIdx = 0;
13593+
if (VectorizeWithPadding) {
13594+
unsigned CandVF = Operands.size() + 1;
13595+
if (isPowerOf2_32(CandVF) && CandVF <= MaxVF) {
13596+
assert(
13597+
all_of(
13598+
Operands,
13599+
[&](Value *V) {
13600+
return cast<StoreInst>(V)->getValueOperand()->getType() ==
13601+
cast<StoreInst>(Operands.front())
13602+
->getValueOperand()
13603+
->getType();
13604+
}) &&
13605+
"Expected all operands of same type.");
13606+
if (!VectorizedStores.count(Operands.front()) &&
13607+
!VectorizedStores.count(Operands.back()) &&
13608+
TriedSequences
13609+
.insert(std::make_pair(Operands.front(), Operands.back()))
13610+
.second &&
13611+
vectorizeStoreChain(Operands, R, Operands.size(), MinVF)) {
13612+
// Mark the vectorized stores so that we don't vectorize them again.
13613+
VectorizedStores.insert(Operands.begin(), Operands.end());
13614+
Changed = true;
13615+
StartIdx += Operands.size();
13616+
}
13617+
}
13618+
}
13619+
1353313620
// FIXME: Is division-by-2 the correct step? Should we assert that the
1353413621
// register size is a power-of-2?
13535-
unsigned StartIdx = 0;
1353613622
for (unsigned Size = MaxVF; Size >= MinVF; Size /= 2) {
1353713623
for (unsigned Cnt = StartIdx, E = Operands.size(); Cnt + Size <= E;) {
1353813624
ArrayRef<Value *> Slice = ArrayRef(Operands).slice(Cnt, Size);

llvm/test/Transforms/SLPVectorizer/AArch64/vec15-base.ll

Lines changed: 39 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,45 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2-
; RUN: opt -passes=slp-vectorizer -mtriple=arm64-apple-ios -S %s | FileCheck %s
2+
; RUN: opt -passes=slp-vectorizer -slp-vectorize-with-padding -mtriple=arm64-apple-ios -S %s | FileCheck --check-prefixes=PADDING %s
3+
; RUN: opt -passes=slp-vectorizer -slp-vectorize-with-padding=false -mtriple=arm64-apple-ios -S %s | FileCheck --check-prefixes=NO-PADDING %s
34

45
define void @v15_load_i8_mul_by_constant_store(ptr %src, ptr noalias %dst) {
5-
; CHECK-LABEL: define void @v15_load_i8_mul_by_constant_store(
6-
; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
7-
; CHECK-NEXT: entry:
8-
; CHECK-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 0
9-
; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[GEP_SRC_0]], align 4
10-
; CHECK-NEXT: [[TMP1:%.*]] = mul nsw <8 x i8> [[TMP0]], <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
11-
; CHECK-NEXT: store <8 x i8> [[TMP1]], ptr [[DST]], align 1
12-
; CHECK-NEXT: [[GEP_SRC_8:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 8
13-
; CHECK-NEXT: [[DST_8:%.*]] = getelementptr i8, ptr [[DST]], i8 8
14-
; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[GEP_SRC_8]], align 4
15-
; CHECK-NEXT: [[TMP3:%.*]] = mul nsw <4 x i8> [[TMP2]], <i8 10, i8 10, i8 10, i8 10>
16-
; CHECK-NEXT: store <4 x i8> [[TMP3]], ptr [[DST_8]], align 1
17-
; CHECK-NEXT: [[GEP_SRC_12:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 12
18-
; CHECK-NEXT: [[L_SRC_12:%.*]] = load i8, ptr [[GEP_SRC_12]], align 4
19-
; CHECK-NEXT: [[MUL_12:%.*]] = mul nsw i8 [[L_SRC_12]], 10
20-
; CHECK-NEXT: [[DST_12:%.*]] = getelementptr i8, ptr [[DST]], i8 12
21-
; CHECK-NEXT: store i8 [[MUL_12]], ptr [[DST_12]], align 1
22-
; CHECK-NEXT: [[GEP_SRC_13:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 13
23-
; CHECK-NEXT: [[L_SRC_13:%.*]] = load i8, ptr [[GEP_SRC_13]], align 4
24-
; CHECK-NEXT: [[MUL_13:%.*]] = mul nsw i8 [[L_SRC_13]], 10
25-
; CHECK-NEXT: [[DST_13:%.*]] = getelementptr i8, ptr [[DST]], i8 13
26-
; CHECK-NEXT: store i8 [[MUL_13]], ptr [[DST_13]], align 1
27-
; CHECK-NEXT: [[GEP_SRC_14:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 14
28-
; CHECK-NEXT: [[L_SRC_14:%.*]] = load i8, ptr [[GEP_SRC_14]], align 4
29-
; CHECK-NEXT: [[MUL_14:%.*]] = mul nsw i8 [[L_SRC_14]], 10
30-
; CHECK-NEXT: [[DST_14:%.*]] = getelementptr i8, ptr [[DST]], i8 14
31-
; CHECK-NEXT: store i8 [[MUL_14]], ptr [[DST_14]], align 1
32-
; CHECK-NEXT: ret void
6+
; PADDING-LABEL: define void @v15_load_i8_mul_by_constant_store(
7+
; PADDING-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
8+
; PADDING-NEXT: entry:
9+
; PADDING-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 0
10+
; PADDING-NEXT: [[TMP0:%.*]] = load <15 x i8>, ptr [[GEP_SRC_0]], align 4
11+
; PADDING-NEXT: [[TMP1:%.*]] = mul nsw <15 x i8> [[TMP0]], <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
12+
; PADDING-NEXT: store <15 x i8> [[TMP1]], ptr [[DST]], align 1
13+
; PADDING-NEXT: ret void
14+
;
15+
; NO-PADDING-LABEL: define void @v15_load_i8_mul_by_constant_store(
16+
; NO-PADDING-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
17+
; NO-PADDING-NEXT: entry:
18+
; NO-PADDING-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 0
19+
; NO-PADDING-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[GEP_SRC_0]], align 4
20+
; NO-PADDING-NEXT: [[TMP1:%.*]] = mul nsw <8 x i8> [[TMP0]], <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
21+
; NO-PADDING-NEXT: store <8 x i8> [[TMP1]], ptr [[DST]], align 1
22+
; NO-PADDING-NEXT: [[GEP_SRC_8:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 8
23+
; NO-PADDING-NEXT: [[DST_8:%.*]] = getelementptr i8, ptr [[DST]], i8 8
24+
; NO-PADDING-NEXT: [[TMP2:%.*]] = load <4 x i8>, ptr [[GEP_SRC_8]], align 4
25+
; NO-PADDING-NEXT: [[TMP3:%.*]] = mul nsw <4 x i8> [[TMP2]], <i8 10, i8 10, i8 10, i8 10>
26+
; NO-PADDING-NEXT: store <4 x i8> [[TMP3]], ptr [[DST_8]], align 1
27+
; NO-PADDING-NEXT: [[GEP_SRC_12:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 12
28+
; NO-PADDING-NEXT: [[L_SRC_12:%.*]] = load i8, ptr [[GEP_SRC_12]], align 4
29+
; NO-PADDING-NEXT: [[MUL_12:%.*]] = mul nsw i8 [[L_SRC_12]], 10
30+
; NO-PADDING-NEXT: [[DST_12:%.*]] = getelementptr i8, ptr [[DST]], i8 12
31+
; NO-PADDING-NEXT: store i8 [[MUL_12]], ptr [[DST_12]], align 1
32+
; NO-PADDING-NEXT: [[GEP_SRC_13:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 13
33+
; NO-PADDING-NEXT: [[L_SRC_13:%.*]] = load i8, ptr [[GEP_SRC_13]], align 4
34+
; NO-PADDING-NEXT: [[MUL_13:%.*]] = mul nsw i8 [[L_SRC_13]], 10
35+
; NO-PADDING-NEXT: [[DST_13:%.*]] = getelementptr i8, ptr [[DST]], i8 13
36+
; NO-PADDING-NEXT: store i8 [[MUL_13]], ptr [[DST_13]], align 1
37+
; NO-PADDING-NEXT: [[GEP_SRC_14:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i8 14
38+
; NO-PADDING-NEXT: [[L_SRC_14:%.*]] = load i8, ptr [[GEP_SRC_14]], align 4
39+
; NO-PADDING-NEXT: [[MUL_14:%.*]] = mul nsw i8 [[L_SRC_14]], 10
40+
; NO-PADDING-NEXT: [[DST_14:%.*]] = getelementptr i8, ptr [[DST]], i8 14
41+
; NO-PADDING-NEXT: store i8 [[MUL_14]], ptr [[DST_14]], align 1
42+
; NO-PADDING-NEXT: ret void
3343
;
3444
entry:
3545
%gep.src.0 = getelementptr inbounds i8, ptr %src, i8 0
@@ -123,5 +133,3 @@ entry:
123133

124134
ret void
125135
}
126-
127-

0 commit comments

Comments
 (0)