Skip to content

Commit 8030481

Browse files
committed
Revert "[SLP]Add detection of shuffled/perfect matching of tree entries."
This reverts commit d6fde91 to fix compiler crashes.
1 parent e28435c commit 8030481

File tree

3 files changed

+115
-138
lines changed

3 files changed

+115
-138
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 2 additions & 97 deletions
Original file line numberDiff line numberDiff line change
@@ -1542,14 +1542,6 @@ class BoUpSLP {
15421542
getGatherCost(FixedVectorType *Ty,
15431543
const DenseSet<unsigned> &ShuffledIndices) const;
15441544

1545-
/// Checks if the gathered \p VL can be represented as shuffle(s) of previous
1546-
/// tree entries.
1547-
/// \returns ShuffleKind, if gathered values can be represented as shuffles of
1548-
/// previous tree entries. \p Mask is filled with the shuffle mask.
1549-
Optional<TargetTransformInfo::ShuffleKind>
1550-
isGatherShuffledEntry(const TreeEntry *TE, SmallVectorImpl<int> &Mask,
1551-
SmallVectorImpl<const TreeEntry *> &Entries);
1552-
15531545
/// \returns the scalarization cost for this list of values. Assuming that
15541546
/// this subtree gets vectorized, we may need to extract the values from the
15551547
/// roots. This method calculates the cost of extracting the values.
@@ -3568,27 +3560,7 @@ InstructionCost BoUpSLP::getEntryCost(TreeEntry *E) {
35683560
return ReuseShuffleCost + Cost;
35693561
}
35703562
}
3571-
InstructionCost GatherCost = 0;
3572-
SmallVector<int> Mask;
3573-
SmallVector<const TreeEntry *> Entries;
3574-
Optional<TargetTransformInfo::ShuffleKind> Shuffle =
3575-
isGatherShuffledEntry(E, Mask, Entries);
3576-
if (Shuffle.hasValue()) {
3577-
if (ShuffleVectorInst::isIdentityMask(Mask)) {
3578-
LLVM_DEBUG(
3579-
dbgs()
3580-
<< "SLP: perfect diamond match for gather bundle that starts with "
3581-
<< *VL.front() << ".\n");
3582-
} else {
3583-
LLVM_DEBUG(dbgs() << "SLP: shuffled " << Entries.size()
3584-
<< " entries for bundle that starts with "
3585-
<< *VL.front() << ".\n");
3586-
GatherCost = TTI->getShuffleCost(*Shuffle, VecTy, Mask);
3587-
}
3588-
} else {
3589-
GatherCost = getGatherCost(VL);
3590-
}
3591-
return ReuseShuffleCost + GatherCost;
3563+
return ReuseShuffleCost + getGatherCost(VL);
35923564
}
35933565
assert((E->State == TreeEntry::Vectorize ||
35943566
E->State == TreeEntry::ScatterVectorize) &&
@@ -4244,61 +4216,6 @@ InstructionCost BoUpSLP::getTreeCost() {
42444216
return Cost;
42454217
}
42464218

4247-
Optional<TargetTransformInfo::ShuffleKind>
4248-
BoUpSLP::isGatherShuffledEntry(const TreeEntry *TE, SmallVectorImpl<int> &Mask,
4249-
SmallVectorImpl<const TreeEntry *> &Entries) {
4250-
auto *VLIt = find_if(VectorizableTree,
4251-
[TE](const std::unique_ptr<TreeEntry> &EntryPtr) {
4252-
return EntryPtr.get() == TE;
4253-
});
4254-
assert(VLIt != VectorizableTree.end() &&
4255-
"Gathered values should be in the tree.");
4256-
Mask.clear();
4257-
Entries.clear();
4258-
DenseMap<const TreeEntry *, int> Used;
4259-
int NumShuffles = 0;
4260-
for (int I = 0, E = TE->Scalars.size(); I < E; ++I) {
4261-
Value *V = TE->Scalars[I];
4262-
const TreeEntry *VTE = getTreeEntry(V);
4263-
if (!VTE) {
4264-
// Check if it is used in one of the gathered entries.
4265-
const auto *It =
4266-
find_if(make_range(VectorizableTree.begin(), VLIt),
4267-
[V](const std::unique_ptr<TreeEntry> &EntryPtr) {
4268-
return EntryPtr->State == TreeEntry::NeedToGather &&
4269-
is_contained(EntryPtr->Scalars, V);
4270-
});
4271-
if (It != VLIt)
4272-
VTE = It->get();
4273-
}
4274-
if (VTE) {
4275-
auto Res = Used.try_emplace(VTE, NumShuffles);
4276-
if (Res.second) {
4277-
Entries.push_back(VTE);
4278-
++NumShuffles;
4279-
}
4280-
Mask.push_back(
4281-
Res.first->second * E +
4282-
std::distance(VTE->Scalars.begin(), find(VTE->Scalars, V)));
4283-
continue;
4284-
}
4285-
return None;
4286-
}
4287-
if (NumShuffles == 1) {
4288-
if (ShuffleVectorInst::isReverseMask(Mask))
4289-
return TargetTransformInfo::SK_Reverse;
4290-
return TargetTransformInfo::SK_PermuteSingleSrc;
4291-
}
4292-
if (NumShuffles == 2) {
4293-
if (ShuffleVectorInst::isSelectMask(Mask))
4294-
return TargetTransformInfo::SK_Select;
4295-
if (ShuffleVectorInst::isTransposeMask(Mask))
4296-
return TargetTransformInfo::SK_Transpose;
4297-
return TargetTransformInfo::SK_PermuteTwoSrc;
4298-
}
4299-
return None;
4300-
}
4301-
43024219
InstructionCost
43034220
BoUpSLP::getGatherCost(FixedVectorType *Ty,
43044221
const DenseSet<unsigned> &ShuffledIndices) const {
@@ -4582,19 +4499,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
45824499
bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty();
45834500
if (E->State == TreeEntry::NeedToGather) {
45844501
setInsertPointAfterBundle(E);
4585-
Value *Vec;
4586-
SmallVector<int> Mask;
4587-
SmallVector<const TreeEntry *> Entries;
4588-
Optional<TargetTransformInfo::ShuffleKind> Shuffle =
4589-
isGatherShuffledEntry(E, Mask, Entries);
4590-
if (Shuffle.hasValue()) {
4591-
assert((Entries.size() == 1 || Entries.size() == 2) &&
4592-
"Expected shuffle of 1 or 2 entries.");
4593-
Vec = Builder.CreateShuffleVector(Entries.front()->VectorizedValue,
4594-
Entries.back()->VectorizedValue, Mask);
4595-
} else {
4596-
Vec = gather(E->Scalars);
4597-
}
4502+
Value *Vec = gather(E->Scalars);
45984503
if (NeedToShuffleReuses) {
45994504
ShuffleBuilder.addMask(E->ReuseShuffleIndices);
46004505
Vec = ShuffleBuilder.finalize(Vec);

llvm/test/Transforms/SLPVectorizer/AArch64/gather-cost.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ target triple = "aarch64--linux-gnu"
1010
; REMARK-LABEL: Function: gather_multiple_use
1111
; REMARK: Args:
1212
; REMARK-NEXT: - String: 'Vectorized horizontal reduction with cost '
13-
; REMARK-NEXT: - Cost: '-16'
13+
; REMARK-NEXT: - Cost: '-7'
1414
;
1515
; REMARK-NOT: Function: gather_load
1616

llvm/test/Transforms/SLPVectorizer/X86/matched-shuffled-entries.ll

Lines changed: 112 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -4,52 +4,124 @@
44
define i32 @bar() local_unnamed_addr {
55
; CHECK-LABEL: @bar(
66
; CHECK-NEXT: entry:
7+
; CHECK-NEXT: [[ADD103:%.*]] = add nsw i32 undef, undef
8+
; CHECK-NEXT: [[SUB104:%.*]] = sub nsw i32 undef, undef
9+
; CHECK-NEXT: [[ADD105:%.*]] = add nsw i32 undef, undef
10+
; CHECK-NEXT: [[SUB106:%.*]] = sub nsw i32 undef, undef
11+
; CHECK-NEXT: [[SHR_I:%.*]] = lshr i32 [[ADD103]], 15
12+
; CHECK-NEXT: [[AND_I:%.*]] = and i32 [[SHR_I]], 65537
13+
; CHECK-NEXT: [[MUL_I:%.*]] = mul nuw i32 [[AND_I]], 65535
14+
; CHECK-NEXT: [[ADD_I:%.*]] = add i32 [[MUL_I]], [[ADD103]]
15+
; CHECK-NEXT: [[XOR_I:%.*]] = xor i32 [[ADD_I]], [[MUL_I]]
16+
; CHECK-NEXT: [[SHR_I64:%.*]] = lshr i32 [[ADD105]], 15
17+
; CHECK-NEXT: [[AND_I65:%.*]] = and i32 [[SHR_I64]], 65537
18+
; CHECK-NEXT: [[MUL_I66:%.*]] = mul nuw i32 [[AND_I65]], 65535
19+
; CHECK-NEXT: [[ADD_I67:%.*]] = add i32 [[MUL_I66]], [[ADD105]]
20+
; CHECK-NEXT: [[XOR_I68:%.*]] = xor i32 [[ADD_I67]], [[MUL_I66]]
21+
; CHECK-NEXT: [[SHR_I69:%.*]] = lshr i32 [[SUB104]], 15
22+
; CHECK-NEXT: [[AND_I70:%.*]] = and i32 [[SHR_I69]], 65537
23+
; CHECK-NEXT: [[MUL_I71:%.*]] = mul nuw i32 [[AND_I70]], 65535
24+
; CHECK-NEXT: [[ADD_I72:%.*]] = add i32 [[MUL_I71]], [[SUB104]]
25+
; CHECK-NEXT: [[XOR_I73:%.*]] = xor i32 [[ADD_I72]], [[MUL_I71]]
26+
; CHECK-NEXT: [[SHR_I74:%.*]] = lshr i32 [[SUB106]], 15
27+
; CHECK-NEXT: [[AND_I75:%.*]] = and i32 [[SHR_I74]], 65537
28+
; CHECK-NEXT: [[MUL_I76:%.*]] = mul nuw i32 [[AND_I75]], 65535
29+
; CHECK-NEXT: [[ADD_I77:%.*]] = add i32 [[MUL_I76]], [[SUB106]]
30+
; CHECK-NEXT: [[XOR_I78:%.*]] = xor i32 [[ADD_I77]], [[MUL_I76]]
31+
; CHECK-NEXT: [[ADD110:%.*]] = add i32 [[XOR_I68]], [[XOR_I]]
32+
; CHECK-NEXT: [[ADD112:%.*]] = add i32 [[ADD110]], [[XOR_I73]]
33+
; CHECK-NEXT: [[ADD113:%.*]] = add i32 [[ADD112]], [[XOR_I78]]
734
; CHECK-NEXT: [[ADD78_1:%.*]] = add nsw i32 undef, undef
835
; CHECK-NEXT: [[SUB86_1:%.*]] = sub nsw i32 undef, undef
936
; CHECK-NEXT: [[ADD94_1:%.*]] = add nsw i32 undef, undef
1037
; CHECK-NEXT: [[SUB102_1:%.*]] = sub nsw i32 undef, undef
38+
; CHECK-NEXT: [[ADD103_1:%.*]] = add nsw i32 [[ADD94_1]], [[ADD78_1]]
39+
; CHECK-NEXT: [[SUB104_1:%.*]] = sub nsw i32 [[ADD78_1]], [[ADD94_1]]
40+
; CHECK-NEXT: [[ADD105_1:%.*]] = add nsw i32 [[SUB102_1]], [[SUB86_1]]
41+
; CHECK-NEXT: [[SUB106_1:%.*]] = sub nsw i32 [[SUB86_1]], [[SUB102_1]]
42+
; CHECK-NEXT: [[SHR_I_1:%.*]] = lshr i32 [[ADD103_1]], 15
43+
; CHECK-NEXT: [[AND_I_1:%.*]] = and i32 [[SHR_I_1]], 65537
44+
; CHECK-NEXT: [[MUL_I_1:%.*]] = mul nuw i32 [[AND_I_1]], 65535
45+
; CHECK-NEXT: [[ADD_I_1:%.*]] = add i32 [[MUL_I_1]], [[ADD103_1]]
46+
; CHECK-NEXT: [[XOR_I_1:%.*]] = xor i32 [[ADD_I_1]], [[MUL_I_1]]
47+
; CHECK-NEXT: [[SHR_I64_1:%.*]] = lshr i32 [[ADD105_1]], 15
48+
; CHECK-NEXT: [[AND_I65_1:%.*]] = and i32 [[SHR_I64_1]], 65537
49+
; CHECK-NEXT: [[MUL_I66_1:%.*]] = mul nuw i32 [[AND_I65_1]], 65535
50+
; CHECK-NEXT: [[ADD_I67_1:%.*]] = add i32 [[MUL_I66_1]], [[ADD105_1]]
51+
; CHECK-NEXT: [[XOR_I68_1:%.*]] = xor i32 [[ADD_I67_1]], [[MUL_I66_1]]
52+
; CHECK-NEXT: [[SHR_I69_1:%.*]] = lshr i32 [[SUB104_1]], 15
53+
; CHECK-NEXT: [[AND_I70_1:%.*]] = and i32 [[SHR_I69_1]], 65537
54+
; CHECK-NEXT: [[MUL_I71_1:%.*]] = mul nuw i32 [[AND_I70_1]], 65535
55+
; CHECK-NEXT: [[ADD_I72_1:%.*]] = add i32 [[MUL_I71_1]], [[SUB104_1]]
56+
; CHECK-NEXT: [[XOR_I73_1:%.*]] = xor i32 [[ADD_I72_1]], [[MUL_I71_1]]
57+
; CHECK-NEXT: [[SHR_I74_1:%.*]] = lshr i32 [[SUB106_1]], 15
58+
; CHECK-NEXT: [[AND_I75_1:%.*]] = and i32 [[SHR_I74_1]], 65537
59+
; CHECK-NEXT: [[MUL_I76_1:%.*]] = mul nuw i32 [[AND_I75_1]], 65535
60+
; CHECK-NEXT: [[ADD_I77_1:%.*]] = add i32 [[MUL_I76_1]], [[SUB106_1]]
61+
; CHECK-NEXT: [[XOR_I78_1:%.*]] = xor i32 [[ADD_I77_1]], [[MUL_I76_1]]
62+
; CHECK-NEXT: [[ADD108_1:%.*]] = add i32 [[XOR_I68_1]], [[ADD113]]
63+
; CHECK-NEXT: [[ADD110_1:%.*]] = add i32 [[ADD108_1]], [[XOR_I_1]]
64+
; CHECK-NEXT: [[ADD112_1:%.*]] = add i32 [[ADD110_1]], [[XOR_I73_1]]
65+
; CHECK-NEXT: [[ADD113_1:%.*]] = add i32 [[ADD112_1]], [[XOR_I78_1]]
1166
; CHECK-NEXT: [[ADD78_2:%.*]] = add nsw i32 undef, undef
67+
; CHECK-NEXT: [[ADD103_2:%.*]] = add nsw i32 undef, [[ADD78_2]]
68+
; CHECK-NEXT: [[SUB104_2:%.*]] = sub nsw i32 [[ADD78_2]], undef
69+
; CHECK-NEXT: [[ADD105_2:%.*]] = add nsw i32 undef, undef
70+
; CHECK-NEXT: [[SUB106_2:%.*]] = sub nsw i32 undef, undef
71+
; CHECK-NEXT: [[SHR_I_2:%.*]] = lshr i32 [[ADD103_2]], 15
72+
; CHECK-NEXT: [[AND_I_2:%.*]] = and i32 [[SHR_I_2]], 65537
73+
; CHECK-NEXT: [[MUL_I_2:%.*]] = mul nuw i32 [[AND_I_2]], 65535
74+
; CHECK-NEXT: [[ADD_I_2:%.*]] = add i32 [[MUL_I_2]], [[ADD103_2]]
75+
; CHECK-NEXT: [[XOR_I_2:%.*]] = xor i32 [[ADD_I_2]], [[MUL_I_2]]
76+
; CHECK-NEXT: [[SHR_I64_2:%.*]] = lshr i32 [[ADD105_2]], 15
77+
; CHECK-NEXT: [[AND_I65_2:%.*]] = and i32 [[SHR_I64_2]], 65537
78+
; CHECK-NEXT: [[MUL_I66_2:%.*]] = mul nuw i32 [[AND_I65_2]], 65535
79+
; CHECK-NEXT: [[ADD_I67_2:%.*]] = add i32 [[MUL_I66_2]], [[ADD105_2]]
80+
; CHECK-NEXT: [[XOR_I68_2:%.*]] = xor i32 [[ADD_I67_2]], [[MUL_I66_2]]
81+
; CHECK-NEXT: [[SHR_I69_2:%.*]] = lshr i32 [[SUB104_2]], 15
82+
; CHECK-NEXT: [[AND_I70_2:%.*]] = and i32 [[SHR_I69_2]], 65537
83+
; CHECK-NEXT: [[MUL_I71_2:%.*]] = mul nuw i32 [[AND_I70_2]], 65535
84+
; CHECK-NEXT: [[ADD_I72_2:%.*]] = add i32 [[MUL_I71_2]], [[SUB104_2]]
85+
; CHECK-NEXT: [[XOR_I73_2:%.*]] = xor i32 [[ADD_I72_2]], [[MUL_I71_2]]
86+
; CHECK-NEXT: [[SHR_I74_2:%.*]] = lshr i32 [[SUB106_2]], 15
87+
; CHECK-NEXT: [[AND_I75_2:%.*]] = and i32 [[SHR_I74_2]], 65537
88+
; CHECK-NEXT: [[MUL_I76_2:%.*]] = mul nuw i32 [[AND_I75_2]], 65535
89+
; CHECK-NEXT: [[ADD_I77_2:%.*]] = add i32 [[MUL_I76_2]], [[SUB106_2]]
90+
; CHECK-NEXT: [[XOR_I78_2:%.*]] = xor i32 [[ADD_I77_2]], [[MUL_I76_2]]
91+
; CHECK-NEXT: [[ADD108_2:%.*]] = add i32 [[XOR_I68_2]], [[ADD113_1]]
92+
; CHECK-NEXT: [[ADD110_2:%.*]] = add i32 [[ADD108_2]], [[XOR_I_2]]
93+
; CHECK-NEXT: [[ADD112_2:%.*]] = add i32 [[ADD110_2]], [[XOR_I73_2]]
94+
; CHECK-NEXT: [[ADD113_2:%.*]] = add i32 [[ADD112_2]], [[XOR_I78_2]]
1295
; CHECK-NEXT: [[SUB102_3:%.*]] = sub nsw i32 undef, undef
13-
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <16 x i32> poison, i32 [[SUB102_3]], i32 0
14-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <16 x i32> [[TMP0]], i32 undef, i32 1
15-
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <16 x i32> [[TMP1]], i32 [[SUB102_1]], i32 2
16-
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <16 x i32> [[TMP2]], i32 undef, i32 3
17-
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <16 x i32> [[TMP3]], i32 undef, i32 4
18-
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <16 x i32> [[TMP4]], i32 undef, i32 5
19-
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <16 x i32> [[TMP5]], i32 undef, i32 6
20-
; CHECK-NEXT: [[TMP7:%.*]] = insertelement <16 x i32> [[TMP6]], i32 [[ADD94_1]], i32 7
21-
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <16 x i32> [[TMP7]], i32 [[ADD78_1]], i32 8
22-
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <16 x i32> [[TMP8]], i32 [[SUB86_1]], i32 9
23-
; CHECK-NEXT: [[TMP10:%.*]] = insertelement <16 x i32> [[TMP9]], i32 undef, i32 10
24-
; CHECK-NEXT: [[TMP11:%.*]] = insertelement <16 x i32> [[TMP10]], i32 [[ADD78_2]], i32 11
25-
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <16 x i32> [[TMP11]], i32 undef, i32 12
26-
; CHECK-NEXT: [[TMP13:%.*]] = insertelement <16 x i32> [[TMP12]], i32 undef, i32 13
27-
; CHECK-NEXT: [[TMP14:%.*]] = insertelement <16 x i32> [[TMP13]], i32 undef, i32 14
28-
; CHECK-NEXT: [[TMP15:%.*]] = insertelement <16 x i32> [[TMP14]], i32 undef, i32 15
29-
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <16 x i32> <i32 undef, i32 undef, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>, i32 [[SUB86_1]], i32 2
30-
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <16 x i32> [[TMP16]], i32 undef, i32 3
31-
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <16 x i32> [[TMP17]], i32 undef, i32 4
32-
; CHECK-NEXT: [[TMP19:%.*]] = insertelement <16 x i32> [[TMP18]], i32 undef, i32 5
33-
; CHECK-NEXT: [[TMP20:%.*]] = insertelement <16 x i32> [[TMP19]], i32 undef, i32 6
34-
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <16 x i32> [[TMP20]], i32 [[ADD78_1]], i32 7
35-
; CHECK-NEXT: [[TMP22:%.*]] = insertelement <16 x i32> [[TMP21]], i32 [[ADD94_1]], i32 8
36-
; CHECK-NEXT: [[TMP23:%.*]] = insertelement <16 x i32> [[TMP22]], i32 [[SUB102_1]], i32 9
37-
; CHECK-NEXT: [[TMP24:%.*]] = insertelement <16 x i32> [[TMP23]], i32 [[ADD78_2]], i32 10
38-
; CHECK-NEXT: [[TMP25:%.*]] = insertelement <16 x i32> [[TMP24]], i32 undef, i32 11
39-
; CHECK-NEXT: [[TMP26:%.*]] = insertelement <16 x i32> [[TMP25]], i32 undef, i32 12
40-
; CHECK-NEXT: [[TMP27:%.*]] = insertelement <16 x i32> [[TMP26]], i32 undef, i32 13
41-
; CHECK-NEXT: [[TMP28:%.*]] = insertelement <16 x i32> [[TMP27]], i32 undef, i32 14
42-
; CHECK-NEXT: [[TMP29:%.*]] = insertelement <16 x i32> [[TMP28]], i32 [[SUB102_3]], i32 15
43-
; CHECK-NEXT: [[TMP30:%.*]] = add nsw <16 x i32> [[TMP15]], [[TMP29]]
44-
; CHECK-NEXT: [[TMP31:%.*]] = sub nsw <16 x i32> [[TMP15]], [[TMP29]]
45-
; CHECK-NEXT: [[TMP32:%.*]] = shufflevector <16 x i32> [[TMP30]], <16 x i32> [[TMP31]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 21, i32 22, i32 7, i32 24, i32 25, i32 10, i32 27, i32 28, i32 13, i32 30, i32 31>
46-
; CHECK-NEXT: [[TMP33:%.*]] = lshr <16 x i32> [[TMP32]], <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
47-
; CHECK-NEXT: [[TMP34:%.*]] = and <16 x i32> [[TMP33]], <i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537, i32 65537>
48-
; CHECK-NEXT: [[TMP35:%.*]] = mul nuw <16 x i32> [[TMP34]], <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
49-
; CHECK-NEXT: [[TMP36:%.*]] = add <16 x i32> [[TMP35]], [[TMP32]]
50-
; CHECK-NEXT: [[TMP37:%.*]] = xor <16 x i32> [[TMP36]], [[TMP35]]
51-
; CHECK-NEXT: [[TMP38:%.*]] = call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> [[TMP37]])
52-
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[TMP38]], 16
96+
; CHECK-NEXT: [[ADD103_3:%.*]] = add nsw i32 undef, undef
97+
; CHECK-NEXT: [[SUB104_3:%.*]] = sub nsw i32 undef, undef
98+
; CHECK-NEXT: [[ADD105_3:%.*]] = add nsw i32 [[SUB102_3]], undef
99+
; CHECK-NEXT: [[SUB106_3:%.*]] = sub nsw i32 undef, [[SUB102_3]]
100+
; CHECK-NEXT: [[SHR_I_3:%.*]] = lshr i32 [[ADD103_3]], 15
101+
; CHECK-NEXT: [[AND_I_3:%.*]] = and i32 [[SHR_I_3]], 65537
102+
; CHECK-NEXT: [[MUL_I_3:%.*]] = mul nuw i32 [[AND_I_3]], 65535
103+
; CHECK-NEXT: [[ADD_I_3:%.*]] = add i32 [[MUL_I_3]], [[ADD103_3]]
104+
; CHECK-NEXT: [[XOR_I_3:%.*]] = xor i32 [[ADD_I_3]], [[MUL_I_3]]
105+
; CHECK-NEXT: [[SHR_I64_3:%.*]] = lshr i32 [[ADD105_3]], 15
106+
; CHECK-NEXT: [[AND_I65_3:%.*]] = and i32 [[SHR_I64_3]], 65537
107+
; CHECK-NEXT: [[MUL_I66_3:%.*]] = mul nuw i32 [[AND_I65_3]], 65535
108+
; CHECK-NEXT: [[ADD_I67_3:%.*]] = add i32 [[MUL_I66_3]], [[ADD105_3]]
109+
; CHECK-NEXT: [[XOR_I68_3:%.*]] = xor i32 [[ADD_I67_3]], [[MUL_I66_3]]
110+
; CHECK-NEXT: [[SHR_I69_3:%.*]] = lshr i32 [[SUB104_3]], 15
111+
; CHECK-NEXT: [[AND_I70_3:%.*]] = and i32 [[SHR_I69_3]], 65537
112+
; CHECK-NEXT: [[MUL_I71_3:%.*]] = mul nuw i32 [[AND_I70_3]], 65535
113+
; CHECK-NEXT: [[ADD_I72_3:%.*]] = add i32 [[MUL_I71_3]], [[SUB104_3]]
114+
; CHECK-NEXT: [[XOR_I73_3:%.*]] = xor i32 [[ADD_I72_3]], [[MUL_I71_3]]
115+
; CHECK-NEXT: [[SHR_I74_3:%.*]] = lshr i32 [[SUB106_3]], 15
116+
; CHECK-NEXT: [[AND_I75_3:%.*]] = and i32 [[SHR_I74_3]], 65537
117+
; CHECK-NEXT: [[MUL_I76_3:%.*]] = mul nuw i32 [[AND_I75_3]], 65535
118+
; CHECK-NEXT: [[ADD_I77_3:%.*]] = add i32 [[MUL_I76_3]], [[SUB106_3]]
119+
; CHECK-NEXT: [[XOR_I78_3:%.*]] = xor i32 [[ADD_I77_3]], [[MUL_I76_3]]
120+
; CHECK-NEXT: [[ADD108_3:%.*]] = add i32 [[XOR_I68_3]], [[ADD113_2]]
121+
; CHECK-NEXT: [[ADD110_3:%.*]] = add i32 [[ADD108_3]], [[XOR_I_3]]
122+
; CHECK-NEXT: [[ADD112_3:%.*]] = add i32 [[ADD110_3]], [[XOR_I73_3]]
123+
; CHECK-NEXT: [[ADD113_3:%.*]] = add i32 [[ADD112_3]], [[XOR_I78_3]]
124+
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 [[ADD113_3]], 16
53125
; CHECK-NEXT: [[ADD119:%.*]] = add nuw nsw i32 undef, [[SHR]]
54126
; CHECK-NEXT: [[SHR120:%.*]] = lshr i32 [[ADD119]], 1
55127
; CHECK-NEXT: ret i32 [[SHR120]]

0 commit comments

Comments
 (0)