Skip to content

Commit 9ecbe95

Browse files
RKSimonNoumanAmir657
authored andcommitted
[VectorCombine] Fold "shuffle (binop (shuffle, shuffle)), undef" --> "binop (shuffle), (shuffle)" (llvm#114101)
Add foldPermuteOfBinops - to fold a permute (single source shuffle) through a binary op that is being fed by other shuffles. Fixes llvm#94546 Fixes llvm#49736
1 parent ebcd4e4 commit 9ecbe95

File tree

7 files changed

+133
-40
lines changed

7 files changed

+133
-40
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ class VectorCombine {
112112
bool foldExtractedCmps(Instruction &I);
113113
bool foldSingleElementStore(Instruction &I);
114114
bool scalarizeLoadExtract(Instruction &I);
115+
bool foldPermuteOfBinops(Instruction &I);
115116
bool foldShuffleOfBinops(Instruction &I);
116117
bool foldShuffleOfCastops(Instruction &I);
117118
bool foldShuffleOfShuffles(Instruction &I);
@@ -1400,6 +1401,100 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
14001401
return true;
14011402
}
14021403

1404+
/// Try to convert "shuffle (binop (shuffle, shuffle)), undef"
1405+
/// --> "binop (shuffle), (shuffle)".
1406+
bool VectorCombine::foldPermuteOfBinops(Instruction &I) {
1407+
BinaryOperator *BinOp;
1408+
ArrayRef<int> OuterMask;
1409+
if (!match(&I,
1410+
m_Shuffle(m_OneUse(m_BinOp(BinOp)), m_Undef(), m_Mask(OuterMask))))
1411+
return false;
1412+
1413+
// Don't introduce poison into div/rem.
1414+
if (BinOp->isIntDivRem() && llvm::is_contained(OuterMask, PoisonMaskElem))
1415+
return false;
1416+
1417+
Value *Op00, *Op01;
1418+
ArrayRef<int> Mask0;
1419+
if (!match(BinOp->getOperand(0),
1420+
m_OneUse(m_Shuffle(m_Value(Op00), m_Value(Op01), m_Mask(Mask0)))))
1421+
return false;
1422+
1423+
Value *Op10, *Op11;
1424+
ArrayRef<int> Mask1;
1425+
if (!match(BinOp->getOperand(1),
1426+
m_OneUse(m_Shuffle(m_Value(Op10), m_Value(Op11), m_Mask(Mask1)))))
1427+
return false;
1428+
1429+
Instruction::BinaryOps Opcode = BinOp->getOpcode();
1430+
auto *ShuffleDstTy = dyn_cast<FixedVectorType>(I.getType());
1431+
auto *BinOpTy = dyn_cast<FixedVectorType>(BinOp->getType());
1432+
auto *Op0Ty = dyn_cast<FixedVectorType>(Op00->getType());
1433+
auto *Op1Ty = dyn_cast<FixedVectorType>(Op10->getType());
1434+
if (!ShuffleDstTy || !BinOpTy || !Op0Ty || !Op1Ty)
1435+
return false;
1436+
1437+
unsigned NumSrcElts = BinOpTy->getNumElements();
1438+
1439+
// Don't accept shuffles that reference the second operand in
1440+
// div/rem or if its an undef arg.
1441+
if ((BinOp->isIntDivRem() || !isa<PoisonValue>(I.getOperand(1))) &&
1442+
any_of(OuterMask, [NumSrcElts](int M) { return M >= (int)NumSrcElts; }))
1443+
return false;
1444+
1445+
// Merge outer / inner shuffles.
1446+
SmallVector<int> NewMask0, NewMask1;
1447+
for (int M : OuterMask) {
1448+
if (M < 0 || M >= (int)NumSrcElts) {
1449+
NewMask0.push_back(PoisonMaskElem);
1450+
NewMask1.push_back(PoisonMaskElem);
1451+
} else {
1452+
NewMask0.push_back(Mask0[M]);
1453+
NewMask1.push_back(Mask1[M]);
1454+
}
1455+
}
1456+
1457+
// Try to merge shuffles across the binop if the new shuffles are not costly.
1458+
TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
1459+
1460+
InstructionCost OldCost =
1461+
TTI.getArithmeticInstrCost(Opcode, BinOpTy, CostKind) +
1462+
TTI.getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, BinOpTy,
1463+
OuterMask, CostKind, 0, nullptr, {BinOp}, &I) +
1464+
TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, Op0Ty, Mask0,
1465+
CostKind, 0, nullptr, {Op00, Op01},
1466+
cast<Instruction>(BinOp->getOperand(0))) +
1467+
TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, Op1Ty, Mask1,
1468+
CostKind, 0, nullptr, {Op10, Op11},
1469+
cast<Instruction>(BinOp->getOperand(1)));
1470+
1471+
InstructionCost NewCost =
1472+
TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, Op0Ty, NewMask0,
1473+
CostKind, 0, nullptr, {Op00, Op01}) +
1474+
TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, Op1Ty, NewMask1,
1475+
CostKind, 0, nullptr, {Op10, Op11}) +
1476+
TTI.getArithmeticInstrCost(Opcode, ShuffleDstTy, CostKind);
1477+
1478+
LLVM_DEBUG(dbgs() << "Found a shuffle feeding a shuffled binop: " << I
1479+
<< "\n OldCost: " << OldCost << " vs NewCost: " << NewCost
1480+
<< "\n");
1481+
if (NewCost >= OldCost)
1482+
return false;
1483+
1484+
Value *Shuf0 = Builder.CreateShuffleVector(Op00, Op01, NewMask0);
1485+
Value *Shuf1 = Builder.CreateShuffleVector(Op10, Op11, NewMask1);
1486+
Value *NewBO = Builder.CreateBinOp(Opcode, Shuf0, Shuf1);
1487+
1488+
// Intersect flags from the old binops.
1489+
if (auto *NewInst = dyn_cast<Instruction>(NewBO))
1490+
NewInst->copyIRFlags(BinOp);
1491+
1492+
Worklist.pushValue(Shuf0);
1493+
Worklist.pushValue(Shuf1);
1494+
replaceValue(I, *NewBO);
1495+
return true;
1496+
}
1497+
14031498
/// Try to convert "shuffle (binop), (binop)" into "binop (shuffle), (shuffle)".
14041499
bool VectorCombine::foldShuffleOfBinops(Instruction &I) {
14051500
BinaryOperator *B0, *B1;
@@ -2736,6 +2831,7 @@ bool VectorCombine::run() {
27362831
MadeChange |= foldInsExtFNeg(I);
27372832
break;
27382833
case Instruction::ShuffleVector:
2834+
MadeChange |= foldPermuteOfBinops(I);
27392835
MadeChange |= foldShuffleOfBinops(I);
27402836
MadeChange |= foldShuffleOfCastops(I);
27412837
MadeChange |= foldShuffleOfShuffles(I);

llvm/test/Transforms/PhaseOrdering/X86/horiz-math-inseltpoison.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -108,11 +108,10 @@ define <8 x float> @hadd_reverse_v8f32(<8 x float> %a, <8 x float> %b) #0 {
108108

109109
define <8 x float> @reverse_hadd_v8f32(<8 x float> %a, <8 x float> %b) #0 {
110110
; CHECK-LABEL: @reverse_hadd_v8f32(
111-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
112-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
111+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> <i32 14, i32 12, i32 6, i32 4, i32 10, i32 8, i32 2, i32 0>
112+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 15, i32 13, i32 7, i32 5, i32 11, i32 9, i32 3, i32 1>
113113
; CHECK-NEXT: [[TMP3:%.*]] = fadd <8 x float> [[TMP1]], [[TMP2]]
114-
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
115-
; CHECK-NEXT: ret <8 x float> [[SHUFFLE]]
114+
; CHECK-NEXT: ret <8 x float> [[TMP3]]
116115
;
117116
%vecext = extractelement <8 x float> %a, i32 0
118117
%vecext1 = extractelement <8 x float> %a, i32 1

llvm/test/Transforms/PhaseOrdering/X86/horiz-math.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -108,11 +108,10 @@ define <8 x float> @hadd_reverse_v8f32(<8 x float> %a, <8 x float> %b) #0 {
108108

109109
define <8 x float> @reverse_hadd_v8f32(<8 x float> %a, <8 x float> %b) #0 {
110110
; CHECK-LABEL: @reverse_hadd_v8f32(
111-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> <i32 0, i32 2, i32 8, i32 10, i32 4, i32 6, i32 12, i32 14>
112-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 1, i32 3, i32 9, i32 11, i32 5, i32 7, i32 13, i32 15>
111+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A:%.*]], <8 x float> [[B:%.*]], <8 x i32> <i32 14, i32 12, i32 6, i32 4, i32 10, i32 8, i32 2, i32 0>
112+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[A]], <8 x float> [[B]], <8 x i32> <i32 15, i32 13, i32 7, i32 5, i32 11, i32 9, i32 3, i32 1>
113113
; CHECK-NEXT: [[TMP3:%.*]] = fadd <8 x float> [[TMP1]], [[TMP2]]
114-
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x float> [[TMP3]], <8 x float> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
115-
; CHECK-NEXT: ret <8 x float> [[SHUFFLE]]
114+
; CHECK-NEXT: ret <8 x float> [[TMP3]]
116115
;
117116
%vecext = extractelement <8 x float> %a, i32 0
118117
%vecext1 = extractelement <8 x float> %a, i32 1

llvm/test/Transforms/PhaseOrdering/X86/pr50392.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,9 @@ define <4 x double> @PR50392(<4 x double> %a, <4 x double> %b) {
3232
; AVX1-NEXT: ret <4 x double> [[SHUFFLE]]
3333
;
3434
; AVX2-LABEL: @PR50392(
35-
; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 4>
36-
; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 5>
37-
; AVX2-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
38-
; AVX2-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 1, i32 poison>
35+
; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 poison, i32 4, i32 poison>
36+
; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 poison, i32 5, i32 poison>
37+
; AVX2-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
3938
; AVX2-NEXT: [[SHIFT:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
4039
; AVX2-NEXT: [[TMP5:%.*]] = fadd <4 x double> [[B]], [[SHIFT]]
4140
; AVX2-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> [[TMP5]], <4 x i32> <i32 0, i32 poison, i32 2, i32 6>

llvm/test/Transforms/PhaseOrdering/X86/pr94546.ll

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,18 @@ define <4 x double> @PR94546(<4 x double> %a, <4 x double> %b) {
1616
; SSE-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 1>
1717
; SSE-NEXT: ret <4 x double> [[TMP4]]
1818
;
19-
; AVX-LABEL: @PR94546(
20-
; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 6>
21-
; AVX-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 7>
22-
; AVX-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
23-
; AVX-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 1>
24-
; AVX-NEXT: ret <4 x double> [[TMP4]]
19+
; AVX1-LABEL: @PR94546(
20+
; AVX1-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <2 x i32> <i32 0, i32 6>
21+
; AVX1-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <2 x i32> <i32 1, i32 7>
22+
; AVX1-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP1]], [[TMP2]]
23+
; AVX1-NEXT: [[TMP4:%.*]] = shufflevector <2 x double> [[TMP3]], <2 x double> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 1>
24+
; AVX1-NEXT: ret <4 x double> [[TMP4]]
25+
;
26+
; AVX2-LABEL: @PR94546(
27+
; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A:%.*]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 poison, i32 poison, i32 6>
28+
; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[A]], <4 x double> [[B]], <4 x i32> <i32 1, i32 poison, i32 poison, i32 7>
29+
; AVX2-NEXT: [[TMP3:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
30+
; AVX2-NEXT: ret <4 x double> [[TMP3]]
2531
;
2632
%vecext = extractelement <4 x double> %a, i32 0
2733
%vecext1 = extractelement <4 x double> %a, i32 1
@@ -43,5 +49,4 @@ define <4 x double> @PR94546(<4 x double> %a, <4 x double> %b) {
4349
ret <4 x double> %shuffle
4450
}
4551
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
46-
; AVX1: {{.*}}
47-
; AVX2: {{.*}}
52+
; AVX: {{.*}}

llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -937,10 +937,9 @@ define <4 x i64> @cast_mismatched_types(<4 x i32> %x) {
937937

938938
define <4 x float> @fadd_mismatched_types(<4 x float> %x, <4 x float> %y) {
939939
; CHECK-LABEL: @fadd_mismatched_types(
940-
; CHECK-NEXT: [[SHUF_X:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <2 x i32> <i32 0, i32 2>
941-
; CHECK-NEXT: [[SHUF_Y:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> poison, <2 x i32> <i32 1, i32 3>
942-
; CHECK-NEXT: [[FADD:%.*]] = fadd fast <2 x float> [[SHUF_X]], [[SHUF_Y]]
943-
; CHECK-NEXT: [[EXTSHUF:%.*]] = shufflevector <2 x float> [[FADD]], <2 x float> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
940+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 2, i32 poison>
941+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 3, i32 poison>
942+
; CHECK-NEXT: [[EXTSHUF:%.*]] = fadd fast <4 x float> [[TMP1]], [[TMP2]]
944943
; CHECK-NEXT: ret <4 x float> [[EXTSHUF]]
945944
;
946945
%shuf.x = shufflevector <4 x float> %x, <4 x float> poison, <2 x i32> <i32 0, i32 2>

llvm/test/Transforms/VectorCombine/X86/permute-of-binops.ll

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,10 @@ declare void @use_v4f64(<4 x double>)
99
define <4 x double> @fadd_v4f64(<4 x double> %a, <4 x double> %b) {
1010
; CHECK-LABEL: define <4 x double> @fadd_v4f64(
1111
; CHECK-SAME: <4 x double> [[A:%.*]], <4 x double> [[B:%.*]]) #[[ATTR0:[0-9]+]] {
12-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
13-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
12+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
13+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
1414
; CHECK-NEXT: [[POST:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
15-
; CHECK-NEXT: [[POST1:%.*]] = shufflevector <4 x double> [[POST]], <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
16-
; CHECK-NEXT: ret <4 x double> [[POST1]]
15+
; CHECK-NEXT: ret <4 x double> [[POST]]
1716
;
1817
%a1 = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1918
%b1 = shufflevector <4 x double> %b, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
@@ -25,11 +24,10 @@ define <4 x double> @fadd_v4f64(<4 x double> %a, <4 x double> %b) {
2524
define <4 x double> @fadd_v4f64_poison_idx(<4 x double> %a, <4 x double> %b) {
2625
; CHECK-LABEL: define <4 x double> @fadd_v4f64_poison_idx(
2726
; CHECK-SAME: <4 x double> [[A:%.*]], <4 x double> [[B:%.*]]) #[[ATTR0]] {
28-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
29-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
27+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 poison>
28+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x double> [[B]], <4 x double> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 poison>
3029
; CHECK-NEXT: [[POST:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
31-
; CHECK-NEXT: [[POST1:%.*]] = shufflevector <4 x double> [[POST]], <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 4>
32-
; CHECK-NEXT: ret <4 x double> [[POST1]]
30+
; CHECK-NEXT: ret <4 x double> [[POST]]
3331
;
3432
%a1 = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
3533
%b1 = shufflevector <4 x double> %b, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
@@ -41,11 +39,10 @@ define <4 x double> @fadd_v4f64_poison_idx(<4 x double> %a, <4 x double> %b) {
4139
define <4 x double> @fadd_mixed_types(<4 x double> %a, <2 x double> %b) {
4240
; CHECK-LABEL: define <4 x double> @fadd_mixed_types(
4341
; CHECK-SAME: <4 x double> [[A:%.*]], <2 x double> [[B:%.*]]) #[[ATTR0]] {
44-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
45-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
42+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A]], <4 x double> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
43+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x double> [[B]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
4644
; CHECK-NEXT: [[POST:%.*]] = fadd <4 x double> [[TMP1]], [[TMP2]]
47-
; CHECK-NEXT: [[POST1:%.*]] = shufflevector <4 x double> [[POST]], <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
48-
; CHECK-NEXT: ret <4 x double> [[POST1]]
45+
; CHECK-NEXT: ret <4 x double> [[POST]]
4946
;
5047
%a1 = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
5148
%b1 = shufflevector <2 x double> %b, <2 x double> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
@@ -95,11 +92,10 @@ define <4 x double> @fadd_v4f64_multiuse_shuffle(<4 x double> %a, <4 x double> %
9592
define <4 x i32> @sdiv_v4i32(<4 x i32> %a, <4 x i32> %b) {
9693
; CHECK-LABEL: define <4 x i32> @sdiv_v4i32(
9794
; CHECK-SAME: <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR0]] {
98-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
99-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
95+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 0, i32 3>
96+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> poison, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
10097
; CHECK-NEXT: [[POST:%.*]] = sdiv <4 x i32> [[TMP1]], [[TMP2]]
101-
; CHECK-NEXT: [[POST1:%.*]] = shufflevector <4 x i32> [[POST]], <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 0>
102-
; CHECK-NEXT: ret <4 x i32> [[POST1]]
98+
; CHECK-NEXT: ret <4 x i32> [[POST]]
10399
;
104100
%a1 = shufflevector <4 x i32> %a, <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
105101
%b1 = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> <i32 1, i32 0, i32 1, i32 0>

0 commit comments

Comments
 (0)