Skip to content

Commit 8f54861

Browse files
committed
Revert "[SLP]Use source vector type as the original vector type instead of"
This reverts commit 9a99944. Due to test suite failures on all our SVE buildbots e.g.: https://lab.llvm.org/buildbot/#/builders/184/builds/7375 clang: ../llvm/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp:3565: InstructionCost llvm::AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind, VectorType *, ArrayRef<int>, TTI::TargetCostKind, int, VectorType *, ArrayRef<const Value *>): Assertion `Mask.size() == TpNumElts && "Expected Mask and Tp size to match!"' failed.
1 parent 619c501 commit 8f54861

13 files changed

+293
-657
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7073,10 +7073,7 @@ class BoUpSLP::ShuffleCostEstimator : public BaseShuffleAnalysis {
70737073
/// extracted values from \p VL.
70747074
InstructionCost computeExtractCost(ArrayRef<Value *> VL, ArrayRef<int> Mask,
70757075
TTI::ShuffleKind ShuffleKind) {
7076-
auto *VecTy = cast<FixedVectorType>(
7077-
cast<ExtractElementInst>(*find_if(VL, [](Value *V) {
7078-
return isa<ExtractElementInst>(V);
7079-
}))->getVectorOperandType());
7076+
auto *VecTy = FixedVectorType::get(VL.front()->getType(), VL.size());
70807077
unsigned NumOfParts = TTI.getNumberOfParts(VecTy);
70817078

70827079
if (ShuffleKind != TargetTransformInfo::SK_PermuteSingleSrc ||

llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat-inseltpoison.ll

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -291,19 +291,14 @@ define <4 x i16> @uadd_sat_v4i16(<4 x i16> %arg0, <4 x i16> %arg1) {
291291
;
292292
; GFX8-LABEL: @uadd_sat_v4i16(
293293
; GFX8-NEXT: bb:
294-
; GFX8-NEXT: [[ARG0_2:%.*]] = extractelement <4 x i16> [[ARG0:%.*]], i64 2
295-
; GFX8-NEXT: [[ARG0_3:%.*]] = extractelement <4 x i16> [[ARG0]], i64 3
296-
; GFX8-NEXT: [[ARG1_2:%.*]] = extractelement <4 x i16> [[ARG1:%.*]], i64 2
297-
; GFX8-NEXT: [[ARG1_3:%.*]] = extractelement <4 x i16> [[ARG1]], i64 3
298-
; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
299-
; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
294+
; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
295+
; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
300296
; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
301-
; GFX8-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
302-
; GFX8-NEXT: [[ADD_3:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_3]], i16 [[ARG1_3]])
303-
; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
304-
; GFX8-NEXT: [[INS_2:%.*]] = insertelement <4 x i16> [[TMP3]], i16 [[ADD_2]], i64 2
305-
; GFX8-NEXT: [[INS_3:%.*]] = insertelement <4 x i16> [[INS_2]], i16 [[ADD_3]], i64 3
306-
; GFX8-NEXT: ret <4 x i16> [[INS_3]]
297+
; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
298+
; GFX8-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
299+
; GFX8-NEXT: [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]])
300+
; GFX8-NEXT: [[INS_31:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
301+
; GFX8-NEXT: ret <4 x i16> [[INS_31]]
307302
;
308303
bb:
309304
%arg0.0 = extractelement <4 x i16> %arg0, i64 0

llvm/test/Transforms/SLPVectorizer/AMDGPU/add_sub_sat.ll

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -291,19 +291,14 @@ define <4 x i16> @uadd_sat_v4i16(<4 x i16> %arg0, <4 x i16> %arg1) {
291291
;
292292
; GFX8-LABEL: @uadd_sat_v4i16(
293293
; GFX8-NEXT: bb:
294-
; GFX8-NEXT: [[ARG0_2:%.*]] = extractelement <4 x i16> [[ARG0:%.*]], i64 2
295-
; GFX8-NEXT: [[ARG0_3:%.*]] = extractelement <4 x i16> [[ARG0]], i64 3
296-
; GFX8-NEXT: [[ARG1_2:%.*]] = extractelement <4 x i16> [[ARG1:%.*]], i64 2
297-
; GFX8-NEXT: [[ARG1_3:%.*]] = extractelement <4 x i16> [[ARG1]], i64 3
298-
; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
299-
; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
294+
; GFX8-NEXT: [[TMP0:%.*]] = shufflevector <4 x i16> [[ARG0:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
295+
; GFX8-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[ARG1:%.*]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
300296
; GFX8-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
301-
; GFX8-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
302-
; GFX8-NEXT: [[ADD_3:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_3]], i16 [[ARG1_3]])
303-
; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
304-
; GFX8-NEXT: [[INS_2:%.*]] = insertelement <4 x i16> [[TMP3]], i16 [[ADD_2]], i64 2
305-
; GFX8-NEXT: [[INS_3:%.*]] = insertelement <4 x i16> [[INS_2]], i16 [[ADD_3]], i64 3
306-
; GFX8-NEXT: ret <4 x i16> [[INS_3]]
297+
; GFX8-NEXT: [[TMP3:%.*]] = shufflevector <4 x i16> [[ARG0]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
298+
; GFX8-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[ARG1]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
299+
; GFX8-NEXT: [[TMP5:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP3]], <2 x i16> [[TMP4]])
300+
; GFX8-NEXT: [[INS_31:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
301+
; GFX8-NEXT: ret <4 x i16> [[INS_31]]
307302
;
308303
bb:
309304
%arg0.0 = extractelement <4 x i16> %arg0, i64 0

llvm/test/Transforms/SLPVectorizer/AMDGPU/crash_extract_subvector_cost.ll

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,10 @@
44
define <2 x i16> @uadd_sat_v9i16_combine_vi16(<9 x i16> %arg0, <9 x i16> %arg1) {
55
; CHECK-LABEL: @uadd_sat_v9i16_combine_vi16(
66
; CHECK-NEXT: bb:
7-
; CHECK-NEXT: [[ARG0_1:%.*]] = extractelement <9 x i16> undef, i64 7
8-
; CHECK-NEXT: [[ARG0_2:%.*]] = extractelement <9 x i16> [[ARG0:%.*]], i64 8
9-
; CHECK-NEXT: [[ARG1_1:%.*]] = extractelement <9 x i16> [[ARG1:%.*]], i64 7
10-
; CHECK-NEXT: [[ARG1_2:%.*]] = extractelement <9 x i16> [[ARG1]], i64 8
11-
; CHECK-NEXT: [[ADD_1:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_1]], i16 [[ARG1_1]])
12-
; CHECK-NEXT: [[ADD_2:%.*]] = call i16 @llvm.uadd.sat.i16(i16 [[ARG0_2]], i16 [[ARG1_2]])
13-
; CHECK-NEXT: [[INS_1:%.*]] = insertelement <2 x i16> undef, i16 [[ADD_1]], i64 0
14-
; CHECK-NEXT: [[INS_2:%.*]] = insertelement <2 x i16> [[INS_1]], i16 [[ADD_2]], i64 1
15-
; CHECK-NEXT: ret <2 x i16> [[INS_2]]
7+
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <9 x i16> [[ARG0:%.*]], <9 x i16> poison, <2 x i32> <i32 poison, i32 8>
8+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <9 x i16> [[ARG1:%.*]], <9 x i16> poison, <2 x i32> <i32 7, i32 8>
9+
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
10+
; CHECK-NEXT: ret <2 x i16> [[TMP2]]
1611
;
1712
bb:
1813
%arg0.1 = extractelement <9 x i16> undef, i64 7

llvm/test/Transforms/SLPVectorizer/AMDGPU/phi-result-use-order.ll

Lines changed: 20 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -4,23 +4,20 @@
44
define <4 x half> @phis(i1 %cmp1, <4 x half> %in1, <4 x half> %in2) {
55
; CHECK-LABEL: @phis(
66
; CHECK-NEXT: entry:
7-
; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x half> [[IN1:%.*]], i64 2
8-
; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x half> [[IN1]], i64 3
9-
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[IN1]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
7+
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[IN1:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
8+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[IN1]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
109
; CHECK-NEXT: br i1 [[CMP1:%.*]], label [[BB1:%.*]], label [[BB0:%.*]]
1110
; CHECK: bb0:
12-
; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x half> [[IN2:%.*]], i64 2
13-
; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x half> [[IN2]], i64 3
14-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[IN2]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
11+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x half> [[IN2:%.*]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
12+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x half> [[IN2]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
1513
; CHECK-NEXT: br label [[BB1]]
1614
; CHECK: bb1:
17-
; CHECK-NEXT: [[C2:%.*]] = phi half [ [[A2]], [[ENTRY:%.*]] ], [ [[B2]], [[BB0]] ]
18-
; CHECK-NEXT: [[C3:%.*]] = phi half [ [[A3]], [[ENTRY]] ], [ [[B3]], [[BB0]] ]
19-
; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x half> [ [[TMP0]], [[ENTRY]] ], [ [[TMP1]], [[BB0]] ]
20-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x half> [[TMP2]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
21-
; CHECK-NEXT: [[O2:%.*]] = insertelement <4 x half> [[TMP3]], half [[C2]], i64 2
22-
; CHECK-NEXT: [[O3:%.*]] = insertelement <4 x half> [[O2]], half [[C3]], i64 3
23-
; CHECK-NEXT: ret <4 x half> [[O3]]
15+
; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x half> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP2]], [[BB0]] ]
16+
; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x half> [ [[TMP1]], [[ENTRY]] ], [ [[TMP3]], [[BB0]] ]
17+
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x half> [[TMP4]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
18+
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x half> [[TMP5]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
19+
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x half> [[TMP4]], <2 x half> [[TMP5]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
20+
; CHECK-NEXT: ret <4 x half> [[TMP8]]
2421
;
2522
entry:
2623
%a0 = extractelement <4 x half> %in1, i64 0
@@ -52,23 +49,20 @@ bb1:
5249
define <4 x half> @phis_reverse(i1 %cmp1, <4 x half> %in1, <4 x half> %in2) {
5350
; CHECK-LABEL: @phis_reverse(
5451
; CHECK-NEXT: entry:
55-
; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x half> [[IN1:%.*]], i64 2
56-
; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x half> [[IN1]], i64 3
57-
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[IN1]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
52+
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[IN1:%.*]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
53+
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[IN1]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
5854
; CHECK-NEXT: br i1 [[CMP1:%.*]], label [[BB1:%.*]], label [[BB0:%.*]]
5955
; CHECK: bb0:
60-
; CHECK-NEXT: [[B2:%.*]] = extractelement <4 x half> [[IN2:%.*]], i64 2
61-
; CHECK-NEXT: [[B3:%.*]] = extractelement <4 x half> [[IN2]], i64 3
62-
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[IN2]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
56+
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x half> [[IN2:%.*]], <4 x half> poison, <2 x i32> <i32 2, i32 3>
57+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x half> [[IN2]], <4 x half> poison, <2 x i32> <i32 0, i32 1>
6358
; CHECK-NEXT: br label [[BB1]]
6459
; CHECK: bb1:
65-
; CHECK-NEXT: [[C3:%.*]] = phi half [ [[A3]], [[ENTRY:%.*]] ], [ [[B3]], [[BB0]] ]
66-
; CHECK-NEXT: [[C2:%.*]] = phi half [ [[A2]], [[ENTRY]] ], [ [[B2]], [[BB0]] ]
67-
; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x half> [ [[TMP0]], [[ENTRY]] ], [ [[TMP1]], [[BB0]] ]
68-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x half> [[TMP2]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
69-
; CHECK-NEXT: [[O2:%.*]] = insertelement <4 x half> [[TMP3]], half [[C2]], i64 2
70-
; CHECK-NEXT: [[O3:%.*]] = insertelement <4 x half> [[O2]], half [[C3]], i64 3
71-
; CHECK-NEXT: ret <4 x half> [[O3]]
60+
; CHECK-NEXT: [[TMP4:%.*]] = phi <2 x half> [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP2]], [[BB0]] ]
61+
; CHECK-NEXT: [[TMP5:%.*]] = phi <2 x half> [ [[TMP1]], [[ENTRY]] ], [ [[TMP3]], [[BB0]] ]
62+
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x half> [[TMP5]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
63+
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x half> [[TMP4]], <2 x half> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
64+
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x half> [[TMP6]], <4 x half> [[TMP7]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
65+
; CHECK-NEXT: ret <4 x half> [[TMP8]]
7266
;
7367
entry:
7468
%a0 = extractelement <4 x half> %in1, i64 0

0 commit comments

Comments
 (0)