Skip to content

Commit 263a00f

Browse files
committed
[COST][AARCH64]Fix crash in cost calculation for shuffles.
Need to take the mask size as number of elements, not the number of elements of the original fixed vector. Otherwise, the compiler may crash.
1 parent 35f5c8d commit 263a00f

File tree

2 files changed

+67
-5
lines changed

2 files changed

+67
-5
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3577,11 +3577,8 @@ InstructionCost AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
35773577
// into smaller vectors and sum the cost of each shuffle.
35783578
if (!Mask.empty() && isa<FixedVectorType>(Tp) && LT.second.isVector() &&
35793579
Tp->getScalarSizeInBits() == LT.second.getScalarSizeInBits() &&
3580-
cast<FixedVectorType>(Tp)->getNumElements() >
3581-
LT.second.getVectorNumElements() &&
3582-
!Index && !SubTp) {
3583-
unsigned TpNumElts = cast<FixedVectorType>(Tp)->getNumElements();
3584-
assert(Mask.size() == TpNumElts && "Expected Mask and Tp size to match!");
3580+
Mask.size() > LT.second.getVectorNumElements() && !Index && !SubTp) {
3581+
unsigned TpNumElts = Mask.size();
35853582
unsigned LTNumElts = LT.second.getVectorNumElements();
35863583
unsigned NumVecs = (TpNumElts + LTNumElts - 1) / LTNumElts;
35873584
VectorType *NTp =
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
2+
; RUN: opt -S -passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
define void @p(double %0) {
5+
; CHECK-LABEL: define void @p(
6+
; CHECK-SAME: double [[TMP0:%.*]]) {
7+
; CHECK-NEXT: entry:
8+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double poison>, double [[TMP0]], i32 3
9+
; CHECK-NEXT: [[TMP2:%.*]] = fmul <4 x double> [[TMP1]], zeroinitializer
10+
; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x double> zeroinitializer, [[TMP2]]
11+
; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP3]], zeroinitializer
12+
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP3]], <2 x i32> <i32 1, i32 7>
13+
; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> zeroinitializer, [[TMP5]]
14+
; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> [[TMP6]], zeroinitializer
15+
; CHECK-NEXT: [[TMP8:%.*]] = fmul <4 x double> [[TMP4]], zeroinitializer
16+
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
17+
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x double> <double 0.000000e+00, double 0.000000e+00, double poison, double poison>, <4 x double> [[TMP9]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
18+
; CHECK-NEXT: [[TMP11:%.*]] = fadd <4 x double> [[TMP8]], [[TMP10]]
19+
; CHECK-NEXT: [[TMP12:%.*]] = fadd <4 x double> [[TMP11]], zeroinitializer
20+
; CHECK-NEXT: [[TMP13:%.*]] = fptosi <4 x double> [[TMP12]] to <4 x i32>
21+
; CHECK-NEXT: store <4 x i32> [[TMP13]], ptr null, align 4
22+
; CHECK-NEXT: ret void
23+
;
24+
entry:
25+
%mul16.1.i = fmul double %0, 0.000000e+00
26+
%add21.1.i = fadd double %mul16.1.i, 0.000000e+00
27+
%add21.2.i = fadd double %add21.1.i, 0.000000e+00
28+
%mul16.150.i = fmul double 0.000000e+00, 0.000000e+00
29+
%add21.1.1.i = fadd double 0.000000e+00, %mul16.150.i
30+
%add21.2.1.i = fadd double %add21.1.1.i, 0.000000e+00
31+
%mul16.252.i = fmul double 0.000000e+00, 0.000000e+00
32+
%add21.1.2.i = fadd double 0.000000e+00, %mul16.252.i
33+
%add21.2.2.i = fadd double %add21.1.2.i, 0.000000e+00
34+
%add21.2.165.i = fadd double %add21.1.i, 0.000000e+00
35+
%mul16.150.1.i = fmul double 0.000000e+00, 0.000000e+00
36+
%add21.1.1.1.i = fadd double %mul16.150.1.i, 0.000000e+00
37+
%add21.2.1.1.i = fadd double %add21.1.1.1.i, 0.000000e+00
38+
%add21.2.2.1.i = fadd double 0.000000e+00, %mul16.150.1.i
39+
%mul16.1.1.i36 = fmul double %add21.2.1.1.i, 0.000000e+00
40+
%add21.1.1.i37 = fadd double 0.000000e+00, %mul16.1.1.i36
41+
%add21.2.1.i40 = fadd double %add21.1.1.i37, 0.000000e+00
42+
%mul16.252.i43 = fmul double %add21.2.2.i, 0.000000e+00
43+
%mul16.1.2.i45 = fmul double %add21.2.2.1.i, 0.000000e+00
44+
%add21.1.2.i46 = fadd double %mul16.252.i43, %mul16.1.2.i45
45+
%add21.2.2.i49 = fadd double %add21.1.2.i46, 0.000000e+00
46+
%mul16.157.i51 = fmul double %add21.2.i, 0.000000e+00
47+
%mul16.1.160.i52 = fmul double %add21.2.165.i, 0.000000e+00
48+
%add21.1.161.i53 = fadd double %mul16.157.i51, %mul16.1.160.i52
49+
%add21.2.165.i56 = fadd double %add21.1.161.i53, 0.000000e+00
50+
%mul16.150.1.i58 = fmul double %add21.2.1.i, 0.000000e+00
51+
%add21.1.1.1.i60 = fadd double %mul16.150.1.i58, 0.000000e+00
52+
%add21.2.1.1.i62 = fadd double %add21.1.1.1.i60, 0.000000e+00
53+
%conv14.1 = fptosi double %add21.2.1.i40 to i32
54+
%arrayidx16.1 = getelementptr i32, ptr null, i64 1
55+
store i32 %conv14.1, ptr %arrayidx16.1, align 4
56+
%conv14.2 = fptosi double %add21.2.2.i49 to i32
57+
%arrayidx16.2 = getelementptr i32, ptr null, i64 2
58+
store i32 %conv14.2, ptr %arrayidx16.2, align 4
59+
%conv14.3 = fptosi double %add21.2.165.i56 to i32
60+
%arrayidx16.3 = getelementptr i32, ptr null, i64 3
61+
store i32 %conv14.3, ptr %arrayidx16.3, align 4
62+
%conv14.4 = fptosi double %add21.2.1.1.i62 to i32
63+
store i32 %conv14.4, ptr null, align 4
64+
ret void
65+
}

0 commit comments

Comments
 (0)