Skip to content

Commit 052225d

Browse files
committed
[AArch64] Use a lower Costsize cost in getScalarizationOverhead.
This is a follow on to #130946 to use the same codesize cost override in getScalarizationOverhead for vector instructions.
1 parent fedd79b commit 052225d

File tree

7 files changed

+72
-71
lines changed

7 files changed

+72
-71
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3832,8 +3832,9 @@ InstructionCost AArch64TTIImpl::getScalarizationOverhead(
38323832
if (Ty->getElementType()->isFloatingPointTy())
38333833
return BaseT::getScalarizationOverhead(Ty, DemandedElts, Insert, Extract,
38343834
CostKind);
3835-
return DemandedElts.popcount() * (Insert + Extract) *
3836-
ST->getVectorInsertExtractBaseCost();
3835+
unsigned VecInstCost =
3836+
CostKind == TTI::TCK_CodeSize ? 1 : ST->getVectorInsertExtractBaseCost();
3837+
return DemandedElts.popcount() * (Insert + Extract) * VecInstCost;
38373838
}
38383839

38393840
InstructionCost AArch64TTIImpl::getArithmeticInstrCost(

llvm/test/Analysis/CostModel/AArch64/bswap.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ define void @neon() {
4444
; CHECK-NEXT: Cost Model: Found costs of 1 for: %v2i64 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> undef)
4545
; CHECK-NEXT: Cost Model: Found costs of 2 for: %v4i64 = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> undef)
4646
; CHECK-NEXT: Cost Model: Found costs of 1 for: %v3i32 = call <3 x i32> @llvm.bswap.v3i32(<3 x i32> undef)
47-
; CHECK-NEXT: Cost Model: Found costs of 12 for: %v4i48 = call <4 x i48> @llvm.bswap.v4i48(<4 x i48> undef)
47+
; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:8 Lat:12 SizeLat:12 for: %v4i48 = call <4 x i48> @llvm.bswap.v4i48(<4 x i48> undef)
4848
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
4949
;
5050
%v4i16 = call <4 x i16> @llvm.bswap.v4i16(<4 x i16> undef)

llvm/test/Analysis/CostModel/AArch64/ctlz.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ declare i8 @llvm.ctlz.i8(i8)
5555
define <2 x i64> @test_ctlz_v2i64(<2 x i64> %a) {
5656
;
5757
; CHECK-LABEL: 'test_ctlz_v2i64'
58-
; CHECK-NEXT: Cost Model: Found costs of 10 for: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false)
58+
; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:6 Lat:10 SizeLat:10 for: %ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false)
5959
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %ctlz
6060
;
6161
%ctlz = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false)
@@ -154,7 +154,7 @@ define <16 x i8> @test_ctlz_v16i8(<16 x i8> %a) {
154154

155155
define <4 x i64> @test_ctlz_v4i64(<4 x i64> %a) {
156156
; CHECK-LABEL: 'test_ctlz_v4i64'
157-
; CHECK-NEXT: Cost Model: Found costs of 20 for: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false)
157+
; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: %ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false)
158158
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %ctlz
159159
;
160160
%ctlz = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false)

llvm/test/Analysis/CostModel/AArch64/cttz.ll

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ declare i8 @llvm.cttz.i8(i8)
5555
define <2 x i64> @test_cttz_v2i64(<2 x i64> %a) {
5656
;
5757
; CHECK-LABEL: 'test_cttz_v2i64'
58-
; CHECK-NEXT: Cost Model: Found costs of 10 for: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
58+
; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:6 Lat:10 SizeLat:10 for: %cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
5959
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i64> %cttz
6060
;
6161
%cttz = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
@@ -65,7 +65,7 @@ define <2 x i64> @test_cttz_v2i64(<2 x i64> %a) {
6565
define <2 x i32> @test_cttz_v2i32(<2 x i32> %a) {
6666
;
6767
; CHECK-LABEL: 'test_cttz_v2i32'
68-
; CHECK-NEXT: Cost Model: Found costs of 10 for: %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 true)
68+
; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:6 Lat:10 SizeLat:10 for: %cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 true)
6969
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i32> %cttz
7070
;
7171
%cttz = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 true)
@@ -75,7 +75,7 @@ define <2 x i32> @test_cttz_v2i32(<2 x i32> %a) {
7575
define <4 x i32> @test_cttz_v4i32(<4 x i32> %a) {
7676
;
7777
; CHECK-LABEL: 'test_cttz_v4i32'
78-
; CHECK-NEXT: Cost Model: Found costs of 20 for: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
78+
; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: %cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
7979
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i32> %cttz
8080
;
8181
%cttz = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
@@ -85,7 +85,7 @@ define <4 x i32> @test_cttz_v4i32(<4 x i32> %a) {
8585
define <2 x i16> @test_cttz_v2i16(<2 x i16> %a) {
8686
;
8787
; CHECK-LABEL: 'test_cttz_v2i16'
88-
; CHECK-NEXT: Cost Model: Found costs of 10 for: %cttz = call <2 x i16> @llvm.cttz.v2i16(<2 x i16> %a, i1 true)
88+
; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:6 Lat:10 SizeLat:10 for: %cttz = call <2 x i16> @llvm.cttz.v2i16(<2 x i16> %a, i1 true)
8989
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i16> %cttz
9090
;
9191
%cttz = call <2 x i16> @llvm.cttz.v2i16(<2 x i16> %a, i1 true)
@@ -95,7 +95,7 @@ define <2 x i16> @test_cttz_v2i16(<2 x i16> %a) {
9595
define <4 x i16> @test_cttz_v4i16(<4 x i16> %a) {
9696
;
9797
; CHECK-LABEL: 'test_cttz_v4i16'
98-
; CHECK-NEXT: Cost Model: Found costs of 20 for: %cttz = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> %a, i1 true)
98+
; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: %cttz = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> %a, i1 true)
9999
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i16> %cttz
100100
;
101101
%cttz = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> %a, i1 true)
@@ -105,7 +105,7 @@ define <4 x i16> @test_cttz_v4i16(<4 x i16> %a) {
105105
define <8 x i16> @test_cttz_v8i16(<8 x i16> %a) {
106106
;
107107
; CHECK-LABEL: 'test_cttz_v8i16'
108-
; CHECK-NEXT: Cost Model: Found costs of 40 for: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
108+
; CHECK-NEXT: Cost Model: Found costs of RThru:40 CodeSize:24 Lat:40 SizeLat:40 for: %cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
109109
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i16> %cttz
110110
;
111111
%cttz = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
@@ -115,7 +115,7 @@ define <8 x i16> @test_cttz_v8i16(<8 x i16> %a) {
115115
define <2 x i8> @test_cttz_v2i8(<2 x i8> %a) {
116116
;
117117
; CHECK-LABEL: 'test_cttz_v2i8'
118-
; CHECK-NEXT: Cost Model: Found costs of 10 for: %cttz = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %a, i1 true)
118+
; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:6 Lat:10 SizeLat:10 for: %cttz = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %a, i1 true)
119119
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <2 x i8> %cttz
120120
;
121121
%cttz = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %a, i1 true)
@@ -125,7 +125,7 @@ define <2 x i8> @test_cttz_v2i8(<2 x i8> %a) {
125125
define <4 x i8> @test_cttz_v4i8(<4 x i8> %a) {
126126
;
127127
; CHECK-LABEL: 'test_cttz_v4i8'
128-
; CHECK-NEXT: Cost Model: Found costs of 20 for: %cttz = call <4 x i8> @llvm.cttz.v4i8(<4 x i8> %a, i1 true)
128+
; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: %cttz = call <4 x i8> @llvm.cttz.v4i8(<4 x i8> %a, i1 true)
129129
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i8> %cttz
130130
;
131131
%cttz = call <4 x i8> @llvm.cttz.v4i8(<4 x i8> %a, i1 true)
@@ -135,7 +135,7 @@ define <4 x i8> @test_cttz_v4i8(<4 x i8> %a) {
135135
define <8 x i8> @test_cttz_v8i8(<8 x i8> %a) {
136136
;
137137
; CHECK-LABEL: 'test_cttz_v8i8'
138-
; CHECK-NEXT: Cost Model: Found costs of 40 for: %cttz = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> %a, i1 true)
138+
; CHECK-NEXT: Cost Model: Found costs of RThru:40 CodeSize:24 Lat:40 SizeLat:40 for: %cttz = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> %a, i1 true)
139139
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i8> %cttz
140140
;
141141
%cttz = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> %a, i1 true)
@@ -145,7 +145,7 @@ define <8 x i8> @test_cttz_v8i8(<8 x i8> %a) {
145145
define <16 x i8> @test_cttz_v16i8(<16 x i8> %a) {
146146
;
147147
; CHECK-LABEL: 'test_cttz_v16i8'
148-
; CHECK-NEXT: Cost Model: Found costs of 80 for: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
148+
; CHECK-NEXT: Cost Model: Found costs of RThru:80 CodeSize:48 Lat:80 SizeLat:80 for: %cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
149149
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i8> %cttz
150150
;
151151
%cttz = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
@@ -155,7 +155,7 @@ define <16 x i8> @test_cttz_v16i8(<16 x i8> %a) {
155155
define <4 x i64> @test_cttz_v4i64(<4 x i64> %a) {
156156
;
157157
; CHECK-LABEL: 'test_cttz_v4i64'
158-
; CHECK-NEXT: Cost Model: Found costs of 20 for: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
158+
; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:12 Lat:20 SizeLat:20 for: %cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
159159
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <4 x i64> %cttz
160160
;
161161
%cttz = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> %a, i1 true)
@@ -165,7 +165,7 @@ define <4 x i64> @test_cttz_v4i64(<4 x i64> %a) {
165165
define <8 x i32> @test_cttz_v8i32(<8 x i32> %a) {
166166
;
167167
; CHECK-LABEL: 'test_cttz_v8i32'
168-
; CHECK-NEXT: Cost Model: Found costs of 40 for: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
168+
; CHECK-NEXT: Cost Model: Found costs of RThru:40 CodeSize:24 Lat:40 SizeLat:40 for: %cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
169169
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <8 x i32> %cttz
170170
;
171171
%cttz = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> %a, i1 true)
@@ -175,7 +175,7 @@ define <8 x i32> @test_cttz_v8i32(<8 x i32> %a) {
175175
define <16 x i16> @test_cttz_v16i16(<16 x i16> %a) {
176176
;
177177
; CHECK-LABEL: 'test_cttz_v16i16'
178-
; CHECK-NEXT: Cost Model: Found costs of 80 for: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
178+
; CHECK-NEXT: Cost Model: Found costs of RThru:80 CodeSize:48 Lat:80 SizeLat:80 for: %cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
179179
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <16 x i16> %cttz
180180
;
181181
%cttz = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> %a, i1 true)
@@ -185,7 +185,7 @@ define <16 x i16> @test_cttz_v16i16(<16 x i16> %a) {
185185
define <32 x i8> @test_cttz_v32i8(<32 x i8> %a) {
186186
;
187187
; CHECK-LABEL: 'test_cttz_v32i8'
188-
; CHECK-NEXT: Cost Model: Found costs of 160 for: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
188+
; CHECK-NEXT: Cost Model: Found costs of RThru:160 CodeSize:96 Lat:160 SizeLat:160 for: %cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)
189189
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret <32 x i8> %cttz
190190
;
191191
%cttz = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> %a, i1 true)

llvm/test/Analysis/CostModel/AArch64/masked_ldst.ll

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -5,24 +5,24 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
55

66
define void @fixed() {
77
; CHECK-LABEL: 'fixed'
8-
; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:12 Lat:12 SizeLat:12 for: %v2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i8> undef)
9-
; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:24 Lat:24 SizeLat:24 for: %v4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i8> undef)
10-
; CHECK-NEXT: Cost Model: Found costs of RThru:40 CodeSize:48 Lat:48 SizeLat:48 for: %v8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 8, <8 x i1> undef, <8 x i8> undef)
11-
; CHECK-NEXT: Cost Model: Found costs of RThru:80 CodeSize:96 Lat:96 SizeLat:96 for: %v16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 8, <16 x i1> undef, <16 x i8> undef)
12-
; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:12 Lat:12 SizeLat:12 for: %v2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i16> undef)
13-
; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:24 Lat:24 SizeLat:24 for: %v4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i16> undef)
14-
; CHECK-NEXT: Cost Model: Found costs of RThru:40 CodeSize:48 Lat:48 SizeLat:48 for: %v8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 8, <8 x i1> undef, <8 x i16> undef)
15-
; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:12 Lat:12 SizeLat:12 for: %v2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i32> undef)
16-
; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:24 Lat:24 SizeLat:24 for: %v4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i32> undef)
17-
; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:12 Lat:12 SizeLat:12 for: %v2i64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i64> undef)
18-
; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:9 Lat:10 SizeLat:10 for: %v2f16 = call <2 x half> @llvm.masked.load.v2f16.p0(ptr undef, i32 8, <2 x i1> undef, <2 x half> undef)
19-
; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:19 Lat:22 SizeLat:22 for: %v4f16 = call <4 x half> @llvm.masked.load.v4f16.p0(ptr undef, i32 8, <4 x i1> undef, <4 x half> undef)
20-
; CHECK-NEXT: Cost Model: Found costs of RThru:38 CodeSize:39 Lat:46 SizeLat:46 for: %v8f16 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr undef, i32 8, <8 x i1> undef, <8 x half> undef)
21-
; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:9 Lat:10 SizeLat:10 for: %v2f32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x float> undef)
22-
; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:19 Lat:22 SizeLat:22 for: %v4f32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x float> undef)
23-
; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:9 Lat:10 SizeLat:10 for: %v2f64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x double> undef)
24-
; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:24 Lat:24 SizeLat:24 for: %v4i64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i64> undef)
25-
; CHECK-NEXT: Cost Model: Found costs of RThru:152 CodeSize:156 Lat:184 SizeLat:184 for: %v32f16 = call <32 x half> @llvm.masked.load.v32f16.p0(ptr undef, i32 8, <32 x i1> undef, <32 x half> undef)
8+
; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:8 Lat:12 SizeLat:12 for: %v2i8 = call <2 x i8> @llvm.masked.load.v2i8.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i8> undef)
9+
; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:16 Lat:24 SizeLat:24 for: %v4i8 = call <4 x i8> @llvm.masked.load.v4i8.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i8> undef)
10+
; CHECK-NEXT: Cost Model: Found costs of RThru:40 CodeSize:32 Lat:48 SizeLat:48 for: %v8i8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 8, <8 x i1> undef, <8 x i8> undef)
11+
; CHECK-NEXT: Cost Model: Found costs of RThru:80 CodeSize:64 Lat:96 SizeLat:96 for: %v16i8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 8, <16 x i1> undef, <16 x i8> undef)
12+
; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:8 Lat:12 SizeLat:12 for: %v2i16 = call <2 x i16> @llvm.masked.load.v2i16.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i16> undef)
13+
; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:16 Lat:24 SizeLat:24 for: %v4i16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i16> undef)
14+
; CHECK-NEXT: Cost Model: Found costs of RThru:40 CodeSize:32 Lat:48 SizeLat:48 for: %v8i16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 8, <8 x i1> undef, <8 x i16> undef)
15+
; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:8 Lat:12 SizeLat:12 for: %v2i32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i32> undef)
16+
; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:16 Lat:24 SizeLat:24 for: %v4i32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i32> undef)
17+
; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:8 Lat:12 SizeLat:12 for: %v2i64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x i64> undef)
18+
; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:10 SizeLat:10 for: %v2f16 = call <2 x half> @llvm.masked.load.v2f16.p0(ptr undef, i32 8, <2 x i1> undef, <2 x half> undef)
19+
; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:15 Lat:22 SizeLat:22 for: %v4f16 = call <4 x half> @llvm.masked.load.v4f16.p0(ptr undef, i32 8, <4 x i1> undef, <4 x half> undef)
20+
; CHECK-NEXT: Cost Model: Found costs of RThru:38 CodeSize:31 Lat:46 SizeLat:46 for: %v8f16 = call <8 x half> @llvm.masked.load.v8f16.p0(ptr undef, i32 8, <8 x i1> undef, <8 x half> undef)
21+
; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:10 SizeLat:10 for: %v2f32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 8, <2 x i1> undef, <2 x float> undef)
22+
; CHECK-NEXT: Cost Model: Found costs of RThru:18 CodeSize:15 Lat:22 SizeLat:22 for: %v4f32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 8, <4 x i1> undef, <4 x float> undef)
23+
; CHECK-NEXT: Cost Model: Found costs of RThru:8 CodeSize:7 Lat:10 SizeLat:10 for: %v2f64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 8, <2 x i1> undef, <2 x double> undef)
24+
; CHECK-NEXT: Cost Model: Found costs of RThru:20 CodeSize:16 Lat:24 SizeLat:24 for: %v4i64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 8, <4 x i1> undef, <4 x i64> undef)
25+
; CHECK-NEXT: Cost Model: Found costs of RThru:152 CodeSize:124 Lat:184 SizeLat:184 for: %v32f16 = call <32 x half> @llvm.masked.load.v32f16.p0(ptr undef, i32 8, <32 x i1> undef, <32 x half> undef)
2626
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
2727
;
2828
entry:

0 commit comments

Comments
 (0)