@@ -8,9 +8,9 @@ target triple = "x86_64-unknown-linux-gnu"
8
8
; This should promote
9
9
define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512 (ptr %arg , ptr readonly %arg1 ) #0 {
10
10
; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512
11
- ; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL :%.*]])
11
+ ; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL :%.*]]) #[[ATTR0:[0-9]+]] {
12
12
; CHECK-NEXT: bb:
13
- ; CHECK-NEXT: store <8 x i64> [[ARG1_VAL ]], ptr [[ARG]]
13
+ ; CHECK-NEXT: store <8 x i64> [[ARG1_0_VAL ]], ptr [[ARG]], align 64
14
14
; CHECK-NEXT: ret void
15
15
;
16
16
bb:
21
21
22
22
define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512 (ptr %arg ) #0 {
23
23
; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer512
24
- ; CHECK-SAME: (ptr [[ARG:%.*]])
24
+ ; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR0]] {
25
25
; CHECK-NEXT: bb:
26
26
; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
27
27
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
28
28
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
29
- ; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
29
+ ; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
30
30
; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
31
31
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
32
32
; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
45
45
; This should promote
46
46
define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256 (ptr %arg , ptr readonly %arg1 ) #1 {
47
47
; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256
48
- ; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL :%.*]])
48
+ ; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL :%.*]]) #[[ATTR1:[0-9]+]] {
49
49
; CHECK-NEXT: bb:
50
- ; CHECK-NEXT: store <8 x i64> [[ARG1_VAL ]], ptr [[ARG]]
50
+ ; CHECK-NEXT: store <8 x i64> [[ARG1_0_VAL ]], ptr [[ARG]], align 64
51
51
; CHECK-NEXT: ret void
52
52
;
53
53
bb:
58
58
59
59
define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256 (ptr %arg ) #1 {
60
60
; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer256
61
- ; CHECK-SAME: (ptr [[ARG:%.*]])
61
+ ; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR1]] {
62
62
; CHECK-NEXT: bb:
63
63
; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
64
64
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
65
65
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
66
- ; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
66
+ ; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
67
67
; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
68
68
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
69
69
; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
82
82
; This should promote
83
83
define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256 (ptr %arg , ptr readonly %arg1 ) #1 {
84
84
; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256
85
- ; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL :%.*]])
85
+ ; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL :%.*]]) #[[ATTR1]] {
86
86
; CHECK-NEXT: bb:
87
- ; CHECK-NEXT: store <8 x i64> [[ARG1_VAL ]], ptr [[ARG]]
87
+ ; CHECK-NEXT: store <8 x i64> [[ARG1_0_VAL ]], ptr [[ARG]], align 64
88
88
; CHECK-NEXT: ret void
89
89
;
90
90
bb:
95
95
96
96
define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256 (ptr %arg ) #0 {
97
97
; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer512_call_avx512_legal512_prefer256
98
- ; CHECK-SAME: (ptr [[ARG:%.*]])
98
+ ; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR0]] {
99
99
; CHECK-NEXT: bb:
100
100
; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
101
101
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
102
102
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
103
- ; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
103
+ ; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
104
104
; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
105
105
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
106
106
; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
119
119
; This should promote
120
120
define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512 (ptr %arg , ptr readonly %arg1 ) #0 {
121
121
; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512
122
- ; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL :%.*]])
122
+ ; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL :%.*]]) #[[ATTR0]] {
123
123
; CHECK-NEXT: bb:
124
- ; CHECK-NEXT: store <8 x i64> [[ARG1_VAL ]], ptr [[ARG]]
124
+ ; CHECK-NEXT: store <8 x i64> [[ARG1_0_VAL ]], ptr [[ARG]], align 64
125
125
; CHECK-NEXT: ret void
126
126
;
127
127
bb:
@@ -132,12 +132,12 @@ bb:
132
132
133
133
define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512 (ptr %arg ) #1 {
134
134
; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal512_prefer512
135
- ; CHECK-SAME: (ptr [[ARG:%.*]])
135
+ ; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR1]] {
136
136
; CHECK-NEXT: bb:
137
137
; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
138
138
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
139
139
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
140
- ; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
140
+ ; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
141
141
; CHECK-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
142
142
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
143
143
; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
@@ -156,10 +156,10 @@ bb:
156
156
; This should not promote
157
157
define internal fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256 (ptr %arg , ptr readonly %arg1 ) #1 {
158
158
; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256
159
- ; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]])
159
+ ; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]]) #[[ATTR1]] {
160
160
; CHECK-NEXT: bb:
161
- ; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]]
162
- ; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]]
161
+ ; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64
162
+ ; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
163
163
; CHECK-NEXT: ret void
164
164
;
165
165
bb:
170
170
171
171
define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256 (ptr %arg ) #2 {
172
172
; CHECK-LABEL: define {{[^@]+}}@avx512_legal256_prefer256_call_avx512_legal512_prefer256
173
- ; CHECK-SAME: (ptr [[ARG:%.*]])
173
+ ; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR2:[0-9]+]] {
174
174
; CHECK-NEXT: bb:
175
175
; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
176
176
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
@@ -193,10 +193,10 @@ bb:
193
193
; This should not promote
194
194
define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256 (ptr %arg , ptr readonly %arg1 ) #2 {
195
195
; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256
196
- ; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]])
196
+ ; CHECK-SAME: (ptr [[ARG:%.*]], ptr readonly [[ARG1:%.*]]) #[[ATTR2]] {
197
197
; CHECK-NEXT: bb:
198
- ; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]]
199
- ; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]]
198
+ ; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64
199
+ ; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
200
200
; CHECK-NEXT: ret void
201
201
;
202
202
bb:
207
207
208
208
define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256 (ptr %arg ) #1 {
209
209
; CHECK-LABEL: define {{[^@]+}}@avx512_legal512_prefer256_call_avx512_legal256_prefer256
210
- ; CHECK-SAME: (ptr [[ARG:%.*]])
210
+ ; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR1]] {
211
211
; CHECK-NEXT: bb:
212
212
; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
213
213
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
230
230
; This should promote
231
231
define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256 (ptr %arg , ptr readonly %arg1 ) #3 {
232
232
; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256
233
- ; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL :%.*]])
233
+ ; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL :%.*]]) #[[ATTR3:[0-9]+]] {
234
234
; CHECK-NEXT: bb:
235
- ; CHECK-NEXT: store <8 x i64> [[ARG1_VAL ]], ptr [[ARG]]
235
+ ; CHECK-NEXT: store <8 x i64> [[ARG1_0_VAL ]], ptr [[ARG]], align 64
236
236
; CHECK-NEXT: ret void
237
237
;
238
238
bb:
@@ -243,12 +243,12 @@ bb:
243
243
244
244
define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256 (ptr %arg ) #4 {
245
245
; CHECK-LABEL: define {{[^@]+}}@avx2_legal256_prefer256_call_avx2_legal512_prefer256
246
- ; CHECK-SAME: (ptr [[ARG:%.*]])
246
+ ; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR3]] {
247
247
; CHECK-NEXT: bb:
248
248
; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
249
249
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
250
250
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
251
- ; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
251
+ ; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
252
252
; CHECK-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
253
253
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
254
254
; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
267
267
; This should promote
268
268
define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256 (ptr %arg , ptr readonly %arg1 ) #4 {
269
269
; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256
270
- ; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_VAL :%.*]])
270
+ ; CHECK-SAME: (ptr [[ARG:%.*]], <8 x i64> [[ARG1_0_VAL :%.*]]) #[[ATTR3]] {
271
271
; CHECK-NEXT: bb:
272
- ; CHECK-NEXT: store <8 x i64> [[ARG1_VAL ]], ptr [[ARG]]
272
+ ; CHECK-NEXT: store <8 x i64> [[ARG1_0_VAL ]], ptr [[ARG]], align 64
273
273
; CHECK-NEXT: ret void
274
274
;
275
275
bb:
@@ -280,12 +280,12 @@ bb:
280
280
281
281
define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256 (ptr %arg ) #3 {
282
282
; CHECK-LABEL: define {{[^@]+}}@avx2_legal512_prefer256_call_avx2_legal256_prefer256
283
- ; CHECK-SAME: (ptr [[ARG:%.*]])
283
+ ; CHECK-SAME: (ptr [[ARG:%.*]]) #[[ATTR3]] {
284
284
; CHECK-NEXT: bb:
285
285
; CHECK-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
286
286
; CHECK-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
287
287
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
288
- ; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]]
288
+ ; CHECK-NEXT: [[TMP_VAL:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
289
289
; CHECK-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr [[TMP2]], <8 x i64> [[TMP_VAL]])
290
290
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 32
291
291
; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
304
304
; If the arguments are scalar, its ok to promote.
305
305
define internal i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256 (ptr %X , ptr %Y ) #2 {
306
306
; CHECK-LABEL: define {{[^@]+}}@scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256
307
- ; CHECK-SAME: (i32 [[X_VAL :%.*]], i32 [[Y_VAL :%.*]])
308
- ; CHECK-NEXT: [[C:%.*]] = add i32 [[X_VAL ]], [[Y_VAL ]]
307
+ ; CHECK-SAME: (i32 [[X_0_VAL :%.*]], i32 [[Y_0_VAL :%.*]]) #[[ATTR2]] {
308
+ ; CHECK-NEXT: [[C:%.*]] = add i32 [[X_0_VAL ]], [[Y_0_VAL ]]
309
309
; CHECK-NEXT: ret i32 [[C]]
310
310
;
311
311
%A = load i32 , ptr %X
@@ -316,11 +316,11 @@ define internal i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal51
316
316
317
317
define i32 @scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256 (ptr %B ) #2 {
318
318
; CHECK-LABEL: define {{[^@]+}}@scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256
319
- ; CHECK-SAME: (ptr [[B:%.*]])
320
- ; CHECK-NEXT: [[A:%.*]] = alloca i32
321
- ; CHECK-NEXT: store i32 1, ptr [[A]]
322
- ; CHECK-NEXT: [[A_VAL:%.*]] = load i32, ptr [[A]]
323
- ; CHECK-NEXT: [[B_VAL:%.*]] = load i32, ptr [[B]]
319
+ ; CHECK-SAME: (ptr [[B:%.*]]) #[[ATTR2]] {
320
+ ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
321
+ ; CHECK-NEXT: store i32 1, ptr [[A]], align 4
322
+ ; CHECK-NEXT: [[A_VAL:%.*]] = load i32, ptr [[A]], align 4
323
+ ; CHECK-NEXT: [[B_VAL:%.*]] = load i32, ptr [[B]], align 4
324
324
; CHECK-NEXT: [[C:%.*]] = call i32 @scalar_callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(i32 [[A_VAL]], i32 [[B_VAL]])
325
325
; CHECK-NEXT: ret i32 [[C]]
326
326
;
@@ -333,8 +333,8 @@ define i32 @scalar_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr
333
333
; If the arguments are scalar, its ok to promote.
334
334
define internal i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256 (ptr %X , ptr %Y ) #2 {
335
335
; CHECK-LABEL: define {{[^@]+}}@scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256
336
- ; CHECK-SAME: (i32 [[X_VAL :%.*]], i32 [[Y_VAL :%.*]])
337
- ; CHECK-NEXT: [[C:%.*]] = add i32 [[X_VAL ]], [[Y_VAL ]]
336
+ ; CHECK-SAME: (i32 [[X_0_VAL :%.*]], i32 [[Y_0_VAL :%.*]]) #[[ATTR2]] {
337
+ ; CHECK-NEXT: [[C:%.*]] = add i32 [[X_0_VAL ]], [[Y_0_VAL ]]
338
338
; CHECK-NEXT: ret i32 [[C]]
339
339
;
340
340
%A = load i32 , ptr %X
@@ -345,11 +345,11 @@ define internal i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal25
345
345
346
346
define i32 @scalar_avx512_legal512_prefer256_call_avx512_legal256_prefer256 (ptr %B ) #2 {
347
347
; CHECK-LABEL: define {{[^@]+}}@scalar_avx512_legal512_prefer256_call_avx512_legal256_prefer256
348
- ; CHECK-SAME: (ptr [[B:%.*]])
349
- ; CHECK-NEXT: [[A:%.*]] = alloca i32
350
- ; CHECK-NEXT: store i32 1, ptr [[A]]
351
- ; CHECK-NEXT: [[A_VAL:%.*]] = load i32, ptr [[A]]
352
- ; CHECK-NEXT: [[B_VAL:%.*]] = load i32, ptr [[B]]
348
+ ; CHECK-SAME: (ptr [[B:%.*]]) #[[ATTR2]] {
349
+ ; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
350
+ ; CHECK-NEXT: store i32 1, ptr [[A]], align 4
351
+ ; CHECK-NEXT: [[A_VAL:%.*]] = load i32, ptr [[A]], align 4
352
+ ; CHECK-NEXT: [[B_VAL:%.*]] = load i32, ptr [[B]], align 4
353
353
; CHECK-NEXT: [[C:%.*]] = call i32 @scalar_callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(i32 [[A_VAL]], i32 [[B_VAL]])
354
354
; CHECK-NEXT: ret i32 [[C]]
355
355
;
0 commit comments