@@ -223,28 +223,116 @@ define void @merge_i32_2i16_float_4i8(ptr addrspace(1) %ptr1, ptr addrspace(2) %
223
223
ret void
224
224
}
225
225
226
- define void @merge_fp_type (ptr addrspace (1 ) %ptr1 , ptr addrspace (2 ) %ptr2 ) {
227
- ; CHECK-OOB-RELAXED-LABEL: define void @merge_fp_type (
226
+ define void @merge_fp_v2half_type (ptr addrspace (1 ) %ptr1 , ptr addrspace (2 ) %ptr2 ) {
227
+ ; CHECK-OOB-RELAXED-LABEL: define void @merge_fp_v2half_type (
228
228
; CHECK-OOB-RELAXED-SAME: ptr addrspace(1) [[PTR1:%.*]], ptr addrspace(2) [[PTR2:%.*]]) #[[ATTR1]] {
229
229
; CHECK-OOB-RELAXED-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[PTR1]], i64 0
230
230
; CHECK-OOB-RELAXED-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr addrspace(1) [[GEP1]], align 4
231
231
; CHECK-OOB-RELAXED-NEXT: [[LOAD11:%.*]] = extractelement <2 x float> [[TMP1]], i32 0
232
232
; CHECK-OOB-RELAXED-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
233
233
; CHECK-OOB-RELAXED-NEXT: [[DOTCAST:%.*]] = bitcast float [[TMP2]] to <2 x half>
234
+ ; CHECK-OOB-RELAXED-NEXT: [[STORE_GEP1:%.*]] = getelementptr inbounds i32, ptr addrspace(2) [[PTR2]], i64 0
235
+ ; CHECK-OOB-RELAXED-NEXT: [[DOTCAST_CAST:%.*]] = bitcast <2 x half> [[DOTCAST]] to i32
236
+ ; CHECK-OOB-RELAXED-NEXT: [[TMP3:%.*]] = bitcast float [[LOAD11]] to i32
237
+ ; CHECK-OOB-RELAXED-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[TMP3]], i32 0
238
+ ; CHECK-OOB-RELAXED-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[DOTCAST_CAST]], i32 1
239
+ ; CHECK-OOB-RELAXED-NEXT: store <2 x i32> [[TMP5]], ptr addrspace(2) [[STORE_GEP1]], align 4
234
240
; CHECK-OOB-RELAXED-NEXT: ret void
235
241
;
236
- ; CHECK-OOB-STRICT-LABEL: define void @merge_fp_type (
242
+ ; CHECK-OOB-STRICT-LABEL: define void @merge_fp_v2half_type (
237
243
; CHECK-OOB-STRICT-SAME: ptr addrspace(1) [[PTR1:%.*]], ptr addrspace(2) [[PTR2:%.*]]) {
238
244
; CHECK-OOB-STRICT-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[PTR1]], i64 0
239
245
; CHECK-OOB-STRICT-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr addrspace(1) [[GEP1]], align 4
240
246
; CHECK-OOB-STRICT-NEXT: [[LOAD11:%.*]] = extractelement <2 x float> [[TMP1]], i32 0
241
247
; CHECK-OOB-STRICT-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
242
248
; CHECK-OOB-STRICT-NEXT: [[DOTCAST:%.*]] = bitcast float [[TMP2]] to <2 x half>
249
+ ; CHECK-OOB-STRICT-NEXT: [[STORE_GEP1:%.*]] = getelementptr inbounds i32, ptr addrspace(2) [[PTR2]], i64 0
250
+ ; CHECK-OOB-STRICT-NEXT: [[DOTCAST_CAST:%.*]] = bitcast <2 x half> [[DOTCAST]] to i32
251
+ ; CHECK-OOB-STRICT-NEXT: [[TMP3:%.*]] = bitcast float [[LOAD11]] to i32
252
+ ; CHECK-OOB-STRICT-NEXT: [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[TMP3]], i32 0
253
+ ; CHECK-OOB-STRICT-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[DOTCAST_CAST]], i32 1
254
+ ; CHECK-OOB-STRICT-NEXT: store <2 x i32> [[TMP5]], ptr addrspace(2) [[STORE_GEP1]], align 4
243
255
; CHECK-OOB-STRICT-NEXT: ret void
244
256
;
245
257
%gep1 = getelementptr inbounds float , ptr addrspace (1 ) %ptr1 , i64 0
246
258
%load1 = load float , ptr addrspace (1 ) %gep1 , align 4
247
259
%gep2 = getelementptr inbounds <2 x half >, ptr addrspace (1 ) %ptr1 , i64 1
248
260
%load2 = load <2 x half >, ptr addrspace (1 ) %gep2 , align 4
261
+ %store.gep1 = getelementptr inbounds i32 , ptr addrspace (2 ) %ptr2 , i64 0
262
+ store float %load1 , ptr addrspace (2 ) %store.gep1 , align 4
263
+ %store.gep2 = getelementptr inbounds <2 x half >, ptr addrspace (2 ) %ptr2 , i64 1
264
+ store <2 x half > %load2 , ptr addrspace (2 ) %store.gep2 , align 4
265
+ ret void
266
+ }
267
+
268
+ define void @merge_v2half_bfloat_type (ptr addrspace (1 ) %ptr1 , ptr addrspace (2 ) %ptr2 ) {
269
+ ; CHECK-OOB-RELAXED-LABEL: define void @merge_v2half_bfloat_type(
270
+ ; CHECK-OOB-RELAXED-SAME: ptr addrspace(1) [[PTR1:%.*]], ptr addrspace(2) [[PTR2:%.*]]) #[[ATTR1]] {
271
+ ; CHECK-OOB-RELAXED-NEXT: [[GEP1:%.*]] = getelementptr inbounds bfloat, ptr addrspace(1) [[PTR1]], i64 0
272
+ ; CHECK-OOB-RELAXED-NEXT: [[LOAD1:%.*]] = load bfloat, ptr addrspace(1) [[GEP1]], align 4
273
+ ; CHECK-OOB-RELAXED-NEXT: [[GEP2:%.*]] = getelementptr inbounds <2 x half>, ptr addrspace(1) [[PTR1]], i64 1
274
+ ; CHECK-OOB-RELAXED-NEXT: [[LOAD2:%.*]] = load <2 x half>, ptr addrspace(1) [[GEP2]], align 4
275
+ ; CHECK-OOB-RELAXED-NEXT: [[STORE_GEP1:%.*]] = getelementptr inbounds i32, ptr addrspace(2) [[PTR2]], i64 0
276
+ ; CHECK-OOB-RELAXED-NEXT: store bfloat [[LOAD1]], ptr addrspace(2) [[STORE_GEP1]], align 4
277
+ ; CHECK-OOB-RELAXED-NEXT: [[STORE_GEP2:%.*]] = getelementptr inbounds <2 x half>, ptr addrspace(2) [[PTR2]], i64 1
278
+ ; CHECK-OOB-RELAXED-NEXT: store <2 x half> [[LOAD2]], ptr addrspace(2) [[STORE_GEP2]], align 4
279
+ ; CHECK-OOB-RELAXED-NEXT: ret void
280
+ ;
281
+ ; CHECK-OOB-STRICT-LABEL: define void @merge_v2half_bfloat_type(
282
+ ; CHECK-OOB-STRICT-SAME: ptr addrspace(1) [[PTR1:%.*]], ptr addrspace(2) [[PTR2:%.*]]) {
283
+ ; CHECK-OOB-STRICT-NEXT: [[GEP1:%.*]] = getelementptr inbounds bfloat, ptr addrspace(1) [[PTR1]], i64 0
284
+ ; CHECK-OOB-STRICT-NEXT: [[LOAD1:%.*]] = load bfloat, ptr addrspace(1) [[GEP1]], align 4
285
+ ; CHECK-OOB-STRICT-NEXT: [[GEP2:%.*]] = getelementptr inbounds <2 x half>, ptr addrspace(1) [[PTR1]], i64 1
286
+ ; CHECK-OOB-STRICT-NEXT: [[LOAD2:%.*]] = load <2 x half>, ptr addrspace(1) [[GEP2]], align 4
287
+ ; CHECK-OOB-STRICT-NEXT: [[STORE_GEP1:%.*]] = getelementptr inbounds i32, ptr addrspace(2) [[PTR2]], i64 0
288
+ ; CHECK-OOB-STRICT-NEXT: store bfloat [[LOAD1]], ptr addrspace(2) [[STORE_GEP1]], align 4
289
+ ; CHECK-OOB-STRICT-NEXT: [[STORE_GEP2:%.*]] = getelementptr inbounds <2 x half>, ptr addrspace(2) [[PTR2]], i64 1
290
+ ; CHECK-OOB-STRICT-NEXT: store <2 x half> [[LOAD2]], ptr addrspace(2) [[STORE_GEP2]], align 4
291
+ ; CHECK-OOB-STRICT-NEXT: ret void
292
+ ;
293
+ %gep1 = getelementptr inbounds bfloat, ptr addrspace (1 ) %ptr1 , i64 0
294
+ %load1 = load bfloat, ptr addrspace (1 ) %gep1 , align 4
295
+ %gep2 = getelementptr inbounds <2 x half >, ptr addrspace (1 ) %ptr1 , i64 1
296
+ %load2 = load <2 x half >, ptr addrspace (1 ) %gep2 , align 4
297
+ %store.gep1 = getelementptr inbounds i32 , ptr addrspace (2 ) %ptr2 , i64 0
298
+ store bfloat %load1 , ptr addrspace (2 ) %store.gep1 , align 4
299
+ %store.gep2 = getelementptr inbounds <2 x half >, ptr addrspace (2 ) %ptr2 , i64 1
300
+ store <2 x half > %load2 , ptr addrspace (2 ) %store.gep2 , align 4
301
+ ret void
302
+ }
303
+
304
+ define void @no_merge_mixed_ptr_addrspaces (ptr addrspace (1 ) %ptr1 , ptr addrspace (2 ) %ptr2 ) {
305
+ ; CHECK-OOB-RELAXED-LABEL: define void @merge_mixed_ptr_addrspaces(
306
+ ; CHECK-OOB-RELAXED-SAME: ptr addrspace(1) [[PTR1:%.*]], ptr addrspace(2) [[PTR2:%.*]]) #[[ATTR1]] {
307
+ ; CHECK-OOB-RELAXED-NEXT: [[GEP1:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[PTR1]], i64 0
308
+ ; CHECK-OOB-RELAXED-NEXT: [[LOAD1:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[GEP1]], align 4
309
+ ; CHECK-OOB-RELAXED-NEXT: [[GEP2:%.*]] = getelementptr inbounds ptr addrspace(2), ptr addrspace(1) [[PTR1]], i64 1
310
+ ; CHECK-OOB-RELAXED-NEXT: [[LOAD2:%.*]] = load ptr addrspace(2), ptr addrspace(1) [[GEP2]], align 4
311
+ ; CHECK-OOB-RELAXED-NEXT: [[STORE_GEP1:%.*]] = getelementptr inbounds i32, ptr addrspace(2) [[PTR2]], i64 0
312
+ ; CHECK-OOB-RELAXED-NEXT: store ptr addrspace(1) [[LOAD1]], ptr addrspace(2) [[STORE_GEP1]], align 4
313
+ ; CHECK-OOB-RELAXED-NEXT: [[STORE_GEP2:%.*]] = getelementptr inbounds ptr addrspace(2), ptr addrspace(2) [[PTR2]], i64 1
314
+ ; CHECK-OOB-RELAXED-NEXT: store ptr addrspace(2) [[LOAD2]], ptr addrspace(2) [[STORE_GEP2]], align 4
315
+ ; CHECK-OOB-RELAXED-NEXT: ret void
316
+ ;
317
+ ; CHECK-OOB-STRICT-LABEL: define void @merge_mixed_ptr_addrspaces(
318
+ ; CHECK-OOB-STRICT-SAME: ptr addrspace(1) [[PTR1:%.*]], ptr addrspace(2) [[PTR2:%.*]]) {
319
+ ; CHECK-OOB-STRICT-NEXT: [[GEP1:%.*]] = getelementptr inbounds ptr addrspace(1), ptr addrspace(1) [[PTR1]], i64 0
320
+ ; CHECK-OOB-STRICT-NEXT: [[LOAD1:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[GEP1]], align 4
321
+ ; CHECK-OOB-STRICT-NEXT: [[GEP2:%.*]] = getelementptr inbounds ptr addrspace(2), ptr addrspace(1) [[PTR1]], i64 1
322
+ ; CHECK-OOB-STRICT-NEXT: [[LOAD2:%.*]] = load ptr addrspace(2), ptr addrspace(1) [[GEP2]], align 4
323
+ ; CHECK-OOB-STRICT-NEXT: [[STORE_GEP1:%.*]] = getelementptr inbounds i32, ptr addrspace(2) [[PTR2]], i64 0
324
+ ; CHECK-OOB-STRICT-NEXT: store ptr addrspace(1) [[LOAD1]], ptr addrspace(2) [[STORE_GEP1]], align 4
325
+ ; CHECK-OOB-STRICT-NEXT: [[STORE_GEP2:%.*]] = getelementptr inbounds ptr addrspace(2), ptr addrspace(2) [[PTR2]], i64 1
326
+ ; CHECK-OOB-STRICT-NEXT: store ptr addrspace(2) [[LOAD2]], ptr addrspace(2) [[STORE_GEP2]], align 4
327
+ ; CHECK-OOB-STRICT-NEXT: ret void
328
+ ;
329
+ %gep1 = getelementptr inbounds ptr addrspace (1 ), ptr addrspace (1 ) %ptr1 , i64 0
330
+ %load1 = load ptr addrspace (1 ), ptr addrspace (1 ) %gep1 , align 4
331
+ %gep2 = getelementptr inbounds ptr addrspace (2 ), ptr addrspace (1 ) %ptr1 , i64 1
332
+ %load2 = load ptr addrspace (2 ), ptr addrspace (1 ) %gep2 , align 4
333
+ %store.gep1 = getelementptr inbounds i32 , ptr addrspace (2 ) %ptr2 , i64 0
334
+ store ptr addrspace (1 ) %load1 , ptr addrspace (2 ) %store.gep1 , align 4
335
+ %store.gep2 = getelementptr inbounds ptr addrspace (2 ), ptr addrspace (2 ) %ptr2 , i64 1
336
+ store ptr addrspace (2 ) %load2 , ptr addrspace (2 ) %store.gep2 , align 4
249
337
ret void
250
338
}
0 commit comments