Skip to content

Commit 2c93413

Browse files
committed
Autogenerate checks for merge-vectors.ll
1 parent 6a55425 commit 2c93413

File tree

1 file changed

+135
-33
lines changed

1 file changed

+135
-33
lines changed

llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-vectors.ll

Lines changed: 135 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,19 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
12
; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=load-store-vectorizer -mattr=+relaxed-buffer-oob-mode -S -o - %s | FileCheck --check-prefixes=CHECK,CHECK-OOB-RELAXED %s
23
; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=load-store-vectorizer -S -o - %s | FileCheck --check-prefixes=CHECK,CHECK-OOB-STRICT %s
34

45
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7"
56

6-
; CHECK-LABEL: @merge_v2i32_v2i32(
7-
; CHECK: load <4 x i32>
8-
; CHECK: store <4 x i32> zeroinitializer
97
define amdgpu_kernel void @merge_v2i32_v2i32(ptr addrspace(1) nocapture %a, ptr addrspace(1) nocapture readonly %b) #0 {
8+
; CHECK-LABEL: define amdgpu_kernel void @merge_v2i32_v2i32(
9+
; CHECK-SAME: ptr addrspace(1) captures(none) [[A:%.*]], ptr addrspace(1) readonly captures(none) [[B:%.*]]) #[[ATTR0:[0-9]+]] {
10+
; CHECK-NEXT: [[ENTRY:.*:]]
11+
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr addrspace(1) [[B]], align 4
12+
; CHECK-NEXT: [[LD_C1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
13+
; CHECK-NEXT: [[LD_C_IDX_12:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 2, i32 3>
14+
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr addrspace(1) [[A]], align 4
15+
; CHECK-NEXT: ret void
16+
;
1017
entry:
1118
%a.1 = getelementptr inbounds <2 x i32>, ptr addrspace(1) %a, i64 1
1219
%b.1 = getelementptr inbounds <2 x i32>, ptr addrspace(1) %b, i64 1
@@ -20,10 +27,16 @@ entry:
2027
ret void
2128
}
2229

23-
; CHECK-LABEL: @merge_v1i32_v1i32(
24-
; CHECK: load <2 x i32>
25-
; CHECK: store <2 x i32> zeroinitializer
2630
define amdgpu_kernel void @merge_v1i32_v1i32(ptr addrspace(1) nocapture %a, ptr addrspace(1) nocapture readonly %b) #0 {
31+
; CHECK-LABEL: define amdgpu_kernel void @merge_v1i32_v1i32(
32+
; CHECK-SAME: ptr addrspace(1) captures(none) [[A:%.*]], ptr addrspace(1) readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
33+
; CHECK-NEXT: [[ENTRY:.*:]]
34+
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(1) [[B]], align 4
35+
; CHECK-NEXT: [[LD_C1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <1 x i32> zeroinitializer
36+
; CHECK-NEXT: [[LD_C_IDX_12:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <1 x i32> <i32 1>
37+
; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(1) [[A]], align 4
38+
; CHECK-NEXT: ret void
39+
;
2740
entry:
2841
%a.1 = getelementptr inbounds <1 x i32>, ptr addrspace(1) %a, i64 1
2942
%b.1 = getelementptr inbounds <1 x i32>, ptr addrspace(1) %b, i64 1
@@ -37,12 +50,18 @@ entry:
3750
ret void
3851
}
3952

40-
; CHECK-LABEL: @no_merge_v3i32_v3i32(
41-
; CHECK: load <3 x i32>
42-
; CHECK: load <3 x i32>
43-
; CHECK: store <3 x i32> zeroinitializer
44-
; CHECK: store <3 x i32> zeroinitializer
4553
define amdgpu_kernel void @no_merge_v3i32_v3i32(ptr addrspace(1) nocapture %a, ptr addrspace(1) nocapture readonly %b) #0 {
54+
; CHECK-LABEL: define amdgpu_kernel void @no_merge_v3i32_v3i32(
55+
; CHECK-SAME: ptr addrspace(1) captures(none) [[A:%.*]], ptr addrspace(1) readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
56+
; CHECK-NEXT: [[ENTRY:.*:]]
57+
; CHECK-NEXT: [[A_1:%.*]] = getelementptr inbounds <3 x i32>, ptr addrspace(1) [[A]], i64 1
58+
; CHECK-NEXT: [[B_1:%.*]] = getelementptr inbounds <3 x i32>, ptr addrspace(1) [[B]], i64 1
59+
; CHECK-NEXT: [[LD_C:%.*]] = load <3 x i32>, ptr addrspace(1) [[B]], align 4
60+
; CHECK-NEXT: [[LD_C_IDX_1:%.*]] = load <3 x i32>, ptr addrspace(1) [[B_1]], align 4
61+
; CHECK-NEXT: store <3 x i32> zeroinitializer, ptr addrspace(1) [[A]], align 4
62+
; CHECK-NEXT: store <3 x i32> zeroinitializer, ptr addrspace(1) [[A_1]], align 4
63+
; CHECK-NEXT: ret void
64+
;
4665
entry:
4766
%a.1 = getelementptr inbounds <3 x i32>, ptr addrspace(1) %a, i64 1
4867
%b.1 = getelementptr inbounds <3 x i32>, ptr addrspace(1) %b, i64 1
@@ -56,10 +75,16 @@ entry:
5675
ret void
5776
}
5877

59-
; CHECK-LABEL: @merge_v2i16_v2i16(
60-
; CHECK: load <4 x i16>
61-
; CHECK: store <4 x i16> zeroinitializer
6278
define amdgpu_kernel void @merge_v2i16_v2i16(ptr addrspace(1) nocapture %a, ptr addrspace(1) nocapture readonly %b) #0 {
79+
; CHECK-LABEL: define amdgpu_kernel void @merge_v2i16_v2i16(
80+
; CHECK-SAME: ptr addrspace(1) captures(none) [[A:%.*]], ptr addrspace(1) readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
81+
; CHECK-NEXT: [[ENTRY:.*:]]
82+
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr addrspace(1) [[B]], align 4
83+
; CHECK-NEXT: [[LD_C1:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
84+
; CHECK-NEXT: [[LD_C_IDX_12:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
85+
; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr addrspace(1) [[A]], align 4
86+
; CHECK-NEXT: ret void
87+
;
6388
entry:
6489
%a.1 = getelementptr inbounds <2 x i16>, ptr addrspace(1) %a, i64 1
6590
%b.1 = getelementptr inbounds <2 x i16>, ptr addrspace(1) %b, i64 1
@@ -73,15 +98,27 @@ entry:
7398
ret void
7499
}
75100

76-
; CHECK-OOB-RELAXED-LABEL: @merge_fat_ptrs(
77-
; CHECK-OOB-RELAXED: load <4 x i16>
78-
; CHECK-OOB-RELAXED: store <4 x i16> zeroinitializer
79-
; CHECK-OOB-STRICT-LABEL: @merge_fat_ptrs(
80-
; CHECK-OOB-STRICT: load <2 x i16>
81-
; CHECK-OOB-STRICT: load <2 x i16>
82-
; CHECK-OOB-STRICT: store <2 x i16> zeroinitializer
83-
; CHECK-OOB-STRICT: store <2 x i16> zeroinitializer
84101
define amdgpu_kernel void @merge_fat_ptrs(ptr addrspace(7) nocapture %a, ptr addrspace(7) nocapture readonly %b) #0 {
102+
; CHECK-OOB-RELAXED-LABEL: define amdgpu_kernel void @merge_fat_ptrs(
103+
; CHECK-OOB-RELAXED-SAME: ptr addrspace(7) captures(none) [[A:%.*]], ptr addrspace(7) readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
104+
; CHECK-OOB-RELAXED-NEXT: [[ENTRY:.*:]]
105+
; CHECK-OOB-RELAXED-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr addrspace(7) [[B]], align 4
106+
; CHECK-OOB-RELAXED-NEXT: [[LD_C1:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
107+
; CHECK-OOB-RELAXED-NEXT: [[LD_C_IDX_12:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
108+
; CHECK-OOB-RELAXED-NEXT: store <4 x i16> zeroinitializer, ptr addrspace(7) [[A]], align 4
109+
; CHECK-OOB-RELAXED-NEXT: ret void
110+
;
111+
; CHECK-OOB-STRICT-LABEL: define amdgpu_kernel void @merge_fat_ptrs(
112+
; CHECK-OOB-STRICT-SAME: ptr addrspace(7) captures(none) [[A:%.*]], ptr addrspace(7) readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
113+
; CHECK-OOB-STRICT-NEXT: [[ENTRY:.*:]]
114+
; CHECK-OOB-STRICT-NEXT: [[A_1:%.*]] = getelementptr inbounds <2 x i16>, ptr addrspace(7) [[A]], i32 1
115+
; CHECK-OOB-STRICT-NEXT: [[B_1:%.*]] = getelementptr inbounds <2 x i16>, ptr addrspace(7) [[B]], i32 1
116+
; CHECK-OOB-STRICT-NEXT: [[LD_C:%.*]] = load <2 x i16>, ptr addrspace(7) [[B]], align 4
117+
; CHECK-OOB-STRICT-NEXT: [[LD_C_IDX_1:%.*]] = load <2 x i16>, ptr addrspace(7) [[B_1]], align 4
118+
; CHECK-OOB-STRICT-NEXT: store <2 x i16> zeroinitializer, ptr addrspace(7) [[A]], align 4
119+
; CHECK-OOB-STRICT-NEXT: store <2 x i16> zeroinitializer, ptr addrspace(7) [[A_1]], align 4
120+
; CHECK-OOB-STRICT-NEXT: ret void
121+
;
85122
entry:
86123
%a.1 = getelementptr inbounds <2 x i16>, ptr addrspace(7) %a, i32 1
87124
%b.1 = getelementptr inbounds <2 x i16>, ptr addrspace(7) %b, i32 1
@@ -95,11 +132,16 @@ entry:
95132
ret void
96133
}
97134

98-
; CHECK-LABEL: @merge_load_i32_v2i16(
99-
; CHECK: load <2 x i32>
100-
; CHECK: extractelement <2 x i32> %0, i32 0
101-
; CHECK: extractelement <2 x i32> %0, i32 1
102135
define amdgpu_kernel void @merge_load_i32_v2i16(ptr addrspace(1) nocapture %a) #0 {
136+
; CHECK-LABEL: define amdgpu_kernel void @merge_load_i32_v2i16(
137+
; CHECK-SAME: ptr addrspace(1) captures(none) [[A:%.*]]) #[[ATTR0]] {
138+
; CHECK-NEXT: [[ENTRY:.*:]]
139+
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(1) [[A]], align 4
140+
; CHECK-NEXT: [[LD_01:%.*]] = extractelement <2 x i32> [[TMP0]], i32 0
141+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
142+
; CHECK-NEXT: [[DOTCAST:%.*]] = bitcast i32 [[TMP1]] to <2 x i16>
143+
; CHECK-NEXT: ret void
144+
;
103145
entry:
104146
%a.1 = getelementptr inbounds i32, ptr addrspace(1) %a, i32 1
105147

@@ -112,11 +154,56 @@ entry:
112154
attributes #0 = { nounwind }
113155
attributes #1 = { nounwind readnone }
114156

115-
; CHECK-LABEL: @merge_i32_2i16_float_4i8(
116-
; CHECK: load <4 x i32>
117-
; CHECK: store <2 x i32>
118-
; CHECK: store <2 x i32>
157+
119158
define void @merge_i32_2i16_float_4i8(ptr addrspace(1) %ptr1, ptr addrspace(2) %ptr2) {
159+
; CHECK-OOB-RELAXED-LABEL: define void @merge_i32_2i16_float_4i8(
160+
; CHECK-OOB-RELAXED-SAME: ptr addrspace(1) [[PTR1:%.*]], ptr addrspace(2) [[PTR2:%.*]]) #[[ATTR1:[0-9]+]] {
161+
; CHECK-OOB-RELAXED-NEXT: [[GEP1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[PTR1]], i64 0
162+
; CHECK-OOB-RELAXED-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr addrspace(1) [[GEP1]], align 4
163+
; CHECK-OOB-RELAXED-NEXT: [[LOAD12:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
164+
; CHECK-OOB-RELAXED-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1
165+
; CHECK-OOB-RELAXED-NEXT: [[LOAD33:%.*]] = extractelement <4 x i32> [[TMP1]], i32 2
166+
; CHECK-OOB-RELAXED-NEXT: [[TMP3:%.*]] = bitcast i32 [[LOAD33]] to float
167+
; CHECK-OOB-RELAXED-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
168+
; CHECK-OOB-RELAXED-NEXT: [[DOTCAST:%.*]] = bitcast i32 [[TMP2]] to <2 x i16>
169+
; CHECK-OOB-RELAXED-NEXT: [[DOTCAST1:%.*]] = bitcast i32 [[TMP4]] to <4 x i8>
170+
; CHECK-OOB-RELAXED-NEXT: [[STORE_GEP1:%.*]] = getelementptr inbounds i32, ptr addrspace(2) [[PTR2]], i64 0
171+
; CHECK-OOB-RELAXED-NEXT: [[DOTCAST_CAST:%.*]] = bitcast <2 x i16> [[DOTCAST]] to i32
172+
; CHECK-OOB-RELAXED-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[LOAD12]], i32 0
173+
; CHECK-OOB-RELAXED-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[DOTCAST_CAST]], i32 1
174+
; CHECK-OOB-RELAXED-NEXT: store <2 x i32> [[TMP6]], ptr addrspace(2) [[STORE_GEP1]], align 4
175+
; CHECK-OOB-RELAXED-NEXT: [[STORE_GEP3:%.*]] = getelementptr inbounds float, ptr addrspace(2) [[PTR2]], i64 2
176+
; CHECK-OOB-RELAXED-NEXT: [[DOTCAST1_CAST:%.*]] = bitcast <4 x i8> [[DOTCAST1]] to i32
177+
; CHECK-OOB-RELAXED-NEXT: [[TMP7:%.*]] = bitcast float [[TMP3]] to i32
178+
; CHECK-OOB-RELAXED-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
179+
; CHECK-OOB-RELAXED-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> [[TMP8]], i32 [[DOTCAST1_CAST]], i32 1
180+
; CHECK-OOB-RELAXED-NEXT: store <2 x i32> [[TMP9]], ptr addrspace(2) [[STORE_GEP3]], align 4
181+
; CHECK-OOB-RELAXED-NEXT: ret void
182+
;
183+
; CHECK-OOB-STRICT-LABEL: define void @merge_i32_2i16_float_4i8(
184+
; CHECK-OOB-STRICT-SAME: ptr addrspace(1) [[PTR1:%.*]], ptr addrspace(2) [[PTR2:%.*]]) {
185+
; CHECK-OOB-STRICT-NEXT: [[GEP1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[PTR1]], i64 0
186+
; CHECK-OOB-STRICT-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr addrspace(1) [[GEP1]], align 4
187+
; CHECK-OOB-STRICT-NEXT: [[LOAD12:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
188+
; CHECK-OOB-STRICT-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1
189+
; CHECK-OOB-STRICT-NEXT: [[LOAD33:%.*]] = extractelement <4 x i32> [[TMP1]], i32 2
190+
; CHECK-OOB-STRICT-NEXT: [[TMP3:%.*]] = bitcast i32 [[LOAD33]] to float
191+
; CHECK-OOB-STRICT-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
192+
; CHECK-OOB-STRICT-NEXT: [[DOTCAST:%.*]] = bitcast i32 [[TMP2]] to <2 x i16>
193+
; CHECK-OOB-STRICT-NEXT: [[DOTCAST1:%.*]] = bitcast i32 [[TMP4]] to <4 x i8>
194+
; CHECK-OOB-STRICT-NEXT: [[STORE_GEP1:%.*]] = getelementptr inbounds i32, ptr addrspace(2) [[PTR2]], i64 0
195+
; CHECK-OOB-STRICT-NEXT: [[DOTCAST_CAST:%.*]] = bitcast <2 x i16> [[DOTCAST]] to i32
196+
; CHECK-OOB-STRICT-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[LOAD12]], i32 0
197+
; CHECK-OOB-STRICT-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[DOTCAST_CAST]], i32 1
198+
; CHECK-OOB-STRICT-NEXT: store <2 x i32> [[TMP6]], ptr addrspace(2) [[STORE_GEP1]], align 4
199+
; CHECK-OOB-STRICT-NEXT: [[STORE_GEP3:%.*]] = getelementptr inbounds float, ptr addrspace(2) [[PTR2]], i64 2
200+
; CHECK-OOB-STRICT-NEXT: [[DOTCAST1_CAST:%.*]] = bitcast <4 x i8> [[DOTCAST1]] to i32
201+
; CHECK-OOB-STRICT-NEXT: [[TMP7:%.*]] = bitcast float [[TMP3]] to i32
202+
; CHECK-OOB-STRICT-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
203+
; CHECK-OOB-STRICT-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> [[TMP8]], i32 [[DOTCAST1_CAST]], i32 1
204+
; CHECK-OOB-STRICT-NEXT: store <2 x i32> [[TMP9]], ptr addrspace(2) [[STORE_GEP3]], align 4
205+
; CHECK-OOB-STRICT-NEXT: ret void
206+
;
120207
%gep1 = getelementptr inbounds i32, ptr addrspace(1) %ptr1, i64 0
121208
%load1 = load i32, ptr addrspace(1) %gep1, align 4
122209
%gep2 = getelementptr inbounds <2 x i16>, ptr addrspace(1) %ptr1, i64 1
@@ -136,10 +223,25 @@ define void @merge_i32_2i16_float_4i8(ptr addrspace(1) %ptr1, ptr addrspace(2) %
136223
ret void
137224
}
138225

139-
; CHECK-LABEL: @merge_fp_type(
140-
; CHECK: load <2 x float>
141-
; CHECK: bitcast float {{.*}} to <2 x half>
142226
define void @merge_fp_type(ptr addrspace(1) %ptr1, ptr addrspace(2) %ptr2) {
227+
; CHECK-OOB-RELAXED-LABEL: define void @merge_fp_type(
228+
; CHECK-OOB-RELAXED-SAME: ptr addrspace(1) [[PTR1:%.*]], ptr addrspace(2) [[PTR2:%.*]]) #[[ATTR1]] {
229+
; CHECK-OOB-RELAXED-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[PTR1]], i64 0
230+
; CHECK-OOB-RELAXED-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr addrspace(1) [[GEP1]], align 4
231+
; CHECK-OOB-RELAXED-NEXT: [[LOAD11:%.*]] = extractelement <2 x float> [[TMP1]], i32 0
232+
; CHECK-OOB-RELAXED-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
233+
; CHECK-OOB-RELAXED-NEXT: [[DOTCAST:%.*]] = bitcast float [[TMP2]] to <2 x half>
234+
; CHECK-OOB-RELAXED-NEXT: ret void
235+
;
236+
; CHECK-OOB-STRICT-LABEL: define void @merge_fp_type(
237+
; CHECK-OOB-STRICT-SAME: ptr addrspace(1) [[PTR1:%.*]], ptr addrspace(2) [[PTR2:%.*]]) {
238+
; CHECK-OOB-STRICT-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[PTR1]], i64 0
239+
; CHECK-OOB-STRICT-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr addrspace(1) [[GEP1]], align 4
240+
; CHECK-OOB-STRICT-NEXT: [[LOAD11:%.*]] = extractelement <2 x float> [[TMP1]], i32 0
241+
; CHECK-OOB-STRICT-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
242+
; CHECK-OOB-STRICT-NEXT: [[DOTCAST:%.*]] = bitcast float [[TMP2]] to <2 x half>
243+
; CHECK-OOB-STRICT-NEXT: ret void
244+
;
143245
%gep1 = getelementptr inbounds float, ptr addrspace(1) %ptr1, i64 0
144246
%load1 = load float, ptr addrspace(1) %gep1, align 4
145247
%gep2 = getelementptr inbounds <2 x half>, ptr addrspace(1) %ptr1, i64 1

0 commit comments

Comments
 (0)