1
+ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
1
2
; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=load-store-vectorizer -mattr=+relaxed-buffer-oob-mode -S -o - %s | FileCheck --check-prefixes=CHECK,CHECK-OOB-RELAXED %s
2
3
; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=load-store-vectorizer -S -o - %s | FileCheck --check-prefixes=CHECK,CHECK-OOB-STRICT %s
3
4
4
5
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7"
5
6
6
- ; CHECK-LABEL: @merge_v2i32_v2i32(
7
- ; CHECK: load <4 x i32>
8
- ; CHECK: store <4 x i32> zeroinitializer
9
7
define amdgpu_kernel void @merge_v2i32_v2i32 (ptr addrspace (1 ) nocapture %a , ptr addrspace (1 ) nocapture readonly %b ) #0 {
8
+ ; CHECK-LABEL: define amdgpu_kernel void @merge_v2i32_v2i32(
9
+ ; CHECK-SAME: ptr addrspace(1) captures(none) [[A:%.*]], ptr addrspace(1) readonly captures(none) [[B:%.*]]) #[[ATTR0:[0-9]+]] {
10
+ ; CHECK-NEXT: [[ENTRY:.*:]]
11
+ ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr addrspace(1) [[B]], align 4
12
+ ; CHECK-NEXT: [[LD_C1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 0, i32 1>
13
+ ; CHECK-NEXT: [[LD_C_IDX_12:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> poison, <2 x i32> <i32 2, i32 3>
14
+ ; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr addrspace(1) [[A]], align 4
15
+ ; CHECK-NEXT: ret void
16
+ ;
10
17
entry:
11
18
%a.1 = getelementptr inbounds <2 x i32 >, ptr addrspace (1 ) %a , i64 1
12
19
%b.1 = getelementptr inbounds <2 x i32 >, ptr addrspace (1 ) %b , i64 1
@@ -20,10 +27,16 @@ entry:
20
27
ret void
21
28
}
22
29
23
- ; CHECK-LABEL: @merge_v1i32_v1i32(
24
- ; CHECK: load <2 x i32>
25
- ; CHECK: store <2 x i32> zeroinitializer
26
30
define amdgpu_kernel void @merge_v1i32_v1i32 (ptr addrspace (1 ) nocapture %a , ptr addrspace (1 ) nocapture readonly %b ) #0 {
31
+ ; CHECK-LABEL: define amdgpu_kernel void @merge_v1i32_v1i32(
32
+ ; CHECK-SAME: ptr addrspace(1) captures(none) [[A:%.*]], ptr addrspace(1) readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
33
+ ; CHECK-NEXT: [[ENTRY:.*:]]
34
+ ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(1) [[B]], align 4
35
+ ; CHECK-NEXT: [[LD_C1:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <1 x i32> zeroinitializer
36
+ ; CHECK-NEXT: [[LD_C_IDX_12:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <1 x i32> <i32 1>
37
+ ; CHECK-NEXT: store <2 x i32> zeroinitializer, ptr addrspace(1) [[A]], align 4
38
+ ; CHECK-NEXT: ret void
39
+ ;
27
40
entry:
28
41
%a.1 = getelementptr inbounds <1 x i32 >, ptr addrspace (1 ) %a , i64 1
29
42
%b.1 = getelementptr inbounds <1 x i32 >, ptr addrspace (1 ) %b , i64 1
@@ -37,12 +50,18 @@ entry:
37
50
ret void
38
51
}
39
52
40
- ; CHECK-LABEL: @no_merge_v3i32_v3i32(
41
- ; CHECK: load <3 x i32>
42
- ; CHECK: load <3 x i32>
43
- ; CHECK: store <3 x i32> zeroinitializer
44
- ; CHECK: store <3 x i32> zeroinitializer
45
53
define amdgpu_kernel void @no_merge_v3i32_v3i32 (ptr addrspace (1 ) nocapture %a , ptr addrspace (1 ) nocapture readonly %b ) #0 {
54
+ ; CHECK-LABEL: define amdgpu_kernel void @no_merge_v3i32_v3i32(
55
+ ; CHECK-SAME: ptr addrspace(1) captures(none) [[A:%.*]], ptr addrspace(1) readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
56
+ ; CHECK-NEXT: [[ENTRY:.*:]]
57
+ ; CHECK-NEXT: [[A_1:%.*]] = getelementptr inbounds <3 x i32>, ptr addrspace(1) [[A]], i64 1
58
+ ; CHECK-NEXT: [[B_1:%.*]] = getelementptr inbounds <3 x i32>, ptr addrspace(1) [[B]], i64 1
59
+ ; CHECK-NEXT: [[LD_C:%.*]] = load <3 x i32>, ptr addrspace(1) [[B]], align 4
60
+ ; CHECK-NEXT: [[LD_C_IDX_1:%.*]] = load <3 x i32>, ptr addrspace(1) [[B_1]], align 4
61
+ ; CHECK-NEXT: store <3 x i32> zeroinitializer, ptr addrspace(1) [[A]], align 4
62
+ ; CHECK-NEXT: store <3 x i32> zeroinitializer, ptr addrspace(1) [[A_1]], align 4
63
+ ; CHECK-NEXT: ret void
64
+ ;
46
65
entry:
47
66
%a.1 = getelementptr inbounds <3 x i32 >, ptr addrspace (1 ) %a , i64 1
48
67
%b.1 = getelementptr inbounds <3 x i32 >, ptr addrspace (1 ) %b , i64 1
@@ -56,10 +75,16 @@ entry:
56
75
ret void
57
76
}
58
77
59
- ; CHECK-LABEL: @merge_v2i16_v2i16(
60
- ; CHECK: load <4 x i16>
61
- ; CHECK: store <4 x i16> zeroinitializer
62
78
define amdgpu_kernel void @merge_v2i16_v2i16 (ptr addrspace (1 ) nocapture %a , ptr addrspace (1 ) nocapture readonly %b ) #0 {
79
+ ; CHECK-LABEL: define amdgpu_kernel void @merge_v2i16_v2i16(
80
+ ; CHECK-SAME: ptr addrspace(1) captures(none) [[A:%.*]], ptr addrspace(1) readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
81
+ ; CHECK-NEXT: [[ENTRY:.*:]]
82
+ ; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr addrspace(1) [[B]], align 4
83
+ ; CHECK-NEXT: [[LD_C1:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
84
+ ; CHECK-NEXT: [[LD_C_IDX_12:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
85
+ ; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr addrspace(1) [[A]], align 4
86
+ ; CHECK-NEXT: ret void
87
+ ;
63
88
entry:
64
89
%a.1 = getelementptr inbounds <2 x i16 >, ptr addrspace (1 ) %a , i64 1
65
90
%b.1 = getelementptr inbounds <2 x i16 >, ptr addrspace (1 ) %b , i64 1
@@ -73,15 +98,27 @@ entry:
73
98
ret void
74
99
}
75
100
76
- ; CHECK-OOB-RELAXED-LABEL: @merge_fat_ptrs(
77
- ; CHECK-OOB-RELAXED: load <4 x i16>
78
- ; CHECK-OOB-RELAXED: store <4 x i16> zeroinitializer
79
- ; CHECK-OOB-STRICT-LABEL: @merge_fat_ptrs(
80
- ; CHECK-OOB-STRICT: load <2 x i16>
81
- ; CHECK-OOB-STRICT: load <2 x i16>
82
- ; CHECK-OOB-STRICT: store <2 x i16> zeroinitializer
83
- ; CHECK-OOB-STRICT: store <2 x i16> zeroinitializer
84
101
define amdgpu_kernel void @merge_fat_ptrs (ptr addrspace (7 ) nocapture %a , ptr addrspace (7 ) nocapture readonly %b ) #0 {
102
+ ; CHECK-OOB-RELAXED-LABEL: define amdgpu_kernel void @merge_fat_ptrs(
103
+ ; CHECK-OOB-RELAXED-SAME: ptr addrspace(7) captures(none) [[A:%.*]], ptr addrspace(7) readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
104
+ ; CHECK-OOB-RELAXED-NEXT: [[ENTRY:.*:]]
105
+ ; CHECK-OOB-RELAXED-NEXT: [[TMP0:%.*]] = load <4 x i16>, ptr addrspace(7) [[B]], align 4
106
+ ; CHECK-OOB-RELAXED-NEXT: [[LD_C1:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <2 x i32> <i32 0, i32 1>
107
+ ; CHECK-OOB-RELAXED-NEXT: [[LD_C_IDX_12:%.*]] = shufflevector <4 x i16> [[TMP0]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
108
+ ; CHECK-OOB-RELAXED-NEXT: store <4 x i16> zeroinitializer, ptr addrspace(7) [[A]], align 4
109
+ ; CHECK-OOB-RELAXED-NEXT: ret void
110
+ ;
111
+ ; CHECK-OOB-STRICT-LABEL: define amdgpu_kernel void @merge_fat_ptrs(
112
+ ; CHECK-OOB-STRICT-SAME: ptr addrspace(7) captures(none) [[A:%.*]], ptr addrspace(7) readonly captures(none) [[B:%.*]]) #[[ATTR0]] {
113
+ ; CHECK-OOB-STRICT-NEXT: [[ENTRY:.*:]]
114
+ ; CHECK-OOB-STRICT-NEXT: [[A_1:%.*]] = getelementptr inbounds <2 x i16>, ptr addrspace(7) [[A]], i32 1
115
+ ; CHECK-OOB-STRICT-NEXT: [[B_1:%.*]] = getelementptr inbounds <2 x i16>, ptr addrspace(7) [[B]], i32 1
116
+ ; CHECK-OOB-STRICT-NEXT: [[LD_C:%.*]] = load <2 x i16>, ptr addrspace(7) [[B]], align 4
117
+ ; CHECK-OOB-STRICT-NEXT: [[LD_C_IDX_1:%.*]] = load <2 x i16>, ptr addrspace(7) [[B_1]], align 4
118
+ ; CHECK-OOB-STRICT-NEXT: store <2 x i16> zeroinitializer, ptr addrspace(7) [[A]], align 4
119
+ ; CHECK-OOB-STRICT-NEXT: store <2 x i16> zeroinitializer, ptr addrspace(7) [[A_1]], align 4
120
+ ; CHECK-OOB-STRICT-NEXT: ret void
121
+ ;
85
122
entry:
86
123
%a.1 = getelementptr inbounds <2 x i16 >, ptr addrspace (7 ) %a , i32 1
87
124
%b.1 = getelementptr inbounds <2 x i16 >, ptr addrspace (7 ) %b , i32 1
@@ -95,11 +132,16 @@ entry:
95
132
ret void
96
133
}
97
134
98
- ; CHECK-LABEL: @merge_load_i32_v2i16(
99
- ; CHECK: load <2 x i32>
100
- ; CHECK: extractelement <2 x i32> %0, i32 0
101
- ; CHECK: extractelement <2 x i32> %0, i32 1
102
135
define amdgpu_kernel void @merge_load_i32_v2i16 (ptr addrspace (1 ) nocapture %a ) #0 {
136
+ ; CHECK-LABEL: define amdgpu_kernel void @merge_load_i32_v2i16(
137
+ ; CHECK-SAME: ptr addrspace(1) captures(none) [[A:%.*]]) #[[ATTR0]] {
138
+ ; CHECK-NEXT: [[ENTRY:.*:]]
139
+ ; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr addrspace(1) [[A]], align 4
140
+ ; CHECK-NEXT: [[LD_01:%.*]] = extractelement <2 x i32> [[TMP0]], i32 0
141
+ ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i32 1
142
+ ; CHECK-NEXT: [[DOTCAST:%.*]] = bitcast i32 [[TMP1]] to <2 x i16>
143
+ ; CHECK-NEXT: ret void
144
+ ;
103
145
entry:
104
146
%a.1 = getelementptr inbounds i32 , ptr addrspace (1 ) %a , i32 1
105
147
@@ -112,11 +154,56 @@ entry:
112
154
attributes #0 = { nounwind }
113
155
attributes #1 = { nounwind readnone }
114
156
115
- ; CHECK-LABEL: @merge_i32_2i16_float_4i8(
116
- ; CHECK: load <4 x i32>
117
- ; CHECK: store <2 x i32>
118
- ; CHECK: store <2 x i32>
157
+
119
158
define void @merge_i32_2i16_float_4i8 (ptr addrspace (1 ) %ptr1 , ptr addrspace (2 ) %ptr2 ) {
159
+ ; CHECK-OOB-RELAXED-LABEL: define void @merge_i32_2i16_float_4i8(
160
+ ; CHECK-OOB-RELAXED-SAME: ptr addrspace(1) [[PTR1:%.*]], ptr addrspace(2) [[PTR2:%.*]]) #[[ATTR1:[0-9]+]] {
161
+ ; CHECK-OOB-RELAXED-NEXT: [[GEP1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[PTR1]], i64 0
162
+ ; CHECK-OOB-RELAXED-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr addrspace(1) [[GEP1]], align 4
163
+ ; CHECK-OOB-RELAXED-NEXT: [[LOAD12:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
164
+ ; CHECK-OOB-RELAXED-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1
165
+ ; CHECK-OOB-RELAXED-NEXT: [[LOAD33:%.*]] = extractelement <4 x i32> [[TMP1]], i32 2
166
+ ; CHECK-OOB-RELAXED-NEXT: [[TMP3:%.*]] = bitcast i32 [[LOAD33]] to float
167
+ ; CHECK-OOB-RELAXED-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
168
+ ; CHECK-OOB-RELAXED-NEXT: [[DOTCAST:%.*]] = bitcast i32 [[TMP2]] to <2 x i16>
169
+ ; CHECK-OOB-RELAXED-NEXT: [[DOTCAST1:%.*]] = bitcast i32 [[TMP4]] to <4 x i8>
170
+ ; CHECK-OOB-RELAXED-NEXT: [[STORE_GEP1:%.*]] = getelementptr inbounds i32, ptr addrspace(2) [[PTR2]], i64 0
171
+ ; CHECK-OOB-RELAXED-NEXT: [[DOTCAST_CAST:%.*]] = bitcast <2 x i16> [[DOTCAST]] to i32
172
+ ; CHECK-OOB-RELAXED-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[LOAD12]], i32 0
173
+ ; CHECK-OOB-RELAXED-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[DOTCAST_CAST]], i32 1
174
+ ; CHECK-OOB-RELAXED-NEXT: store <2 x i32> [[TMP6]], ptr addrspace(2) [[STORE_GEP1]], align 4
175
+ ; CHECK-OOB-RELAXED-NEXT: [[STORE_GEP3:%.*]] = getelementptr inbounds float, ptr addrspace(2) [[PTR2]], i64 2
176
+ ; CHECK-OOB-RELAXED-NEXT: [[DOTCAST1_CAST:%.*]] = bitcast <4 x i8> [[DOTCAST1]] to i32
177
+ ; CHECK-OOB-RELAXED-NEXT: [[TMP7:%.*]] = bitcast float [[TMP3]] to i32
178
+ ; CHECK-OOB-RELAXED-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
179
+ ; CHECK-OOB-RELAXED-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> [[TMP8]], i32 [[DOTCAST1_CAST]], i32 1
180
+ ; CHECK-OOB-RELAXED-NEXT: store <2 x i32> [[TMP9]], ptr addrspace(2) [[STORE_GEP3]], align 4
181
+ ; CHECK-OOB-RELAXED-NEXT: ret void
182
+ ;
183
+ ; CHECK-OOB-STRICT-LABEL: define void @merge_i32_2i16_float_4i8(
184
+ ; CHECK-OOB-STRICT-SAME: ptr addrspace(1) [[PTR1:%.*]], ptr addrspace(2) [[PTR2:%.*]]) {
185
+ ; CHECK-OOB-STRICT-NEXT: [[GEP1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[PTR1]], i64 0
186
+ ; CHECK-OOB-STRICT-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr addrspace(1) [[GEP1]], align 4
187
+ ; CHECK-OOB-STRICT-NEXT: [[LOAD12:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
188
+ ; CHECK-OOB-STRICT-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1
189
+ ; CHECK-OOB-STRICT-NEXT: [[LOAD33:%.*]] = extractelement <4 x i32> [[TMP1]], i32 2
190
+ ; CHECK-OOB-STRICT-NEXT: [[TMP3:%.*]] = bitcast i32 [[LOAD33]] to float
191
+ ; CHECK-OOB-STRICT-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
192
+ ; CHECK-OOB-STRICT-NEXT: [[DOTCAST:%.*]] = bitcast i32 [[TMP2]] to <2 x i16>
193
+ ; CHECK-OOB-STRICT-NEXT: [[DOTCAST1:%.*]] = bitcast i32 [[TMP4]] to <4 x i8>
194
+ ; CHECK-OOB-STRICT-NEXT: [[STORE_GEP1:%.*]] = getelementptr inbounds i32, ptr addrspace(2) [[PTR2]], i64 0
195
+ ; CHECK-OOB-STRICT-NEXT: [[DOTCAST_CAST:%.*]] = bitcast <2 x i16> [[DOTCAST]] to i32
196
+ ; CHECK-OOB-STRICT-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[LOAD12]], i32 0
197
+ ; CHECK-OOB-STRICT-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[DOTCAST_CAST]], i32 1
198
+ ; CHECK-OOB-STRICT-NEXT: store <2 x i32> [[TMP6]], ptr addrspace(2) [[STORE_GEP1]], align 4
199
+ ; CHECK-OOB-STRICT-NEXT: [[STORE_GEP3:%.*]] = getelementptr inbounds float, ptr addrspace(2) [[PTR2]], i64 2
200
+ ; CHECK-OOB-STRICT-NEXT: [[DOTCAST1_CAST:%.*]] = bitcast <4 x i8> [[DOTCAST1]] to i32
201
+ ; CHECK-OOB-STRICT-NEXT: [[TMP7:%.*]] = bitcast float [[TMP3]] to i32
202
+ ; CHECK-OOB-STRICT-NEXT: [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i32 0
203
+ ; CHECK-OOB-STRICT-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> [[TMP8]], i32 [[DOTCAST1_CAST]], i32 1
204
+ ; CHECK-OOB-STRICT-NEXT: store <2 x i32> [[TMP9]], ptr addrspace(2) [[STORE_GEP3]], align 4
205
+ ; CHECK-OOB-STRICT-NEXT: ret void
206
+ ;
120
207
%gep1 = getelementptr inbounds i32 , ptr addrspace (1 ) %ptr1 , i64 0
121
208
%load1 = load i32 , ptr addrspace (1 ) %gep1 , align 4
122
209
%gep2 = getelementptr inbounds <2 x i16 >, ptr addrspace (1 ) %ptr1 , i64 1
@@ -136,10 +223,25 @@ define void @merge_i32_2i16_float_4i8(ptr addrspace(1) %ptr1, ptr addrspace(2) %
136
223
ret void
137
224
}
138
225
139
- ; CHECK-LABEL: @merge_fp_type(
140
- ; CHECK: load <2 x float>
141
- ; CHECK: bitcast float {{.*}} to <2 x half>
142
226
define void @merge_fp_type (ptr addrspace (1 ) %ptr1 , ptr addrspace (2 ) %ptr2 ) {
227
+ ; CHECK-OOB-RELAXED-LABEL: define void @merge_fp_type(
228
+ ; CHECK-OOB-RELAXED-SAME: ptr addrspace(1) [[PTR1:%.*]], ptr addrspace(2) [[PTR2:%.*]]) #[[ATTR1]] {
229
+ ; CHECK-OOB-RELAXED-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[PTR1]], i64 0
230
+ ; CHECK-OOB-RELAXED-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr addrspace(1) [[GEP1]], align 4
231
+ ; CHECK-OOB-RELAXED-NEXT: [[LOAD11:%.*]] = extractelement <2 x float> [[TMP1]], i32 0
232
+ ; CHECK-OOB-RELAXED-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
233
+ ; CHECK-OOB-RELAXED-NEXT: [[DOTCAST:%.*]] = bitcast float [[TMP2]] to <2 x half>
234
+ ; CHECK-OOB-RELAXED-NEXT: ret void
235
+ ;
236
+ ; CHECK-OOB-STRICT-LABEL: define void @merge_fp_type(
237
+ ; CHECK-OOB-STRICT-SAME: ptr addrspace(1) [[PTR1:%.*]], ptr addrspace(2) [[PTR2:%.*]]) {
238
+ ; CHECK-OOB-STRICT-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, ptr addrspace(1) [[PTR1]], i64 0
239
+ ; CHECK-OOB-STRICT-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr addrspace(1) [[GEP1]], align 4
240
+ ; CHECK-OOB-STRICT-NEXT: [[LOAD11:%.*]] = extractelement <2 x float> [[TMP1]], i32 0
241
+ ; CHECK-OOB-STRICT-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[TMP1]], i32 1
242
+ ; CHECK-OOB-STRICT-NEXT: [[DOTCAST:%.*]] = bitcast float [[TMP2]] to <2 x half>
243
+ ; CHECK-OOB-STRICT-NEXT: ret void
244
+ ;
143
245
%gep1 = getelementptr inbounds float , ptr addrspace (1 ) %ptr1 , i64 0
144
246
%load1 = load float , ptr addrspace (1 ) %gep1 , align 4
145
247
%gep2 = getelementptr inbounds <2 x half >, ptr addrspace (1 ) %ptr1 , i64 1
0 commit comments