@@ -7,17 +7,19 @@ define amdgpu_kernel void @test_overwrite(i64 %val, i1 %cond) {
7
7
; CHECK-LABEL: define amdgpu_kernel void @test_overwrite
8
8
; CHECK-SAME: (i64 [[VAL:%.*]], i1 [[COND:%.*]]) {
9
9
; CHECK-NEXT: entry:
10
+ ; CHECK-NEXT: [[STACK:%.*]] = freeze <3 x i64> poison
11
+ ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <3 x i64> [[STACK]], i64 43, i32 0
10
12
; CHECK-NEXT: br i1 [[COND]], label [[LOOP:%.*]], label [[END:%.*]]
11
13
; CHECK: loop:
12
- ; CHECK-NEXT: [[PROMOTEALLOCA1:%.*]] = phi <3 x i64> [ [[TMP2 :%.*]], [[LOOP]] ], [ <i64 43, i64 undef, i64 undef> , [[ENTRY:%.*]] ]
13
- ; CHECK-NEXT: [[TMP0 :%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA1]], i32 0
14
- ; CHECK-NEXT: [[TMP1 :%.*]] = insertelement <3 x i64> [[PROMOTEALLOCA1]], i64 68, i32 0
15
- ; CHECK-NEXT: [[TMP2 ]] = insertelement <3 x i64> [[TMP1 ]], i64 32, i32 0
16
- ; CHECK-NEXT: [[LOOP_CC:%.*]] = icmp ne i64 [[TMP0 ]], 68
14
+ ; CHECK-NEXT: [[PROMOTEALLOCA1:%.*]] = phi <3 x i64> [ [[TMP3 :%.*]], [[LOOP]] ], [ [[TMP0]] , [[ENTRY:%.*]] ]
15
+ ; CHECK-NEXT: [[TMP1 :%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA1]], i32 0
16
+ ; CHECK-NEXT: [[TMP2 :%.*]] = insertelement <3 x i64> [[PROMOTEALLOCA1]], i64 68, i32 0
17
+ ; CHECK-NEXT: [[TMP3 ]] = insertelement <3 x i64> [[TMP2 ]], i64 32, i32 0
18
+ ; CHECK-NEXT: [[LOOP_CC:%.*]] = icmp ne i64 [[TMP1 ]], 68
17
19
; CHECK-NEXT: br i1 [[LOOP_CC]], label [[LOOP]], label [[END]]
18
20
; CHECK: end:
19
- ; CHECK-NEXT: [[PROMOTEALLOCA:%.*]] = phi <3 x i64> [ [[TMP2 ]], [[LOOP]] ], [ <i64 43, i64 undef, i64 undef> , [[ENTRY]] ]
20
- ; CHECK-NEXT: [[TMP3 :%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA]], i32 0
21
+ ; CHECK-NEXT: [[PROMOTEALLOCA:%.*]] = phi <3 x i64> [ [[TMP3 ]], [[LOOP]] ], [ [[TMP0]] , [[ENTRY]] ]
22
+ ; CHECK-NEXT: [[TMP4 :%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA]], i32 0
21
23
; CHECK-NEXT: ret void
22
24
;
23
25
entry:
@@ -42,8 +44,9 @@ define <4 x i64> @test_fullvec_out_of_bounds(<4 x i64> %arg) {
42
44
; CHECK-LABEL: define <4 x i64> @test_fullvec_out_of_bounds
43
45
; CHECK-SAME: (<4 x i64> [[ARG:%.*]]) {
44
46
; CHECK-NEXT: entry:
47
+ ; CHECK-NEXT: [[STACK:%.*]] = freeze <4 x i64> poison
45
48
; CHECK-NEXT: [[TMP0:%.*]] = extractelement <4 x i64> [[ARG]], i64 0
46
- ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> undef , i64 [[TMP0]], i32 3
49
+ ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> [[STACK]] , i64 [[TMP0]], i32 3
47
50
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i64> [[ARG]], i64 1
48
51
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i64> [[ARG]], i64 2
49
52
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[ARG]], i64 3
@@ -62,17 +65,19 @@ define amdgpu_kernel void @test_no_overwrite(i64 %val, i1 %cond) {
62
65
; CHECK-LABEL: define amdgpu_kernel void @test_no_overwrite
63
66
; CHECK-SAME: (i64 [[VAL:%.*]], i1 [[COND:%.*]]) {
64
67
; CHECK-NEXT: entry:
68
+ ; CHECK-NEXT: [[STACK:%.*]] = freeze <3 x i64> poison
69
+ ; CHECK-NEXT: [[TMP0:%.*]] = insertelement <3 x i64> [[STACK]], i64 43, i32 0
65
70
; CHECK-NEXT: br i1 [[COND]], label [[LOOP:%.*]], label [[END:%.*]]
66
71
; CHECK: loop:
67
- ; CHECK-NEXT: [[PROMOTEALLOCA1:%.*]] = phi <3 x i64> [ [[TMP1 :%.*]], [[LOOP]] ], [ <i64 43, i64 undef, i64 undef> , [[ENTRY:%.*]] ]
68
- ; CHECK-NEXT: [[TMP0 :%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA1]], i32 0
69
- ; CHECK-NEXT: [[TMP1 ]] = insertelement <3 x i64> [[PROMOTEALLOCA1]], i64 32, i32 1
70
- ; CHECK-NEXT: [[LOOP_CC:%.*]] = icmp ne i64 [[TMP0 ]], 32
72
+ ; CHECK-NEXT: [[PROMOTEALLOCA1:%.*]] = phi <3 x i64> [ [[TMP2 :%.*]], [[LOOP]] ], [ [[TMP0]] , [[ENTRY:%.*]] ]
73
+ ; CHECK-NEXT: [[TMP1 :%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA1]], i32 0
74
+ ; CHECK-NEXT: [[TMP2 ]] = insertelement <3 x i64> [[PROMOTEALLOCA1]], i64 32, i32 1
75
+ ; CHECK-NEXT: [[LOOP_CC:%.*]] = icmp ne i64 [[TMP1 ]], 32
71
76
; CHECK-NEXT: br i1 [[LOOP_CC]], label [[LOOP]], label [[END]]
72
77
; CHECK: end:
73
- ; CHECK-NEXT: [[PROMOTEALLOCA:%.*]] = phi <3 x i64> [ [[TMP1 ]], [[LOOP]] ], [ <i64 43, i64 undef, i64 undef> , [[ENTRY]] ]
74
- ; CHECK-NEXT: [[TMP2 :%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA]], i32 0
75
- ; CHECK-NEXT: [[TMP3 :%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA]], i32 1
78
+ ; CHECK-NEXT: [[PROMOTEALLOCA:%.*]] = phi <3 x i64> [ [[TMP2 ]], [[LOOP]] ], [ [[TMP0]] , [[ENTRY]] ]
79
+ ; CHECK-NEXT: [[TMP3 :%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA]], i32 0
80
+ ; CHECK-NEXT: [[TMP4 :%.*]] = extractelement <3 x i64> [[PROMOTEALLOCA]], i32 1
76
81
; CHECK-NEXT: ret void
77
82
;
78
83
entry:
@@ -97,6 +102,7 @@ define ptr @alloca_load_store_ptr64_full_ivec(ptr %arg) {
97
102
; CHECK-LABEL: define ptr @alloca_load_store_ptr64_full_ivec
98
103
; CHECK-SAME: (ptr [[ARG:%.*]]) {
99
104
; CHECK-NEXT: entry:
105
+ ; CHECK-NEXT: [[ALLOCA:%.*]] = freeze <8 x i8> poison
100
106
; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr [[ARG]] to i64
101
107
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[TMP0]] to <8 x i8>
102
108
; CHECK-NEXT: ret ptr [[ARG]]
@@ -112,6 +118,7 @@ define ptr addrspace(3) @alloca_load_store_ptr32_full_ivec(ptr addrspace(3) %arg
112
118
; CHECK-LABEL: define ptr addrspace(3) @alloca_load_store_ptr32_full_ivec
113
119
; CHECK-SAME: (ptr addrspace(3) [[ARG:%.*]]) {
114
120
; CHECK-NEXT: entry:
121
+ ; CHECK-NEXT: [[ALLOCA:%.*]] = freeze <4 x i8> poison
115
122
; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint ptr addrspace(3) [[ARG]] to i32
116
123
; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[TMP0]] to <4 x i8>
117
124
; CHECK-NEXT: ret ptr addrspace(3) [[ARG]]
@@ -127,6 +134,7 @@ define <4 x ptr addrspace(3)> @alloca_load_store_ptr_mixed_full_ptrvec(<2 x ptr>
127
134
; CHECK-LABEL: define <4 x ptr addrspace(3)> @alloca_load_store_ptr_mixed_full_ptrvec
128
135
; CHECK-SAME: (<2 x ptr> [[ARG:%.*]]) {
129
136
; CHECK-NEXT: entry:
137
+ ; CHECK-NEXT: [[ALLOCA:%.*]] = freeze <4 x i32> poison
130
138
; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint <2 x ptr> [[ARG]] to <2 x i64>
131
139
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <4 x i32>
132
140
; CHECK-NEXT: [[TMP2:%.*]] = inttoptr <4 x i32> [[TMP1]] to <4 x ptr addrspace(3)>
@@ -143,6 +151,7 @@ define <8 x i16> @ptralloca_load_store_ints_full(<2 x i64> %arg) {
143
151
; CHECK-LABEL: define <8 x i16> @ptralloca_load_store_ints_full
144
152
; CHECK-SAME: (<2 x i64> [[ARG:%.*]]) {
145
153
; CHECK-NEXT: entry:
154
+ ; CHECK-NEXT: [[STACK:%.*]] = freeze <4 x ptr addrspace(5)> poison
146
155
; CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[ARG]] to <4 x i32>
147
156
; CHECK-NEXT: [[TMP1:%.*]] = inttoptr <4 x i32> [[TMP0]] to <4 x ptr addrspace(5)>
148
157
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <8 x i16>
@@ -159,19 +168,22 @@ define void @alloca_load_store_ptr_mixed_ptrvec(<2 x ptr addrspace(3)> %arg) {
159
168
; CHECK-LABEL: define void @alloca_load_store_ptr_mixed_ptrvec
160
169
; CHECK-SAME: (<2 x ptr addrspace(3)> [[ARG:%.*]]) {
161
170
; CHECK-NEXT: entry:
171
+ ; CHECK-NEXT: [[ALLOCA:%.*]] = freeze <8 x i32> poison
162
172
; CHECK-NEXT: [[TMP0:%.*]] = ptrtoint <2 x ptr addrspace(3)> [[ARG]] to <2 x i32>
163
173
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[TMP0]], i64 0
164
- ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> undef , i32 [[TMP1]], i32 0
174
+ ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> [[ALLOCA]] , i32 [[TMP1]], i32 0
165
175
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[TMP0]], i64 1
166
176
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <8 x i32> [[TMP2]], i32 [[TMP3]], i32 1
167
177
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[TMP1]], i64 0
168
178
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP3]], i64 1
169
179
; CHECK-NEXT: [[TMP7:%.*]] = inttoptr <2 x i32> [[TMP6]] to <2 x ptr addrspace(3)>
170
180
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i64 0
171
181
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP3]], i64 1
172
- ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x i32> [[TMP9]], i32 undef, i64 2
173
- ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP10]], i32 undef, i64 3
174
- ; CHECK-NEXT: [[TMP12:%.*]] = inttoptr <4 x i32> [[TMP11]] to <4 x ptr addrspace(3)>
182
+ ; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP4]], i32 2
183
+ ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP10]], i64 2
184
+ ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i32> [[TMP4]], i32 3
185
+ ; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP12]], i64 3
186
+ ; CHECK-NEXT: [[TMP14:%.*]] = inttoptr <4 x i32> [[TMP13]] to <4 x ptr addrspace(3)>
175
187
; CHECK-NEXT: ret void
176
188
;
177
189
entry:
0 commit comments