Skip to content

Commit 29c5e42

Browse files
authored
AMDGPU: Add baseline tests for bitcast + readlane intrinsics (#128493)
1 parent 3f64899 commit 29c5e42

File tree

2 files changed

+341
-2
lines changed

2 files changed

+341
-2
lines changed
Lines changed: 315 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,315 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=instcombine -S < %s | FileCheck %s
3+
4+
define i32 @test_bitcast_f32_to_i32_readfirstlane(float %val) {
5+
; CHECK-LABEL: define i32 @test_bitcast_f32_to_i32_readfirstlane(
6+
; CHECK-SAME: float [[VAL:%.*]]) #[[ATTR0:[0-9]+]] {
7+
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast float [[VAL]] to i32
8+
; CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[BITCAST]])
9+
; CHECK-NEXT: ret i32 [[RESULT]]
10+
;
11+
%bitcast = bitcast float %val to i32
12+
%result = call i32 @llvm.amdgcn.readfirstlane.i32(i32 %bitcast)
13+
ret i32 %result
14+
}
15+
16+
define i32 @test_bitcast_f32_to_i32_readfirstlane_multi_use_store(float %val, ptr %use.ptr) {
17+
; CHECK-LABEL: define i32 @test_bitcast_f32_to_i32_readfirstlane_multi_use_store(
18+
; CHECK-SAME: float [[VAL:%.*]], ptr [[USE_PTR:%.*]]) #[[ATTR0]] {
19+
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast float [[VAL]] to i32
20+
; CHECK-NEXT: store float [[VAL]], ptr [[USE_PTR]], align 4
21+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[BITCAST]])
22+
; CHECK-NEXT: ret i32 [[TMP1]]
23+
;
24+
%bitcast = bitcast float %val to i32
25+
store i32 %bitcast, ptr %use.ptr
26+
%result = call i32 @llvm.amdgcn.readfirstlane.i32(i32 %bitcast)
27+
ret i32 %result
28+
}
29+
30+
declare void @use.i32(i32)
31+
32+
define i32 @test_bitcast_f32_to_i32_readfirstlane_multi_use_call(float %val) {
33+
; CHECK-LABEL: define i32 @test_bitcast_f32_to_i32_readfirstlane_multi_use_call(
34+
; CHECK-SAME: float [[VAL:%.*]]) #[[ATTR0]] {
35+
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast float [[VAL]] to i32
36+
; CHECK-NEXT: call void @use.i32(i32 [[BITCAST]])
37+
; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[BITCAST]])
38+
; CHECK-NEXT: ret i32 [[TMP1]]
39+
;
40+
%bitcast = bitcast float %val to i32
41+
call void @use.i32(i32 %bitcast)
42+
%result = call i32 @llvm.amdgcn.readfirstlane.i32(i32 %bitcast)
43+
ret i32 %result
44+
}
45+
46+
define float @test_bitcast_f32_to_i32_readfirstlane_bitcast(float %val) {
47+
; CHECK-LABEL: define float @test_bitcast_f32_to_i32_readfirstlane_bitcast(
48+
; CHECK-SAME: float [[VAL:%.*]]) #[[ATTR0]] {
49+
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast float [[VAL]] to i32
50+
; CHECK-NEXT: [[CALL:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[BITCAST]])
51+
; CHECK-NEXT: [[RESULT:%.*]] = bitcast i32 [[CALL]] to float
52+
; CHECK-NEXT: ret float [[RESULT]]
53+
;
54+
%bitcast = bitcast float %val to i32
55+
%call = call i32 @llvm.amdgcn.readfirstlane.i32(i32 %bitcast)
56+
%result = bitcast i32 %call to float
57+
ret float %result
58+
}
59+
60+
define i32 @test_bitcast_v2f16_to_i32_readfirstlane(<2 x half> %val) {
61+
; CHECK-LABEL: define i32 @test_bitcast_v2f16_to_i32_readfirstlane(
62+
; CHECK-SAME: <2 x half> [[VAL:%.*]]) #[[ATTR0]] {
63+
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast <2 x half> [[VAL]] to i32
64+
; CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[BITCAST]])
65+
; CHECK-NEXT: ret i32 [[RESULT]]
66+
;
67+
%bitcast = bitcast <2 x half> %val to i32
68+
%result = call i32 @llvm.amdgcn.readfirstlane.i32(i32 %bitcast)
69+
ret i32 %result
70+
}
71+
72+
define i32 @test_bitcast_v2bf16_to_i32_readfirstlane(<2 x bfloat> %val) {
73+
; CHECK-LABEL: define i32 @test_bitcast_v2bf16_to_i32_readfirstlane(
74+
; CHECK-SAME: <2 x bfloat> [[VAL:%.*]]) #[[ATTR0]] {
75+
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast <2 x bfloat> [[VAL]] to i32
76+
; CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[BITCAST]])
77+
; CHECK-NEXT: ret i32 [[RESULT]]
78+
;
79+
%bitcast = bitcast <2 x bfloat> %val to i32
80+
%result = call i32 @llvm.amdgcn.readfirstlane.i32(i32 %bitcast)
81+
ret i32 %result
82+
}
83+
84+
define i64 @test_bitcast_f64_to_i64_readfirstlane(double %val) {
85+
; CHECK-LABEL: define i64 @test_bitcast_f64_to_i64_readfirstlane(
86+
; CHECK-SAME: double [[VAL:%.*]]) #[[ATTR0]] {
87+
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast double [[VAL]] to i64
88+
; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.amdgcn.readfirstlane.i64(i64 [[BITCAST]])
89+
; CHECK-NEXT: ret i64 [[RESULT]]
90+
;
91+
%bitcast = bitcast double %val to i64
92+
%result = call i64 @llvm.amdgcn.readfirstlane.i64(i64 %bitcast)
93+
ret i64 %result
94+
}
95+
96+
define <2 x i32> @test_bitcast_f64_to_v2i32_readfirstlane(double %val) {
97+
; CHECK-LABEL: define <2 x i32> @test_bitcast_f64_to_v2i32_readfirstlane(
98+
; CHECK-SAME: double [[VAL:%.*]]) #[[ATTR0]] {
99+
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast double [[VAL]] to <2 x i32>
100+
; CHECK-NEXT: [[RESULT:%.*]] = call <2 x i32> @llvm.amdgcn.readfirstlane.v2i32(<2 x i32> [[BITCAST]])
101+
; CHECK-NEXT: ret <2 x i32> [[RESULT]]
102+
;
103+
%bitcast = bitcast double %val to <2 x i32>
104+
%result = call <2 x i32> @llvm.amdgcn.readfirstlane.v2i32(<2 x i32> %bitcast)
105+
ret <2 x i32> %result
106+
}
107+
108+
define i64 @test_bitcast_v4i16_to_i64_readfirstlane(<4 x i16> %val) {
109+
; CHECK-LABEL: define i64 @test_bitcast_v4i16_to_i64_readfirstlane(
110+
; CHECK-SAME: <4 x i16> [[VAL:%.*]]) #[[ATTR0]] {
111+
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast <4 x i16> [[VAL]] to i64
112+
; CHECK-NEXT: [[RESULT:%.*]] = call i64 @llvm.amdgcn.readfirstlane.i64(i64 [[BITCAST]])
113+
; CHECK-NEXT: ret i64 [[RESULT]]
114+
;
115+
%bitcast = bitcast <4 x i16> %val to i64
116+
%result = call i64 @llvm.amdgcn.readfirstlane.i64(i64 %bitcast)
117+
ret i64 %result
118+
}
119+
120+
121+
define i32 @test_bitcast_v4i8_to_i32_readfirstlane(<4 x i8> %val) {
122+
; CHECK-LABEL: define i32 @test_bitcast_v4i8_to_i32_readfirstlane(
123+
; CHECK-SAME: <4 x i8> [[VAL:%.*]]) #[[ATTR0]] {
124+
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast <4 x i8> [[VAL]] to i32
125+
; CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[BITCAST]])
126+
; CHECK-NEXT: ret i32 [[RESULT]]
127+
;
128+
%bitcast = bitcast <4 x i8> %val to i32
129+
%result = call i32 @llvm.amdgcn.readfirstlane.i32(i32 %bitcast)
130+
ret i32 %result
131+
}
132+
133+
define i32 @test_bitcast_v8i4_to_i32_readfirstlane(<8 x i4> %val) {
134+
; CHECK-LABEL: define i32 @test_bitcast_v8i4_to_i32_readfirstlane(
135+
; CHECK-SAME: <8 x i4> [[VAL:%.*]]) #[[ATTR0]] {
136+
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast <8 x i4> [[VAL]] to i32
137+
; CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[BITCAST]])
138+
; CHECK-NEXT: ret i32 [[RESULT]]
139+
;
140+
%bitcast = bitcast <8 x i4> %val to i32
141+
%result = call i32 @llvm.amdgcn.readfirstlane.i32(i32 %bitcast)
142+
ret i32 %result
143+
}
144+
145+
define float @test_bitcast_i32_to_f32_readfirstlane(i32 %val) {
146+
; CHECK-LABEL: define float @test_bitcast_i32_to_f32_readfirstlane(
147+
; CHECK-SAME: i32 [[VAL:%.*]]) #[[ATTR0]] {
148+
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast i32 [[VAL]] to float
149+
; CHECK-NEXT: [[RESULT:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[BITCAST]])
150+
; CHECK-NEXT: ret float [[RESULT]]
151+
;
152+
%bitcast = bitcast i32 %val to float
153+
%result = call float @llvm.amdgcn.readfirstlane.f32(float %bitcast)
154+
ret float %result
155+
}
156+
157+
define i16 @test_bitcast_f16_to_i16_readfirstlane(half %val) {
158+
; CHECK-LABEL: define i16 @test_bitcast_f16_to_i16_readfirstlane(
159+
; CHECK-SAME: half [[VAL:%.*]]) #[[ATTR0]] {
160+
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast half [[VAL]] to i16
161+
; CHECK-NEXT: [[RESULT:%.*]] = call i16 @llvm.amdgcn.readfirstlane.i16(i16 [[BITCAST]])
162+
; CHECK-NEXT: ret i16 [[RESULT]]
163+
;
164+
%bitcast = bitcast half %val to i16
165+
%result = call i16 @llvm.amdgcn.readfirstlane.i16(i16 %bitcast)
166+
ret i16 %result
167+
}
168+
169+
define i16 @test_bitcast_v2i8_to_i16_readfirstlane(<2 x i8> %val) {
170+
; CHECK-LABEL: define i16 @test_bitcast_v2i8_to_i16_readfirstlane(
171+
; CHECK-SAME: <2 x i8> [[VAL:%.*]]) #[[ATTR0]] {
172+
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast <2 x i8> [[VAL]] to i16
173+
; CHECK-NEXT: [[RESULT:%.*]] = call i16 @llvm.amdgcn.readfirstlane.i16(i16 [[BITCAST]])
174+
; CHECK-NEXT: ret i16 [[RESULT]]
175+
;
176+
%bitcast = bitcast <2 x i8> %val to i16
177+
%result = call i16 @llvm.amdgcn.readfirstlane.i16(i16 %bitcast)
178+
ret i16 %result
179+
}
180+
181+
define <16 x i32> @test_bitcast_v16f32_to_v16i32_readfirstlane(<16 x float> %val) {
182+
; CHECK-LABEL: define <16 x i32> @test_bitcast_v16f32_to_v16i32_readfirstlane(
183+
; CHECK-SAME: <16 x float> [[VAL:%.*]]) #[[ATTR0]] {
184+
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast <16 x float> [[VAL]] to <16 x i32>
185+
; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i32> @llvm.amdgcn.readfirstlane.v16i32(<16 x i32> [[BITCAST]])
186+
; CHECK-NEXT: ret <16 x i32> [[TMP1]]
187+
;
188+
%bitcast = bitcast <16 x float> %val to <16 x i32>
189+
%result = call <16 x i32> @llvm.amdgcn.readfirstlane.v16i32(<16 x i32> %bitcast)
190+
ret <16 x i32> %result
191+
}
192+
193+
define <8 x i64> @test_bitcast_v16f32_to_v8i64_readfirstlane(<16 x float> %val) {
194+
; CHECK-LABEL: define <8 x i64> @test_bitcast_v16f32_to_v8i64_readfirstlane(
195+
; CHECK-SAME: <16 x float> [[VAL:%.*]]) #[[ATTR0]] {
196+
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast <16 x float> [[VAL]] to <8 x i64>
197+
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i64> @llvm.amdgcn.readfirstlane.v8i64(<8 x i64> [[BITCAST]])
198+
; CHECK-NEXT: ret <8 x i64> [[TMP1]]
199+
;
200+
%bitcast = bitcast <16 x float> %val to <8 x i64>
201+
%result = call <8 x i64> @llvm.amdgcn.readfirstlane.v8i64(<8 x i64> %bitcast)
202+
ret <8 x i64> %result
203+
}
204+
205+
define i32 @test_bitcast_f32_to_i32_readlane(float %val, i32 inreg %lane.index) {
206+
; CHECK-LABEL: define i32 @test_bitcast_f32_to_i32_readlane(
207+
; CHECK-SAME: float [[VAL:%.*]], i32 inreg [[LANE_INDEX:%.*]]) #[[ATTR0]] {
208+
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast float [[VAL]] to i32
209+
; CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[BITCAST]], i32 [[LANE_INDEX]])
210+
; CHECK-NEXT: ret i32 [[RESULT]]
211+
;
212+
%bitcast = bitcast float %val to i32
213+
%result = call i32 @llvm.amdgcn.readlane.i32(i32 %bitcast, i32 %lane.index)
214+
ret i32 %result
215+
}
216+
217+
define i32 @test_bitcast_f32_to_i32_writelane_samesourcetype(float %val0, i32 inreg %lane.index, float %val1) {
218+
; CHECK-LABEL: define i32 @test_bitcast_f32_to_i32_writelane_samesourcetype(
219+
; CHECK-SAME: float [[VAL0:%.*]], i32 inreg [[LANE_INDEX:%.*]], float [[VAL1:%.*]]) #[[ATTR0]] {
220+
; CHECK-NEXT: [[BITCAST0:%.*]] = bitcast float [[VAL0]] to i32
221+
; CHECK-NEXT: [[BITCAST1:%.*]] = bitcast float [[VAL1]] to i32
222+
; CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.amdgcn.writelane.i32(i32 [[BITCAST0]], i32 [[LANE_INDEX]], i32 [[BITCAST1]])
223+
; CHECK-NEXT: ret i32 [[RESULT]]
224+
;
225+
%bitcast0 = bitcast float %val0 to i32
226+
%bitcast1 = bitcast float %val1 to i32
227+
%result = call i32 @llvm.amdgcn.writelane.i32(i32 %bitcast0, i32 %lane.index, i32 %bitcast1)
228+
ret i32 %result
229+
}
230+
231+
define i32 @test_bitcast_f32_to_i32_writelane_diffsourcetype(float %val0, i32 inreg %lane.index, <2 x half> %val1) {
232+
; CHECK-LABEL: define i32 @test_bitcast_f32_to_i32_writelane_diffsourcetype(
233+
; CHECK-SAME: float [[VAL0:%.*]], i32 inreg [[LANE_INDEX:%.*]], <2 x half> [[VAL1:%.*]]) #[[ATTR0]] {
234+
; CHECK-NEXT: [[BITCAST0:%.*]] = bitcast float [[VAL0]] to i32
235+
; CHECK-NEXT: [[BITCAST1:%.*]] = bitcast <2 x half> [[VAL1]] to i32
236+
; CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.amdgcn.writelane.i32(i32 [[BITCAST0]], i32 [[LANE_INDEX]], i32 [[BITCAST1]])
237+
; CHECK-NEXT: ret i32 [[RESULT]]
238+
;
239+
%bitcast0 = bitcast float %val0 to i32
240+
%bitcast1 = bitcast <2 x half> %val1 to i32
241+
%result = call i32 @llvm.amdgcn.writelane.i32(i32 %bitcast0, i32 %lane.index, i32 %bitcast1)
242+
ret i32 %result
243+
}
244+
245+
define i32 @test_bitcast_update_dpp_f32_to_i32(float %val0, float %val1) {
246+
; CHECK-LABEL: define i32 @test_bitcast_update_dpp_f32_to_i32(
247+
; CHECK-SAME: float [[VAL0:%.*]], float [[VAL1:%.*]]) #[[ATTR0]] {
248+
; CHECK-NEXT: [[BITCAST0:%.*]] = bitcast float [[VAL0]] to i32
249+
; CHECK-NEXT: [[BITCAST1:%.*]] = bitcast float [[VAL1]] to i32
250+
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 [[BITCAST0]], i32 [[BITCAST1]], i32 1, i32 1, i32 1, i1 false)
251+
; CHECK-NEXT: ret i32 [[TMP0]]
252+
;
253+
%bitcast0 = bitcast float %val0 to i32
254+
%bitcast1 = bitcast float %val1 to i32
255+
%dpp = call i32 @llvm.amdgcn.update.dpp.i32(i32 %bitcast0, i32 %bitcast1, i32 1, i32 1, i32 1, i1 false)
256+
ret i32 %dpp
257+
}
258+
259+
define i32 @test_bitcast_update_dpp_f32_to_i32_convergencetoken(float %val0, float %val1) convergent {
260+
; CHECK-LABEL: define i32 @test_bitcast_update_dpp_f32_to_i32_convergencetoken(
261+
; CHECK-SAME: float [[VAL0:%.*]], float [[VAL1:%.*]]) #[[ATTR1:[0-9]+]] {
262+
; CHECK-NEXT: [[T:%.*]] = call token @llvm.experimental.convergence.entry()
263+
; CHECK-NEXT: [[BITCAST0:%.*]] = bitcast float [[VAL0]] to i32
264+
; CHECK-NEXT: [[BITCAST1:%.*]] = bitcast float [[VAL1]] to i32
265+
; CHECK-NEXT: [[DPP:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 [[BITCAST0]], i32 [[BITCAST1]], i32 1, i32 1, i32 1, i1 false) [ "convergencectrl"(token [[T]]) ]
266+
; CHECK-NEXT: ret i32 [[DPP]]
267+
;
268+
%t = call token @llvm.experimental.convergence.entry()
269+
%bitcast0 = bitcast float %val0 to i32
270+
%bitcast1 = bitcast float %val1 to i32
271+
%dpp = call i32 @llvm.amdgcn.update.dpp.i32(i32 %bitcast0, i32 %bitcast1, i32 1, i32 1, i32 1, i1 false) [ "convergencectrl"(token %t) ]
272+
ret i32 %dpp
273+
}
274+
275+
define i32 @test_bitcast_update_dpp_sources_different_type(float %val0, <2 x half> %val1) {
276+
; CHECK-LABEL: define i32 @test_bitcast_update_dpp_sources_different_type(
277+
; CHECK-SAME: float [[VAL0:%.*]], <2 x half> [[VAL1:%.*]]) #[[ATTR0]] {
278+
; CHECK-NEXT: [[BITCAST0:%.*]] = bitcast float [[VAL0]] to i32
279+
; CHECK-NEXT: [[BITCAST1:%.*]] = bitcast <2 x half> [[VAL1]] to i32
280+
; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 [[BITCAST0]], i32 [[BITCAST1]], i32 1, i32 1, i32 1, i1 false)
281+
; CHECK-NEXT: ret i32 [[TMP0]]
282+
;
283+
%bitcast0 = bitcast float %val0 to i32
284+
%bitcast1 = bitcast <2 x half> %val1 to i32
285+
%dpp = call i32 @llvm.amdgcn.update.dpp.i32(i32 %bitcast0, i32 %bitcast1, i32 1, i32 1, i32 1, i1 false)
286+
ret i32 %dpp
287+
}
288+
289+
define i32 @test_bitcast_f32_to_i32_readfirstlane_convergencetoken(float %val) convergent {
290+
; CHECK-LABEL: define i32 @test_bitcast_f32_to_i32_readfirstlane_convergencetoken(
291+
; CHECK-SAME: float [[VAL:%.*]]) #[[ATTR1]] {
292+
; CHECK-NEXT: [[T:%.*]] = call token @llvm.experimental.convergence.entry()
293+
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast float [[VAL]] to i32
294+
; CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.amdgcn.readfirstlane.i32(i32 [[BITCAST]]) [ "convergencectrl"(token [[T]]) ]
295+
; CHECK-NEXT: ret i32 [[RESULT]]
296+
;
297+
%t = call token @llvm.experimental.convergence.entry()
298+
%bitcast = bitcast float %val to i32
299+
%result = call i32 @llvm.amdgcn.readfirstlane.i32(i32 %bitcast) [ "convergencectrl"(token %t) ]
300+
ret i32 %result
301+
}
302+
303+
define i32 @test_bitcast_f32_to_i32_readlane_convergencetoken(float %val, i32 inreg %lane.index) convergent {
304+
; CHECK-LABEL: define i32 @test_bitcast_f32_to_i32_readlane_convergencetoken(
305+
; CHECK-SAME: float [[VAL:%.*]], i32 inreg [[LANE_INDEX:%.*]]) #[[ATTR1]] {
306+
; CHECK-NEXT: [[T:%.*]] = call token @llvm.experimental.convergence.entry()
307+
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast float [[VAL]] to i32
308+
; CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[BITCAST]], i32 [[LANE_INDEX]]) [ "convergencectrl"(token [[T]]) ]
309+
; CHECK-NEXT: ret i32 [[RESULT]]
310+
;
311+
%t = call token @llvm.experimental.convergence.entry()
312+
%bitcast = bitcast float %val to i32
313+
%result = call i32 @llvm.amdgcn.readlane.i32(i32 %bitcast, i32 %lane.index) [ "convergencectrl"(token %t) ]
314+
ret i32 %result
315+
}

llvm/test/Transforms/InstCombine/AMDGPU/permlane64.ll

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,32 @@ define i32 @test_constant() {
66
; CHECK-LABEL: @test_constant(
77
; CHECK-NEXT: ret i32 99
88
;
9-
%call = call i32 @llvm.amdgcn.permlane64(i32 99)
9+
%call = call i32 @llvm.amdgcn.permlane64.i32(i32 99)
1010
ret i32 %call
1111
}
1212

13-
declare i32 @llvm.amdgcn.permlane64(i32)
13+
define i32 @test_bitcast_f32_to_i32_permlane64(float %val) {
14+
; CHECK-LABEL: @test_bitcast_f32_to_i32_permlane64(
15+
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast float [[VAL:%.*]] to i32
16+
; CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.amdgcn.permlane64.i32(i32 [[BITCAST]])
17+
; CHECK-NEXT: ret i32 [[RESULT]]
18+
;
19+
%bitcast = bitcast float %val to i32
20+
%result = call i32 @llvm.amdgcn.permlane64.i32(i32 %bitcast)
21+
ret i32 %result
22+
}
23+
24+
define i32 @test_bitcast_f32_to_i32_permlane64_convergencetokenn(float %val) convergent {
25+
; CHECK-LABEL: @test_bitcast_f32_to_i32_permlane64_convergencetokenn(
26+
; CHECK-NEXT: [[T:%.*]] = call token @llvm.experimental.convergence.entry()
27+
; CHECK-NEXT: [[BITCAST:%.*]] = bitcast float [[VAL:%.*]] to i32
28+
; CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.amdgcn.permlane64.i32(i32 [[BITCAST]]) [ "convergencectrl"(token [[T]]) ]
29+
; CHECK-NEXT: ret i32 [[RESULT]]
30+
;
31+
%t = call token @llvm.experimental.convergence.entry()
32+
%bitcast = bitcast float %val to i32
33+
%result = call i32 @llvm.amdgcn.permlane64.i32(i32 %bitcast) [ "convergencectrl"(token %t) ]
34+
ret i32 %result
35+
}
36+
37+
declare i32 @llvm.amdgcn.permlane64.i32(i32)

0 commit comments

Comments
 (0)