Skip to content

Commit 6ebbf18

Browse files
committed
fix vgprimm for t16
1 parent 9d7e1d9 commit 6ebbf18

File tree

3 files changed

+148
-66
lines changed

3 files changed

+148
-66
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -846,6 +846,11 @@ def cond_as_i32imm: SDNodeXForm<cond, [{
846846
}]>;
847847

848848
// Copied from the AArch64 backend:
849+
def bitcast_fpimm_to_i16 : SDNodeXForm<fpimm, [{
850+
return CurDAG->getTargetConstant(
851+
N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i16);
852+
}]>;
853+
849854
def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{
850855
return CurDAG->getTargetConstant(
851856
N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32);

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2283,7 +2283,7 @@ let True16Predicate = UseRealTrue16Insts in {
22832283
foreach vt = [f16, bf16] in {
22842284
def : GCNPat <
22852285
(VGPRImm<(vt fpimm)>:$imm),
2286-
(V_MOV_B16_t16_e64 0, $imm, 0)
2286+
(V_MOV_B16_t16_e64 0, (vt (bitcast_fpimm_to_i16 $imm)), 0)
22872287
>;
22882288
}
22892289
}

llvm/test/CodeGen/AMDGPU/br_cc.f16.ll

Lines changed: 142 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
22
; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI %s
33
; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI %s
4-
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s
4+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,+real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-TRUE16 %s
5+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global,-real-true16 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11,GFX11-FAKE16 %s
56

67
define amdgpu_kernel void @br_cc_f16(
78
; SI-LABEL: br_cc_f16:
@@ -60,32 +61,62 @@ define amdgpu_kernel void @br_cc_f16(
6061
; VI-NEXT: buffer_store_short v1, off, s[0:3], 0
6162
; VI-NEXT: s_endpgm
6263
;
63-
; GFX11-LABEL: br_cc_f16:
64-
; GFX11: ; %bb.0: ; %entry
65-
; GFX11-NEXT: s_clause 0x1
66-
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
67-
; GFX11-NEXT: s_load_b64 s[8:9], s[4:5], 0x34
68-
; GFX11-NEXT: s_mov_b32 s6, -1
69-
; GFX11-NEXT: s_mov_b32 s7, 0x31016000
70-
; GFX11-NEXT: s_mov_b32 s10, s6
71-
; GFX11-NEXT: s_mov_b32 s11, s7
72-
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
73-
; GFX11-NEXT: s_mov_b32 s4, s2
74-
; GFX11-NEXT: s_mov_b32 s5, s3
75-
; GFX11-NEXT: buffer_load_u16 v0, off, s[4:7], 0 glc dlc
76-
; GFX11-NEXT: s_waitcnt vmcnt(0)
77-
; GFX11-NEXT: buffer_load_u16 v1, off, s[8:11], 0 glc dlc
78-
; GFX11-NEXT: s_waitcnt vmcnt(0)
79-
; GFX11-NEXT: s_mov_b32 s2, s6
80-
; GFX11-NEXT: s_mov_b32 s3, s7
81-
; GFX11-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1
82-
; GFX11-NEXT: s_cbranch_vccnz .LBB0_2
83-
; GFX11-NEXT: ; %bb.1: ; %one
84-
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
85-
; GFX11-NEXT: s_endpgm
86-
; GFX11-NEXT: .LBB0_2: ; %two
87-
; GFX11-NEXT: buffer_store_b16 v1, off, s[0:3], 0
88-
; GFX11-NEXT: s_endpgm
64+
; GFX11-TRUE16-LABEL: br_cc_f16:
65+
; GFX11-TRUE16: ; %bb.0: ; %entry
66+
; GFX11-TRUE16-NEXT: s_clause 0x1
67+
; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
68+
; GFX11-TRUE16-NEXT: s_load_b64 s[8:9], s[4:5], 0x34
69+
; GFX11-TRUE16-NEXT: s_mov_b32 s6, -1
70+
; GFX11-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
71+
; GFX11-TRUE16-NEXT: s_mov_b32 s10, s6
72+
; GFX11-TRUE16-NEXT: s_mov_b32 s11, s7
73+
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
74+
; GFX11-TRUE16-NEXT: s_mov_b32 s4, s2
75+
; GFX11-TRUE16-NEXT: s_mov_b32 s5, s3
76+
; GFX11-TRUE16-NEXT: buffer_load_u16 v0, off, s[4:7], 0 glc dlc
77+
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
78+
; GFX11-TRUE16-NEXT: buffer_load_u16 v1, off, s[8:11], 0 glc dlc
79+
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
80+
; GFX11-TRUE16-NEXT: s_mov_b32 s2, s6
81+
; GFX11-TRUE16-NEXT: s_mov_b32 s3, s7
82+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.l
83+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
84+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
85+
; GFX11-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v2.l, v2.h
86+
; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB0_2
87+
; GFX11-TRUE16-NEXT: ; %bb.1: ; %one
88+
; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
89+
; GFX11-TRUE16-NEXT: s_endpgm
90+
; GFX11-TRUE16-NEXT: .LBB0_2: ; %two
91+
; GFX11-TRUE16-NEXT: buffer_store_b16 v1, off, s[0:3], 0
92+
; GFX11-TRUE16-NEXT: s_endpgm
93+
;
94+
; GFX11-FAKE16-LABEL: br_cc_f16:
95+
; GFX11-FAKE16: ; %bb.0: ; %entry
96+
; GFX11-FAKE16-NEXT: s_clause 0x1
97+
; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
98+
; GFX11-FAKE16-NEXT: s_load_b64 s[8:9], s[4:5], 0x34
99+
; GFX11-FAKE16-NEXT: s_mov_b32 s6, -1
100+
; GFX11-FAKE16-NEXT: s_mov_b32 s7, 0x31016000
101+
; GFX11-FAKE16-NEXT: s_mov_b32 s10, s6
102+
; GFX11-FAKE16-NEXT: s_mov_b32 s11, s7
103+
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
104+
; GFX11-FAKE16-NEXT: s_mov_b32 s4, s2
105+
; GFX11-FAKE16-NEXT: s_mov_b32 s5, s3
106+
; GFX11-FAKE16-NEXT: buffer_load_u16 v0, off, s[4:7], 0 glc dlc
107+
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
108+
; GFX11-FAKE16-NEXT: buffer_load_u16 v1, off, s[8:11], 0 glc dlc
109+
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
110+
; GFX11-FAKE16-NEXT: s_mov_b32 s2, s6
111+
; GFX11-FAKE16-NEXT: s_mov_b32 s3, s7
112+
; GFX11-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, v0, v1
113+
; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB0_2
114+
; GFX11-FAKE16-NEXT: ; %bb.1: ; %one
115+
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
116+
; GFX11-FAKE16-NEXT: s_endpgm
117+
; GFX11-FAKE16-NEXT: .LBB0_2: ; %two
118+
; GFX11-FAKE16-NEXT: buffer_store_b16 v1, off, s[0:3], 0
119+
; GFX11-FAKE16-NEXT: s_endpgm
89120
ptr addrspace(1) %r,
90121
ptr addrspace(1) %a,
91122
ptr addrspace(1) %b) {
@@ -151,25 +182,47 @@ define amdgpu_kernel void @br_cc_f16_imm_a(
151182
; VI-NEXT: buffer_store_short v0, off, s[0:3], 0
152183
; VI-NEXT: s_endpgm
153184
;
154-
; GFX11-LABEL: br_cc_f16_imm_a:
155-
; GFX11: ; %bb.0: ; %entry
156-
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
157-
; GFX11-NEXT: s_mov_b32 s7, 0x31016000
158-
; GFX11-NEXT: s_mov_b32 s6, -1
159-
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
160-
; GFX11-NEXT: s_mov_b32 s4, s2
161-
; GFX11-NEXT: s_mov_b32 s5, s3
162-
; GFX11-NEXT: buffer_load_u16 v0, off, s[4:7], 0
163-
; GFX11-NEXT: s_waitcnt vmcnt(0)
164-
; GFX11-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0.5, v0
165-
; GFX11-NEXT: s_cbranch_vccnz .LBB1_2
166-
; GFX11-NEXT: ; %bb.1: ; %one
167-
; GFX11-NEXT: v_mov_b32_e32 v0, 0x3800
168-
; GFX11-NEXT: .LBB1_2: ; %two
169-
; GFX11-NEXT: s_mov_b32 s2, s6
170-
; GFX11-NEXT: s_mov_b32 s3, s7
171-
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
172-
; GFX11-NEXT: s_endpgm
185+
; GFX11-TRUE16-LABEL: br_cc_f16_imm_a:
186+
; GFX11-TRUE16: ; %bb.0: ; %entry
187+
; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
188+
; GFX11-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
189+
; GFX11-TRUE16-NEXT: s_mov_b32 s6, -1
190+
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
191+
; GFX11-TRUE16-NEXT: s_mov_b32 s4, s2
192+
; GFX11-TRUE16-NEXT: s_mov_b32 s5, s3
193+
; GFX11-TRUE16-NEXT: buffer_load_u16 v0, off, s[4:7], 0
194+
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
195+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
196+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
197+
; GFX11-TRUE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0.5, v1.l
198+
; GFX11-TRUE16-NEXT: s_cbranch_vccnz .LBB1_2
199+
; GFX11-TRUE16-NEXT: ; %bb.1: ; %one
200+
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 0x3800
201+
; GFX11-TRUE16-NEXT: .LBB1_2: ; %two
202+
; GFX11-TRUE16-NEXT: s_mov_b32 s2, s6
203+
; GFX11-TRUE16-NEXT: s_mov_b32 s3, s7
204+
; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
205+
; GFX11-TRUE16-NEXT: s_endpgm
206+
;
207+
; GFX11-FAKE16-LABEL: br_cc_f16_imm_a:
208+
; GFX11-FAKE16: ; %bb.0: ; %entry
209+
; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
210+
; GFX11-FAKE16-NEXT: s_mov_b32 s7, 0x31016000
211+
; GFX11-FAKE16-NEXT: s_mov_b32 s6, -1
212+
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
213+
; GFX11-FAKE16-NEXT: s_mov_b32 s4, s2
214+
; GFX11-FAKE16-NEXT: s_mov_b32 s5, s3
215+
; GFX11-FAKE16-NEXT: buffer_load_u16 v0, off, s[4:7], 0
216+
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
217+
; GFX11-FAKE16-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0.5, v0
218+
; GFX11-FAKE16-NEXT: s_cbranch_vccnz .LBB1_2
219+
; GFX11-FAKE16-NEXT: ; %bb.1: ; %one
220+
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x3800
221+
; GFX11-FAKE16-NEXT: .LBB1_2: ; %two
222+
; GFX11-FAKE16-NEXT: s_mov_b32 s2, s6
223+
; GFX11-FAKE16-NEXT: s_mov_b32 s3, s7
224+
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
225+
; GFX11-FAKE16-NEXT: s_endpgm
173226
ptr addrspace(1) %r,
174227
ptr addrspace(1) %b) {
175228
entry:
@@ -235,25 +288,47 @@ define amdgpu_kernel void @br_cc_f16_imm_b(
235288
; VI-NEXT: buffer_store_short v0, off, s[0:3], 0
236289
; VI-NEXT: s_endpgm
237290
;
238-
; GFX11-LABEL: br_cc_f16_imm_b:
239-
; GFX11: ; %bb.0: ; %entry
240-
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
241-
; GFX11-NEXT: s_mov_b32 s7, 0x31016000
242-
; GFX11-NEXT: s_mov_b32 s6, -1
243-
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
244-
; GFX11-NEXT: s_mov_b32 s4, s2
245-
; GFX11-NEXT: s_mov_b32 s5, s3
246-
; GFX11-NEXT: buffer_load_u16 v0, off, s[4:7], 0
247-
; GFX11-NEXT: s_waitcnt vmcnt(0)
248-
; GFX11-NEXT: v_cmp_ngt_f16_e32 vcc_lo, 0.5, v0
249-
; GFX11-NEXT: s_cbranch_vccz .LBB2_2
250-
; GFX11-NEXT: ; %bb.1: ; %two
251-
; GFX11-NEXT: v_mov_b32_e32 v0, 0x3800
252-
; GFX11-NEXT: .LBB2_2: ; %one
253-
; GFX11-NEXT: s_mov_b32 s2, s6
254-
; GFX11-NEXT: s_mov_b32 s3, s7
255-
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
256-
; GFX11-NEXT: s_endpgm
291+
; GFX11-TRUE16-LABEL: br_cc_f16_imm_b:
292+
; GFX11-TRUE16: ; %bb.0: ; %entry
293+
; GFX11-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
294+
; GFX11-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
295+
; GFX11-TRUE16-NEXT: s_mov_b32 s6, -1
296+
; GFX11-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
297+
; GFX11-TRUE16-NEXT: s_mov_b32 s4, s2
298+
; GFX11-TRUE16-NEXT: s_mov_b32 s5, s3
299+
; GFX11-TRUE16-NEXT: buffer_load_u16 v0, off, s[4:7], 0
300+
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0)
301+
; GFX11-TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
302+
; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
303+
; GFX11-TRUE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, 0.5, v1.l
304+
; GFX11-TRUE16-NEXT: s_cbranch_vccz .LBB2_2
305+
; GFX11-TRUE16-NEXT: ; %bb.1: ; %two
306+
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, 0x3800
307+
; GFX11-TRUE16-NEXT: .LBB2_2: ; %one
308+
; GFX11-TRUE16-NEXT: s_mov_b32 s2, s6
309+
; GFX11-TRUE16-NEXT: s_mov_b32 s3, s7
310+
; GFX11-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
311+
; GFX11-TRUE16-NEXT: s_endpgm
312+
;
313+
; GFX11-FAKE16-LABEL: br_cc_f16_imm_b:
314+
; GFX11-FAKE16: ; %bb.0: ; %entry
315+
; GFX11-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
316+
; GFX11-FAKE16-NEXT: s_mov_b32 s7, 0x31016000
317+
; GFX11-FAKE16-NEXT: s_mov_b32 s6, -1
318+
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
319+
; GFX11-FAKE16-NEXT: s_mov_b32 s4, s2
320+
; GFX11-FAKE16-NEXT: s_mov_b32 s5, s3
321+
; GFX11-FAKE16-NEXT: buffer_load_u16 v0, off, s[4:7], 0
322+
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0)
323+
; GFX11-FAKE16-NEXT: v_cmp_ngt_f16_e32 vcc_lo, 0.5, v0
324+
; GFX11-FAKE16-NEXT: s_cbranch_vccz .LBB2_2
325+
; GFX11-FAKE16-NEXT: ; %bb.1: ; %two
326+
; GFX11-FAKE16-NEXT: v_mov_b32_e32 v0, 0x3800
327+
; GFX11-FAKE16-NEXT: .LBB2_2: ; %one
328+
; GFX11-FAKE16-NEXT: s_mov_b32 s2, s6
329+
; GFX11-FAKE16-NEXT: s_mov_b32 s3, s7
330+
; GFX11-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
331+
; GFX11-FAKE16-NEXT: s_endpgm
257332
ptr addrspace(1) %r,
258333
ptr addrspace(1) %a) {
259334
entry:
@@ -269,3 +344,5 @@ two:
269344
store half 0xH3800, ptr addrspace(1) %r
270345
ret void
271346
}
347+
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
348+
; GFX11: {{.*}}

0 commit comments

Comments
 (0)