Skip to content

Commit b2f84d2

Browse files
jayfoadAlexisPerry
authored andcommitted
[AMDGPU] Add some gfx1200 test coverage
1 parent 7f448a1 commit b2f84d2

File tree

3 files changed

+1658
-11
lines changed

3 files changed

+1658
-11
lines changed

llvm/test/CodeGen/AMDGPU/code-size-estimate.ll

Lines changed: 152 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -show-mc-encoding < %s | FileCheck -check-prefixes=CHECK,GFX9 %s
2-
; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -show-mc-encoding < %s | FileCheck -check-prefixes=CHECK,GFX10 %s
3-
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -show-mc-encoding < %s | FileCheck -check-prefixes=CHECK,GFX11,GFX1100 %s
4-
; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -show-mc-encoding < %s | FileCheck -check-prefixes=CHECK,GFX11,GFX1150 %s
1+
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX9,NOT-GFX12 %s
2+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX10,NOT-GFX12 %s
3+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX11,GFX1100,NOT-GFX12 %s
4+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX11,GFX1150,NOT-GFX12 %s
5+
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX1200 %s
56

67
declare float @llvm.fabs.f32(float)
78
declare float @llvm.fma.f32(float, float, float)
@@ -24,10 +25,21 @@ define float @v_mul_f32_vop2(float %x, float %y) {
2425
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
2526
; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x10]
2627
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
28+
;
29+
; GFX1200-LABEL: v_mul_f32_vop2:
30+
; GFX1200: ; %bb.0:
31+
; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
32+
; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
33+
; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
34+
; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
35+
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
36+
; GFX1200-NEXT: v_mul_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x10]
37+
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
2738
%mul = fmul float %x, %y
2839
ret float %mul
2940
}
30-
; CHECK: codeLenInByte = 12
41+
; NOT-GFX12: codeLenInByte = 12
42+
; GFX1200: codeLenInByte = 28
3143

3244
define float @v_mul_f32_vop2_inline_imm(float %x) {
3345
; GFX9-LABEL: v_mul_f32_vop2_inline_imm:
@@ -47,10 +59,21 @@ define float @v_mul_f32_vop2_inline_imm(float %x) {
4759
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
4860
; GFX11-NEXT: v_mul_f32_e32 v0, 4.0, v0 ; encoding: [0xf6,0x00,0x00,0x10]
4961
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
62+
;
63+
; GFX1200-LABEL: v_mul_f32_vop2_inline_imm:
64+
; GFX1200: ; %bb.0:
65+
; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
66+
; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
67+
; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
68+
; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
69+
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
70+
; GFX1200-NEXT: v_mul_f32_e32 v0, 4.0, v0 ; encoding: [0xf6,0x00,0x00,0x10]
71+
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
5072
%mul = fmul float %x, 4.0
5173
ret float %mul
5274
}
53-
; CHECK: codeLenInByte = 12
75+
; NOT-GFX12: codeLenInByte = 12
76+
; GFX1200: codeLenInByte = 28
5477

5578
define float @v_mul_f32_vop2_literal(float %x) {
5679
; GFX9-LABEL: v_mul_f32_vop2_literal:
@@ -70,10 +93,21 @@ define float @v_mul_f32_vop2_literal(float %x) {
7093
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
7194
; GFX11-NEXT: v_mul_f32_e32 v0, 0x42f60000, v0 ; encoding: [0xff,0x00,0x00,0x10,0x00,0x00,0xf6,0x42]
7295
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
96+
;
97+
; GFX1200-LABEL: v_mul_f32_vop2_literal:
98+
; GFX1200: ; %bb.0:
99+
; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
100+
; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
101+
; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
102+
; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
103+
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
104+
; GFX1200-NEXT: v_mul_f32_e32 v0, 0x42f60000, v0 ; encoding: [0xff,0x00,0x00,0x10,0x00,0x00,0xf6,0x42]
105+
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
73106
%mul = fmul float %x, 123.0
74107
ret float %mul
75108
}
76-
; CHECK: codeLenInByte = 16
109+
; NOT-GFX12: codeLenInByte = 16
110+
; GFX1200: codeLenInByte = 32
77111

78112
define float @v_mul_f32_vop3_src_mods(float %x, float %y) {
79113
; GFX9-LABEL: v_mul_f32_vop3_src_mods:
@@ -93,11 +127,22 @@ define float @v_mul_f32_vop3_src_mods(float %x, float %y) {
93127
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
94128
; GFX11-NEXT: v_mul_f32_e64 v0, |v0|, v1 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0x03,0x02,0x00]
95129
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
130+
;
131+
; GFX1200-LABEL: v_mul_f32_vop3_src_mods:
132+
; GFX1200: ; %bb.0:
133+
; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
134+
; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
135+
; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
136+
; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
137+
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
138+
; GFX1200-NEXT: v_mul_f32_e64 v0, |v0|, v1 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0x03,0x02,0x00]
139+
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
96140
%fabs.x = call float @llvm.fabs.f32(float %x)
97141
%mul = fmul float %fabs.x, %y
98142
ret float %mul
99143
}
100-
; CHECK: codeLenInByte = 16
144+
; NOT-GFX12: codeLenInByte = 16
145+
; GFX1200: codeLenInByte = 32
101146

102147
define float @v_mul_f32_vop3_src_mods_inline_imm(float %x, float %y) {
103148
; GFX9-LABEL: v_mul_f32_vop3_src_mods_inline_imm:
@@ -117,12 +162,23 @@ define float @v_mul_f32_vop3_src_mods_inline_imm(float %x, float %y) {
117162
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
118163
; GFX11-NEXT: v_mul_f32_e64 v0, |v0|, 4.0 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0xed,0x01,0x00]
119164
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
165+
;
166+
; GFX1200-LABEL: v_mul_f32_vop3_src_mods_inline_imm:
167+
; GFX1200: ; %bb.0:
168+
; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
169+
; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
170+
; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
171+
; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
172+
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
173+
; GFX1200-NEXT: v_mul_f32_e64 v0, |v0|, 4.0 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0xed,0x01,0x00]
174+
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
120175
%fabs.x = call float @llvm.fabs.f32(float %x)
121176
%mul = fmul float %fabs.x, 4.0
122177
ret float %mul
123178
}
124179

125-
; CHECK: codeLenInByte = 16
180+
; NOT-GFX12: codeLenInByte = 16
181+
; GFX1200: codeLenInByte = 32
126182

127183
define float @v_mul_f32_vop3_src_mods_literal(float %x, float %y) {
128184
; GFX9-LABEL: v_mul_f32_vop3_src_mods_literal:
@@ -143,6 +199,16 @@ define float @v_mul_f32_vop3_src_mods_literal(float %x, float %y) {
143199
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
144200
; GFX11-NEXT: v_mul_f32_e64 v0, 0x42f60000, |v0| ; encoding: [0x00,0x02,0x08,0xd5,0xff,0x00,0x02,0x00,0x00,0x00,0xf6,0x42]
145201
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
202+
;
203+
; GFX1200-LABEL: v_mul_f32_vop3_src_mods_literal:
204+
; GFX1200: ; %bb.0:
205+
; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
206+
; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
207+
; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
208+
; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
209+
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
210+
; GFX1200-NEXT: v_mul_f32_e64 v0, 0x42f60000, |v0| ; encoding: [0x00,0x02,0x08,0xd5,0xff,0x00,0x02,0x00,0x00,0x00,0xf6,0x42]
211+
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
146212
%fabs.x = call float @llvm.fabs.f32(float %x)
147213
%mul = fmul float %fabs.x, 123.0
148214
ret float %mul
@@ -151,6 +217,7 @@ define float @v_mul_f32_vop3_src_mods_literal(float %x, float %y) {
151217
; GFX9: codeLenInByte = 24
152218
; GFX10: codeLenInByte = 20
153219
; GFX11: codeLenInByte = 20
220+
; GFX1200: codeLenInByte = 36
154221

155222
define float @v_mul_f32_vop2_frame_index(float %x) {
156223
; GFX9-LABEL: v_mul_f32_vop2_frame_index:
@@ -172,6 +239,16 @@ define float @v_mul_f32_vop2_frame_index(float %x) {
172239
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
173240
; GFX11-NEXT: v_mul_f32_e32 v0, s32, v0 ; encoding: [0x20,0x00,0x00,0x10]
174241
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
242+
;
243+
; GFX1200-LABEL: v_mul_f32_vop2_frame_index:
244+
; GFX1200: ; %bb.0:
245+
; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
246+
; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
247+
; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
248+
; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
249+
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
250+
; GFX1200-NEXT: v_mul_f32_e32 v0, s32, v0 ; encoding: [0x20,0x00,0x00,0x10]
251+
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
175252
%alloca = alloca i32, addrspace(5)
176253
%ptrtoint = ptrtoint ptr addrspace(5) %alloca to i32
177254
%cast = bitcast i32 %ptrtoint to float
@@ -182,6 +259,7 @@ define float @v_mul_f32_vop2_frame_index(float %x) {
182259
; GFX9: codeLenInByte = 20
183260
; GFX10: codeLenInByte = 20
184261
; GFX11: codeLenInByte = 12
262+
; GFX1200: codeLenInByte = 28
185263

186264
define float @v_fma_f32(float %x, float %y, float %z) {
187265
; GFX9-LABEL: v_fma_f32:
@@ -201,11 +279,22 @@ define float @v_fma_f32(float %x, float %y, float %z) {
201279
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
202280
; GFX11-NEXT: v_fma_f32 v0, v0, v1, v2 ; encoding: [0x00,0x00,0x13,0xd6,0x00,0x03,0x0a,0x04]
203281
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
282+
;
283+
; GFX1200-LABEL: v_fma_f32:
284+
; GFX1200: ; %bb.0:
285+
; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
286+
; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
287+
; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
288+
; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
289+
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
290+
; GFX1200-NEXT: v_fma_f32 v0, v0, v1, v2 ; encoding: [0x00,0x00,0x13,0xd6,0x00,0x03,0x0a,0x04]
291+
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
204292
%fma = call float @llvm.fma.f32(float %x, float %y, float %z)
205293
ret float %fma
206294
}
207295

208-
; CHECK: codeLenInByte = 16
296+
; NOT-GFX12: codeLenInByte = 16
297+
; GFX1200: codeLenInByte = 32
209298

210299
define float @v_fma_f32_src_mods(float %x, float %y, float %z) {
211300
; GFX9-LABEL: v_fma_f32_src_mods:
@@ -225,12 +314,23 @@ define float @v_fma_f32_src_mods(float %x, float %y, float %z) {
225314
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
226315
; GFX11-NEXT: v_fma_f32 v0, |v0|, v1, v2 ; encoding: [0x00,0x01,0x13,0xd6,0x00,0x03,0x0a,0x04]
227316
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
317+
;
318+
; GFX1200-LABEL: v_fma_f32_src_mods:
319+
; GFX1200: ; %bb.0:
320+
; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
321+
; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
322+
; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
323+
; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
324+
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
325+
; GFX1200-NEXT: v_fma_f32 v0, |v0|, v1, v2 ; encoding: [0x00,0x01,0x13,0xd6,0x00,0x03,0x0a,0x04]
326+
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
228327
%fabs.x = call float @llvm.fabs.f32(float %x)
229328
%fma = call float @llvm.fma.f32(float %fabs.x, float %y, float %z)
230329
ret float %fma
231330
}
232331

233-
; CHECK: codeLenInByte = 16
332+
; NOT-GFX12: codeLenInByte = 16
333+
; GFX1200: codeLenInByte = 32
234334

235335
define float @v_fmac_f32(float %x, float %y) {
236336
; GFX9-LABEL: v_fmac_f32:
@@ -250,13 +350,24 @@ define float @v_fmac_f32(float %x, float %y) {
250350
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
251351
; GFX11-NEXT: v_fmac_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x56]
252352
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
353+
;
354+
; GFX1200-LABEL: v_fmac_f32:
355+
; GFX1200: ; %bb.0:
356+
; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
357+
; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
358+
; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
359+
; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
360+
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
361+
; GFX1200-NEXT: v_fmac_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x56]
362+
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
253363
%fma = call float @llvm.fma.f32(float %x, float %y, float %x)
254364
ret float %fma
255365
}
256366

257367
; GFX9: codeLenInByte = 16
258368
; GFX10: codeLenInByte = 12
259369
; GFX11: codeLenInByte = 12
370+
; GFX1200: codeLenInByte = 28
260371

261372
define float @v_fmaak_f32(float %x, float %y) {
262373
; GFX9-LABEL: v_fmaak_f32:
@@ -277,13 +388,24 @@ define float @v_fmaak_f32(float %x, float %y) {
277388
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
278389
; GFX11-NEXT: v_fmaak_f32 v0, v0, v1, 0x43800000 ; encoding: [0x00,0x03,0x00,0x5a,0x00,0x00,0x80,0x43]
279390
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
391+
;
392+
; GFX1200-LABEL: v_fmaak_f32:
393+
; GFX1200: ; %bb.0:
394+
; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
395+
; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
396+
; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
397+
; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
398+
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
399+
; GFX1200-NEXT: v_fmaak_f32 v0, v0, v1, 0x43800000 ; encoding: [0x00,0x03,0x00,0x5a,0x00,0x00,0x80,0x43]
400+
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
280401
%fma = call float @llvm.fma.f32(float %x, float %y, float 256.0)
281402
ret float %fma
282403
}
283404

284405
; GFX9: codeLenInByte = 24
285406
; GFX10: codeLenInByte = 16
286407
; GFX11: codeLenInByte = 16
408+
; GFX1200: codeLenInByte = 32
287409

288410
define float @v_fma_k_f32_src_mods(float %x, float %y) {
289411
; GFX9-LABEL: v_fma_k_f32_src_mods:
@@ -304,6 +426,16 @@ define float @v_fma_k_f32_src_mods(float %x, float %y) {
304426
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
305427
; GFX11-NEXT: v_fma_f32 v0, |v0|, v1, 0x43800000 ; encoding: [0x00,0x01,0x13,0xd6,0x00,0x03,0xfe,0x03,0x00,0x00,0x80,0x43]
306428
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
429+
;
430+
; GFX1200-LABEL: v_fma_k_f32_src_mods:
431+
; GFX1200: ; %bb.0:
432+
; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
433+
; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
434+
; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
435+
; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
436+
; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
437+
; GFX1200-NEXT: v_fma_f32 v0, |v0|, v1, 0x43800000 ; encoding: [0x00,0x01,0x13,0xd6,0x00,0x03,0xfe,0x03,0x00,0x00,0x80,0x43]
438+
; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
307439
%fabs.x = call float @llvm.fabs.f32(float %x)
308440
%fma = call float @llvm.fma.f32(float %fabs.x, float %y, float 256.0)
309441
ret float %fma
@@ -312,6 +444,7 @@ define float @v_fma_k_f32_src_mods(float %x, float %y) {
312444
; GFX9: codeLenInByte = 24
313445
; GFX10: codeLenInByte = 20
314446
; GFX11: codeLenInByte = 20
447+
; GFX1200: codeLenInByte = 36
315448

316449
define amdgpu_ps float @s_fmaak_f32(float inreg %x, float inreg %y) {
317450
; GFX9-LABEL: s_fmaak_f32:
@@ -340,6 +473,13 @@ define amdgpu_ps float @s_fmaak_f32(float inreg %x, float inreg %y) {
340473
; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; encoding: [0x0b,0x00,0x87,0xbf]
341474
; GFX1150-NEXT: v_mov_b32_e32 v0, s0 ; encoding: [0x00,0x02,0x00,0x7e]
342475
; GFX1150-NEXT: ; return to shader part epilog
476+
;
477+
; GFX1200-LABEL: s_fmaak_f32:
478+
; GFX1200: ; %bb.0:
479+
; GFX1200-NEXT: s_fmaak_f32 s0, s0, s1, 0x43800000 ; encoding: [0x00,0x01,0x80,0xa2,0x00,0x00,0x80,0x43]
480+
; GFX1200-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; encoding: [0x0b,0x00,0x87,0xbf]
481+
; GFX1200-NEXT: v_mov_b32_e32 v0, s0 ; encoding: [0x00,0x02,0x00,0x7e]
482+
; GFX1200-NEXT: ; return to shader part epilog
343483
%fma = call float @llvm.fma.f32(float %x, float %y, float 256.0)
344484
ret float %fma
345485
}
@@ -348,3 +488,4 @@ define amdgpu_ps float @s_fmaak_f32(float inreg %x, float inreg %y) {
348488
; GFX10: codeLenInByte = 12
349489
; GFX1100: codeLenInByte = 16
350490
; GFX1150: codeLenInByte = 16
491+
; GFX1200: codeLenInByte = 16

llvm/test/CodeGen/AMDGPU/extra-lds-size.ll

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10-MESA %s
33
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11-PAL %s
44
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11-MESA %s
5+
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX1200-PAL %s
6+
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX1200-MESA %s
57

68
; Check EXTRA_LDS_SIZE in SPI_SHADER_PGM_RSRC2_PS.
79

@@ -15,6 +17,11 @@
1517
; GFX11-MESA: .long 45100
1618
; GFX11-MESA-NEXT: .long 1024
1719

20+
; GFX1200-PAL: '0x2c0b (SPI_SHADER_PGM_RSRC2_PS)': 0x400
21+
22+
; GFX1200-MESA: .long 45100
23+
; GFX1200-MESA-NEXT: .long 1024
24+
1825
@lds = internal addrspace(3) global [4096 x i8] undef
1926

2027
define amdgpu_ps void @global_store_saddr_uniform_ptr_in_vgprs(i32 %voffset) {

0 commit comments

Comments
 (0)