1
- ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -show-mc-encoding < %s | FileCheck -check-prefixes=CHECK,GFX9 %s
2
- ; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -show-mc-encoding < %s | FileCheck -check-prefixes=CHECK,GFX10 %s
3
- ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -show-mc-encoding < %s | FileCheck -check-prefixes=CHECK,GFX11,GFX1100 %s
4
- ; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -show-mc-encoding < %s | FileCheck -check-prefixes=CHECK,GFX11,GFX1150 %s
1
+ ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX9,NOT-GFX12 %s
2
+ ; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX10,NOT-GFX12 %s
3
+ ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX11,GFX1100,NOT-GFX12 %s
4
+ ; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX11,GFX1150,NOT-GFX12 %s
5
+ ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX1200 %s
5
6
6
7
declare float @llvm.fabs.f32 (float )
7
8
declare float @llvm.fma.f32 (float , float , float )
@@ -24,10 +25,21 @@ define float @v_mul_f32_vop2(float %x, float %y) {
24
25
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
25
26
; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x10]
26
27
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
28
+ ;
29
+ ; GFX1200-LABEL: v_mul_f32_vop2:
30
+ ; GFX1200: ; %bb.0:
31
+ ; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
32
+ ; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
33
+ ; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
34
+ ; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
35
+ ; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
36
+ ; GFX1200-NEXT: v_mul_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x10]
37
+ ; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
27
38
%mul = fmul float %x , %y
28
39
ret float %mul
29
40
}
30
- ; CHECK: codeLenInByte = 12
41
+ ; NOT-GFX12: codeLenInByte = 12
42
+ ; GFX1200: codeLenInByte = 28
31
43
32
44
define float @v_mul_f32_vop2_inline_imm (float %x ) {
33
45
; GFX9-LABEL: v_mul_f32_vop2_inline_imm:
@@ -47,10 +59,21 @@ define float @v_mul_f32_vop2_inline_imm(float %x) {
47
59
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
48
60
; GFX11-NEXT: v_mul_f32_e32 v0, 4.0, v0 ; encoding: [0xf6,0x00,0x00,0x10]
49
61
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
62
+ ;
63
+ ; GFX1200-LABEL: v_mul_f32_vop2_inline_imm:
64
+ ; GFX1200: ; %bb.0:
65
+ ; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
66
+ ; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
67
+ ; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
68
+ ; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
69
+ ; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
70
+ ; GFX1200-NEXT: v_mul_f32_e32 v0, 4.0, v0 ; encoding: [0xf6,0x00,0x00,0x10]
71
+ ; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
50
72
%mul = fmul float %x , 4 .0
51
73
ret float %mul
52
74
}
53
- ; CHECK: codeLenInByte = 12
75
+ ; NOT-GFX12: codeLenInByte = 12
76
+ ; GFX1200: codeLenInByte = 28
54
77
55
78
define float @v_mul_f32_vop2_literal (float %x ) {
56
79
; GFX9-LABEL: v_mul_f32_vop2_literal:
@@ -70,10 +93,21 @@ define float @v_mul_f32_vop2_literal(float %x) {
70
93
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
71
94
; GFX11-NEXT: v_mul_f32_e32 v0, 0x42f60000, v0 ; encoding: [0xff,0x00,0x00,0x10,0x00,0x00,0xf6,0x42]
72
95
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
96
+ ;
97
+ ; GFX1200-LABEL: v_mul_f32_vop2_literal:
98
+ ; GFX1200: ; %bb.0:
99
+ ; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
100
+ ; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
101
+ ; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
102
+ ; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
103
+ ; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
104
+ ; GFX1200-NEXT: v_mul_f32_e32 v0, 0x42f60000, v0 ; encoding: [0xff,0x00,0x00,0x10,0x00,0x00,0xf6,0x42]
105
+ ; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
73
106
%mul = fmul float %x , 123 .0
74
107
ret float %mul
75
108
}
76
- ; CHECK: codeLenInByte = 16
109
+ ; NOT-GFX12: codeLenInByte = 16
110
+ ; GFX1200: codeLenInByte = 32
77
111
78
112
define float @v_mul_f32_vop3_src_mods (float %x , float %y ) {
79
113
; GFX9-LABEL: v_mul_f32_vop3_src_mods:
@@ -93,11 +127,22 @@ define float @v_mul_f32_vop3_src_mods(float %x, float %y) {
93
127
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
94
128
; GFX11-NEXT: v_mul_f32_e64 v0, |v0|, v1 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0x03,0x02,0x00]
95
129
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
130
+ ;
131
+ ; GFX1200-LABEL: v_mul_f32_vop3_src_mods:
132
+ ; GFX1200: ; %bb.0:
133
+ ; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
134
+ ; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
135
+ ; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
136
+ ; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
137
+ ; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
138
+ ; GFX1200-NEXT: v_mul_f32_e64 v0, |v0|, v1 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0x03,0x02,0x00]
139
+ ; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
96
140
%fabs.x = call float @llvm.fabs.f32 (float %x )
97
141
%mul = fmul float %fabs.x , %y
98
142
ret float %mul
99
143
}
100
- ; CHECK: codeLenInByte = 16
144
+ ; NOT-GFX12: codeLenInByte = 16
145
+ ; GFX1200: codeLenInByte = 32
101
146
102
147
define float @v_mul_f32_vop3_src_mods_inline_imm (float %x , float %y ) {
103
148
; GFX9-LABEL: v_mul_f32_vop3_src_mods_inline_imm:
@@ -117,12 +162,23 @@ define float @v_mul_f32_vop3_src_mods_inline_imm(float %x, float %y) {
117
162
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
118
163
; GFX11-NEXT: v_mul_f32_e64 v0, |v0|, 4.0 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0xed,0x01,0x00]
119
164
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
165
+ ;
166
+ ; GFX1200-LABEL: v_mul_f32_vop3_src_mods_inline_imm:
167
+ ; GFX1200: ; %bb.0:
168
+ ; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
169
+ ; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
170
+ ; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
171
+ ; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
172
+ ; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
173
+ ; GFX1200-NEXT: v_mul_f32_e64 v0, |v0|, 4.0 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0xed,0x01,0x00]
174
+ ; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
120
175
%fabs.x = call float @llvm.fabs.f32 (float %x )
121
176
%mul = fmul float %fabs.x , 4 .0
122
177
ret float %mul
123
178
}
124
179
125
- ; CHECK: codeLenInByte = 16
180
+ ; NOT-GFX12: codeLenInByte = 16
181
+ ; GFX1200: codeLenInByte = 32
126
182
127
183
define float @v_mul_f32_vop3_src_mods_literal (float %x , float %y ) {
128
184
; GFX9-LABEL: v_mul_f32_vop3_src_mods_literal:
@@ -143,6 +199,16 @@ define float @v_mul_f32_vop3_src_mods_literal(float %x, float %y) {
143
199
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
144
200
; GFX11-NEXT: v_mul_f32_e64 v0, 0x42f60000, |v0| ; encoding: [0x00,0x02,0x08,0xd5,0xff,0x00,0x02,0x00,0x00,0x00,0xf6,0x42]
145
201
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
202
+ ;
203
+ ; GFX1200-LABEL: v_mul_f32_vop3_src_mods_literal:
204
+ ; GFX1200: ; %bb.0:
205
+ ; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
206
+ ; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
207
+ ; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
208
+ ; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
209
+ ; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
210
+ ; GFX1200-NEXT: v_mul_f32_e64 v0, 0x42f60000, |v0| ; encoding: [0x00,0x02,0x08,0xd5,0xff,0x00,0x02,0x00,0x00,0x00,0xf6,0x42]
211
+ ; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
146
212
%fabs.x = call float @llvm.fabs.f32 (float %x )
147
213
%mul = fmul float %fabs.x , 123 .0
148
214
ret float %mul
@@ -151,6 +217,7 @@ define float @v_mul_f32_vop3_src_mods_literal(float %x, float %y) {
151
217
; GFX9: codeLenInByte = 24
152
218
; GFX10: codeLenInByte = 20
153
219
; GFX11: codeLenInByte = 20
220
+ ; GFX1200: codeLenInByte = 36
154
221
155
222
define float @v_mul_f32_vop2_frame_index (float %x ) {
156
223
; GFX9-LABEL: v_mul_f32_vop2_frame_index:
@@ -172,6 +239,16 @@ define float @v_mul_f32_vop2_frame_index(float %x) {
172
239
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
173
240
; GFX11-NEXT: v_mul_f32_e32 v0, s32, v0 ; encoding: [0x20,0x00,0x00,0x10]
174
241
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
242
+ ;
243
+ ; GFX1200-LABEL: v_mul_f32_vop2_frame_index:
244
+ ; GFX1200: ; %bb.0:
245
+ ; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
246
+ ; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
247
+ ; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
248
+ ; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
249
+ ; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
250
+ ; GFX1200-NEXT: v_mul_f32_e32 v0, s32, v0 ; encoding: [0x20,0x00,0x00,0x10]
251
+ ; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
175
252
%alloca = alloca i32 , addrspace (5 )
176
253
%ptrtoint = ptrtoint ptr addrspace (5 ) %alloca to i32
177
254
%cast = bitcast i32 %ptrtoint to float
@@ -182,6 +259,7 @@ define float @v_mul_f32_vop2_frame_index(float %x) {
182
259
; GFX9: codeLenInByte = 20
183
260
; GFX10: codeLenInByte = 20
184
261
; GFX11: codeLenInByte = 12
262
+ ; GFX1200: codeLenInByte = 28
185
263
186
264
define float @v_fma_f32 (float %x , float %y , float %z ) {
187
265
; GFX9-LABEL: v_fma_f32:
@@ -201,11 +279,22 @@ define float @v_fma_f32(float %x, float %y, float %z) {
201
279
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
202
280
; GFX11-NEXT: v_fma_f32 v0, v0, v1, v2 ; encoding: [0x00,0x00,0x13,0xd6,0x00,0x03,0x0a,0x04]
203
281
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
282
+ ;
283
+ ; GFX1200-LABEL: v_fma_f32:
284
+ ; GFX1200: ; %bb.0:
285
+ ; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
286
+ ; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
287
+ ; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
288
+ ; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
289
+ ; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
290
+ ; GFX1200-NEXT: v_fma_f32 v0, v0, v1, v2 ; encoding: [0x00,0x00,0x13,0xd6,0x00,0x03,0x0a,0x04]
291
+ ; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
204
292
%fma = call float @llvm.fma.f32 (float %x , float %y , float %z )
205
293
ret float %fma
206
294
}
207
295
208
- ; CHECK: codeLenInByte = 16
296
+ ; NOT-GFX12: codeLenInByte = 16
297
+ ; GFX1200: codeLenInByte = 32
209
298
210
299
define float @v_fma_f32_src_mods (float %x , float %y , float %z ) {
211
300
; GFX9-LABEL: v_fma_f32_src_mods:
@@ -225,12 +314,23 @@ define float @v_fma_f32_src_mods(float %x, float %y, float %z) {
225
314
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
226
315
; GFX11-NEXT: v_fma_f32 v0, |v0|, v1, v2 ; encoding: [0x00,0x01,0x13,0xd6,0x00,0x03,0x0a,0x04]
227
316
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
317
+ ;
318
+ ; GFX1200-LABEL: v_fma_f32_src_mods:
319
+ ; GFX1200: ; %bb.0:
320
+ ; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
321
+ ; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
322
+ ; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
323
+ ; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
324
+ ; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
325
+ ; GFX1200-NEXT: v_fma_f32 v0, |v0|, v1, v2 ; encoding: [0x00,0x01,0x13,0xd6,0x00,0x03,0x0a,0x04]
326
+ ; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
228
327
%fabs.x = call float @llvm.fabs.f32 (float %x )
229
328
%fma = call float @llvm.fma.f32 (float %fabs.x , float %y , float %z )
230
329
ret float %fma
231
330
}
232
331
233
- ; CHECK: codeLenInByte = 16
332
+ ; NOT-GFX12: codeLenInByte = 16
333
+ ; GFX1200: codeLenInByte = 32
234
334
235
335
define float @v_fmac_f32 (float %x , float %y ) {
236
336
; GFX9-LABEL: v_fmac_f32:
@@ -250,13 +350,24 @@ define float @v_fmac_f32(float %x, float %y) {
250
350
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
251
351
; GFX11-NEXT: v_fmac_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x56]
252
352
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
353
+ ;
354
+ ; GFX1200-LABEL: v_fmac_f32:
355
+ ; GFX1200: ; %bb.0:
356
+ ; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
357
+ ; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
358
+ ; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
359
+ ; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
360
+ ; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
361
+ ; GFX1200-NEXT: v_fmac_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x56]
362
+ ; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
253
363
%fma = call float @llvm.fma.f32 (float %x , float %y , float %x )
254
364
ret float %fma
255
365
}
256
366
257
367
; GFX9: codeLenInByte = 16
258
368
; GFX10: codeLenInByte = 12
259
369
; GFX11: codeLenInByte = 12
370
+ ; GFX1200: codeLenInByte = 28
260
371
261
372
define float @v_fmaak_f32 (float %x , float %y ) {
262
373
; GFX9-LABEL: v_fmaak_f32:
@@ -277,13 +388,24 @@ define float @v_fmaak_f32(float %x, float %y) {
277
388
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
278
389
; GFX11-NEXT: v_fmaak_f32 v0, v0, v1, 0x43800000 ; encoding: [0x00,0x03,0x00,0x5a,0x00,0x00,0x80,0x43]
279
390
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
391
+ ;
392
+ ; GFX1200-LABEL: v_fmaak_f32:
393
+ ; GFX1200: ; %bb.0:
394
+ ; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
395
+ ; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
396
+ ; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
397
+ ; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
398
+ ; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
399
+ ; GFX1200-NEXT: v_fmaak_f32 v0, v0, v1, 0x43800000 ; encoding: [0x00,0x03,0x00,0x5a,0x00,0x00,0x80,0x43]
400
+ ; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
280
401
%fma = call float @llvm.fma.f32 (float %x , float %y , float 256 .0 )
281
402
ret float %fma
282
403
}
283
404
284
405
; GFX9: codeLenInByte = 24
285
406
; GFX10: codeLenInByte = 16
286
407
; GFX11: codeLenInByte = 16
408
+ ; GFX1200: codeLenInByte = 32
287
409
288
410
define float @v_fma_k_f32_src_mods (float %x , float %y ) {
289
411
; GFX9-LABEL: v_fma_k_f32_src_mods:
@@ -304,6 +426,16 @@ define float @v_fma_k_f32_src_mods(float %x, float %y) {
304
426
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
305
427
; GFX11-NEXT: v_fma_f32 v0, |v0|, v1, 0x43800000 ; encoding: [0x00,0x01,0x13,0xd6,0x00,0x03,0xfe,0x03,0x00,0x00,0x80,0x43]
306
428
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
429
+ ;
430
+ ; GFX1200-LABEL: v_fma_k_f32_src_mods:
431
+ ; GFX1200: ; %bb.0:
432
+ ; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf]
433
+ ; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf]
434
+ ; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf]
435
+ ; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf]
436
+ ; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf]
437
+ ; GFX1200-NEXT: v_fma_f32 v0, |v0|, v1, 0x43800000 ; encoding: [0x00,0x01,0x13,0xd6,0x00,0x03,0xfe,0x03,0x00,0x00,0x80,0x43]
438
+ ; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
307
439
%fabs.x = call float @llvm.fabs.f32 (float %x )
308
440
%fma = call float @llvm.fma.f32 (float %fabs.x , float %y , float 256 .0 )
309
441
ret float %fma
@@ -312,6 +444,7 @@ define float @v_fma_k_f32_src_mods(float %x, float %y) {
312
444
; GFX9: codeLenInByte = 24
313
445
; GFX10: codeLenInByte = 20
314
446
; GFX11: codeLenInByte = 20
447
+ ; GFX1200: codeLenInByte = 36
315
448
316
449
define amdgpu_ps float @s_fmaak_f32 (float inreg %x , float inreg %y ) {
317
450
; GFX9-LABEL: s_fmaak_f32:
@@ -340,6 +473,13 @@ define amdgpu_ps float @s_fmaak_f32(float inreg %x, float inreg %y) {
340
473
; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; encoding: [0x0b,0x00,0x87,0xbf]
341
474
; GFX1150-NEXT: v_mov_b32_e32 v0, s0 ; encoding: [0x00,0x02,0x00,0x7e]
342
475
; GFX1150-NEXT: ; return to shader part epilog
476
+ ;
477
+ ; GFX1200-LABEL: s_fmaak_f32:
478
+ ; GFX1200: ; %bb.0:
479
+ ; GFX1200-NEXT: s_fmaak_f32 s0, s0, s1, 0x43800000 ; encoding: [0x00,0x01,0x80,0xa2,0x00,0x00,0x80,0x43]
480
+ ; GFX1200-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; encoding: [0x0b,0x00,0x87,0xbf]
481
+ ; GFX1200-NEXT: v_mov_b32_e32 v0, s0 ; encoding: [0x00,0x02,0x00,0x7e]
482
+ ; GFX1200-NEXT: ; return to shader part epilog
343
483
%fma = call float @llvm.fma.f32 (float %x , float %y , float 256 .0 )
344
484
ret float %fma
345
485
}
@@ -348,3 +488,4 @@ define amdgpu_ps float @s_fmaak_f32(float inreg %x, float inreg %y) {
348
488
; GFX10: codeLenInByte = 12
349
489
; GFX1100: codeLenInByte = 16
350
490
; GFX1150: codeLenInByte = 16
491
+ ; GFX1200: codeLenInByte = 16
0 commit comments