Skip to content

Commit 4b1702e

Browse files
committed
AMDGPU: Fix counting source modifiers as literal constants
This fixes over estimating code size. This was broken by 79f52af. https://reviews.llvm.org/D157103
1 parent df3800f commit 4b1702e

File tree

3 files changed

+329
-9
lines changed

3 files changed

+329
-9
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3832,9 +3832,7 @@ bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
38323832
bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
38333833
uint8_t OperandType) const {
38343834
assert(!MO.isReg() && "isInlineConstant called on register operand!");
3835-
if (!MO.isImm() ||
3836-
OperandType < AMDGPU::OPERAND_SRC_FIRST ||
3837-
OperandType > AMDGPU::OPERAND_SRC_LAST)
3835+
if (!MO.isImm())
38383836
return false;
38393837

38403838
// MachineOperand provides no way to tell the true operand size, since it only
@@ -3908,9 +3906,23 @@ bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
39083906
}
39093907
case AMDGPU::OPERAND_KIMM32:
39103908
case AMDGPU::OPERAND_KIMM16:
3911-
return false;
3909+
case AMDGPU::OPERAND_INPUT_MODS:
3910+
case MCOI::OPERAND_IMMEDIATE:
3911+
// Always embedded in the instruction for free.
3912+
return true;
3913+
case MCOI::OPERAND_UNKNOWN:
3914+
case MCOI::OPERAND_REGISTER:
3915+
case MCOI::OPERAND_PCREL:
3916+
case MCOI::OPERAND_GENERIC_0:
3917+
case MCOI::OPERAND_GENERIC_1:
3918+
case MCOI::OPERAND_GENERIC_2:
3919+
case MCOI::OPERAND_GENERIC_3:
3920+
case MCOI::OPERAND_GENERIC_4:
3921+
case MCOI::OPERAND_GENERIC_5:
3922+
// Just ignore anything else.
3923+
return true;
39123924
default:
3913-
llvm_unreachable("invalid bitwidth");
3925+
llvm_unreachable("invalid operand type");
39143926
}
39153927
}
39163928

Lines changed: 312 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,312 @@
1+
; RUN: llc -march=amdgcn -mcpu=gfx900 -show-mc-encoding < %s | FileCheck -check-prefixes=CHECK,GFX9 %s
2+
; RUN: llc -march=amdgcn -mcpu=gfx1030 -show-mc-encoding < %s | FileCheck -check-prefixes=CHECK,GFX10 %s
3+
; RUN: llc -march=amdgcn -mcpu=gfx1100 -show-mc-encoding < %s | FileCheck -check-prefixes=CHECK,GFX11 %s
4+
5+
declare float @llvm.fabs.f32(float)
6+
declare float @llvm.fma.f32(float, float, float)
7+
8+
define float @v_mul_f32_vop2(float %x, float %y) {
9+
; GFX9-LABEL: v_mul_f32_vop2:
10+
; GFX9: ; %bb.0:
11+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
12+
; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x0a]
13+
; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
14+
;
15+
; GFX10-LABEL: v_mul_f32_vop2:
16+
; GFX10: ; %bb.0:
17+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
18+
; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x10]
19+
; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
20+
;
21+
; GFX11-LABEL: v_mul_f32_vop2:
22+
; GFX11: ; %bb.0:
23+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
24+
; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x10]
25+
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
26+
%mul = fmul float %x, %y
27+
ret float %mul
28+
}
29+
; CHECK: codeLenInByte = 12
30+
31+
define float @v_mul_f32_vop2_inline_imm(float %x) {
32+
; GFX9-LABEL: v_mul_f32_vop2_inline_imm:
33+
; GFX9: ; %bb.0:
34+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
35+
; GFX9-NEXT: v_mul_f32_e32 v0, 4.0, v0 ; encoding: [0xf6,0x00,0x00,0x0a]
36+
; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
37+
;
38+
; GFX10-LABEL: v_mul_f32_vop2_inline_imm:
39+
; GFX10: ; %bb.0:
40+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
41+
; GFX10-NEXT: v_mul_f32_e32 v0, 4.0, v0 ; encoding: [0xf6,0x00,0x00,0x10]
42+
; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
43+
;
44+
; GFX11-LABEL: v_mul_f32_vop2_inline_imm:
45+
; GFX11: ; %bb.0:
46+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
47+
; GFX11-NEXT: v_mul_f32_e32 v0, 4.0, v0 ; encoding: [0xf6,0x00,0x00,0x10]
48+
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
49+
%mul = fmul float %x, 4.0
50+
ret float %mul
51+
}
52+
; CHECK: codeLenInByte = 12
53+
54+
define float @v_mul_f32_vop2_literal(float %x) {
55+
; GFX9-LABEL: v_mul_f32_vop2_literal:
56+
; GFX9: ; %bb.0:
57+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
58+
; GFX9-NEXT: v_mul_f32_e32 v0, 0x42f60000, v0 ; encoding: [0xff,0x00,0x00,0x0a,0x00,0x00,0xf6,0x42]
59+
; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
60+
;
61+
; GFX10-LABEL: v_mul_f32_vop2_literal:
62+
; GFX10: ; %bb.0:
63+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
64+
; GFX10-NEXT: v_mul_f32_e32 v0, 0x42f60000, v0 ; encoding: [0xff,0x00,0x00,0x10,0x00,0x00,0xf6,0x42]
65+
; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
66+
;
67+
; GFX11-LABEL: v_mul_f32_vop2_literal:
68+
; GFX11: ; %bb.0:
69+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
70+
; GFX11-NEXT: v_mul_f32_e32 v0, 0x42f60000, v0 ; encoding: [0xff,0x00,0x00,0x10,0x00,0x00,0xf6,0x42]
71+
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
72+
%mul = fmul float %x, 123.0
73+
ret float %mul
74+
}
75+
; CHECK: codeLenInByte = 16
76+
77+
define float @v_mul_f32_vop3_src_mods(float %x, float %y) {
78+
; GFX9-LABEL: v_mul_f32_vop3_src_mods:
79+
; GFX9: ; %bb.0:
80+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
81+
; GFX9-NEXT: v_mul_f32_e64 v0, |v0|, v1 ; encoding: [0x00,0x01,0x05,0xd1,0x00,0x03,0x02,0x00]
82+
; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
83+
;
84+
; GFX10-LABEL: v_mul_f32_vop3_src_mods:
85+
; GFX10: ; %bb.0:
86+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
87+
; GFX10-NEXT: v_mul_f32_e64 v0, |v0|, v1 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0x03,0x02,0x00]
88+
; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
89+
;
90+
; GFX11-LABEL: v_mul_f32_vop3_src_mods:
91+
; GFX11: ; %bb.0:
92+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
93+
; GFX11-NEXT: v_mul_f32_e64 v0, |v0|, v1 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0x03,0x02,0x00]
94+
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
95+
%fabs.x = call float @llvm.fabs.f32(float %x)
96+
%mul = fmul float %fabs.x, %y
97+
ret float %mul
98+
}
99+
; CHECK: codeLenInByte = 16
100+
101+
define float @v_mul_f32_vop3_src_mods_inline_imm(float %x, float %y) {
102+
; GFX9-LABEL: v_mul_f32_vop3_src_mods_inline_imm:
103+
; GFX9: ; %bb.0:
104+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
105+
; GFX9-NEXT: v_mul_f32_e64 v0, |v0|, 4.0 ; encoding: [0x00,0x01,0x05,0xd1,0x00,0xed,0x01,0x00]
106+
; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
107+
;
108+
; GFX10-LABEL: v_mul_f32_vop3_src_mods_inline_imm:
109+
; GFX10: ; %bb.0:
110+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
111+
; GFX10-NEXT: v_mul_f32_e64 v0, |v0|, 4.0 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0xed,0x01,0x00]
112+
; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
113+
;
114+
; GFX11-LABEL: v_mul_f32_vop3_src_mods_inline_imm:
115+
; GFX11: ; %bb.0:
116+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
117+
; GFX11-NEXT: v_mul_f32_e64 v0, |v0|, 4.0 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0xed,0x01,0x00]
118+
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
119+
%fabs.x = call float @llvm.fabs.f32(float %x)
120+
%mul = fmul float %fabs.x, 4.0
121+
ret float %mul
122+
}
123+
124+
; CHECK: codeLenInByte = 16
125+
126+
define float @v_mul_f32_vop3_src_mods_literal(float %x, float %y) {
127+
; GFX9-LABEL: v_mul_f32_vop3_src_mods_literal:
128+
; GFX9: ; %bb.0:
129+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
130+
; GFX9-NEXT: s_mov_b32 s4, 0x42f60000 ; encoding: [0xff,0x00,0x84,0xbe,0x00,0x00,0xf6,0x42]
131+
; GFX9-NEXT: v_mul_f32_e64 v0, |v0|, s4 ; encoding: [0x00,0x01,0x05,0xd1,0x00,0x09,0x00,0x00]
132+
; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
133+
;
134+
; GFX10-LABEL: v_mul_f32_vop3_src_mods_literal:
135+
; GFX10: ; %bb.0:
136+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
137+
; GFX10-NEXT: v_mul_f32_e64 v0, 0x42f60000, |v0| ; encoding: [0x00,0x02,0x08,0xd5,0xff,0x00,0x02,0x00,0x00,0x00,0xf6,0x42]
138+
; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
139+
;
140+
; GFX11-LABEL: v_mul_f32_vop3_src_mods_literal:
141+
; GFX11: ; %bb.0:
142+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
143+
; GFX11-NEXT: v_mul_f32_e64 v0, 0x42f60000, |v0| ; encoding: [0x00,0x02,0x08,0xd5,0xff,0x00,0x02,0x00,0x00,0x00,0xf6,0x42]
144+
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
145+
%fabs.x = call float @llvm.fabs.f32(float %x)
146+
%mul = fmul float %fabs.x, 123.0
147+
ret float %mul
148+
}
149+
150+
; GFX9: codeLenInByte = 24
151+
; GFX10: codeLenInByte = 20
152+
153+
define float @v_mul_f32_vop2_frame_index(float %x) {
154+
; GFX9-LABEL: v_mul_f32_vop2_frame_index:
155+
; GFX9: ; %bb.0:
156+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
157+
; GFX9-NEXT: v_lshrrev_b32_e64 v1, 6, s32 ; encoding: [0x01,0x00,0x10,0xd1,0x86,0x40,0x00,0x00]
158+
; GFX9-NEXT: v_mul_f32_e32 v0, v1, v0 ; encoding: [0x01,0x01,0x00,0x0a]
159+
; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
160+
;
161+
; GFX10-LABEL: v_mul_f32_vop2_frame_index:
162+
; GFX10: ; %bb.0:
163+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
164+
; GFX10-NEXT: v_lshrrev_b32_e64 v1, 5, s32 ; encoding: [0x01,0x00,0x16,0xd5,0x85,0x40,0x00,0x00]
165+
; GFX10-NEXT: v_mul_f32_e32 v0, v1, v0 ; encoding: [0x01,0x01,0x00,0x10]
166+
; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
167+
;
168+
; GFX11-LABEL: v_mul_f32_vop2_frame_index:
169+
; GFX11: ; %bb.0:
170+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
171+
; GFX11-NEXT: v_mul_f32_e32 v0, s32, v0 ; encoding: [0x20,0x00,0x00,0x10]
172+
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
173+
%alloca = alloca i32, addrspace(5)
174+
%ptrtoint = ptrtoint ptr addrspace(5) %alloca to i32
175+
%cast = bitcast i32 %ptrtoint to float
176+
%mul = fmul float %x, %cast
177+
ret float %mul
178+
}
179+
180+
; GFX9: codeLenInByte = 20
181+
; GFX10: codeLenInByte = 20
182+
; GFX11: codeLenInByte = 12
183+
184+
define float @v_fma_f32(float %x, float %y, float %z) {
185+
; GFX9-LABEL: v_fma_f32:
186+
; GFX9: ; %bb.0:
187+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
188+
; GFX9-NEXT: v_fma_f32 v0, v0, v1, v2 ; encoding: [0x00,0x00,0xcb,0xd1,0x00,0x03,0x0a,0x04]
189+
; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
190+
;
191+
; GFX10-LABEL: v_fma_f32:
192+
; GFX10: ; %bb.0:
193+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
194+
; GFX10-NEXT: v_fma_f32 v0, v0, v1, v2 ; encoding: [0x00,0x00,0x4b,0xd5,0x00,0x03,0x0a,0x04]
195+
; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
196+
;
197+
; GFX11-LABEL: v_fma_f32:
198+
; GFX11: ; %bb.0:
199+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
200+
; GFX11-NEXT: v_fma_f32 v0, v0, v1, v2 ; encoding: [0x00,0x00,0x13,0xd6,0x00,0x03,0x0a,0x04]
201+
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
202+
%fma = call float @llvm.fma.f32(float %x, float %y, float %z)
203+
ret float %fma
204+
}
205+
206+
; CHECK: codeLenInByte = 16
207+
208+
define float @v_fma_f32_src_mods(float %x, float %y, float %z) {
209+
; GFX9-LABEL: v_fma_f32_src_mods:
210+
; GFX9: ; %bb.0:
211+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
212+
; GFX9-NEXT: v_fma_f32 v0, |v0|, v1, v2 ; encoding: [0x00,0x01,0xcb,0xd1,0x00,0x03,0x0a,0x04]
213+
; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
214+
;
215+
; GFX10-LABEL: v_fma_f32_src_mods:
216+
; GFX10: ; %bb.0:
217+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
218+
; GFX10-NEXT: v_fma_f32 v0, |v0|, v1, v2 ; encoding: [0x00,0x01,0x4b,0xd5,0x00,0x03,0x0a,0x04]
219+
; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
220+
;
221+
; GFX11-LABEL: v_fma_f32_src_mods:
222+
; GFX11: ; %bb.0:
223+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
224+
; GFX11-NEXT: v_fma_f32 v0, |v0|, v1, v2 ; encoding: [0x00,0x01,0x13,0xd6,0x00,0x03,0x0a,0x04]
225+
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
226+
%fabs.x = call float @llvm.fabs.f32(float %x)
227+
%fma = call float @llvm.fma.f32(float %fabs.x, float %y, float %z)
228+
ret float %fma
229+
}
230+
231+
; CHECK: codeLenInByte = 16
232+
233+
define float @v_fmac_f32(float %x, float %y) {
234+
; GFX9-LABEL: v_fmac_f32:
235+
; GFX9: ; %bb.0:
236+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
237+
; GFX9-NEXT: v_fma_f32 v0, v0, v1, v0 ; encoding: [0x00,0x00,0xcb,0xd1,0x00,0x03,0x02,0x04]
238+
; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
239+
;
240+
; GFX10-LABEL: v_fmac_f32:
241+
; GFX10: ; %bb.0:
242+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
243+
; GFX10-NEXT: v_fmac_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x56]
244+
; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
245+
;
246+
; GFX11-LABEL: v_fmac_f32:
247+
; GFX11: ; %bb.0:
248+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
249+
; GFX11-NEXT: v_fmac_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x56]
250+
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
251+
%fma = call float @llvm.fma.f32(float %x, float %y, float %x)
252+
ret float %fma
253+
}
254+
255+
; GFX9: codeLenInByte = 16
256+
; GFX10: codeLenInByte = 12
257+
; GFX11: codeLenInByte = 12
258+
259+
define float @v_fmaak_f32(float %x, float %y) {
260+
; GFX9-LABEL: v_fmaak_f32:
261+
; GFX9: ; %bb.0:
262+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
263+
; GFX9-NEXT: s_mov_b32 s4, 0x43800000 ; encoding: [0xff,0x00,0x84,0xbe,0x00,0x00,0x80,0x43]
264+
; GFX9-NEXT: v_fma_f32 v0, v0, v1, s4 ; encoding: [0x00,0x00,0xcb,0xd1,0x00,0x03,0x12,0x00]
265+
; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
266+
;
267+
; GFX10-LABEL: v_fmaak_f32:
268+
; GFX10: ; %bb.0:
269+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
270+
; GFX10-NEXT: v_fmaak_f32 v0, v0, v1, 0x43800000 ; encoding: [0x00,0x03,0x00,0x5a,0x00,0x00,0x80,0x43]
271+
; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
272+
;
273+
; GFX11-LABEL: v_fmaak_f32:
274+
; GFX11: ; %bb.0:
275+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
276+
; GFX11-NEXT: v_fmaak_f32 v0, v0, v1, 0x43800000 ; encoding: [0x00,0x03,0x00,0x5a,0x00,0x00,0x80,0x43]
277+
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
278+
%fma = call float @llvm.fma.f32(float %x, float %y, float 256.0)
279+
ret float %fma
280+
}
281+
282+
; GFX9: codeLenInByte = 24
283+
; GFX10: codeLenInByte = 16
284+
; GFX11: codeLenInByte = 16
285+
286+
define float @v_fma_k_f32_src_mods(float %x, float %y) {
287+
; GFX9-LABEL: v_fma_k_f32_src_mods:
288+
; GFX9: ; %bb.0:
289+
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
290+
; GFX9-NEXT: s_mov_b32 s4, 0x43800000 ; encoding: [0xff,0x00,0x84,0xbe,0x00,0x00,0x80,0x43]
291+
; GFX9-NEXT: v_fma_f32 v0, |v0|, v1, s4 ; encoding: [0x00,0x01,0xcb,0xd1,0x00,0x03,0x12,0x00]
292+
; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe]
293+
;
294+
; GFX10-LABEL: v_fma_k_f32_src_mods:
295+
; GFX10: ; %bb.0:
296+
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf]
297+
; GFX10-NEXT: v_fma_f32 v0, |v0|, v1, 0x43800000 ; encoding: [0x00,0x01,0x4b,0xd5,0x00,0x03,0xfe,0x03,0x00,0x00,0x80,0x43]
298+
; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe]
299+
;
300+
; GFX11-LABEL: v_fma_k_f32_src_mods:
301+
; GFX11: ; %bb.0:
302+
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf]
303+
; GFX11-NEXT: v_fma_f32 v0, |v0|, v1, 0x43800000 ; encoding: [0x00,0x01,0x13,0xd6,0x00,0x03,0xfe,0x03,0x00,0x00,0x80,0x43]
304+
; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe]
305+
%fabs.x = call float @llvm.fabs.f32(float %x)
306+
%fma = call float @llvm.fma.f32(float %fabs.x, float %y, float 256.0)
307+
ret float %fma
308+
}
309+
310+
; GFX9: codeLenInByte = 24
311+
; GFX10: codeLenInByte = 20
312+
; GFX11: codeLenInByte = 20

llvm/test/CodeGen/AMDGPU/idiv-licm.ll

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -848,7 +848,6 @@ define amdgpu_kernel void @urem16_invariant_denom(ptr addrspace(1) nocapture %ar
848848
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
849849
; GFX11-NEXT: v_cvt_f32_u32_e32 v2, s1
850850
; GFX11-NEXT: v_rcp_iflag_f32_e32 v3, v2
851-
; GFX11-NEXT: s_set_inst_prefetch_distance 0x1
852851
; GFX11-NEXT: .p2align 6
853852
; GFX11-NEXT: .LBB5_1: ; %bb3
854853
; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -875,7 +874,6 @@ define amdgpu_kernel void @urem16_invariant_denom(ptr addrspace(1) nocapture %ar
875874
; GFX11-NEXT: global_store_b16 v[5:6], v0, off
876875
; GFX11-NEXT: s_cbranch_vccz .LBB5_1
877876
; GFX11-NEXT: ; %bb.2: ; %bb2
878-
; GFX11-NEXT: s_set_inst_prefetch_distance 0x2
879877
; GFX11-NEXT: s_nop 0
880878
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
881879
; GFX11-NEXT: s_endpgm
@@ -1135,7 +1133,6 @@ define amdgpu_kernel void @srem16_invariant_denom(ptr addrspace(1) nocapture %ar
11351133
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
11361134
; GFX11-NEXT: v_cvt_f32_i32_e32 v0, s4
11371135
; GFX11-NEXT: v_rcp_iflag_f32_e32 v1, v0
1138-
; GFX11-NEXT: s_set_inst_prefetch_distance 0x1
11391136
; GFX11-NEXT: .p2align 6
11401137
; GFX11-NEXT: .LBB7_1: ; %bb3
11411138
; GFX11-NEXT: ; =>This Inner Loop Header: Depth=1
@@ -1170,7 +1167,6 @@ define amdgpu_kernel void @srem16_invariant_denom(ptr addrspace(1) nocapture %ar
11701167
; GFX11-NEXT: global_store_b16 v2, v3, s[6:7]
11711168
; GFX11-NEXT: s_cbranch_vccz .LBB7_1
11721169
; GFX11-NEXT: ; %bb.2: ; %bb2
1173-
; GFX11-NEXT: s_set_inst_prefetch_distance 0x2
11741170
; GFX11-NEXT: s_nop 0
11751171
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
11761172
; GFX11-NEXT: s_endpgm

0 commit comments

Comments
 (0)