Skip to content

Commit 22d65d8

Browse files
authored
AMDGPU: Teach isOperandLegal about SALU literal restrictions (#127626)
isOperandLegal mostly implemented the VALU operand rules, and largely ignored SALU restrictions. This theoretically avoids folding literals into SALU insts which already have a literal operand. This issue is currently avoided due to a bug in SIFoldOperands; this change will allow using raw operand legality rules. This breaks the formation of s_fmaak_f32 in SIFoldOperands, but it probably should not have been forming there in the first place. TwoAddressInsts or RA should generally handle that, and this only worked by accident.
1 parent aed9f11 commit 22d65d8

File tree

3 files changed

+226
-10
lines changed

3 files changed

+226
-10
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5931,11 +5931,15 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
59315931
if (!MO)
59325932
MO = &MI.getOperand(OpIdx);
59335933

5934-
const MachineOperand *UsedLiteral = nullptr;
5934+
const bool IsInlineConst = !MO->isReg() && isInlineConstant(*MO, OpInfo);
59355935

5936-
int ConstantBusLimit = ST.getConstantBusLimit(MI.getOpcode());
5937-
int LiteralLimit = !isVOP3(MI) || ST.hasVOP3Literal() ? 1 : 0;
5938-
if (isVALU(MI) && usesConstantBus(MRI, *MO, OpInfo)) {
5936+
if (isVALU(MI) && !IsInlineConst && usesConstantBus(MRI, *MO, OpInfo)) {
5937+
const MachineOperand *UsedLiteral = nullptr;
5938+
5939+
int ConstantBusLimit = ST.getConstantBusLimit(MI.getOpcode());
5940+
int LiteralLimit = !isVOP3(MI) || ST.hasVOP3Literal() ? 1 : 0;
5941+
5942+
// TODO: Be more permissive with frame indexes.
59395943
if (!MO->isReg() && !isInlineConstant(*MO, OpInfo)) {
59405944
if (!LiteralLimit--)
59415945
return false;
@@ -5974,9 +5978,19 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
59745978
return false;
59755979
}
59765980
}
5977-
} else if (ST.hasNoF16PseudoScalarTransInlineConstants() && !MO->isReg() &&
5978-
isF16PseudoScalarTrans(MI.getOpcode()) &&
5979-
isInlineConstant(*MO, OpInfo)) {
5981+
} else if (!IsInlineConst && !MO->isReg() && isSALU(MI)) {
5982+
// There can be at most one literal operand, but it can be repeated.
5983+
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
5984+
if (i == OpIdx)
5985+
continue;
5986+
const MachineOperand &Op = MI.getOperand(i);
5987+
if (!Op.isReg() && !Op.isFI() &&
5988+
!isInlineConstant(Op, InstDesc.operands()[i]) &&
5989+
!Op.isIdenticalTo(*MO))
5990+
return false;
5991+
}
5992+
} else if (IsInlineConst && ST.hasNoF16PseudoScalarTransInlineConstants() &&
5993+
isF16PseudoScalarTrans(MI.getOpcode())) {
59805994
return false;
59815995
}
59825996

llvm/test/CodeGen/AMDGPU/fold-operands-scalar-fmac.mir

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,8 @@ body: |
133133
; CHECK: liveins: $sgpr0
134134
; CHECK-NEXT: {{ $}}
135135
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
136-
; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAMK_F32 [[COPY]], 1234567890, 1056964608, implicit $mode
136+
; CHECK-NEXT: %noninlinable:sreg_32 = S_MOV_B32 1234567890
137+
; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAAK_F32 %noninlinable, [[COPY]], 1056964608, implicit $mode
137138
; CHECK-NEXT: $sgpr0 = COPY %fma
138139
%0:sreg_32 = COPY $sgpr0
139140
%inlinable:sreg_32 = S_MOV_B32 1056964608
@@ -152,7 +153,8 @@ body: |
152153
; CHECK: liveins: $sgpr0
153154
; CHECK-NEXT: {{ $}}
154155
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
155-
; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAMK_F32 [[COPY]], 1234567890, 1056964608, implicit $mode
156+
; CHECK-NEXT: %noninlinable:sreg_32 = S_MOV_B32 1234567890
157+
; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAAK_F32 [[COPY]], %noninlinable, 1056964608, implicit $mode
156158
; CHECK-NEXT: $sgpr0 = COPY %fma
157159
%0:sreg_32 = COPY $sgpr0
158160
%inlinable:sreg_32 = S_MOV_B32 1056964608
@@ -210,7 +212,8 @@ body: |
210212
; CHECK: liveins: $sgpr0
211213
; CHECK-NEXT: {{ $}}
212214
; CHECK-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
213-
; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAAK_F32 [[COPY]], 1056964608, 1234567890, implicit $mode
215+
; CHECK-NEXT: %noninlinable:sreg_32 = S_MOV_B32 1234567890
216+
; CHECK-NEXT: %fma:sreg_32 = nofpexcept S_FMAMK_F32 [[COPY]], 1056964608, %noninlinable, implicit $mode
214217
; CHECK-NEXT: $sgpr0 = COPY %fma
215218
%0:sreg_32 = COPY $sgpr0
216219
%inlinable:sreg_32 = S_MOV_B32 1056964608

llvm/test/CodeGen/AMDGPU/fold-sgpr-multi-imm.mir

Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,3 +69,202 @@ body: |
6969
%0:sreg_32 = S_MOV_B32 63
7070
%1:sreg_32 = S_ADD_I32 %stack.0, %0, implicit-def $scc
7171
...
72+
73+
# GCN-LABEL: name: test_no_fold_literal_already_inline_lhs{{$}}
74+
# GCN: %0:sreg_32 = S_MOV_B32 80
75+
# GCN-NEXT: %1:sreg_32 = S_ADD_I32 70, %0
76+
---
77+
name: test_no_fold_literal_already_inline_lhs
78+
tracksRegLiveness: true
79+
body: |
80+
bb.0:
81+
%0:sreg_32 = S_MOV_B32 80
82+
%1:sreg_32 = S_ADD_I32 70, %0, implicit-def $scc
83+
...
84+
85+
# GCN-LABEL: name: test_no_fold_literal_already_inline_rhs{{$}}
86+
# GCN: %0:sreg_32 = S_MOV_B32 80
87+
# GCN-NEXT: %1:sreg_32 = S_ADD_I32 %0, 70
88+
---
89+
name: test_no_fold_literal_already_inline_rhs
90+
tracksRegLiveness: true
91+
body: |
92+
bb.0:
93+
%0:sreg_32 = S_MOV_B32 80
94+
%1:sreg_32 = S_ADD_I32 %0, 70, implicit-def $scc
95+
...
96+
97+
# GCN-LABEL: name: test_fold_literal_inlineimm_lhs{{$}}
98+
# GCN: %1:sreg_32 = S_ADD_I32 64, 80
99+
---
100+
name: test_fold_literal_inlineimm_lhs
101+
tracksRegLiveness: true
102+
body: |
103+
bb.0:
104+
%0:sreg_32 = S_MOV_B32 80
105+
%1:sreg_32 = S_ADD_I32 64, %0, implicit-def $scc
106+
...
107+
108+
# GCN-LABEL: name: test_fold_literal_inlineimm_rhs{{$}}
109+
# GCN: %1:sreg_32 = S_ADD_I32 80, 64
110+
---
111+
name: test_fold_literal_inlineimm_rhs
112+
tracksRegLiveness: true
113+
body: |
114+
bb.0:
115+
%0:sreg_32 = S_MOV_B32 80
116+
%1:sreg_32 = S_ADD_I32 %0, 64, implicit-def $scc
117+
...
118+
119+
# GCN-LABEL: name: test_fold_same_literal_2x{{$}}
120+
# GCN: %2:sreg_32 = S_ADD_I32 70, %1
121+
---
122+
name: test_fold_same_literal_2x
123+
tracksRegLiveness: true
124+
body: |
125+
bb.0:
126+
%0:sreg_32 = S_MOV_B32 70
127+
%1:sreg_32 = S_MOV_B32 70
128+
%2:sreg_32 = S_ADD_I32 %0, %1, implicit-def $scc
129+
...
130+
131+
# GCN-LABEL: name: test_fold_same_literal_lhs{{$}}
132+
# GCN: %1:sreg_32 = S_ADD_I32 70, %0
133+
---
134+
name: test_fold_same_literal_lhs
135+
tracksRegLiveness: true
136+
body: |
137+
bb.0:
138+
%0:sreg_32 = S_MOV_B32 70
139+
%1:sreg_32 = S_ADD_I32 70, %0, implicit-def $scc
140+
...
141+
142+
# GCN-LABEL: name: test_fold_same_literal_rhs{{$}}
143+
# GCN: %1:sreg_32 = S_ADD_I32 %0, 70
144+
---
145+
name: test_fold_same_literal_rhs
146+
tracksRegLiveness: true
147+
body: |
148+
bb.0:
149+
%0:sreg_32 = S_MOV_B32 70
150+
%1:sreg_32 = S_ADD_I32 %0, 70, implicit-def $scc
151+
...
152+
153+
154+
# GCN-LABEL: name: test_s_cselect_b32_2x_literal_fold{{$}}
155+
# GCN: %2:sreg_32 = S_CSELECT_B32 70, %1, implicit $scc
156+
---
157+
name: test_s_cselect_b32_2x_literal_fold
158+
tracksRegLiveness: true
159+
body: |
160+
bb.0:
161+
%0:sreg_32 = S_MOV_B32 70
162+
%1:sreg_32 = S_MOV_B32 80
163+
$scc = IMPLICIT_DEF
164+
%2:sreg_32 = S_CSELECT_B32 %0, %1, implicit $scc
165+
...
166+
167+
# GCN-LABEL: name: test_s_cselect_b32_fold_literal_literal_lhs{{$}}
168+
# GCN: %1:sreg_32 = S_CSELECT_B32 70, %0, implicit $scc
169+
---
170+
name: test_s_cselect_b32_fold_literal_literal_lhs
171+
tracksRegLiveness: true
172+
body: |
173+
bb.0:
174+
%0:sreg_32 = S_MOV_B32 80
175+
$scc = IMPLICIT_DEF
176+
%1:sreg_32 = S_CSELECT_B32 70, %0, implicit $scc
177+
...
178+
179+
# GCN-LABEL: name: test_s_cselect_b32_fold_literal_literal_rhs{{$}}
180+
# GCN: %1:sreg_32 = S_CSELECT_B32 %0, 70, implicit $scc
181+
---
182+
name: test_s_cselect_b32_fold_literal_literal_rhs
183+
tracksRegLiveness: true
184+
body: |
185+
bb.0:
186+
%0:sreg_32 = S_MOV_B32 80
187+
$scc = IMPLICIT_DEF
188+
%1:sreg_32 = S_CSELECT_B32 %0, 70, implicit $scc
189+
...
190+
191+
# GCN-LABEL: name: test_s_cselect_b32_fold_literal_inlineimm_lhs{{$}}
192+
# GCN: %1:sreg_32 = S_CSELECT_B32 64, 80, implicit $scc
193+
---
194+
name: test_s_cselect_b32_fold_literal_inlineimm_lhs
195+
tracksRegLiveness: true
196+
body: |
197+
bb.0:
198+
%0:sreg_32 = S_MOV_B32 80
199+
$scc = IMPLICIT_DEF
200+
%1:sreg_32 = S_CSELECT_B32 64, %0, implicit $scc
201+
...
202+
203+
# GCN-LABEL: name: test_s_cselect_b32_fold_literal_inlineimm_rhs{{$}}
204+
# GCN: %1:sreg_32 = S_CSELECT_B32 80, 64, implicit $scc
205+
---
206+
name: test_s_cselect_b32_fold_literal_inlineimm_rhs
207+
tracksRegLiveness: true
208+
body: |
209+
bb.0:
210+
%0:sreg_32 = S_MOV_B32 80
211+
$scc = IMPLICIT_DEF
212+
%1:sreg_32 = S_CSELECT_B32 %0, 64, implicit $scc
213+
...
214+
215+
# GCN-LABEL: name: test_s_cmp_b32_2x_literal_fold{{$}}
216+
# GCN: S_CMP_EQ_U32 70, %1, implicit-def $scc
217+
---
218+
name: test_s_cmp_b32_2x_literal_fold
219+
tracksRegLiveness: true
220+
body: |
221+
bb.0:
222+
%0:sreg_32 = S_MOV_B32 70
223+
%1:sreg_32 = S_MOV_B32 80
224+
$scc = IMPLICIT_DEF
225+
S_CMP_EQ_U32 %0, %1, implicit-def $scc
226+
...
227+
228+
# GCN-LABEL: name: test_s_cmp_b32_literal_literal_lhs{{$}}
229+
# GCN: S_CMP_EQ_U32 70, %0, implicit-def $scc
230+
---
231+
name: test_s_cmp_b32_literal_literal_lhs
232+
tracksRegLiveness: true
233+
body: |
234+
bb.0:
235+
%0:sreg_32 = S_MOV_B32 80
236+
S_CMP_EQ_U32 70, %0, implicit-def $scc
237+
...
238+
239+
# GCN-LABEL: name: test_s_cmp_b32_literal_literal_rhs{{$}}
240+
# GCN: S_CMP_EQ_U32 %0, 70, implicit-def $scc
241+
---
242+
name: test_s_cmp_b32_literal_literal_rhs
243+
tracksRegLiveness: true
244+
body: |
245+
bb.0:
246+
%0:sreg_32 = S_MOV_B32 80
247+
S_CMP_EQ_U32 %0, 70, implicit-def $scc
248+
...
249+
250+
# GCN-LABEL: name: test_s_cmp_b32_literal_inlineimm_lhs{{$}}
251+
# GCN: S_CMP_EQ_U32 64, 80, implicit-def $scc
252+
---
253+
name: test_s_cmp_b32_literal_inlineimm_lhs
254+
tracksRegLiveness: true
255+
body: |
256+
bb.0:
257+
%0:sreg_32 = S_MOV_B32 80
258+
S_CMP_EQ_U32 64, %0, implicit-def $scc
259+
...
260+
261+
# GCN-LABEL: name: test_s_cmp_b32_literal_inlineimm_rhs{{$}}
262+
# GCN: S_CMP_EQ_U32 80, 64, implicit-def $scc
263+
---
264+
name: test_s_cmp_b32_literal_inlineimm_rhs
265+
tracksRegLiveness: true
266+
body: |
267+
bb.0:
268+
%0:sreg_32 = S_MOV_B32 80
269+
S_CMP_EQ_U32 %0, 64, implicit-def $scc
270+
...

0 commit comments

Comments
 (0)