-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[AMDGPU] Add commute for some VOP3 inst #121326
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
f0877bb
ae5f1e8
df6903c
ada83d6
c599af0
3a83ae5
5c9d065
d3f00dc
0e60298
707474f
79219d1
288ead2
4becc58
53b370a
0b746a8
785921f
6955a86
5e15e72
004a82d
dc2739f
378c02c
49ae569
161a2b9
4d569ae
1689c1e
328e566
9faf423
19b8ad4
cc3a125
0a89dc9
d8e6cb7
3c7bd89
bf1da57
789092d
6c35280
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2749,6 +2749,20 @@ static MachineInstr *swapRegAndNonRegOperand(MachineInstr &MI, | |
return &MI; | ||
} | ||
|
||
static MachineInstr *swapInlineConstOperands(MachineInstr &MI, | ||
MachineOperand &NonRegOp1, | ||
MachineOperand &NonRegOp2) { | ||
|
||
auto TargetFlags = NonRegOp1.getTargetFlags(); | ||
auto NonRegVal = NonRegOp1.getImm(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No auto |
||
|
||
NonRegOp1.setImm(NonRegOp2.getImm()); | ||
NonRegOp2.setImm(NonRegVal); | ||
NonRegOp1.setTargetFlags(NonRegOp2.getTargetFlags()); | ||
NonRegOp2.setTargetFlags(TargetFlags); | ||
return &MI; | ||
} | ||
|
||
MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, | ||
unsigned Src0Idx, | ||
unsigned Src1Idx) const { | ||
|
@@ -2785,6 +2799,9 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, | |
} else if (!Src0.isReg() && Src1.isReg()) { | ||
if (isOperandLegal(MI, Src1Idx, &Src0)) | ||
CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0); | ||
} else if (isInlineConstant(Src0) && isInlineConstant(Src1)) { | ||
if (isOperandLegal(MI, Src1Idx, &Src0)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. At least one of these isInlineConstant checks is redundant with the isOperandLegal check There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi @arsenm , sorry for replying late, it took me long time to investigate the By testing, one of the operand is inline constant and It is true for VOP that if Src1 is inline constant, then Src0 is also has to be an inline constant. But the reason for that is if I think this may be not consistent with ISA on section "6.12.1 Instruction Inputs" saying:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. BTW, the isOperandLegal check may also be necessary, because some of the operand type cannot use inline constant, e.g. second operand of |
||
CommutedMI = swapInlineConstOperands(MI, Src0, Src1); | ||
} else { | ||
// FIXME: Found two non registers to commute. This does happen. | ||
return nullptr; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -335,6 +335,7 @@ let isCommutable = 1, SchedRW = [WriteIntMul, WriteSALU] in { | |
let FPDPRounding = 1 in { | ||
let Predicates = [Has16BitInsts, isGFX8Only] in { | ||
defm V_DIV_FIXUP_F16 : VOP3Inst <"v_div_fixup_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUdiv_fixup>; | ||
let isCommutable = 1 in | ||
defm V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, any_fma>; | ||
} // End Predicates = [Has16BitInsts, isGFX8Only] | ||
|
||
|
@@ -639,8 +640,10 @@ let SubtargetPredicate = HasMinimum3Maximum3F16, ReadsModeReg = 0 in { | |
defm V_ADD_I16 : VOP3Inst_t16 <"v_add_i16", VOP_I16_I16_I16>; | ||
defm V_SUB_I16 : VOP3Inst_t16 <"v_sub_i16", VOP_I16_I16_I16>; | ||
|
||
defm V_MAD_U32_U16 : VOP3Inst <"v_mad_u32_u16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>; | ||
defm V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>; | ||
let isCommutable = 1 in { | ||
defm V_MAD_U32_U16 : VOP3Inst <"v_mad_u32_u16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>; | ||
defm V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile<VOP_I32_I16_I16_I32, VOP3_OPSEL>>; | ||
} // End isCommutable = 1 | ||
|
||
defm V_CVT_PKNORM_I16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_i16_f16", VOP_B32_F16_F16>; | ||
defm V_CVT_PKNORM_U16_F16 : VOP3Inst_t16 <"v_cvt_pknorm_u16_f16", VOP_B32_F16_F16>; | ||
|
@@ -1254,7 +1257,7 @@ let SubtargetPredicate = isGFX10Plus in { | |
def : PermlanePat<int_amdgcn_permlane16, V_PERMLANE16_B32_e64, vt>; | ||
def : PermlanePat<int_amdgcn_permlanex16, V_PERMLANEX16_B32_e64, vt>; | ||
} | ||
|
||
let isCommutable = 1 in | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add braces |
||
defm V_ADD_NC_U16 : VOP3Inst_t16 <"v_add_nc_u16", VOP_I16_I16_I16, add>; | ||
defm V_SUB_NC_U16 : VOP3Inst_t16 <"v_sub_nc_u16", VOP_I16_I16_I16, sub>; | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,11 @@ | ||
# RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -run-pass=machine-cse -verify-machineinstrs %s -o - 2>&1 | FileCheck --check-prefix=GCN %s | ||
|
||
# GCN-LABEL: name: test_machine_cse_op_sel | ||
# GCN: %2:vgpr_32 = V_ADD_NC_U16_e64 0, %0, 0, %1, 1, 0, implicit $mode, implicit $exec | ||
# GCN: %3:vgpr_32 = V_ADD_NC_U16_e64 0, %1, 0, %0, 1, 0, implicit $mode, implicit $exec | ||
# GCN: DS_WRITE2_B32_gfx9 undef %4:vgpr_32, %2, %3, 0, 1, 0, implicit $exec | ||
--- | ||
name: test_machine_cse_op_sel | ||
body: | | ||
; GCN-LABEL: name: test_machine_cse_op_sel | ||
; GCN: %2:vgpr_32 = V_ADD_NC_U16_e64 0, %0, 0, %1, 1, 0, implicit $mode, implicit $exec | ||
; GCN-NEXT: DS_WRITE2_B32_gfx9 undef %4:vgpr_32, %2, %2, 0, 1, 0, implicit $exec | ||
bb.0: | ||
%0:vgpr_32 = IMPLICIT_DEF | ||
%1:vgpr_32 = IMPLICIT_DEF | ||
|
@@ -15,3 +14,14 @@ body: | | |
DS_WRITE2_B32_gfx9 undef %4:vgpr_32, %2, %3, 0, 1, 0, implicit $exec | ||
... | ||
|
||
--- | ||
name: test_machine_cse_op_inline_const | ||
body: | | ||
; GCN-LABEL: name: test_machine_cse_op_inline_const | ||
; GCN: %0:vgpr_32 = V_ADD_NC_U16_e64 0, 64, 0, -3, 1, 0, implicit $mode, implicit $exec | ||
; GCN-NEXT: DS_WRITE2_B32_gfx9 undef %2:vgpr_32, %0, %0, 0, 1, 0, implicit $exec | ||
bb.0: | ||
%1:vgpr_32 = V_ADD_NC_U16_e64 0, 64, 0, -3, 1, 0, implicit $mode, implicit $exec | ||
%2:vgpr_32 = V_ADD_NC_U16_e64 0, -3, 0, 64, 1, 0, implicit $mode, implicit $exec | ||
DS_WRITE2_B32_gfx9 undef %4:vgpr_32, %1, %2, 0, 1, 0, implicit $exec | ||
... | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There should be a dedicated commute test for every changed opcode like this. What about the _e32 forms? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi, for |
Uh oh!
There was an error while loading. Please reload this page.