Skip to content

Commit bb62af7

Browse files
authored
[AMDGPU][True16][CodeGen] true16 codegen for valu op (#124797)
true16 selection for valu ops, enable `real-true16` attribute and update the codegen test
1 parent 30b021f commit bb62af7

File tree

9 files changed

+767
-335
lines changed

9 files changed

+767
-335
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 36 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -815,7 +815,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
815815
if (Fix16BitCopies) {
816816
if (((Size == 16) != (SrcSize == 16))) {
817817
// Non-VGPR Src and Dst will later be expanded back to 32 bits.
818-
assert(ST.hasTrue16BitInsts());
818+
assert(ST.useRealTrue16Insts());
819819
Register &RegToFix = (Size == 32) ? DestReg : SrcReg;
820820
MCRegister SubReg = RI.getSubReg(RegToFix, AMDGPU::lo16);
821821
RegToFix = SubReg;
@@ -989,7 +989,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
989989
return;
990990
}
991991

992-
if (ST.hasTrue16BitInsts()) {
992+
if (ST.useRealTrue16Insts()) {
993993
if (IsSGPRSrc) {
994994
assert(SrcLow);
995995
SrcReg = NewSrcReg;
@@ -5581,27 +5581,39 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
55815581
return ST.useRealTrue16Insts() ? AMDGPU::V_FLOOR_F16_t16_e64
55825582
: AMDGPU::V_FLOOR_F16_fake16_e64;
55835583
case AMDGPU::S_TRUNC_F16:
5584-
return AMDGPU::V_TRUNC_F16_fake16_e64;
5584+
return ST.useRealTrue16Insts() ? AMDGPU::V_TRUNC_F16_t16_e64
5585+
: AMDGPU::V_TRUNC_F16_fake16_e64;
55855586
case AMDGPU::S_RNDNE_F16:
5586-
return AMDGPU::V_RNDNE_F16_fake16_e64;
5587+
return ST.useRealTrue16Insts() ? AMDGPU::V_RNDNE_F16_t16_e64
5588+
: AMDGPU::V_RNDNE_F16_fake16_e64;
55875589
case AMDGPU::S_ADD_F32: return AMDGPU::V_ADD_F32_e64;
55885590
case AMDGPU::S_SUB_F32: return AMDGPU::V_SUB_F32_e64;
55895591
case AMDGPU::S_MIN_F32: return AMDGPU::V_MIN_F32_e64;
55905592
case AMDGPU::S_MAX_F32: return AMDGPU::V_MAX_F32_e64;
55915593
case AMDGPU::S_MINIMUM_F32: return AMDGPU::V_MINIMUM_F32_e64;
55925594
case AMDGPU::S_MAXIMUM_F32: return AMDGPU::V_MAXIMUM_F32_e64;
55935595
case AMDGPU::S_MUL_F32: return AMDGPU::V_MUL_F32_e64;
5594-
case AMDGPU::S_ADD_F16: return AMDGPU::V_ADD_F16_fake16_e64;
5595-
case AMDGPU::S_SUB_F16: return AMDGPU::V_SUB_F16_fake16_e64;
5596-
case AMDGPU::S_MIN_F16: return AMDGPU::V_MIN_F16_fake16_e64;
5597-
case AMDGPU::S_MAX_F16: return AMDGPU::V_MAX_F16_fake16_e64;
5596+
case AMDGPU::S_ADD_F16:
5597+
return ST.useRealTrue16Insts() ? AMDGPU::V_ADD_F16_t16_e64
5598+
: AMDGPU::V_ADD_F16_fake16_e64;
5599+
case AMDGPU::S_SUB_F16:
5600+
return ST.useRealTrue16Insts() ? AMDGPU::V_SUB_F16_t16_e64
5601+
: AMDGPU::V_SUB_F16_fake16_e64;
5602+
case AMDGPU::S_MIN_F16:
5603+
return ST.useRealTrue16Insts() ? AMDGPU::V_MIN_F16_t16_e64
5604+
: AMDGPU::V_MIN_F16_fake16_e64;
5605+
case AMDGPU::S_MAX_F16:
5606+
return ST.useRealTrue16Insts() ? AMDGPU::V_MAX_F16_t16_e64
5607+
: AMDGPU::V_MAX_F16_fake16_e64;
55985608
case AMDGPU::S_MINIMUM_F16:
55995609
return ST.useRealTrue16Insts() ? AMDGPU::V_MINIMUM_F16_t16_e64
56005610
: AMDGPU::V_MINIMUM_F16_fake16_e64;
56015611
case AMDGPU::S_MAXIMUM_F16:
56025612
return ST.useRealTrue16Insts() ? AMDGPU::V_MAXIMUM_F16_t16_e64
56035613
: AMDGPU::V_MAXIMUM_F16_fake16_e64;
5604-
case AMDGPU::S_MUL_F16: return AMDGPU::V_MUL_F16_fake16_e64;
5614+
case AMDGPU::S_MUL_F16:
5615+
return ST.useRealTrue16Insts() ? AMDGPU::V_MUL_F16_t16_e64
5616+
: AMDGPU::V_MUL_F16_fake16_e64;
56055617
case AMDGPU::S_CVT_PK_RTZ_F16_F32: return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
56065618
case AMDGPU::S_FMAC_F32: return AMDGPU::V_FMAC_F32_e64;
56075619
case AMDGPU::S_FMAC_F16:
@@ -5666,15 +5678,25 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
56665678
return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLT_F16_t16_e64
56675679
: AMDGPU::V_CMP_NLT_F16_fake16_e64;
56685680
case AMDGPU::V_S_EXP_F32_e64: return AMDGPU::V_EXP_F32_e64;
5669-
case AMDGPU::V_S_EXP_F16_e64: return AMDGPU::V_EXP_F16_fake16_e64;
5681+
case AMDGPU::V_S_EXP_F16_e64:
5682+
return ST.useRealTrue16Insts() ? AMDGPU::V_EXP_F16_t16_e64
5683+
: AMDGPU::V_EXP_F16_fake16_e64;
56705684
case AMDGPU::V_S_LOG_F32_e64: return AMDGPU::V_LOG_F32_e64;
5671-
case AMDGPU::V_S_LOG_F16_e64: return AMDGPU::V_LOG_F16_fake16_e64;
5685+
case AMDGPU::V_S_LOG_F16_e64:
5686+
return ST.useRealTrue16Insts() ? AMDGPU::V_LOG_F16_t16_e64
5687+
: AMDGPU::V_LOG_F16_fake16_e64;
56725688
case AMDGPU::V_S_RCP_F32_e64: return AMDGPU::V_RCP_F32_e64;
5673-
case AMDGPU::V_S_RCP_F16_e64: return AMDGPU::V_RCP_F16_fake16_e64;
5689+
case AMDGPU::V_S_RCP_F16_e64:
5690+
return ST.useRealTrue16Insts() ? AMDGPU::V_RCP_F16_t16_e64
5691+
: AMDGPU::V_RCP_F16_fake16_e64;
56745692
case AMDGPU::V_S_RSQ_F32_e64: return AMDGPU::V_RSQ_F32_e64;
5675-
case AMDGPU::V_S_RSQ_F16_e64: return AMDGPU::V_RSQ_F16_fake16_e64;
5693+
case AMDGPU::V_S_RSQ_F16_e64:
5694+
return ST.useRealTrue16Insts() ? AMDGPU::V_RSQ_F16_t16_e64
5695+
: AMDGPU::V_RSQ_F16_fake16_e64;
56765696
case AMDGPU::V_S_SQRT_F32_e64: return AMDGPU::V_SQRT_F32_e64;
5677-
case AMDGPU::V_S_SQRT_F16_e64: return AMDGPU::V_SQRT_F16_fake16_e64;
5697+
case AMDGPU::V_S_SQRT_F16_e64:
5698+
return ST.useRealTrue16Insts() ? AMDGPU::V_SQRT_F16_t16_e64
5699+
: AMDGPU::V_SQRT_F16_fake16_e64;
56785700
}
56795701
llvm_unreachable(
56805702
"Unexpected scalar opcode without corresponding vector one!");

0 commit comments

Comments
 (0)