Skip to content

Commit e657206

Browse files
committed
true16 selection for valu op
1 parent 7d172f9 commit e657206

11 files changed

+1739
-750
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 39 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -814,7 +814,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
814814
if (Fix16BitCopies) {
815815
if (((Size == 16) != (SrcSize == 16))) {
816816
// Non-VGPR Src and Dst will later be expanded back to 32 bits.
817-
assert(ST.hasTrue16BitInsts());
817+
assert(ST.useRealTrue16Insts());
818818
MCRegister &RegToFix = (Size == 32) ? DestReg : SrcReg;
819819
MCRegister SubReg = RI.getSubReg(RegToFix, AMDGPU::lo16);
820820
RegToFix = SubReg;
@@ -988,7 +988,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
988988
return;
989989
}
990990

991-
if (ST.hasTrue16BitInsts()) {
991+
if (ST.useRealTrue16Insts()) {
992992
if (IsSGPRSrc) {
993993
assert(SrcLow);
994994
SrcReg = NewSrcReg;
@@ -5559,30 +5559,44 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
55595559
return ST.useRealTrue16Insts() ? AMDGPU::V_FLOOR_F16_t16_e64
55605560
: AMDGPU::V_FLOOR_F16_fake16_e64;
55615561
case AMDGPU::S_TRUNC_F16:
5562-
return AMDGPU::V_TRUNC_F16_fake16_e64;
5562+
return ST.useRealTrue16Insts() ? AMDGPU::V_TRUNC_F16_t16_e64
5563+
: AMDGPU::V_TRUNC_F16_fake16_e64;
55635564
case AMDGPU::S_RNDNE_F16:
5564-
return AMDGPU::V_RNDNE_F16_fake16_e64;
5565+
return ST.useRealTrue16Insts() ? AMDGPU::V_RNDNE_F16_t16_e64
5566+
: AMDGPU::V_RNDNE_F16_fake16_e64;
55655567
case AMDGPU::S_ADD_F32: return AMDGPU::V_ADD_F32_e64;
55665568
case AMDGPU::S_SUB_F32: return AMDGPU::V_SUB_F32_e64;
55675569
case AMDGPU::S_MIN_F32: return AMDGPU::V_MIN_F32_e64;
55685570
case AMDGPU::S_MAX_F32: return AMDGPU::V_MAX_F32_e64;
55695571
case AMDGPU::S_MINIMUM_F32: return AMDGPU::V_MINIMUM_F32_e64;
55705572
case AMDGPU::S_MAXIMUM_F32: return AMDGPU::V_MAXIMUM_F32_e64;
55715573
case AMDGPU::S_MUL_F32: return AMDGPU::V_MUL_F32_e64;
5572-
case AMDGPU::S_ADD_F16: return AMDGPU::V_ADD_F16_fake16_e64;
5573-
case AMDGPU::S_SUB_F16: return AMDGPU::V_SUB_F16_fake16_e64;
5574-
case AMDGPU::S_MIN_F16: return AMDGPU::V_MIN_F16_fake16_e64;
5575-
case AMDGPU::S_MAX_F16: return AMDGPU::V_MAX_F16_fake16_e64;
5574+
case AMDGPU::S_ADD_F16:
5575+
return ST.useRealTrue16Insts() ? AMDGPU::V_ADD_F16_t16_e64
5576+
: AMDGPU::V_ADD_F16_fake16_e64;
5577+
case AMDGPU::S_SUB_F16:
5578+
return ST.useRealTrue16Insts() ? AMDGPU::V_SUB_F16_t16_e64
5579+
: AMDGPU::V_SUB_F16_fake16_e64;
5580+
case AMDGPU::S_MIN_F16:
5581+
return ST.useRealTrue16Insts() ? AMDGPU::V_MIN_F16_t16_e64
5582+
: AMDGPU::V_MIN_F16_fake16_e64;
5583+
case AMDGPU::S_MAX_F16:
5584+
return ST.useRealTrue16Insts() ? AMDGPU::V_MAX_F16_t16_e64
5585+
: AMDGPU::V_MAX_F16_fake16_e64;
55765586
case AMDGPU::S_MINIMUM_F16:
55775587
return ST.useRealTrue16Insts() ? AMDGPU::V_MINIMUM_F16_t16_e64
55785588
: AMDGPU::V_MINIMUM_F16_fake16_e64;
55795589
case AMDGPU::S_MAXIMUM_F16:
55805590
return ST.useRealTrue16Insts() ? AMDGPU::V_MAXIMUM_F16_t16_e64
55815591
: AMDGPU::V_MAXIMUM_F16_fake16_e64;
5582-
case AMDGPU::S_MUL_F16: return AMDGPU::V_MUL_F16_fake16_e64;
5592+
case AMDGPU::S_MUL_F16:
5593+
return ST.useRealTrue16Insts() ? AMDGPU::V_MUL_F16_t16_e64
5594+
: AMDGPU::V_MUL_F16_fake16_e64;
55835595
case AMDGPU::S_CVT_PK_RTZ_F16_F32: return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
55845596
case AMDGPU::S_FMAC_F32: return AMDGPU::V_FMAC_F32_e64;
5585-
case AMDGPU::S_FMAC_F16: return AMDGPU::V_FMAC_F16_fake16_e64;
5597+
case AMDGPU::S_FMAC_F16:
5598+
return ST.useRealTrue16Insts() ? AMDGPU::V_FMAC_F16_t16_e64
5599+
: AMDGPU::V_FMAC_F16_fake16_e64;
55865600
case AMDGPU::S_FMAMK_F32: return AMDGPU::V_FMAMK_F32;
55875601
case AMDGPU::S_FMAAK_F32: return AMDGPU::V_FMAAK_F32;
55885602
case AMDGPU::S_CMP_LT_F32: return AMDGPU::V_CMP_LT_F32_e64;
@@ -5642,15 +5656,25 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
56425656
return ST.useRealTrue16Insts() ? AMDGPU::V_CMP_NLT_F16_t16_e64
56435657
: AMDGPU::V_CMP_NLT_F16_fake16_e64;
56445658
case AMDGPU::V_S_EXP_F32_e64: return AMDGPU::V_EXP_F32_e64;
5645-
case AMDGPU::V_S_EXP_F16_e64: return AMDGPU::V_EXP_F16_fake16_e64;
5659+
case AMDGPU::V_S_EXP_F16_e64:
5660+
return ST.useRealTrue16Insts() ? AMDGPU::V_EXP_F16_t16_e64
5661+
: AMDGPU::V_EXP_F16_fake16_e64;
56465662
case AMDGPU::V_S_LOG_F32_e64: return AMDGPU::V_LOG_F32_e64;
5647-
case AMDGPU::V_S_LOG_F16_e64: return AMDGPU::V_LOG_F16_fake16_e64;
5663+
case AMDGPU::V_S_LOG_F16_e64:
5664+
return ST.useRealTrue16Insts() ? AMDGPU::V_LOG_F16_t16_e64
5665+
: AMDGPU::V_LOG_F16_fake16_e64;
56485666
case AMDGPU::V_S_RCP_F32_e64: return AMDGPU::V_RCP_F32_e64;
5649-
case AMDGPU::V_S_RCP_F16_e64: return AMDGPU::V_RCP_F16_fake16_e64;
5667+
case AMDGPU::V_S_RCP_F16_e64:
5668+
return ST.useRealTrue16Insts() ? AMDGPU::V_RCP_F16_t16_e64
5669+
: AMDGPU::V_RCP_F16_fake16_e64;
56505670
case AMDGPU::V_S_RSQ_F32_e64: return AMDGPU::V_RSQ_F32_e64;
5651-
case AMDGPU::V_S_RSQ_F16_e64: return AMDGPU::V_RSQ_F16_fake16_e64;
5671+
case AMDGPU::V_S_RSQ_F16_e64:
5672+
return ST.useRealTrue16Insts() ? AMDGPU::V_RSQ_F16_t16_e64
5673+
: AMDGPU::V_RSQ_F16_fake16_e64;
56525674
case AMDGPU::V_S_SQRT_F32_e64: return AMDGPU::V_SQRT_F32_e64;
5653-
case AMDGPU::V_S_SQRT_F16_e64: return AMDGPU::V_SQRT_F16_fake16_e64;
5675+
case AMDGPU::V_S_SQRT_F16_e64:
5676+
return ST.useRealTrue16Insts() ? AMDGPU::V_SQRT_F16_t16_e64
5677+
: AMDGPU::V_SQRT_F16_fake16_e64;
56545678
}
56555679
llvm_unreachable(
56565680
"Unexpected scalar opcode without corresponding vector one!");

0 commit comments

Comments
 (0)