@@ -814,7 +814,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
814
814
if (Fix16BitCopies) {
815
815
if (((Size == 16 ) != (SrcSize == 16 ))) {
816
816
// Non-VGPR Src and Dst will later be expanded back to 32 bits.
817
- assert (ST.hasTrue16BitInsts ());
817
+ assert (ST.useRealTrue16Insts ());
818
818
MCRegister &RegToFix = (Size == 32 ) ? DestReg : SrcReg;
819
819
MCRegister SubReg = RI.getSubReg (RegToFix, AMDGPU::lo16);
820
820
RegToFix = SubReg;
@@ -988,7 +988,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
988
988
return ;
989
989
}
990
990
991
- if (ST.hasTrue16BitInsts ()) {
991
+ if (ST.useRealTrue16Insts ()) {
992
992
if (IsSGPRSrc) {
993
993
assert (SrcLow);
994
994
SrcReg = NewSrcReg;
@@ -5559,30 +5559,44 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
5559
5559
return ST.useRealTrue16Insts () ? AMDGPU::V_FLOOR_F16_t16_e64
5560
5560
: AMDGPU::V_FLOOR_F16_fake16_e64;
5561
5561
case AMDGPU::S_TRUNC_F16:
5562
- return AMDGPU::V_TRUNC_F16_fake16_e64;
5562
+ return ST.useRealTrue16Insts () ? AMDGPU::V_TRUNC_F16_t16_e64
5563
+ : AMDGPU::V_TRUNC_F16_fake16_e64;
5563
5564
case AMDGPU::S_RNDNE_F16:
5564
- return AMDGPU::V_RNDNE_F16_fake16_e64;
5565
+ return ST.useRealTrue16Insts () ? AMDGPU::V_RNDNE_F16_t16_e64
5566
+ : AMDGPU::V_RNDNE_F16_fake16_e64;
5565
5567
case AMDGPU::S_ADD_F32: return AMDGPU::V_ADD_F32_e64;
5566
5568
case AMDGPU::S_SUB_F32: return AMDGPU::V_SUB_F32_e64;
5567
5569
case AMDGPU::S_MIN_F32: return AMDGPU::V_MIN_F32_e64;
5568
5570
case AMDGPU::S_MAX_F32: return AMDGPU::V_MAX_F32_e64;
5569
5571
case AMDGPU::S_MINIMUM_F32: return AMDGPU::V_MINIMUM_F32_e64;
5570
5572
case AMDGPU::S_MAXIMUM_F32: return AMDGPU::V_MAXIMUM_F32_e64;
5571
5573
case AMDGPU::S_MUL_F32: return AMDGPU::V_MUL_F32_e64;
5572
- case AMDGPU::S_ADD_F16: return AMDGPU::V_ADD_F16_fake16_e64;
5573
- case AMDGPU::S_SUB_F16: return AMDGPU::V_SUB_F16_fake16_e64;
5574
- case AMDGPU::S_MIN_F16: return AMDGPU::V_MIN_F16_fake16_e64;
5575
- case AMDGPU::S_MAX_F16: return AMDGPU::V_MAX_F16_fake16_e64;
5574
+ case AMDGPU::S_ADD_F16:
5575
+ return ST.useRealTrue16Insts () ? AMDGPU::V_ADD_F16_t16_e64
5576
+ : AMDGPU::V_ADD_F16_fake16_e64;
5577
+ case AMDGPU::S_SUB_F16:
5578
+ return ST.useRealTrue16Insts () ? AMDGPU::V_SUB_F16_t16_e64
5579
+ : AMDGPU::V_SUB_F16_fake16_e64;
5580
+ case AMDGPU::S_MIN_F16:
5581
+ return ST.useRealTrue16Insts () ? AMDGPU::V_MIN_F16_t16_e64
5582
+ : AMDGPU::V_MIN_F16_fake16_e64;
5583
+ case AMDGPU::S_MAX_F16:
5584
+ return ST.useRealTrue16Insts () ? AMDGPU::V_MAX_F16_t16_e64
5585
+ : AMDGPU::V_MAX_F16_fake16_e64;
5576
5586
case AMDGPU::S_MINIMUM_F16:
5577
5587
return ST.useRealTrue16Insts () ? AMDGPU::V_MINIMUM_F16_t16_e64
5578
5588
: AMDGPU::V_MINIMUM_F16_fake16_e64;
5579
5589
case AMDGPU::S_MAXIMUM_F16:
5580
5590
return ST.useRealTrue16Insts () ? AMDGPU::V_MAXIMUM_F16_t16_e64
5581
5591
: AMDGPU::V_MAXIMUM_F16_fake16_e64;
5582
- case AMDGPU::S_MUL_F16: return AMDGPU::V_MUL_F16_fake16_e64;
5592
+ case AMDGPU::S_MUL_F16:
5593
+ return ST.useRealTrue16Insts () ? AMDGPU::V_MUL_F16_t16_e64
5594
+ : AMDGPU::V_MUL_F16_fake16_e64;
5583
5595
case AMDGPU::S_CVT_PK_RTZ_F16_F32: return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
5584
5596
case AMDGPU::S_FMAC_F32: return AMDGPU::V_FMAC_F32_e64;
5585
- case AMDGPU::S_FMAC_F16: return AMDGPU::V_FMAC_F16_fake16_e64;
5597
+ case AMDGPU::S_FMAC_F16:
5598
+ return ST.useRealTrue16Insts () ? AMDGPU::V_FMAC_F16_t16_e64
5599
+ : AMDGPU::V_FMAC_F16_fake16_e64;
5586
5600
case AMDGPU::S_FMAMK_F32: return AMDGPU::V_FMAMK_F32;
5587
5601
case AMDGPU::S_FMAAK_F32: return AMDGPU::V_FMAAK_F32;
5588
5602
case AMDGPU::S_CMP_LT_F32: return AMDGPU::V_CMP_LT_F32_e64;
@@ -5642,15 +5656,25 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
5642
5656
return ST.useRealTrue16Insts () ? AMDGPU::V_CMP_NLT_F16_t16_e64
5643
5657
: AMDGPU::V_CMP_NLT_F16_fake16_e64;
5644
5658
case AMDGPU::V_S_EXP_F32_e64: return AMDGPU::V_EXP_F32_e64;
5645
- case AMDGPU::V_S_EXP_F16_e64: return AMDGPU::V_EXP_F16_fake16_e64;
5659
+ case AMDGPU::V_S_EXP_F16_e64:
5660
+ return ST.useRealTrue16Insts () ? AMDGPU::V_EXP_F16_t16_e64
5661
+ : AMDGPU::V_EXP_F16_fake16_e64;
5646
5662
case AMDGPU::V_S_LOG_F32_e64: return AMDGPU::V_LOG_F32_e64;
5647
- case AMDGPU::V_S_LOG_F16_e64: return AMDGPU::V_LOG_F16_fake16_e64;
5663
+ case AMDGPU::V_S_LOG_F16_e64:
5664
+ return ST.useRealTrue16Insts () ? AMDGPU::V_LOG_F16_t16_e64
5665
+ : AMDGPU::V_LOG_F16_fake16_e64;
5648
5666
case AMDGPU::V_S_RCP_F32_e64: return AMDGPU::V_RCP_F32_e64;
5649
- case AMDGPU::V_S_RCP_F16_e64: return AMDGPU::V_RCP_F16_fake16_e64;
5667
+ case AMDGPU::V_S_RCP_F16_e64:
5668
+ return ST.useRealTrue16Insts () ? AMDGPU::V_RCP_F16_t16_e64
5669
+ : AMDGPU::V_RCP_F16_fake16_e64;
5650
5670
case AMDGPU::V_S_RSQ_F32_e64: return AMDGPU::V_RSQ_F32_e64;
5651
- case AMDGPU::V_S_RSQ_F16_e64: return AMDGPU::V_RSQ_F16_fake16_e64;
5671
+ case AMDGPU::V_S_RSQ_F16_e64:
5672
+ return ST.useRealTrue16Insts () ? AMDGPU::V_RSQ_F16_t16_e64
5673
+ : AMDGPU::V_RSQ_F16_fake16_e64;
5652
5674
case AMDGPU::V_S_SQRT_F32_e64: return AMDGPU::V_SQRT_F32_e64;
5653
- case AMDGPU::V_S_SQRT_F16_e64: return AMDGPU::V_SQRT_F16_fake16_e64;
5675
+ case AMDGPU::V_S_SQRT_F16_e64:
5676
+ return ST.useRealTrue16Insts () ? AMDGPU::V_SQRT_F16_t16_e64
5677
+ : AMDGPU::V_SQRT_F16_fake16_e64;
5654
5678
}
5655
5679
llvm_unreachable (
5656
5680
" Unexpected scalar opcode without corresponding vector one!" );
0 commit comments