@@ -815,7 +815,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
815
815
if (Fix16BitCopies) {
816
816
if (((Size == 16 ) != (SrcSize == 16 ))) {
817
817
// Non-VGPR Src and Dst will later be expanded back to 32 bits.
818
- assert (ST.hasTrue16BitInsts ());
818
+ assert (ST.useRealTrue16Insts ());
819
819
Register &RegToFix = (Size == 32 ) ? DestReg : SrcReg;
820
820
MCRegister SubReg = RI.getSubReg (RegToFix, AMDGPU::lo16);
821
821
RegToFix = SubReg;
@@ -989,7 +989,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
989
989
return ;
990
990
}
991
991
992
- if (ST.hasTrue16BitInsts ()) {
992
+ if (ST.useRealTrue16Insts ()) {
993
993
if (IsSGPRSrc) {
994
994
assert (SrcLow);
995
995
SrcReg = NewSrcReg;
@@ -5579,27 +5579,39 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
5579
5579
return ST.useRealTrue16Insts () ? AMDGPU::V_FLOOR_F16_t16_e64
5580
5580
: AMDGPU::V_FLOOR_F16_fake16_e64;
5581
5581
case AMDGPU::S_TRUNC_F16:
5582
- return AMDGPU::V_TRUNC_F16_fake16_e64;
5582
+ return ST.useRealTrue16Insts () ? AMDGPU::V_TRUNC_F16_t16_e64
5583
+ : AMDGPU::V_TRUNC_F16_fake16_e64;
5583
5584
case AMDGPU::S_RNDNE_F16:
5584
- return AMDGPU::V_RNDNE_F16_fake16_e64;
5585
+ return ST.useRealTrue16Insts () ? AMDGPU::V_RNDNE_F16_t16_e64
5586
+ : AMDGPU::V_RNDNE_F16_fake16_e64;
5585
5587
case AMDGPU::S_ADD_F32: return AMDGPU::V_ADD_F32_e64;
5586
5588
case AMDGPU::S_SUB_F32: return AMDGPU::V_SUB_F32_e64;
5587
5589
case AMDGPU::S_MIN_F32: return AMDGPU::V_MIN_F32_e64;
5588
5590
case AMDGPU::S_MAX_F32: return AMDGPU::V_MAX_F32_e64;
5589
5591
case AMDGPU::S_MINIMUM_F32: return AMDGPU::V_MINIMUM_F32_e64;
5590
5592
case AMDGPU::S_MAXIMUM_F32: return AMDGPU::V_MAXIMUM_F32_e64;
5591
5593
case AMDGPU::S_MUL_F32: return AMDGPU::V_MUL_F32_e64;
5592
- case AMDGPU::S_ADD_F16: return AMDGPU::V_ADD_F16_fake16_e64;
5593
- case AMDGPU::S_SUB_F16: return AMDGPU::V_SUB_F16_fake16_e64;
5594
- case AMDGPU::S_MIN_F16: return AMDGPU::V_MIN_F16_fake16_e64;
5595
- case AMDGPU::S_MAX_F16: return AMDGPU::V_MAX_F16_fake16_e64;
5594
+ case AMDGPU::S_ADD_F16:
5595
+ return ST.useRealTrue16Insts () ? AMDGPU::V_ADD_F16_t16_e64
5596
+ : AMDGPU::V_ADD_F16_fake16_e64;
5597
+ case AMDGPU::S_SUB_F16:
5598
+ return ST.useRealTrue16Insts () ? AMDGPU::V_SUB_F16_t16_e64
5599
+ : AMDGPU::V_SUB_F16_fake16_e64;
5600
+ case AMDGPU::S_MIN_F16:
5601
+ return ST.useRealTrue16Insts () ? AMDGPU::V_MIN_F16_t16_e64
5602
+ : AMDGPU::V_MIN_F16_fake16_e64;
5603
+ case AMDGPU::S_MAX_F16:
5604
+ return ST.useRealTrue16Insts () ? AMDGPU::V_MAX_F16_t16_e64
5605
+ : AMDGPU::V_MAX_F16_fake16_e64;
5596
5606
case AMDGPU::S_MINIMUM_F16:
5597
5607
return ST.useRealTrue16Insts () ? AMDGPU::V_MINIMUM_F16_t16_e64
5598
5608
: AMDGPU::V_MINIMUM_F16_fake16_e64;
5599
5609
case AMDGPU::S_MAXIMUM_F16:
5600
5610
return ST.useRealTrue16Insts () ? AMDGPU::V_MAXIMUM_F16_t16_e64
5601
5611
: AMDGPU::V_MAXIMUM_F16_fake16_e64;
5602
- case AMDGPU::S_MUL_F16: return AMDGPU::V_MUL_F16_fake16_e64;
5612
+ case AMDGPU::S_MUL_F16:
5613
+ return ST.useRealTrue16Insts () ? AMDGPU::V_MUL_F16_t16_e64
5614
+ : AMDGPU::V_MUL_F16_fake16_e64;
5603
5615
case AMDGPU::S_CVT_PK_RTZ_F16_F32: return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
5604
5616
case AMDGPU::S_FMAC_F32: return AMDGPU::V_FMAC_F32_e64;
5605
5617
case AMDGPU::S_FMAC_F16:
@@ -5664,15 +5676,25 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
5664
5676
return ST.useRealTrue16Insts () ? AMDGPU::V_CMP_NLT_F16_t16_e64
5665
5677
: AMDGPU::V_CMP_NLT_F16_fake16_e64;
5666
5678
case AMDGPU::V_S_EXP_F32_e64: return AMDGPU::V_EXP_F32_e64;
5667
- case AMDGPU::V_S_EXP_F16_e64: return AMDGPU::V_EXP_F16_fake16_e64;
5679
+ case AMDGPU::V_S_EXP_F16_e64:
5680
+ return ST.useRealTrue16Insts () ? AMDGPU::V_EXP_F16_t16_e64
5681
+ : AMDGPU::V_EXP_F16_fake16_e64;
5668
5682
case AMDGPU::V_S_LOG_F32_e64: return AMDGPU::V_LOG_F32_e64;
5669
- case AMDGPU::V_S_LOG_F16_e64: return AMDGPU::V_LOG_F16_fake16_e64;
5683
+ case AMDGPU::V_S_LOG_F16_e64:
5684
+ return ST.useRealTrue16Insts () ? AMDGPU::V_LOG_F16_t16_e64
5685
+ : AMDGPU::V_LOG_F16_fake16_e64;
5670
5686
case AMDGPU::V_S_RCP_F32_e64: return AMDGPU::V_RCP_F32_e64;
5671
- case AMDGPU::V_S_RCP_F16_e64: return AMDGPU::V_RCP_F16_fake16_e64;
5687
+ case AMDGPU::V_S_RCP_F16_e64:
5688
+ return ST.useRealTrue16Insts () ? AMDGPU::V_RCP_F16_t16_e64
5689
+ : AMDGPU::V_RCP_F16_fake16_e64;
5672
5690
case AMDGPU::V_S_RSQ_F32_e64: return AMDGPU::V_RSQ_F32_e64;
5673
- case AMDGPU::V_S_RSQ_F16_e64: return AMDGPU::V_RSQ_F16_fake16_e64;
5691
+ case AMDGPU::V_S_RSQ_F16_e64:
5692
+ return ST.useRealTrue16Insts () ? AMDGPU::V_RSQ_F16_t16_e64
5693
+ : AMDGPU::V_RSQ_F16_fake16_e64;
5674
5694
case AMDGPU::V_S_SQRT_F32_e64: return AMDGPU::V_SQRT_F32_e64;
5675
- case AMDGPU::V_S_SQRT_F16_e64: return AMDGPU::V_SQRT_F16_fake16_e64;
5695
+ case AMDGPU::V_S_SQRT_F16_e64:
5696
+ return ST.useRealTrue16Insts () ? AMDGPU::V_SQRT_F16_t16_e64
5697
+ : AMDGPU::V_SQRT_F16_fake16_e64;
5676
5698
}
5677
5699
llvm_unreachable (
5678
5700
" Unexpected scalar opcode without corresponding vector one!" );
0 commit comments