@@ -494,6 +494,8 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
494
494
return false ;
495
495
496
496
const unsigned Size = Ty.getSizeInBits ();
497
+ if (Ty.isPointerVector ())
498
+ return true ;
497
499
if (Size <= 64 )
498
500
return false ;
499
501
// Address space 8 pointers get their own workaround.
@@ -502,9 +504,6 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
502
504
if (!Ty.isVector ())
503
505
return true ;
504
506
505
- if (Ty.isPointerVector ())
506
- return true ;
507
-
508
507
unsigned EltSize = Ty.getScalarSizeInBits ();
509
508
return EltSize != 32 && EltSize != 64 ;
510
509
}
@@ -5820,8 +5819,9 @@ Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B,
5820
5819
return Reg;
5821
5820
}
5822
5821
5823
- Register AMDGPULegalizerInfo::fixStoreSourceType (
5824
- MachineIRBuilder &B, Register VData, bool IsFormat) const {
5822
+ Register AMDGPULegalizerInfo::fixStoreSourceType (MachineIRBuilder &B,
5823
+ Register VData, LLT MemTy,
5824
+ bool IsFormat) const {
5825
5825
MachineRegisterInfo *MRI = B.getMRI ();
5826
5826
LLT Ty = MRI->getType (VData);
5827
5827
@@ -5831,6 +5831,10 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(
5831
5831
if (hasBufferRsrcWorkaround (Ty))
5832
5832
return castBufferRsrcToV4I32 (VData, B);
5833
5833
5834
+ if (shouldBitcastLoadStoreType (ST, Ty, MemTy)) {
5835
+ Ty = getBitcastRegisterType (Ty);
5836
+ VData = B.buildBitcast (Ty, VData).getReg (0 );
5837
+ }
5834
5838
// Fixup illegal register types for i8 stores.
5835
5839
if (Ty == LLT::scalar (8 ) || Ty == S16) {
5836
5840
Register AnyExt = B.buildAnyExt (LLT::scalar (32 ), VData).getReg (0 );
@@ -5848,22 +5852,26 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(
5848
5852
}
5849
5853
5850
5854
bool AMDGPULegalizerInfo::legalizeBufferStore (MachineInstr &MI,
5851
- MachineRegisterInfo &MRI,
5852
- MachineIRBuilder &B,
5855
+ LegalizerHelper &Helper,
5853
5856
bool IsTyped,
5854
5857
bool IsFormat) const {
5858
+ MachineIRBuilder &B = Helper.MIRBuilder ;
5859
+ MachineRegisterInfo &MRI = *B.getMRI ();
5860
+
5855
5861
Register VData = MI.getOperand (1 ).getReg ();
5856
5862
LLT Ty = MRI.getType (VData);
5857
5863
LLT EltTy = Ty.getScalarType ();
5858
5864
const bool IsD16 = IsFormat && (EltTy.getSizeInBits () == 16 );
5859
5865
const LLT S32 = LLT::scalar (32 );
5860
5866
5861
- VData = fixStoreSourceType (B, VData, IsFormat);
5862
- castBufferRsrcArgToV4I32 (MI, B, 2 );
5863
- Register RSrc = MI.getOperand (2 ).getReg ();
5864
-
5865
5867
MachineMemOperand *MMO = *MI.memoperands_begin ();
5866
5868
const int MemSize = MMO->getSize ().getValue ();
5869
+ LLT MemTy = MMO->getMemoryType ();
5870
+
5871
+ VData = fixStoreSourceType (B, VData, MemTy, IsFormat);
5872
+
5873
+ castBufferRsrcArgToV4I32 (MI, B, 2 );
5874
+ Register RSrc = MI.getOperand (2 ).getReg ();
5867
5875
5868
5876
unsigned ImmOffset;
5869
5877
@@ -5956,10 +5964,13 @@ static void buildBufferLoad(unsigned Opc, Register LoadDstReg, Register RSrc,
5956
5964
}
5957
5965
5958
5966
bool AMDGPULegalizerInfo::legalizeBufferLoad (MachineInstr &MI,
5959
- MachineRegisterInfo &MRI,
5960
- MachineIRBuilder &B,
5967
+ LegalizerHelper &Helper,
5961
5968
bool IsFormat,
5962
5969
bool IsTyped) const {
5970
+ MachineIRBuilder &B = Helper.MIRBuilder ;
5971
+ MachineRegisterInfo &MRI = *B.getMRI ();
5972
+ GISelChangeObserver &Observer = Helper.Observer ;
5973
+
5963
5974
// FIXME: Verifier should enforce 1 MMO for these intrinsics.
5964
5975
MachineMemOperand *MMO = *MI.memoperands_begin ();
5965
5976
const LLT MemTy = MMO->getMemoryType ();
@@ -6008,9 +6019,21 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
6008
6019
// Make addrspace 8 pointers loads into 4xs32 loads here, so the rest of the
6009
6020
// logic doesn't have to handle that case.
6010
6021
if (hasBufferRsrcWorkaround (Ty)) {
6022
+ Observer.changingInstr (MI);
6011
6023
Ty = castBufferRsrcFromV4I32 (MI, B, MRI, 0 );
6024
+ Observer.changedInstr (MI);
6012
6025
Dst = MI.getOperand (0 ).getReg ();
6026
+ B.setInsertPt (B.getMBB (), MI);
6013
6027
}
6028
+ if (shouldBitcastLoadStoreType (ST, Ty, MemTy)) {
6029
+ Ty = getBitcastRegisterType (Ty);
6030
+ Observer.changingInstr (MI);
6031
+ Helper.bitcastDst (MI, Ty, 0 );
6032
+ Observer.changedInstr (MI);
6033
+ Dst = MI.getOperand (0 ).getReg ();
6034
+ B.setInsertPt (B.getMBB (), MI);
6035
+ }
6036
+
6014
6037
LLT EltTy = Ty.getScalarType ();
6015
6038
const bool IsD16 = IsFormat && (EltTy.getSizeInBits () == 16 );
6016
6039
const bool Unpacked = ST.hasUnpackedD16VMem ();
@@ -7390,17 +7413,17 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
7390
7413
case Intrinsic::amdgcn_raw_ptr_buffer_store:
7391
7414
case Intrinsic::amdgcn_struct_buffer_store:
7392
7415
case Intrinsic::amdgcn_struct_ptr_buffer_store:
7393
- return legalizeBufferStore (MI, MRI, B , false , false );
7416
+ return legalizeBufferStore (MI, Helper , false , false );
7394
7417
case Intrinsic::amdgcn_raw_buffer_store_format:
7395
7418
case Intrinsic::amdgcn_raw_ptr_buffer_store_format:
7396
7419
case Intrinsic::amdgcn_struct_buffer_store_format:
7397
7420
case Intrinsic::amdgcn_struct_ptr_buffer_store_format:
7398
- return legalizeBufferStore (MI, MRI, B , false , true );
7421
+ return legalizeBufferStore (MI, Helper , false , true );
7399
7422
case Intrinsic::amdgcn_raw_tbuffer_store:
7400
7423
case Intrinsic::amdgcn_raw_ptr_tbuffer_store:
7401
7424
case Intrinsic::amdgcn_struct_tbuffer_store:
7402
7425
case Intrinsic::amdgcn_struct_ptr_tbuffer_store:
7403
- return legalizeBufferStore (MI, MRI, B , true , true );
7426
+ return legalizeBufferStore (MI, Helper , true , true );
7404
7427
case Intrinsic::amdgcn_raw_buffer_load:
7405
7428
case Intrinsic::amdgcn_raw_ptr_buffer_load:
7406
7429
case Intrinsic::amdgcn_raw_atomic_buffer_load:
@@ -7409,17 +7432,17 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
7409
7432
case Intrinsic::amdgcn_struct_ptr_buffer_load:
7410
7433
case Intrinsic::amdgcn_struct_atomic_buffer_load:
7411
7434
case Intrinsic::amdgcn_struct_ptr_atomic_buffer_load:
7412
- return legalizeBufferLoad (MI, MRI, B , false , false );
7435
+ return legalizeBufferLoad (MI, Helper , false , false );
7413
7436
case Intrinsic::amdgcn_raw_buffer_load_format:
7414
7437
case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
7415
7438
case Intrinsic::amdgcn_struct_buffer_load_format:
7416
7439
case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
7417
- return legalizeBufferLoad (MI, MRI, B , true , false );
7440
+ return legalizeBufferLoad (MI, Helper , true , false );
7418
7441
case Intrinsic::amdgcn_raw_tbuffer_load:
7419
7442
case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
7420
7443
case Intrinsic::amdgcn_struct_tbuffer_load:
7421
7444
case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
7422
- return legalizeBufferLoad (MI, MRI, B , true , true );
7445
+ return legalizeBufferLoad (MI, Helper , true , true );
7423
7446
case Intrinsic::amdgcn_raw_buffer_atomic_swap:
7424
7447
case Intrinsic::amdgcn_raw_ptr_buffer_atomic_swap:
7425
7448
case Intrinsic::amdgcn_struct_buffer_atomic_swap:
0 commit comments