@@ -494,6 +494,8 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
494
494
return false ;
495
495
496
496
const unsigned Size = Ty.getSizeInBits ();
497
+ if (Ty.isPointerVector ())
498
+ return true ;
497
499
if (Size <= 64 )
498
500
return false ;
499
501
// Address space 8 pointers get their own workaround.
@@ -502,9 +504,6 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
502
504
if (!Ty.isVector ())
503
505
return true ;
504
506
505
- if (Ty.isPointerVector ())
506
- return true ;
507
-
508
507
unsigned EltSize = Ty.getScalarSizeInBits ();
509
508
return EltSize != 32 && EltSize != 64 ;
510
509
}
@@ -5818,8 +5817,9 @@ Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B,
5818
5817
return Reg;
5819
5818
}
5820
5819
5821
- Register AMDGPULegalizerInfo::fixStoreSourceType (
5822
- MachineIRBuilder &B, Register VData, bool IsFormat) const {
5820
+ Register AMDGPULegalizerInfo::fixStoreSourceType (MachineIRBuilder &B,
5821
+ Register VData, LLT MemTy,
5822
+ bool IsFormat) const {
5823
5823
MachineRegisterInfo *MRI = B.getMRI ();
5824
5824
LLT Ty = MRI->getType (VData);
5825
5825
@@ -5829,6 +5829,10 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(
5829
5829
if (hasBufferRsrcWorkaround (Ty))
5830
5830
return castBufferRsrcToV4I32 (VData, B);
5831
5831
5832
+ if (shouldBitcastLoadStoreType (ST, Ty, MemTy)) {
5833
+ Ty = getBitcastRegisterType (Ty);
5834
+ VData = B.buildBitcast (Ty, VData).getReg (0 );
5835
+ }
5832
5836
// Fixup illegal register types for i8 stores.
5833
5837
if (Ty == LLT::scalar (8 ) || Ty == S16) {
5834
5838
Register AnyExt = B.buildAnyExt (LLT::scalar (32 ), VData).getReg (0 );
@@ -5846,22 +5850,26 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(
5846
5850
}
5847
5851
5848
5852
bool AMDGPULegalizerInfo::legalizeBufferStore (MachineInstr &MI,
5849
- MachineRegisterInfo &MRI,
5850
- MachineIRBuilder &B,
5853
+ LegalizerHelper &Helper,
5851
5854
bool IsTyped,
5852
5855
bool IsFormat) const {
5856
+ MachineIRBuilder &B = Helper.MIRBuilder ;
5857
+ MachineRegisterInfo &MRI = *B.getMRI ();
5858
+
5853
5859
Register VData = MI.getOperand (1 ).getReg ();
5854
5860
LLT Ty = MRI.getType (VData);
5855
5861
LLT EltTy = Ty.getScalarType ();
5856
5862
const bool IsD16 = IsFormat && (EltTy.getSizeInBits () == 16 );
5857
5863
const LLT S32 = LLT::scalar (32 );
5858
5864
5859
- VData = fixStoreSourceType (B, VData, IsFormat);
5860
- castBufferRsrcArgToV4I32 (MI, B, 2 );
5861
- Register RSrc = MI.getOperand (2 ).getReg ();
5862
-
5863
5865
MachineMemOperand *MMO = *MI.memoperands_begin ();
5864
5866
const int MemSize = MMO->getSize ().getValue ();
5867
+ LLT MemTy = MMO->getMemoryType ();
5868
+
5869
+ VData = fixStoreSourceType (B, VData, MemTy, IsFormat);
5870
+
5871
+ castBufferRsrcArgToV4I32 (MI, B, 2 );
5872
+ Register RSrc = MI.getOperand (2 ).getReg ();
5865
5873
5866
5874
unsigned ImmOffset;
5867
5875
@@ -5954,10 +5962,13 @@ static void buildBufferLoad(unsigned Opc, Register LoadDstReg, Register RSrc,
5954
5962
}
5955
5963
5956
5964
bool AMDGPULegalizerInfo::legalizeBufferLoad (MachineInstr &MI,
5957
- MachineRegisterInfo &MRI,
5958
- MachineIRBuilder &B,
5965
+ LegalizerHelper &Helper,
5959
5966
bool IsFormat,
5960
5967
bool IsTyped) const {
5968
+ MachineIRBuilder &B = Helper.MIRBuilder ;
5969
+ MachineRegisterInfo &MRI = *B.getMRI ();
5970
+ GISelChangeObserver &Observer = Helper.Observer ;
5971
+
5961
5972
// FIXME: Verifier should enforce 1 MMO for these intrinsics.
5962
5973
MachineMemOperand *MMO = *MI.memoperands_begin ();
5963
5974
const LLT MemTy = MMO->getMemoryType ();
@@ -6006,9 +6017,21 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
6006
6017
// Make addrspace 8 pointers loads into 4xs32 loads here, so the rest of the
6007
6018
// logic doesn't have to handle that case.
6008
6019
if (hasBufferRsrcWorkaround (Ty)) {
6020
+ Observer.changingInstr (MI);
6009
6021
Ty = castBufferRsrcFromV4I32 (MI, B, MRI, 0 );
6022
+ Observer.changedInstr (MI);
6010
6023
Dst = MI.getOperand (0 ).getReg ();
6024
+ B.setInsertPt (B.getMBB (), MI);
6011
6025
}
6026
+ if (shouldBitcastLoadStoreType (ST, Ty, MemTy)) {
6027
+ Ty = getBitcastRegisterType (Ty);
6028
+ Observer.changingInstr (MI);
6029
+ Helper.bitcastDst (MI, Ty, 0 );
6030
+ Observer.changedInstr (MI);
6031
+ Dst = MI.getOperand (0 ).getReg ();
6032
+ B.setInsertPt (B.getMBB (), MI);
6033
+ }
6034
+
6012
6035
LLT EltTy = Ty.getScalarType ();
6013
6036
const bool IsD16 = IsFormat && (EltTy.getSizeInBits () == 16 );
6014
6037
const bool Unpacked = ST.hasUnpackedD16VMem ();
@@ -7388,17 +7411,17 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
7388
7411
case Intrinsic::amdgcn_raw_ptr_buffer_store:
7389
7412
case Intrinsic::amdgcn_struct_buffer_store:
7390
7413
case Intrinsic::amdgcn_struct_ptr_buffer_store:
7391
- return legalizeBufferStore (MI, MRI, B , false , false );
7414
+ return legalizeBufferStore (MI, Helper , false , false );
7392
7415
case Intrinsic::amdgcn_raw_buffer_store_format:
7393
7416
case Intrinsic::amdgcn_raw_ptr_buffer_store_format:
7394
7417
case Intrinsic::amdgcn_struct_buffer_store_format:
7395
7418
case Intrinsic::amdgcn_struct_ptr_buffer_store_format:
7396
- return legalizeBufferStore (MI, MRI, B , false , true );
7419
+ return legalizeBufferStore (MI, Helper , false , true );
7397
7420
case Intrinsic::amdgcn_raw_tbuffer_store:
7398
7421
case Intrinsic::amdgcn_raw_ptr_tbuffer_store:
7399
7422
case Intrinsic::amdgcn_struct_tbuffer_store:
7400
7423
case Intrinsic::amdgcn_struct_ptr_tbuffer_store:
7401
- return legalizeBufferStore (MI, MRI, B , true , true );
7424
+ return legalizeBufferStore (MI, Helper , true , true );
7402
7425
case Intrinsic::amdgcn_raw_buffer_load:
7403
7426
case Intrinsic::amdgcn_raw_ptr_buffer_load:
7404
7427
case Intrinsic::amdgcn_raw_atomic_buffer_load:
@@ -7407,17 +7430,17 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
7407
7430
case Intrinsic::amdgcn_struct_ptr_buffer_load:
7408
7431
case Intrinsic::amdgcn_struct_atomic_buffer_load:
7409
7432
case Intrinsic::amdgcn_struct_ptr_atomic_buffer_load:
7410
- return legalizeBufferLoad (MI, MRI, B , false , false );
7433
+ return legalizeBufferLoad (MI, Helper , false , false );
7411
7434
case Intrinsic::amdgcn_raw_buffer_load_format:
7412
7435
case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
7413
7436
case Intrinsic::amdgcn_struct_buffer_load_format:
7414
7437
case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
7415
- return legalizeBufferLoad (MI, MRI, B , true , false );
7438
+ return legalizeBufferLoad (MI, Helper , true , false );
7416
7439
case Intrinsic::amdgcn_raw_tbuffer_load:
7417
7440
case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
7418
7441
case Intrinsic::amdgcn_struct_tbuffer_load:
7419
7442
case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
7420
- return legalizeBufferLoad (MI, MRI, B , true , true );
7443
+ return legalizeBufferLoad (MI, Helper , true , true );
7421
7444
case Intrinsic::amdgcn_raw_buffer_atomic_swap:
7422
7445
case Intrinsic::amdgcn_raw_ptr_buffer_atomic_swap:
7423
7446
case Intrinsic::amdgcn_struct_buffer_atomic_swap:
0 commit comments