@@ -494,6 +494,8 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
494
494
return false ;
495
495
496
496
const unsigned Size = Ty.getSizeInBits ();
497
+ if (Ty.isPointerVector ())
498
+ return true ;
497
499
if (Size <= 64 )
498
500
return false ;
499
501
// Address space 8 pointers get their own workaround.
@@ -502,9 +504,6 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
502
504
if (!Ty.isVector ())
503
505
return true ;
504
506
505
- if (Ty.isPointerVector ())
506
- return true ;
507
-
508
507
unsigned EltSize = Ty.getScalarSizeInBits ();
509
508
return EltSize != 32 && EltSize != 64 ;
510
509
}
@@ -5794,8 +5793,9 @@ Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B,
5794
5793
return Reg;
5795
5794
}
5796
5795
5797
- Register AMDGPULegalizerInfo::fixStoreSourceType (
5798
- MachineIRBuilder &B, Register VData, bool IsFormat) const {
5796
+ Register AMDGPULegalizerInfo::fixStoreSourceType (MachineIRBuilder &B,
5797
+ Register VData, LLT MemTy,
5798
+ bool IsFormat) const {
5799
5799
MachineRegisterInfo *MRI = B.getMRI ();
5800
5800
LLT Ty = MRI->getType (VData);
5801
5801
@@ -5805,6 +5805,10 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(
5805
5805
if (hasBufferRsrcWorkaround (Ty))
5806
5806
return castBufferRsrcToV4I32 (VData, B);
5807
5807
5808
+ if (shouldBitcastLoadStoreType (ST, Ty, MemTy)) {
5809
+ Ty = getBitcastRegisterType (Ty);
5810
+ VData = B.buildBitcast (Ty, VData).getReg (0 );
5811
+ }
5808
5812
// Fixup illegal register types for i8 stores.
5809
5813
if (Ty == LLT::scalar (8 ) || Ty == S16) {
5810
5814
Register AnyExt = B.buildAnyExt (LLT::scalar (32 ), VData).getReg (0 );
@@ -5822,22 +5826,26 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(
5822
5826
}
5823
5827
5824
5828
bool AMDGPULegalizerInfo::legalizeBufferStore (MachineInstr &MI,
5825
- MachineRegisterInfo &MRI,
5826
- MachineIRBuilder &B,
5829
+ LegalizerHelper &Helper,
5827
5830
bool IsTyped,
5828
5831
bool IsFormat) const {
5832
+ MachineIRBuilder &B = Helper.MIRBuilder ;
5833
+ MachineRegisterInfo &MRI = *B.getMRI ();
5834
+
5829
5835
Register VData = MI.getOperand (1 ).getReg ();
5830
5836
LLT Ty = MRI.getType (VData);
5831
5837
LLT EltTy = Ty.getScalarType ();
5832
5838
const bool IsD16 = IsFormat && (EltTy.getSizeInBits () == 16 );
5833
5839
const LLT S32 = LLT::scalar (32 );
5834
5840
5835
- VData = fixStoreSourceType (B, VData, IsFormat);
5836
- castBufferRsrcArgToV4I32 (MI, B, 2 );
5837
- Register RSrc = MI.getOperand (2 ).getReg ();
5838
-
5839
5841
MachineMemOperand *MMO = *MI.memoperands_begin ();
5840
5842
const int MemSize = MMO->getSize ().getValue ();
5843
+ LLT MemTy = MMO->getMemoryType ();
5844
+
5845
+ VData = fixStoreSourceType (B, VData, MemTy, IsFormat);
5846
+
5847
+ castBufferRsrcArgToV4I32 (MI, B, 2 );
5848
+ Register RSrc = MI.getOperand (2 ).getReg ();
5841
5849
5842
5850
unsigned ImmOffset;
5843
5851
@@ -5930,10 +5938,13 @@ static void buildBufferLoad(unsigned Opc, Register LoadDstReg, Register RSrc,
5930
5938
}
5931
5939
5932
5940
bool AMDGPULegalizerInfo::legalizeBufferLoad (MachineInstr &MI,
5933
- MachineRegisterInfo &MRI,
5934
- MachineIRBuilder &B,
5941
+ LegalizerHelper &Helper,
5935
5942
bool IsFormat,
5936
5943
bool IsTyped) const {
5944
+ MachineIRBuilder &B = Helper.MIRBuilder ;
5945
+ MachineRegisterInfo &MRI = *B.getMRI ();
5946
+ GISelChangeObserver &Observer = Helper.Observer ;
5947
+
5937
5948
// FIXME: Verifier should enforce 1 MMO for these intrinsics.
5938
5949
MachineMemOperand *MMO = *MI.memoperands_begin ();
5939
5950
const LLT MemTy = MMO->getMemoryType ();
@@ -5982,9 +5993,21 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
5982
5993
// Make addrspace 8 pointers loads into 4xs32 loads here, so the rest of the
5983
5994
// logic doesn't have to handle that case.
5984
5995
if (hasBufferRsrcWorkaround (Ty)) {
5996
+ Observer.changingInstr (MI);
5985
5997
Ty = castBufferRsrcFromV4I32 (MI, B, MRI, 0 );
5998
+ Observer.changedInstr (MI);
5986
5999
Dst = MI.getOperand (0 ).getReg ();
6000
+ B.setInsertPt (B.getMBB (), MI);
5987
6001
}
6002
+ if (shouldBitcastLoadStoreType (ST, Ty, MemTy)) {
6003
+ Ty = getBitcastRegisterType (Ty);
6004
+ Observer.changingInstr (MI);
6005
+ Helper.bitcastDst (MI, Ty, 0 );
6006
+ Observer.changedInstr (MI);
6007
+ Dst = MI.getOperand (0 ).getReg ();
6008
+ B.setInsertPt (B.getMBB (), MI);
6009
+ }
6010
+
5988
6011
LLT EltTy = Ty.getScalarType ();
5989
6012
const bool IsD16 = IsFormat && (EltTy.getSizeInBits () == 16 );
5990
6013
const bool Unpacked = ST.hasUnpackedD16VMem ();
@@ -7364,17 +7387,17 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
7364
7387
case Intrinsic::amdgcn_raw_ptr_buffer_store:
7365
7388
case Intrinsic::amdgcn_struct_buffer_store:
7366
7389
case Intrinsic::amdgcn_struct_ptr_buffer_store:
7367
- return legalizeBufferStore (MI, MRI, B , false , false );
7390
+ return legalizeBufferStore (MI, Helper , false , false );
7368
7391
case Intrinsic::amdgcn_raw_buffer_store_format:
7369
7392
case Intrinsic::amdgcn_raw_ptr_buffer_store_format:
7370
7393
case Intrinsic::amdgcn_struct_buffer_store_format:
7371
7394
case Intrinsic::amdgcn_struct_ptr_buffer_store_format:
7372
- return legalizeBufferStore (MI, MRI, B , false , true );
7395
+ return legalizeBufferStore (MI, Helper , false , true );
7373
7396
case Intrinsic::amdgcn_raw_tbuffer_store:
7374
7397
case Intrinsic::amdgcn_raw_ptr_tbuffer_store:
7375
7398
case Intrinsic::amdgcn_struct_tbuffer_store:
7376
7399
case Intrinsic::amdgcn_struct_ptr_tbuffer_store:
7377
- return legalizeBufferStore (MI, MRI, B , true , true );
7400
+ return legalizeBufferStore (MI, Helper , true , true );
7378
7401
case Intrinsic::amdgcn_raw_buffer_load:
7379
7402
case Intrinsic::amdgcn_raw_ptr_buffer_load:
7380
7403
case Intrinsic::amdgcn_raw_atomic_buffer_load:
@@ -7383,17 +7406,17 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
7383
7406
case Intrinsic::amdgcn_struct_ptr_buffer_load:
7384
7407
case Intrinsic::amdgcn_struct_atomic_buffer_load:
7385
7408
case Intrinsic::amdgcn_struct_ptr_atomic_buffer_load:
7386
- return legalizeBufferLoad (MI, MRI, B , false , false );
7409
+ return legalizeBufferLoad (MI, Helper , false , false );
7387
7410
case Intrinsic::amdgcn_raw_buffer_load_format:
7388
7411
case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
7389
7412
case Intrinsic::amdgcn_struct_buffer_load_format:
7390
7413
case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
7391
- return legalizeBufferLoad (MI, MRI, B , true , false );
7414
+ return legalizeBufferLoad (MI, Helper , true , false );
7392
7415
case Intrinsic::amdgcn_raw_tbuffer_load:
7393
7416
case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
7394
7417
case Intrinsic::amdgcn_struct_tbuffer_load:
7395
7418
case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
7396
- return legalizeBufferLoad (MI, MRI, B , true , true );
7419
+ return legalizeBufferLoad (MI, Helper , true , true );
7397
7420
case Intrinsic::amdgcn_raw_buffer_atomic_swap:
7398
7421
case Intrinsic::amdgcn_raw_ptr_buffer_atomic_swap:
7399
7422
case Intrinsic::amdgcn_struct_buffer_atomic_swap:
0 commit comments