Skip to content

Commit 19261ad

Browse files
krzysz00easyonaadit
authored andcommitted
[AMDGPU][GlobalISel] Fix load/store of pointer vectors, buffer.*.pN (llvm#110714)
Certain pointer address spaces were not being correctly handled by the GlobalISel lowering for buffer_load and buffer_store. 1. ptr addrspace(1) and addrspace(4) did not have rewrite patterns defined for them, while p0 did, since those pointer types weren't in the list of types that was iterated to form the patterns. 2. Vectors of pointers need to be bitcast to vectors of the corresponding scalars, since there doesn't seem to be a good way to define the rewrite patterns for buffer_load/store of those types The need to bitcast vectors of pointers was also revealed to affect ordinary `G_LOAD` and `G_STORE` in some cases, so `shouldBitcastLoadStore()` has been fixed to handle it properly.
1 parent 393c380 commit 19261ad

12 files changed

+3980
-257
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 42 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -492,6 +492,8 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
492492
return false;
493493

494494
const unsigned Size = Ty.getSizeInBits();
495+
if (Ty.isPointerVector())
496+
return true;
495497
if (Size <= 64)
496498
return false;
497499
// Address space 8 pointers get their own workaround.
@@ -500,9 +502,6 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
500502
if (!Ty.isVector())
501503
return true;
502504

503-
if (Ty.isPointerVector())
504-
return true;
505-
506505
unsigned EltSize = Ty.getScalarSizeInBits();
507506
return EltSize != 32 && EltSize != 64;
508507
}
@@ -5809,8 +5808,9 @@ Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B,
58095808
return Reg;
58105809
}
58115810

5812-
Register AMDGPULegalizerInfo::fixStoreSourceType(
5813-
MachineIRBuilder &B, Register VData, bool IsFormat) const {
5811+
Register AMDGPULegalizerInfo::fixStoreSourceType(MachineIRBuilder &B,
5812+
Register VData, LLT MemTy,
5813+
bool IsFormat) const {
58145814
MachineRegisterInfo *MRI = B.getMRI();
58155815
LLT Ty = MRI->getType(VData);
58165816

@@ -5820,6 +5820,10 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(
58205820
if (hasBufferRsrcWorkaround(Ty))
58215821
return castBufferRsrcToV4I32(VData, B);
58225822

5823+
if (shouldBitcastLoadStoreType(ST, Ty, MemTy)) {
5824+
Ty = getBitcastRegisterType(Ty);
5825+
VData = B.buildBitcast(Ty, VData).getReg(0);
5826+
}
58235827
// Fixup illegal register types for i8 stores.
58245828
if (Ty == LLT::scalar(8) || Ty == S16) {
58255829
Register AnyExt = B.buildAnyExt(LLT::scalar(32), VData).getReg(0);
@@ -5837,22 +5841,26 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(
58375841
}
58385842

58395843
bool AMDGPULegalizerInfo::legalizeBufferStore(MachineInstr &MI,
5840-
MachineRegisterInfo &MRI,
5841-
MachineIRBuilder &B,
5844+
LegalizerHelper &Helper,
58425845
bool IsTyped,
58435846
bool IsFormat) const {
5847+
MachineIRBuilder &B = Helper.MIRBuilder;
5848+
MachineRegisterInfo &MRI = *B.getMRI();
5849+
58445850
Register VData = MI.getOperand(1).getReg();
58455851
LLT Ty = MRI.getType(VData);
58465852
LLT EltTy = Ty.getScalarType();
58475853
const bool IsD16 = IsFormat && (EltTy.getSizeInBits() == 16);
58485854
const LLT S32 = LLT::scalar(32);
58495855

5850-
VData = fixStoreSourceType(B, VData, IsFormat);
5851-
castBufferRsrcArgToV4I32(MI, B, 2);
5852-
Register RSrc = MI.getOperand(2).getReg();
5853-
58545856
MachineMemOperand *MMO = *MI.memoperands_begin();
58555857
const int MemSize = MMO->getSize().getValue();
5858+
LLT MemTy = MMO->getMemoryType();
5859+
5860+
VData = fixStoreSourceType(B, VData, MemTy, IsFormat);
5861+
5862+
castBufferRsrcArgToV4I32(MI, B, 2);
5863+
Register RSrc = MI.getOperand(2).getReg();
58565864

58575865
unsigned ImmOffset;
58585866

@@ -5945,10 +5953,13 @@ static void buildBufferLoad(unsigned Opc, Register LoadDstReg, Register RSrc,
59455953
}
59465954

59475955
bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
5948-
MachineRegisterInfo &MRI,
5949-
MachineIRBuilder &B,
5956+
LegalizerHelper &Helper,
59505957
bool IsFormat,
59515958
bool IsTyped) const {
5959+
MachineIRBuilder &B = Helper.MIRBuilder;
5960+
MachineRegisterInfo &MRI = *B.getMRI();
5961+
GISelChangeObserver &Observer = Helper.Observer;
5962+
59525963
// FIXME: Verifier should enforce 1 MMO for these intrinsics.
59535964
MachineMemOperand *MMO = *MI.memoperands_begin();
59545965
const LLT MemTy = MMO->getMemoryType();
@@ -5997,9 +6008,21 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
59976008
// Make addrspace 8 pointers loads into 4xs32 loads here, so the rest of the
59986009
// logic doesn't have to handle that case.
59996010
if (hasBufferRsrcWorkaround(Ty)) {
6011+
Observer.changingInstr(MI);
60006012
Ty = castBufferRsrcFromV4I32(MI, B, MRI, 0);
6013+
Observer.changedInstr(MI);
60016014
Dst = MI.getOperand(0).getReg();
6015+
B.setInsertPt(B.getMBB(), MI);
60026016
}
6017+
if (shouldBitcastLoadStoreType(ST, Ty, MemTy)) {
6018+
Ty = getBitcastRegisterType(Ty);
6019+
Observer.changingInstr(MI);
6020+
Helper.bitcastDst(MI, Ty, 0);
6021+
Observer.changedInstr(MI);
6022+
Dst = MI.getOperand(0).getReg();
6023+
B.setInsertPt(B.getMBB(), MI);
6024+
}
6025+
60036026
LLT EltTy = Ty.getScalarType();
60046027
const bool IsD16 = IsFormat && (EltTy.getSizeInBits() == 16);
60056028
const bool Unpacked = ST.hasUnpackedD16VMem();
@@ -7367,17 +7390,17 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
73677390
case Intrinsic::amdgcn_raw_ptr_buffer_store:
73687391
case Intrinsic::amdgcn_struct_buffer_store:
73697392
case Intrinsic::amdgcn_struct_ptr_buffer_store:
7370-
return legalizeBufferStore(MI, MRI, B, false, false);
7393+
return legalizeBufferStore(MI, Helper, false, false);
73717394
case Intrinsic::amdgcn_raw_buffer_store_format:
73727395
case Intrinsic::amdgcn_raw_ptr_buffer_store_format:
73737396
case Intrinsic::amdgcn_struct_buffer_store_format:
73747397
case Intrinsic::amdgcn_struct_ptr_buffer_store_format:
7375-
return legalizeBufferStore(MI, MRI, B, false, true);
7398+
return legalizeBufferStore(MI, Helper, false, true);
73767399
case Intrinsic::amdgcn_raw_tbuffer_store:
73777400
case Intrinsic::amdgcn_raw_ptr_tbuffer_store:
73787401
case Intrinsic::amdgcn_struct_tbuffer_store:
73797402
case Intrinsic::amdgcn_struct_ptr_tbuffer_store:
7380-
return legalizeBufferStore(MI, MRI, B, true, true);
7403+
return legalizeBufferStore(MI, Helper, true, true);
73817404
case Intrinsic::amdgcn_raw_buffer_load:
73827405
case Intrinsic::amdgcn_raw_ptr_buffer_load:
73837406
case Intrinsic::amdgcn_raw_atomic_buffer_load:
@@ -7386,17 +7409,17 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
73867409
case Intrinsic::amdgcn_struct_ptr_buffer_load:
73877410
case Intrinsic::amdgcn_struct_atomic_buffer_load:
73887411
case Intrinsic::amdgcn_struct_ptr_atomic_buffer_load:
7389-
return legalizeBufferLoad(MI, MRI, B, false, false);
7412+
return legalizeBufferLoad(MI, Helper, false, false);
73907413
case Intrinsic::amdgcn_raw_buffer_load_format:
73917414
case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
73927415
case Intrinsic::amdgcn_struct_buffer_load_format:
73937416
case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
7394-
return legalizeBufferLoad(MI, MRI, B, true, false);
7417+
return legalizeBufferLoad(MI, Helper, true, false);
73957418
case Intrinsic::amdgcn_raw_tbuffer_load:
73967419
case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
73977420
case Intrinsic::amdgcn_struct_tbuffer_load:
73987421
case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
7399-
return legalizeBufferLoad(MI, MRI, B, true, true);
7422+
return legalizeBufferLoad(MI, Helper, true, true);
74007423
case Intrinsic::amdgcn_raw_buffer_atomic_swap:
74017424
case Intrinsic::amdgcn_raw_ptr_buffer_atomic_swap:
74027425
case Intrinsic::amdgcn_struct_buffer_atomic_swap:

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -195,15 +195,13 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
195195

196196
Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI,
197197
Register Reg, bool ImageStore = false) const;
198-
Register fixStoreSourceType(MachineIRBuilder &B, Register VData,
198+
Register fixStoreSourceType(MachineIRBuilder &B, Register VData, LLT MemTy,
199199
bool IsFormat) const;
200200

201-
bool legalizeBufferStore(MachineInstr &MI, MachineRegisterInfo &MRI,
202-
MachineIRBuilder &B, bool IsTyped,
203-
bool IsFormat) const;
204-
bool legalizeBufferLoad(MachineInstr &MI, MachineRegisterInfo &MRI,
205-
MachineIRBuilder &B, bool IsFormat,
206-
bool IsTyped) const;
201+
bool legalizeBufferStore(MachineInstr &MI, LegalizerHelper &Helper,
202+
bool IsTyped, bool IsFormat) const;
203+
bool legalizeBufferLoad(MachineInstr &MI, LegalizerHelper &Helper,
204+
bool IsFormat, bool IsTyped) const;
207205
bool legalizeBufferAtomic(MachineInstr &MI, MachineIRBuilder &B,
208206
Intrinsic::ID IID) const;
209207

llvm/lib/Target/AMDGPU/SIRegisterInfo.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -592,7 +592,7 @@ class RegisterTypes<list<ValueType> reg_types> {
592592

593593
def Reg16Types : RegisterTypes<[i16, f16, bf16]>;
594594
def Reg32Types : RegisterTypes<[i32, f32, v2i16, v2f16, v2bf16, p2, p3, p5, p6]>;
595-
def Reg64Types : RegisterTypes<[i64, f64, v2i32, v2f32, p0, v4i16, v4f16, v4bf16]>;
595+
def Reg64Types : RegisterTypes<[i64, f64, v2i32, v2f32, p0, p1, p4, v4i16, v4f16, v4bf16]>;
596596
def Reg96Types : RegisterTypes<[v3i32, v3f32]>;
597597
def Reg128Types : RegisterTypes<[v4i32, v4f32, v2i64, v2f64, v8i16, v8f16, v8bf16]>;
598598

0 commit comments

Comments
 (0)