Skip to content

Commit e86563f

Browse files
krzysz00Sterling-Augustine
authored andcommitted
[AMDGPU][GlobalISel] Fix load/store of pointer vectors, buffer.*.pN (llvm#110714)
Certain pointer address spaces were not being correctly handled by the GlobalISel lowering for buffer_load and buffer_store. 1. ptr addrspace(1) and addrspace(4) did not have rewrite patterns defined for them, while p0 did, since those pointer types weren't in the list of types that was iterated to form the patterns. 2. Vectors of pointers need to be bitcast to vectors of the corresponding scalars, since there doesn't seem to be a good way to define the rewrite patterns for buffer_load/store of those types The need to bitcast vectors of pointers was also revealed to affect ordinary `G_LOAD` and `G_STORE` in some cases, so `shouldBitcastLoadStore()` has been fixed to handle it properly.
1 parent 54d2c08 commit e86563f

12 files changed

+3980
-263
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

+42-19
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,8 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
494494
return false;
495495

496496
const unsigned Size = Ty.getSizeInBits();
497+
if (Ty.isPointerVector())
498+
return true;
497499
if (Size <= 64)
498500
return false;
499501
// Address space 8 pointers get their own workaround.
@@ -502,9 +504,6 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
502504
if (!Ty.isVector())
503505
return true;
504506

505-
if (Ty.isPointerVector())
506-
return true;
507-
508507
unsigned EltSize = Ty.getScalarSizeInBits();
509508
return EltSize != 32 && EltSize != 64;
510509
}
@@ -5794,8 +5793,9 @@ Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B,
57945793
return Reg;
57955794
}
57965795

5797-
Register AMDGPULegalizerInfo::fixStoreSourceType(
5798-
MachineIRBuilder &B, Register VData, bool IsFormat) const {
5796+
Register AMDGPULegalizerInfo::fixStoreSourceType(MachineIRBuilder &B,
5797+
Register VData, LLT MemTy,
5798+
bool IsFormat) const {
57995799
MachineRegisterInfo *MRI = B.getMRI();
58005800
LLT Ty = MRI->getType(VData);
58015801

@@ -5805,6 +5805,10 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(
58055805
if (hasBufferRsrcWorkaround(Ty))
58065806
return castBufferRsrcToV4I32(VData, B);
58075807

5808+
if (shouldBitcastLoadStoreType(ST, Ty, MemTy)) {
5809+
Ty = getBitcastRegisterType(Ty);
5810+
VData = B.buildBitcast(Ty, VData).getReg(0);
5811+
}
58085812
// Fixup illegal register types for i8 stores.
58095813
if (Ty == LLT::scalar(8) || Ty == S16) {
58105814
Register AnyExt = B.buildAnyExt(LLT::scalar(32), VData).getReg(0);
@@ -5822,22 +5826,26 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(
58225826
}
58235827

58245828
bool AMDGPULegalizerInfo::legalizeBufferStore(MachineInstr &MI,
5825-
MachineRegisterInfo &MRI,
5826-
MachineIRBuilder &B,
5829+
LegalizerHelper &Helper,
58275830
bool IsTyped,
58285831
bool IsFormat) const {
5832+
MachineIRBuilder &B = Helper.MIRBuilder;
5833+
MachineRegisterInfo &MRI = *B.getMRI();
5834+
58295835
Register VData = MI.getOperand(1).getReg();
58305836
LLT Ty = MRI.getType(VData);
58315837
LLT EltTy = Ty.getScalarType();
58325838
const bool IsD16 = IsFormat && (EltTy.getSizeInBits() == 16);
58335839
const LLT S32 = LLT::scalar(32);
58345840

5835-
VData = fixStoreSourceType(B, VData, IsFormat);
5836-
castBufferRsrcArgToV4I32(MI, B, 2);
5837-
Register RSrc = MI.getOperand(2).getReg();
5838-
58395841
MachineMemOperand *MMO = *MI.memoperands_begin();
58405842
const int MemSize = MMO->getSize().getValue();
5843+
LLT MemTy = MMO->getMemoryType();
5844+
5845+
VData = fixStoreSourceType(B, VData, MemTy, IsFormat);
5846+
5847+
castBufferRsrcArgToV4I32(MI, B, 2);
5848+
Register RSrc = MI.getOperand(2).getReg();
58415849

58425850
unsigned ImmOffset;
58435851

@@ -5930,10 +5938,13 @@ static void buildBufferLoad(unsigned Opc, Register LoadDstReg, Register RSrc,
59305938
}
59315939

59325940
bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
5933-
MachineRegisterInfo &MRI,
5934-
MachineIRBuilder &B,
5941+
LegalizerHelper &Helper,
59355942
bool IsFormat,
59365943
bool IsTyped) const {
5944+
MachineIRBuilder &B = Helper.MIRBuilder;
5945+
MachineRegisterInfo &MRI = *B.getMRI();
5946+
GISelChangeObserver &Observer = Helper.Observer;
5947+
59375948
// FIXME: Verifier should enforce 1 MMO for these intrinsics.
59385949
MachineMemOperand *MMO = *MI.memoperands_begin();
59395950
const LLT MemTy = MMO->getMemoryType();
@@ -5982,9 +5993,21 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
59825993
// Make addrspace 8 pointers loads into 4xs32 loads here, so the rest of the
59835994
// logic doesn't have to handle that case.
59845995
if (hasBufferRsrcWorkaround(Ty)) {
5996+
Observer.changingInstr(MI);
59855997
Ty = castBufferRsrcFromV4I32(MI, B, MRI, 0);
5998+
Observer.changedInstr(MI);
59865999
Dst = MI.getOperand(0).getReg();
6000+
B.setInsertPt(B.getMBB(), MI);
59876001
}
6002+
if (shouldBitcastLoadStoreType(ST, Ty, MemTy)) {
6003+
Ty = getBitcastRegisterType(Ty);
6004+
Observer.changingInstr(MI);
6005+
Helper.bitcastDst(MI, Ty, 0);
6006+
Observer.changedInstr(MI);
6007+
Dst = MI.getOperand(0).getReg();
6008+
B.setInsertPt(B.getMBB(), MI);
6009+
}
6010+
59886011
LLT EltTy = Ty.getScalarType();
59896012
const bool IsD16 = IsFormat && (EltTy.getSizeInBits() == 16);
59906013
const bool Unpacked = ST.hasUnpackedD16VMem();
@@ -7364,17 +7387,17 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
73647387
case Intrinsic::amdgcn_raw_ptr_buffer_store:
73657388
case Intrinsic::amdgcn_struct_buffer_store:
73667389
case Intrinsic::amdgcn_struct_ptr_buffer_store:
7367-
return legalizeBufferStore(MI, MRI, B, false, false);
7390+
return legalizeBufferStore(MI, Helper, false, false);
73687391
case Intrinsic::amdgcn_raw_buffer_store_format:
73697392
case Intrinsic::amdgcn_raw_ptr_buffer_store_format:
73707393
case Intrinsic::amdgcn_struct_buffer_store_format:
73717394
case Intrinsic::amdgcn_struct_ptr_buffer_store_format:
7372-
return legalizeBufferStore(MI, MRI, B, false, true);
7395+
return legalizeBufferStore(MI, Helper, false, true);
73737396
case Intrinsic::amdgcn_raw_tbuffer_store:
73747397
case Intrinsic::amdgcn_raw_ptr_tbuffer_store:
73757398
case Intrinsic::amdgcn_struct_tbuffer_store:
73767399
case Intrinsic::amdgcn_struct_ptr_tbuffer_store:
7377-
return legalizeBufferStore(MI, MRI, B, true, true);
7400+
return legalizeBufferStore(MI, Helper, true, true);
73787401
case Intrinsic::amdgcn_raw_buffer_load:
73797402
case Intrinsic::amdgcn_raw_ptr_buffer_load:
73807403
case Intrinsic::amdgcn_raw_atomic_buffer_load:
@@ -7383,17 +7406,17 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
73837406
case Intrinsic::amdgcn_struct_ptr_buffer_load:
73847407
case Intrinsic::amdgcn_struct_atomic_buffer_load:
73857408
case Intrinsic::amdgcn_struct_ptr_atomic_buffer_load:
7386-
return legalizeBufferLoad(MI, MRI, B, false, false);
7409+
return legalizeBufferLoad(MI, Helper, false, false);
73877410
case Intrinsic::amdgcn_raw_buffer_load_format:
73887411
case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
73897412
case Intrinsic::amdgcn_struct_buffer_load_format:
73907413
case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
7391-
return legalizeBufferLoad(MI, MRI, B, true, false);
7414+
return legalizeBufferLoad(MI, Helper, true, false);
73927415
case Intrinsic::amdgcn_raw_tbuffer_load:
73937416
case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
73947417
case Intrinsic::amdgcn_struct_tbuffer_load:
73957418
case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
7396-
return legalizeBufferLoad(MI, MRI, B, true, true);
7419+
return legalizeBufferLoad(MI, Helper, true, true);
73977420
case Intrinsic::amdgcn_raw_buffer_atomic_swap:
73987421
case Intrinsic::amdgcn_raw_ptr_buffer_atomic_swap:
73997422
case Intrinsic::amdgcn_struct_buffer_atomic_swap:

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h

+5-7
Original file line numberDiff line numberDiff line change
@@ -195,15 +195,13 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
195195

196196
Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI,
197197
Register Reg, bool ImageStore = false) const;
198-
Register fixStoreSourceType(MachineIRBuilder &B, Register VData,
198+
Register fixStoreSourceType(MachineIRBuilder &B, Register VData, LLT MemTy,
199199
bool IsFormat) const;
200200

201-
bool legalizeBufferStore(MachineInstr &MI, MachineRegisterInfo &MRI,
202-
MachineIRBuilder &B, bool IsTyped,
203-
bool IsFormat) const;
204-
bool legalizeBufferLoad(MachineInstr &MI, MachineRegisterInfo &MRI,
205-
MachineIRBuilder &B, bool IsFormat,
206-
bool IsTyped) const;
201+
bool legalizeBufferStore(MachineInstr &MI, LegalizerHelper &Helper,
202+
bool IsTyped, bool IsFormat) const;
203+
bool legalizeBufferLoad(MachineInstr &MI, LegalizerHelper &Helper,
204+
bool IsFormat, bool IsTyped) const;
207205
bool legalizeBufferAtomic(MachineInstr &MI, MachineIRBuilder &B,
208206
Intrinsic::ID IID) const;
209207

llvm/lib/Target/AMDGPU/SIRegisterInfo.td

+1-1
Original file line numberDiff line numberDiff line change
@@ -590,7 +590,7 @@ class RegisterTypes<list<ValueType> reg_types> {
590590

591591
def Reg16Types : RegisterTypes<[i16, f16, bf16]>;
592592
def Reg32Types : RegisterTypes<[i32, f32, v2i16, v2f16, v2bf16, p2, p3, p5, p6]>;
593-
def Reg64Types : RegisterTypes<[i64, f64, v2i32, v2f32, p0, v4i16, v4f16, v4bf16]>;
593+
def Reg64Types : RegisterTypes<[i64, f64, v2i32, v2f32, p0, p1, p4, v4i16, v4f16, v4bf16]>;
594594
def Reg96Types : RegisterTypes<[v3i32, v3f32]>;
595595
def Reg128Types : RegisterTypes<[v4i32, v4f32, v2i64, v2f64, v8i16, v8f16, v8bf16]>;
596596

0 commit comments

Comments
 (0)