Skip to content

Commit 98a15c7

Browse files
authored
Reapply "[AMDGPU][GlobalISel] Fix load/store of pointer vectors, buffer.*.pN (llvm#110714)" (llvm#111059)
This reverts commit 650c41a. The test failures appear to be from conflicts with other PRs that landed around this time.
1 parent 643df05 commit 98a15c7

12 files changed

+3980
-263
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 42 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,8 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
494494
return false;
495495

496496
const unsigned Size = Ty.getSizeInBits();
497+
if (Ty.isPointerVector())
498+
return true;
497499
if (Size <= 64)
498500
return false;
499501
// Address space 8 pointers get their own workaround.
@@ -502,9 +504,6 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
502504
if (!Ty.isVector())
503505
return true;
504506

505-
if (Ty.isPointerVector())
506-
return true;
507-
508507
unsigned EltSize = Ty.getScalarSizeInBits();
509508
return EltSize != 32 && EltSize != 64;
510509
}
@@ -5794,8 +5793,9 @@ Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B,
57945793
return Reg;
57955794
}
57965795

5797-
Register AMDGPULegalizerInfo::fixStoreSourceType(
5798-
MachineIRBuilder &B, Register VData, bool IsFormat) const {
5796+
Register AMDGPULegalizerInfo::fixStoreSourceType(MachineIRBuilder &B,
5797+
Register VData, LLT MemTy,
5798+
bool IsFormat) const {
57995799
MachineRegisterInfo *MRI = B.getMRI();
58005800
LLT Ty = MRI->getType(VData);
58015801

@@ -5805,6 +5805,10 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(
58055805
if (hasBufferRsrcWorkaround(Ty))
58065806
return castBufferRsrcToV4I32(VData, B);
58075807

5808+
if (shouldBitcastLoadStoreType(ST, Ty, MemTy)) {
5809+
Ty = getBitcastRegisterType(Ty);
5810+
VData = B.buildBitcast(Ty, VData).getReg(0);
5811+
}
58085812
// Fixup illegal register types for i8 stores.
58095813
if (Ty == LLT::scalar(8) || Ty == S16) {
58105814
Register AnyExt = B.buildAnyExt(LLT::scalar(32), VData).getReg(0);
@@ -5822,22 +5826,26 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(
58225826
}
58235827

58245828
bool AMDGPULegalizerInfo::legalizeBufferStore(MachineInstr &MI,
5825-
MachineRegisterInfo &MRI,
5826-
MachineIRBuilder &B,
5829+
LegalizerHelper &Helper,
58275830
bool IsTyped,
58285831
bool IsFormat) const {
5832+
MachineIRBuilder &B = Helper.MIRBuilder;
5833+
MachineRegisterInfo &MRI = *B.getMRI();
5834+
58295835
Register VData = MI.getOperand(1).getReg();
58305836
LLT Ty = MRI.getType(VData);
58315837
LLT EltTy = Ty.getScalarType();
58325838
const bool IsD16 = IsFormat && (EltTy.getSizeInBits() == 16);
58335839
const LLT S32 = LLT::scalar(32);
58345840

5835-
VData = fixStoreSourceType(B, VData, IsFormat);
5836-
castBufferRsrcArgToV4I32(MI, B, 2);
5837-
Register RSrc = MI.getOperand(2).getReg();
5838-
58395841
MachineMemOperand *MMO = *MI.memoperands_begin();
58405842
const int MemSize = MMO->getSize().getValue();
5843+
LLT MemTy = MMO->getMemoryType();
5844+
5845+
VData = fixStoreSourceType(B, VData, MemTy, IsFormat);
5846+
5847+
castBufferRsrcArgToV4I32(MI, B, 2);
5848+
Register RSrc = MI.getOperand(2).getReg();
58415849

58425850
unsigned ImmOffset;
58435851

@@ -5930,10 +5938,13 @@ static void buildBufferLoad(unsigned Opc, Register LoadDstReg, Register RSrc,
59305938
}
59315939

59325940
bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
5933-
MachineRegisterInfo &MRI,
5934-
MachineIRBuilder &B,
5941+
LegalizerHelper &Helper,
59355942
bool IsFormat,
59365943
bool IsTyped) const {
5944+
MachineIRBuilder &B = Helper.MIRBuilder;
5945+
MachineRegisterInfo &MRI = *B.getMRI();
5946+
GISelChangeObserver &Observer = Helper.Observer;
5947+
59375948
// FIXME: Verifier should enforce 1 MMO for these intrinsics.
59385949
MachineMemOperand *MMO = *MI.memoperands_begin();
59395950
const LLT MemTy = MMO->getMemoryType();
@@ -5982,9 +5993,21 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
59825993
// Make addrspace 8 pointers loads into 4xs32 loads here, so the rest of the
59835994
// logic doesn't have to handle that case.
59845995
if (hasBufferRsrcWorkaround(Ty)) {
5996+
Observer.changingInstr(MI);
59855997
Ty = castBufferRsrcFromV4I32(MI, B, MRI, 0);
5998+
Observer.changedInstr(MI);
59865999
Dst = MI.getOperand(0).getReg();
6000+
B.setInsertPt(B.getMBB(), MI);
59876001
}
6002+
if (shouldBitcastLoadStoreType(ST, Ty, MemTy)) {
6003+
Ty = getBitcastRegisterType(Ty);
6004+
Observer.changingInstr(MI);
6005+
Helper.bitcastDst(MI, Ty, 0);
6006+
Observer.changedInstr(MI);
6007+
Dst = MI.getOperand(0).getReg();
6008+
B.setInsertPt(B.getMBB(), MI);
6009+
}
6010+
59886011
LLT EltTy = Ty.getScalarType();
59896012
const bool IsD16 = IsFormat && (EltTy.getSizeInBits() == 16);
59906013
const bool Unpacked = ST.hasUnpackedD16VMem();
@@ -7364,17 +7387,17 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
73647387
case Intrinsic::amdgcn_raw_ptr_buffer_store:
73657388
case Intrinsic::amdgcn_struct_buffer_store:
73667389
case Intrinsic::amdgcn_struct_ptr_buffer_store:
7367-
return legalizeBufferStore(MI, MRI, B, false, false);
7390+
return legalizeBufferStore(MI, Helper, false, false);
73687391
case Intrinsic::amdgcn_raw_buffer_store_format:
73697392
case Intrinsic::amdgcn_raw_ptr_buffer_store_format:
73707393
case Intrinsic::amdgcn_struct_buffer_store_format:
73717394
case Intrinsic::amdgcn_struct_ptr_buffer_store_format:
7372-
return legalizeBufferStore(MI, MRI, B, false, true);
7395+
return legalizeBufferStore(MI, Helper, false, true);
73737396
case Intrinsic::amdgcn_raw_tbuffer_store:
73747397
case Intrinsic::amdgcn_raw_ptr_tbuffer_store:
73757398
case Intrinsic::amdgcn_struct_tbuffer_store:
73767399
case Intrinsic::amdgcn_struct_ptr_tbuffer_store:
7377-
return legalizeBufferStore(MI, MRI, B, true, true);
7400+
return legalizeBufferStore(MI, Helper, true, true);
73787401
case Intrinsic::amdgcn_raw_buffer_load:
73797402
case Intrinsic::amdgcn_raw_ptr_buffer_load:
73807403
case Intrinsic::amdgcn_raw_atomic_buffer_load:
@@ -7383,17 +7406,17 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
73837406
case Intrinsic::amdgcn_struct_ptr_buffer_load:
73847407
case Intrinsic::amdgcn_struct_atomic_buffer_load:
73857408
case Intrinsic::amdgcn_struct_ptr_atomic_buffer_load:
7386-
return legalizeBufferLoad(MI, MRI, B, false, false);
7409+
return legalizeBufferLoad(MI, Helper, false, false);
73877410
case Intrinsic::amdgcn_raw_buffer_load_format:
73887411
case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
73897412
case Intrinsic::amdgcn_struct_buffer_load_format:
73907413
case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
7391-
return legalizeBufferLoad(MI, MRI, B, true, false);
7414+
return legalizeBufferLoad(MI, Helper, true, false);
73927415
case Intrinsic::amdgcn_raw_tbuffer_load:
73937416
case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
73947417
case Intrinsic::amdgcn_struct_tbuffer_load:
73957418
case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
7396-
return legalizeBufferLoad(MI, MRI, B, true, true);
7419+
return legalizeBufferLoad(MI, Helper, true, true);
73977420
case Intrinsic::amdgcn_raw_buffer_atomic_swap:
73987421
case Intrinsic::amdgcn_raw_ptr_buffer_atomic_swap:
73997422
case Intrinsic::amdgcn_struct_buffer_atomic_swap:

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -195,15 +195,13 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
195195

196196
Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI,
197197
Register Reg, bool ImageStore = false) const;
198-
Register fixStoreSourceType(MachineIRBuilder &B, Register VData,
198+
Register fixStoreSourceType(MachineIRBuilder &B, Register VData, LLT MemTy,
199199
bool IsFormat) const;
200200

201-
bool legalizeBufferStore(MachineInstr &MI, MachineRegisterInfo &MRI,
202-
MachineIRBuilder &B, bool IsTyped,
203-
bool IsFormat) const;
204-
bool legalizeBufferLoad(MachineInstr &MI, MachineRegisterInfo &MRI,
205-
MachineIRBuilder &B, bool IsFormat,
206-
bool IsTyped) const;
201+
bool legalizeBufferStore(MachineInstr &MI, LegalizerHelper &Helper,
202+
bool IsTyped, bool IsFormat) const;
203+
bool legalizeBufferLoad(MachineInstr &MI, LegalizerHelper &Helper,
204+
bool IsFormat, bool IsTyped) const;
207205
bool legalizeBufferAtomic(MachineInstr &MI, MachineIRBuilder &B,
208206
Intrinsic::ID IID) const;
209207

llvm/lib/Target/AMDGPU/SIRegisterInfo.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -590,7 +590,7 @@ class RegisterTypes<list<ValueType> reg_types> {
590590

591591
def Reg16Types : RegisterTypes<[i16, f16, bf16]>;
592592
def Reg32Types : RegisterTypes<[i32, f32, v2i16, v2f16, v2bf16, p2, p3, p5, p6]>;
593-
def Reg64Types : RegisterTypes<[i64, f64, v2i32, v2f32, p0, v4i16, v4f16, v4bf16]>;
593+
def Reg64Types : RegisterTypes<[i64, f64, v2i32, v2f32, p0, p1, p4, v4i16, v4f16, v4bf16]>;
594594
def Reg96Types : RegisterTypes<[v3i32, v3f32]>;
595595
def Reg128Types : RegisterTypes<[v4i32, v4f32, v2i64, v2f64, v8i16, v8f16, v8bf16]>;
596596

0 commit comments

Comments
 (0)