Skip to content

Commit ea33af6

Browse files
authored
Reapply "[AMDGPU][GlobalISel] Fix load/store of pointer vectors, buffer.*.pN (#110714)" v3 (#114443)
This reverts commit 8a849a2. It seems I missed a spot when trying to ensure the code in the instruction selection tests were actually legalized MIR.
1 parent 69edef1 commit ea33af6

12 files changed

+4038
-269
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

+42-19
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,8 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
494494
return false;
495495

496496
const unsigned Size = Ty.getSizeInBits();
497+
if (Ty.isPointerVector())
498+
return true;
497499
if (Size <= 64)
498500
return false;
499501
// Address space 8 pointers get their own workaround.
@@ -502,9 +504,6 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
502504
if (!Ty.isVector())
503505
return true;
504506

505-
if (Ty.isPointerVector())
506-
return true;
507-
508507
unsigned EltSize = Ty.getScalarSizeInBits();
509508
return EltSize != 32 && EltSize != 64;
510509
}
@@ -5820,8 +5819,9 @@ Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B,
58205819
return Reg;
58215820
}
58225821

5823-
Register AMDGPULegalizerInfo::fixStoreSourceType(
5824-
MachineIRBuilder &B, Register VData, bool IsFormat) const {
5822+
Register AMDGPULegalizerInfo::fixStoreSourceType(MachineIRBuilder &B,
5823+
Register VData, LLT MemTy,
5824+
bool IsFormat) const {
58255825
MachineRegisterInfo *MRI = B.getMRI();
58265826
LLT Ty = MRI->getType(VData);
58275827

@@ -5831,6 +5831,10 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(
58315831
if (hasBufferRsrcWorkaround(Ty))
58325832
return castBufferRsrcToV4I32(VData, B);
58335833

5834+
if (shouldBitcastLoadStoreType(ST, Ty, MemTy)) {
5835+
Ty = getBitcastRegisterType(Ty);
5836+
VData = B.buildBitcast(Ty, VData).getReg(0);
5837+
}
58345838
// Fixup illegal register types for i8 stores.
58355839
if (Ty == LLT::scalar(8) || Ty == S16) {
58365840
Register AnyExt = B.buildAnyExt(LLT::scalar(32), VData).getReg(0);
@@ -5848,22 +5852,26 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(
58485852
}
58495853

58505854
bool AMDGPULegalizerInfo::legalizeBufferStore(MachineInstr &MI,
5851-
MachineRegisterInfo &MRI,
5852-
MachineIRBuilder &B,
5855+
LegalizerHelper &Helper,
58535856
bool IsTyped,
58545857
bool IsFormat) const {
5858+
MachineIRBuilder &B = Helper.MIRBuilder;
5859+
MachineRegisterInfo &MRI = *B.getMRI();
5860+
58555861
Register VData = MI.getOperand(1).getReg();
58565862
LLT Ty = MRI.getType(VData);
58575863
LLT EltTy = Ty.getScalarType();
58585864
const bool IsD16 = IsFormat && (EltTy.getSizeInBits() == 16);
58595865
const LLT S32 = LLT::scalar(32);
58605866

5861-
VData = fixStoreSourceType(B, VData, IsFormat);
5862-
castBufferRsrcArgToV4I32(MI, B, 2);
5863-
Register RSrc = MI.getOperand(2).getReg();
5864-
58655867
MachineMemOperand *MMO = *MI.memoperands_begin();
58665868
const int MemSize = MMO->getSize().getValue();
5869+
LLT MemTy = MMO->getMemoryType();
5870+
5871+
VData = fixStoreSourceType(B, VData, MemTy, IsFormat);
5872+
5873+
castBufferRsrcArgToV4I32(MI, B, 2);
5874+
Register RSrc = MI.getOperand(2).getReg();
58675875

58685876
unsigned ImmOffset;
58695877

@@ -5956,10 +5964,13 @@ static void buildBufferLoad(unsigned Opc, Register LoadDstReg, Register RSrc,
59565964
}
59575965

59585966
bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
5959-
MachineRegisterInfo &MRI,
5960-
MachineIRBuilder &B,
5967+
LegalizerHelper &Helper,
59615968
bool IsFormat,
59625969
bool IsTyped) const {
5970+
MachineIRBuilder &B = Helper.MIRBuilder;
5971+
MachineRegisterInfo &MRI = *B.getMRI();
5972+
GISelChangeObserver &Observer = Helper.Observer;
5973+
59635974
// FIXME: Verifier should enforce 1 MMO for these intrinsics.
59645975
MachineMemOperand *MMO = *MI.memoperands_begin();
59655976
const LLT MemTy = MMO->getMemoryType();
@@ -6008,9 +6019,21 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
60086019
// Make addrspace 8 pointers loads into 4xs32 loads here, so the rest of the
60096020
// logic doesn't have to handle that case.
60106021
if (hasBufferRsrcWorkaround(Ty)) {
6022+
Observer.changingInstr(MI);
60116023
Ty = castBufferRsrcFromV4I32(MI, B, MRI, 0);
6024+
Observer.changedInstr(MI);
60126025
Dst = MI.getOperand(0).getReg();
6026+
B.setInsertPt(B.getMBB(), MI);
60136027
}
6028+
if (shouldBitcastLoadStoreType(ST, Ty, MemTy)) {
6029+
Ty = getBitcastRegisterType(Ty);
6030+
Observer.changingInstr(MI);
6031+
Helper.bitcastDst(MI, Ty, 0);
6032+
Observer.changedInstr(MI);
6033+
Dst = MI.getOperand(0).getReg();
6034+
B.setInsertPt(B.getMBB(), MI);
6035+
}
6036+
60146037
LLT EltTy = Ty.getScalarType();
60156038
const bool IsD16 = IsFormat && (EltTy.getSizeInBits() == 16);
60166039
const bool Unpacked = ST.hasUnpackedD16VMem();
@@ -7390,17 +7413,17 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
73907413
case Intrinsic::amdgcn_raw_ptr_buffer_store:
73917414
case Intrinsic::amdgcn_struct_buffer_store:
73927415
case Intrinsic::amdgcn_struct_ptr_buffer_store:
7393-
return legalizeBufferStore(MI, MRI, B, false, false);
7416+
return legalizeBufferStore(MI, Helper, false, false);
73947417
case Intrinsic::amdgcn_raw_buffer_store_format:
73957418
case Intrinsic::amdgcn_raw_ptr_buffer_store_format:
73967419
case Intrinsic::amdgcn_struct_buffer_store_format:
73977420
case Intrinsic::amdgcn_struct_ptr_buffer_store_format:
7398-
return legalizeBufferStore(MI, MRI, B, false, true);
7421+
return legalizeBufferStore(MI, Helper, false, true);
73997422
case Intrinsic::amdgcn_raw_tbuffer_store:
74007423
case Intrinsic::amdgcn_raw_ptr_tbuffer_store:
74017424
case Intrinsic::amdgcn_struct_tbuffer_store:
74027425
case Intrinsic::amdgcn_struct_ptr_tbuffer_store:
7403-
return legalizeBufferStore(MI, MRI, B, true, true);
7426+
return legalizeBufferStore(MI, Helper, true, true);
74047427
case Intrinsic::amdgcn_raw_buffer_load:
74057428
case Intrinsic::amdgcn_raw_ptr_buffer_load:
74067429
case Intrinsic::amdgcn_raw_atomic_buffer_load:
@@ -7409,17 +7432,17 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
74097432
case Intrinsic::amdgcn_struct_ptr_buffer_load:
74107433
case Intrinsic::amdgcn_struct_atomic_buffer_load:
74117434
case Intrinsic::amdgcn_struct_ptr_atomic_buffer_load:
7412-
return legalizeBufferLoad(MI, MRI, B, false, false);
7435+
return legalizeBufferLoad(MI, Helper, false, false);
74137436
case Intrinsic::amdgcn_raw_buffer_load_format:
74147437
case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
74157438
case Intrinsic::amdgcn_struct_buffer_load_format:
74167439
case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
7417-
return legalizeBufferLoad(MI, MRI, B, true, false);
7440+
return legalizeBufferLoad(MI, Helper, true, false);
74187441
case Intrinsic::amdgcn_raw_tbuffer_load:
74197442
case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
74207443
case Intrinsic::amdgcn_struct_tbuffer_load:
74217444
case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
7422-
return legalizeBufferLoad(MI, MRI, B, true, true);
7445+
return legalizeBufferLoad(MI, Helper, true, true);
74237446
case Intrinsic::amdgcn_raw_buffer_atomic_swap:
74247447
case Intrinsic::amdgcn_raw_ptr_buffer_atomic_swap:
74257448
case Intrinsic::amdgcn_struct_buffer_atomic_swap:

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h

+5-7
Original file line numberDiff line numberDiff line change
@@ -195,15 +195,13 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
195195

196196
Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI,
197197
Register Reg, bool ImageStore = false) const;
198-
Register fixStoreSourceType(MachineIRBuilder &B, Register VData,
198+
Register fixStoreSourceType(MachineIRBuilder &B, Register VData, LLT MemTy,
199199
bool IsFormat) const;
200200

201-
bool legalizeBufferStore(MachineInstr &MI, MachineRegisterInfo &MRI,
202-
MachineIRBuilder &B, bool IsTyped,
203-
bool IsFormat) const;
204-
bool legalizeBufferLoad(MachineInstr &MI, MachineRegisterInfo &MRI,
205-
MachineIRBuilder &B, bool IsFormat,
206-
bool IsTyped) const;
201+
bool legalizeBufferStore(MachineInstr &MI, LegalizerHelper &Helper,
202+
bool IsTyped, bool IsFormat) const;
203+
bool legalizeBufferLoad(MachineInstr &MI, LegalizerHelper &Helper,
204+
bool IsFormat, bool IsTyped) const;
207205
bool legalizeBufferAtomic(MachineInstr &MI, MachineIRBuilder &B,
208206
Intrinsic::ID IID) const;
209207

llvm/lib/Target/AMDGPU/SIRegisterInfo.td

+1-1
Original file line numberDiff line numberDiff line change
@@ -585,7 +585,7 @@ class RegisterTypes<list<ValueType> reg_types> {
585585

586586
def Reg16Types : RegisterTypes<[i16, f16, bf16]>;
587587
def Reg32Types : RegisterTypes<[i32, f32, v2i16, v2f16, v2bf16, p2, p3, p5, p6]>;
588-
def Reg64Types : RegisterTypes<[i64, f64, v2i32, v2f32, p0, v4i16, v4f16, v4bf16]>;
588+
def Reg64Types : RegisterTypes<[i64, f64, v2i32, v2f32, p0, p1, p4, v4i16, v4f16, v4bf16]>;
589589
def Reg96Types : RegisterTypes<[v3i32, v3f32]>;
590590
def Reg128Types : RegisterTypes<[v4i32, v4f32, v2i64, v2f64, v8i16, v8f16, v8bf16]>;
591591

0 commit comments

Comments
 (0)