Skip to content

Commit 4b4a0d4

Browse files
authored
Reapply "[AMDGPU][GlobalISel] Fix load/store of pointer vectors, buffer.*.pN (#110714)" v2 (#111708)
This adds `-disable-gisel-legality-check` to some gfx6 and gfx7 test lines to prevent behavior mismatches between debug and release builds The first attempted reapply was #111059 This reverts commit e075dcf.
1 parent 1dff330 commit 4b4a0d4

12 files changed

+4015
-275
lines changed

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

+42-19
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,8 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
494494
return false;
495495

496496
const unsigned Size = Ty.getSizeInBits();
497+
if (Ty.isPointerVector())
498+
return true;
497499
if (Size <= 64)
498500
return false;
499501
// Address space 8 pointers get their own workaround.
@@ -502,9 +504,6 @@ static bool loadStoreBitcastWorkaround(const LLT Ty) {
502504
if (!Ty.isVector())
503505
return true;
504506

505-
if (Ty.isPointerVector())
506-
return true;
507-
508507
unsigned EltSize = Ty.getScalarSizeInBits();
509508
return EltSize != 32 && EltSize != 64;
510509
}
@@ -5818,8 +5817,9 @@ Register AMDGPULegalizerInfo::handleD16VData(MachineIRBuilder &B,
58185817
return Reg;
58195818
}
58205819

5821-
Register AMDGPULegalizerInfo::fixStoreSourceType(
5822-
MachineIRBuilder &B, Register VData, bool IsFormat) const {
5820+
Register AMDGPULegalizerInfo::fixStoreSourceType(MachineIRBuilder &B,
5821+
Register VData, LLT MemTy,
5822+
bool IsFormat) const {
58235823
MachineRegisterInfo *MRI = B.getMRI();
58245824
LLT Ty = MRI->getType(VData);
58255825

@@ -5829,6 +5829,10 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(
58295829
if (hasBufferRsrcWorkaround(Ty))
58305830
return castBufferRsrcToV4I32(VData, B);
58315831

5832+
if (shouldBitcastLoadStoreType(ST, Ty, MemTy)) {
5833+
Ty = getBitcastRegisterType(Ty);
5834+
VData = B.buildBitcast(Ty, VData).getReg(0);
5835+
}
58325836
// Fixup illegal register types for i8 stores.
58335837
if (Ty == LLT::scalar(8) || Ty == S16) {
58345838
Register AnyExt = B.buildAnyExt(LLT::scalar(32), VData).getReg(0);
@@ -5846,22 +5850,26 @@ Register AMDGPULegalizerInfo::fixStoreSourceType(
58465850
}
58475851

58485852
bool AMDGPULegalizerInfo::legalizeBufferStore(MachineInstr &MI,
5849-
MachineRegisterInfo &MRI,
5850-
MachineIRBuilder &B,
5853+
LegalizerHelper &Helper,
58515854
bool IsTyped,
58525855
bool IsFormat) const {
5856+
MachineIRBuilder &B = Helper.MIRBuilder;
5857+
MachineRegisterInfo &MRI = *B.getMRI();
5858+
58535859
Register VData = MI.getOperand(1).getReg();
58545860
LLT Ty = MRI.getType(VData);
58555861
LLT EltTy = Ty.getScalarType();
58565862
const bool IsD16 = IsFormat && (EltTy.getSizeInBits() == 16);
58575863
const LLT S32 = LLT::scalar(32);
58585864

5859-
VData = fixStoreSourceType(B, VData, IsFormat);
5860-
castBufferRsrcArgToV4I32(MI, B, 2);
5861-
Register RSrc = MI.getOperand(2).getReg();
5862-
58635865
MachineMemOperand *MMO = *MI.memoperands_begin();
58645866
const int MemSize = MMO->getSize().getValue();
5867+
LLT MemTy = MMO->getMemoryType();
5868+
5869+
VData = fixStoreSourceType(B, VData, MemTy, IsFormat);
5870+
5871+
castBufferRsrcArgToV4I32(MI, B, 2);
5872+
Register RSrc = MI.getOperand(2).getReg();
58655873

58665874
unsigned ImmOffset;
58675875

@@ -5954,10 +5962,13 @@ static void buildBufferLoad(unsigned Opc, Register LoadDstReg, Register RSrc,
59545962
}
59555963

59565964
bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
5957-
MachineRegisterInfo &MRI,
5958-
MachineIRBuilder &B,
5965+
LegalizerHelper &Helper,
59595966
bool IsFormat,
59605967
bool IsTyped) const {
5968+
MachineIRBuilder &B = Helper.MIRBuilder;
5969+
MachineRegisterInfo &MRI = *B.getMRI();
5970+
GISelChangeObserver &Observer = Helper.Observer;
5971+
59615972
// FIXME: Verifier should enforce 1 MMO for these intrinsics.
59625973
MachineMemOperand *MMO = *MI.memoperands_begin();
59635974
const LLT MemTy = MMO->getMemoryType();
@@ -6006,9 +6017,21 @@ bool AMDGPULegalizerInfo::legalizeBufferLoad(MachineInstr &MI,
60066017
// Make addrspace 8 pointers loads into 4xs32 loads here, so the rest of the
60076018
// logic doesn't have to handle that case.
60086019
if (hasBufferRsrcWorkaround(Ty)) {
6020+
Observer.changingInstr(MI);
60096021
Ty = castBufferRsrcFromV4I32(MI, B, MRI, 0);
6022+
Observer.changedInstr(MI);
60106023
Dst = MI.getOperand(0).getReg();
6024+
B.setInsertPt(B.getMBB(), MI);
60116025
}
6026+
if (shouldBitcastLoadStoreType(ST, Ty, MemTy)) {
6027+
Ty = getBitcastRegisterType(Ty);
6028+
Observer.changingInstr(MI);
6029+
Helper.bitcastDst(MI, Ty, 0);
6030+
Observer.changedInstr(MI);
6031+
Dst = MI.getOperand(0).getReg();
6032+
B.setInsertPt(B.getMBB(), MI);
6033+
}
6034+
60126035
LLT EltTy = Ty.getScalarType();
60136036
const bool IsD16 = IsFormat && (EltTy.getSizeInBits() == 16);
60146037
const bool Unpacked = ST.hasUnpackedD16VMem();
@@ -7388,17 +7411,17 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
73887411
case Intrinsic::amdgcn_raw_ptr_buffer_store:
73897412
case Intrinsic::amdgcn_struct_buffer_store:
73907413
case Intrinsic::amdgcn_struct_ptr_buffer_store:
7391-
return legalizeBufferStore(MI, MRI, B, false, false);
7414+
return legalizeBufferStore(MI, Helper, false, false);
73927415
case Intrinsic::amdgcn_raw_buffer_store_format:
73937416
case Intrinsic::amdgcn_raw_ptr_buffer_store_format:
73947417
case Intrinsic::amdgcn_struct_buffer_store_format:
73957418
case Intrinsic::amdgcn_struct_ptr_buffer_store_format:
7396-
return legalizeBufferStore(MI, MRI, B, false, true);
7419+
return legalizeBufferStore(MI, Helper, false, true);
73977420
case Intrinsic::amdgcn_raw_tbuffer_store:
73987421
case Intrinsic::amdgcn_raw_ptr_tbuffer_store:
73997422
case Intrinsic::amdgcn_struct_tbuffer_store:
74007423
case Intrinsic::amdgcn_struct_ptr_tbuffer_store:
7401-
return legalizeBufferStore(MI, MRI, B, true, true);
7424+
return legalizeBufferStore(MI, Helper, true, true);
74027425
case Intrinsic::amdgcn_raw_buffer_load:
74037426
case Intrinsic::amdgcn_raw_ptr_buffer_load:
74047427
case Intrinsic::amdgcn_raw_atomic_buffer_load:
@@ -7407,17 +7430,17 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
74077430
case Intrinsic::amdgcn_struct_ptr_buffer_load:
74087431
case Intrinsic::amdgcn_struct_atomic_buffer_load:
74097432
case Intrinsic::amdgcn_struct_ptr_atomic_buffer_load:
7410-
return legalizeBufferLoad(MI, MRI, B, false, false);
7433+
return legalizeBufferLoad(MI, Helper, false, false);
74117434
case Intrinsic::amdgcn_raw_buffer_load_format:
74127435
case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
74137436
case Intrinsic::amdgcn_struct_buffer_load_format:
74147437
case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
7415-
return legalizeBufferLoad(MI, MRI, B, true, false);
7438+
return legalizeBufferLoad(MI, Helper, true, false);
74167439
case Intrinsic::amdgcn_raw_tbuffer_load:
74177440
case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
74187441
case Intrinsic::amdgcn_struct_tbuffer_load:
74197442
case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
7420-
return legalizeBufferLoad(MI, MRI, B, true, true);
7443+
return legalizeBufferLoad(MI, Helper, true, true);
74217444
case Intrinsic::amdgcn_raw_buffer_atomic_swap:
74227445
case Intrinsic::amdgcn_raw_ptr_buffer_atomic_swap:
74237446
case Intrinsic::amdgcn_struct_buffer_atomic_swap:

llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h

+5-7
Original file line numberDiff line numberDiff line change
@@ -195,15 +195,13 @@ class AMDGPULegalizerInfo final : public LegalizerInfo {
195195

196196
Register handleD16VData(MachineIRBuilder &B, MachineRegisterInfo &MRI,
197197
Register Reg, bool ImageStore = false) const;
198-
Register fixStoreSourceType(MachineIRBuilder &B, Register VData,
198+
Register fixStoreSourceType(MachineIRBuilder &B, Register VData, LLT MemTy,
199199
bool IsFormat) const;
200200

201-
bool legalizeBufferStore(MachineInstr &MI, MachineRegisterInfo &MRI,
202-
MachineIRBuilder &B, bool IsTyped,
203-
bool IsFormat) const;
204-
bool legalizeBufferLoad(MachineInstr &MI, MachineRegisterInfo &MRI,
205-
MachineIRBuilder &B, bool IsFormat,
206-
bool IsTyped) const;
201+
bool legalizeBufferStore(MachineInstr &MI, LegalizerHelper &Helper,
202+
bool IsTyped, bool IsFormat) const;
203+
bool legalizeBufferLoad(MachineInstr &MI, LegalizerHelper &Helper,
204+
bool IsFormat, bool IsTyped) const;
207205
bool legalizeBufferAtomic(MachineInstr &MI, MachineIRBuilder &B,
208206
Intrinsic::ID IID) const;
209207

llvm/lib/Target/AMDGPU/SIRegisterInfo.td

+1-1
Original file line numberDiff line numberDiff line change
@@ -590,7 +590,7 @@ class RegisterTypes<list<ValueType> reg_types> {
590590

591591
def Reg16Types : RegisterTypes<[i16, f16, bf16]>;
592592
def Reg32Types : RegisterTypes<[i32, f32, v2i16, v2f16, v2bf16, p2, p3, p5, p6]>;
593-
def Reg64Types : RegisterTypes<[i64, f64, v2i32, v2f32, p0, v4i16, v4f16, v4bf16]>;
593+
def Reg64Types : RegisterTypes<[i64, f64, v2i32, v2f32, p0, p1, p4, v4i16, v4f16, v4bf16]>;
594594
def Reg96Types : RegisterTypes<[v3i32, v3f32]>;
595595
def Reg128Types : RegisterTypes<[v4i32, v4f32, v2i64, v2f64, v8i16, v8f16, v8bf16]>;
596596

0 commit comments

Comments
 (0)