Skip to content

Commit a9aa4ec

Browse files
committed
[AMDGPU] Remove -amdgpu-spill-sgpr-to-smem.
Summary: The implementation was never completed and never used except in tests. Reviewers: arsenm, mareko Subscribers: qcolombet, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D69163 llvm-svn: 375293
1 parent 64b7d95 commit a9aa4ec

9 files changed

+10
-447
lines changed

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

+1-151
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,6 @@ void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg,
4848
}
4949
}
5050

51-
static cl::opt<bool> EnableSpillSGPRToSMEM(
52-
"amdgpu-spill-sgpr-to-smem",
53-
cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
54-
cl::init(false));
55-
5651
static cl::opt<bool> EnableSpillSGPRToVGPR(
5752
"amdgpu-spill-sgpr-to-vgpr",
5853
cl::desc("Enable spilling VGPRs to SGPRs"),
@@ -65,14 +60,8 @@ SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) :
6560
SGPRPressureSets(getNumRegPressureSets()),
6661
VGPRPressureSets(getNumRegPressureSets()),
6762
AGPRPressureSets(getNumRegPressureSets()),
68-
SpillSGPRToVGPR(false),
69-
SpillSGPRToSMEM(false),
63+
SpillSGPRToVGPR(EnableSpillSGPRToVGPR),
7064
isWave32(ST.isWave32()) {
71-
if (EnableSpillSGPRToSMEM && ST.hasScalarStores())
72-
SpillSGPRToSMEM = true;
73-
else if (EnableSpillSGPRToVGPR)
74-
SpillSGPRToVGPR = true;
75-
7665
unsigned NumRegPressureSets = getNumRegPressureSets();
7766

7867
SGPRSetID = NumRegPressureSets;
@@ -759,22 +748,6 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
759748
}
760749
}
761750

762-
static std::pair<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize,
763-
bool Store) {
764-
if (SuperRegSize % 16 == 0) {
765-
return { 16, Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR :
766-
AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR };
767-
}
768-
769-
if (SuperRegSize % 8 == 0) {
770-
return { 8, Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR :
771-
AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR };
772-
}
773-
774-
return { 4, Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR :
775-
AMDGPU::S_BUFFER_LOAD_DWORD_SGPR};
776-
}
777-
778751
bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
779752
int Index,
780753
RegScavenger *RS,
@@ -799,38 +772,16 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
799772

800773
MachineFrameInfo &FrameInfo = MF->getFrameInfo();
801774

802-
bool SpillToSMEM = spillSGPRToSMEM();
803-
if (SpillToSMEM && OnlyToVGPR)
804-
return false;
805-
806-
Register FrameReg = getFrameRegister(*MF);
807-
808775
assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() &&
809776
SuperReg != MFI->getFrameOffsetReg() &&
810777
SuperReg != MFI->getScratchWaveOffsetReg()));
811778

812779
assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
813780

814-
unsigned OffsetReg = AMDGPU::M0;
815781
unsigned M0CopyReg = AMDGPU::NoRegister;
816782

817-
if (SpillToSMEM) {
818-
if (RS->isRegUsed(AMDGPU::M0)) {
819-
M0CopyReg = RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false);
820-
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
821-
.addReg(AMDGPU::M0);
822-
}
823-
}
824-
825-
unsigned ScalarStoreOp;
826783
unsigned EltSize = 4;
827784
const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
828-
if (SpillToSMEM && isSGPRClass(RC)) {
829-
// XXX - if private_element_size is larger than 4 it might be useful to be
830-
// able to spill wider vmem spills.
831-
std::tie(EltSize, ScalarStoreOp) =
832-
getSpillEltSize(getRegSizeInBits(*RC) / 8, true);
833-
}
834785

835786
ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
836787
unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
@@ -845,47 +796,6 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
845796
Register SubReg =
846797
NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]);
847798

848-
if (SpillToSMEM) {
849-
int64_t FrOffset = FrameInfo.getObjectOffset(Index);
850-
851-
// The allocated memory size is really the wavefront size * the frame
852-
// index size. The widest register class is 64 bytes, so a 4-byte scratch
853-
// allocation is enough to spill this in a single stack object.
854-
//
855-
// FIXME: Frame size/offsets are computed earlier than this, so the extra
856-
// space is still unnecessarily allocated.
857-
858-
unsigned Align = FrameInfo.getObjectAlignment(Index);
859-
MachinePointerInfo PtrInfo
860-
= MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
861-
MachineMemOperand *MMO
862-
= MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
863-
EltSize, MinAlign(Align, EltSize * i));
864-
865-
// SMEM instructions only support a single offset, so increment the wave
866-
// offset.
867-
868-
int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
869-
if (Offset != 0) {
870-
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
871-
.addReg(FrameReg)
872-
.addImm(Offset);
873-
} else {
874-
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
875-
.addReg(FrameReg);
876-
}
877-
878-
BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp))
879-
.addReg(SubReg, getKillRegState(IsKill)) // sdata
880-
.addReg(MFI->getScratchRSrcReg()) // sbase
881-
.addReg(OffsetReg, RegState::Kill) // soff
882-
.addImm(0) // glc
883-
.addImm(0) // dlc
884-
.addMemOperand(MMO);
885-
886-
continue;
887-
}
888-
889799
if (SpillToVGPR) {
890800
SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
891801

@@ -914,10 +824,8 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
914824
return false;
915825

916826
// Spill SGPR to a frame index.
917-
// TODO: Should VI try to spill to VGPR and then spill to SMEM?
918827
if (!TmpVGPR.isValid())
919828
TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
920-
// TODO: Should VI try to spill to VGPR and then spill to SMEM?
921829

922830
MachineInstrBuilder Mov
923831
= BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
@@ -979,82 +887,24 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
979887
const DebugLoc &DL = MI->getDebugLoc();
980888

981889
Register SuperReg = MI->getOperand(0).getReg();
982-
bool SpillToSMEM = spillSGPRToSMEM();
983-
if (SpillToSMEM && OnlyToVGPR)
984-
return false;
985890

986891
assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
987892

988-
unsigned OffsetReg = AMDGPU::M0;
989893
unsigned M0CopyReg = AMDGPU::NoRegister;
990894

991-
if (SpillToSMEM) {
992-
if (RS->isRegUsed(AMDGPU::M0)) {
993-
M0CopyReg = RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false);
994-
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
995-
.addReg(AMDGPU::M0);
996-
}
997-
}
998-
999895
unsigned EltSize = 4;
1000-
unsigned ScalarLoadOp;
1001-
1002-
Register FrameReg = getFrameRegister(*MF);
1003896

1004897
const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
1005-
if (SpillToSMEM && isSGPRClass(RC)) {
1006-
// XXX - if private_element_size is larger than 4 it might be useful to be
1007-
// able to spill wider vmem spills.
1008-
std::tie(EltSize, ScalarLoadOp) =
1009-
getSpillEltSize(getRegSizeInBits(*RC) / 8, false);
1010-
}
1011898

1012899
ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
1013900
unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
1014901

1015-
// SubReg carries the "Kill" flag when SubReg == SuperReg.
1016-
int64_t FrOffset = FrameInfo.getObjectOffset(Index);
1017-
1018902
Register TmpVGPR;
1019903

1020904
for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
1021905
Register SubReg =
1022906
NumSubRegs == 1 ? SuperReg : getSubReg(SuperReg, SplitParts[i]);
1023907

1024-
if (SpillToSMEM) {
1025-
// FIXME: Size may be > 4 but extra bytes wasted.
1026-
unsigned Align = FrameInfo.getObjectAlignment(Index);
1027-
MachinePointerInfo PtrInfo
1028-
= MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
1029-
MachineMemOperand *MMO
1030-
= MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
1031-
EltSize, MinAlign(Align, EltSize * i));
1032-
1033-
// Add i * 4 offset
1034-
int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
1035-
if (Offset != 0) {
1036-
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
1037-
.addReg(FrameReg)
1038-
.addImm(Offset);
1039-
} else {
1040-
BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
1041-
.addReg(FrameReg);
1042-
}
1043-
1044-
auto MIB =
1045-
BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
1046-
.addReg(MFI->getScratchRSrcReg()) // sbase
1047-
.addReg(OffsetReg, RegState::Kill) // soff
1048-
.addImm(0) // glc
1049-
.addImm(0) // dlc
1050-
.addMemOperand(MMO);
1051-
1052-
if (NumSubRegs > 1 && i == 0)
1053-
MIB.addReg(SuperReg, RegState::ImplicitDefine);
1054-
1055-
continue;
1056-
}
1057-
1058908
if (SpillToVGPR) {
1059909
SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
1060910
auto MIB =

llvm/lib/Target/AMDGPU/SIRegisterInfo.h

-5
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ class SIRegisterInfo final : public AMDGPURegisterInfo {
3535
BitVector VGPRPressureSets;
3636
BitVector AGPRPressureSets;
3737
bool SpillSGPRToVGPR;
38-
bool SpillSGPRToSMEM;
3938
bool isWave32;
4039

4140
void classifyPressureSet(unsigned PSetID, unsigned Reg,
@@ -47,10 +46,6 @@ class SIRegisterInfo final : public AMDGPURegisterInfo {
4746
return SpillSGPRToVGPR;
4847
}
4948

50-
bool spillSGPRToSMEM() const {
51-
return SpillSGPRToSMEM;
52-
}
53-
5449
/// Return the end register initially reserved for the scratch buffer in case
5550
/// spilling is needed.
5651
unsigned reservedPrivateSegmentBufferReg(const MachineFunction &MF) const;

llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr-spill-to-smem.ll

-33
This file was deleted.

llvm/test/CodeGen/AMDGPU/attr-amdgpu-num-sgpr.ll

+1-16
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,6 @@
1-
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s | FileCheck -check-prefix=TOSGPR -check-prefix=ALL %s
2-
3-
; If spilling to smem, additional registers are used for the resource
4-
; descriptor.
1+
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=TOSGPR -check-prefix=ALL %s
52

63
; FIXME: Vectorization can increase required SGPR count beyond limit.
7-
; FIXME: SGPR-to-SMEM requires an additional SGPR always to scavenge m0
84

95
; ALL-LABEL: {{^}}max_9_sgprs:
106

@@ -55,13 +51,6 @@ define amdgpu_kernel void @max_9_sgprs() #0 {
5551
; XTOSGPR: SGPRBlocks: 1
5652
; XTOSGPR: NumSGPRsForWavesPerEU: 16
5753

58-
; XTOSMEM: s_mov_b64 s[10:11], s[2:3]
59-
; XTOSMEM: s_mov_b64 s[8:9], s[0:1]
60-
; XTOSMEM: s_mov_b32 s7, s13
61-
62-
; XTOSMEM: SGPRBlocks: 1
63-
; XTOSMEM: NumSGPRsForWavesPerEU: 16
64-
;
6554
; This test case is disabled: When calculating the spillslot addresses AMDGPU
6655
; creates an extra vreg to save/restore m0 which in a point of maximum register
6756
; pressure would trigger an endless loop; the compiler aborts earlier with
@@ -101,10 +90,6 @@ define amdgpu_kernel void @max_9_sgprs() #0 {
10190
; ; swapping the order the registers are copied from what normally
10291
; ; happens.
10392

104-
; XTOSMEM: s_mov_b32 s5, s11
105-
; XTOSMEM: s_add_u32 m0, s5,
106-
; XTOSMEM: s_buffer_store_dword vcc_lo, s[0:3], m0
107-
10893
; XALL: SGPRBlocks: 2
10994
; XALL: NumSGPRsForWavesPerEU: 18
11095
;define amdgpu_kernel void @max_12_sgprs_12_input_sgprs(i32 addrspace(1)* %out1,

llvm/test/CodeGen/AMDGPU/basic-branch.ll

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
; RUN: llc -O0 -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCNNOOPT -check-prefix=GCN %s
2-
; RUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCNNOOPT -check-prefix=GCN %s
3-
; RUN: llc -O0 -march=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global,-WavefrontSize32,+WavefrontSize64 -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCNNOOPT -check-prefix=GCN %s
2+
; RUN: llc -O0 -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCNNOOPT -check-prefix=GCN %s
3+
; RUN: llc -O0 -march=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global,-WavefrontSize32,+WavefrontSize64 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCNNOOPT -check-prefix=GCN %s
44
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCNOPT -check-prefix=GCN %s
55
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCNOPT -check-prefix=GCN %s
66

llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll

+1-11
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -amdgpu-spill-sgpr-to-smem=0 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=SGPR %s
2-
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -amdgpu-spill-sgpr-to-smem=1 -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=SMEM %s
1+
; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=SGPR %s
32

43
; Make sure this doesn't crash.
54
; ALL-LABEL: {{^}}test:
@@ -14,15 +13,6 @@
1413
; SGPR-NEXT: s_nop 4
1514
; SGPR-NEXT: buffer_store_dword v0, off, s[0:[[HI]]{{\]}}, 0
1615

17-
; Make sure scratch wave offset register is correctly incremented and
18-
; then restored.
19-
; SMEM: s_add_u32 m0, s[[OFF]], 0x100{{$}}
20-
; SMEM: s_buffer_store_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[LO]]:[[HI]]], m0 ; 16-byte Folded Spill
21-
22-
; SMEM: s_add_u32 m0, s[[OFF]], 0x100{{$}}
23-
; SMEM: s_buffer_load_dwordx4 s{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[LO]]:[[HI]]], m0 ; 16-byte Folded Reload
24-
25-
; SMEM: s_dcache_wb
2616
; ALL: s_endpgm
2717
define amdgpu_kernel void @test(i32 addrspace(1)* %out, i32 %in) {
2818
call void asm sideeffect "", "~{s[0:7]}" ()

0 commit comments

Comments
 (0)