@@ -48,11 +48,6 @@ void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg,
48
48
}
49
49
}
50
50
51
- static cl::opt<bool > EnableSpillSGPRToSMEM (
52
- " amdgpu-spill-sgpr-to-smem" ,
53
- cl::desc (" Use scalar stores to spill SGPRs if supported by subtarget" ),
54
- cl::init(false ));
55
-
56
51
static cl::opt<bool > EnableSpillSGPRToVGPR (
57
52
" amdgpu-spill-sgpr-to-vgpr" ,
58
53
cl::desc (" Enable spilling VGPRs to SGPRs" ),
@@ -65,14 +60,8 @@ SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) :
65
60
SGPRPressureSets(getNumRegPressureSets()),
66
61
VGPRPressureSets(getNumRegPressureSets()),
67
62
AGPRPressureSets(getNumRegPressureSets()),
68
- SpillSGPRToVGPR(false ),
69
- SpillSGPRToSMEM(false ),
63
+ SpillSGPRToVGPR(EnableSpillSGPRToVGPR),
70
64
isWave32(ST.isWave32()) {
71
- if (EnableSpillSGPRToSMEM && ST.hasScalarStores ())
72
- SpillSGPRToSMEM = true ;
73
- else if (EnableSpillSGPRToVGPR)
74
- SpillSGPRToVGPR = true ;
75
-
76
65
unsigned NumRegPressureSets = getNumRegPressureSets ();
77
66
78
67
SGPRSetID = NumRegPressureSets;
@@ -759,22 +748,6 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
759
748
}
760
749
}
761
750
762
- static std::pair<unsigned , unsigned > getSpillEltSize (unsigned SuperRegSize,
763
- bool Store) {
764
- if (SuperRegSize % 16 == 0 ) {
765
- return { 16 , Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR :
766
- AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR };
767
- }
768
-
769
- if (SuperRegSize % 8 == 0 ) {
770
- return { 8 , Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR :
771
- AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR };
772
- }
773
-
774
- return { 4 , Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR :
775
- AMDGPU::S_BUFFER_LOAD_DWORD_SGPR};
776
- }
777
-
778
751
bool SIRegisterInfo::spillSGPR (MachineBasicBlock::iterator MI,
779
752
int Index,
780
753
RegScavenger *RS,
@@ -799,38 +772,16 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
799
772
800
773
MachineFrameInfo &FrameInfo = MF->getFrameInfo ();
801
774
802
- bool SpillToSMEM = spillSGPRToSMEM ();
803
- if (SpillToSMEM && OnlyToVGPR)
804
- return false ;
805
-
806
- Register FrameReg = getFrameRegister (*MF);
807
-
808
775
assert (SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg () &&
809
776
SuperReg != MFI->getFrameOffsetReg () &&
810
777
SuperReg != MFI->getScratchWaveOffsetReg ()));
811
778
812
779
assert (SuperReg != AMDGPU::M0 && " m0 should never spill" );
813
780
814
- unsigned OffsetReg = AMDGPU::M0;
815
781
unsigned M0CopyReg = AMDGPU::NoRegister;
816
782
817
- if (SpillToSMEM) {
818
- if (RS->isRegUsed (AMDGPU::M0)) {
819
- M0CopyReg = RS->scavengeRegister (&AMDGPU::SReg_32_XM0RegClass, MI, 0 , false );
820
- BuildMI (*MBB, MI, DL, TII->get (AMDGPU::COPY), M0CopyReg)
821
- .addReg (AMDGPU::M0);
822
- }
823
- }
824
-
825
- unsigned ScalarStoreOp;
826
783
unsigned EltSize = 4 ;
827
784
const TargetRegisterClass *RC = getPhysRegClass (SuperReg);
828
- if (SpillToSMEM && isSGPRClass (RC)) {
829
- // XXX - if private_element_size is larger than 4 it might be useful to be
830
- // able to spill wider vmem spills.
831
- std::tie (EltSize, ScalarStoreOp) =
832
- getSpillEltSize (getRegSizeInBits (*RC) / 8 , true );
833
- }
834
785
835
786
ArrayRef<int16_t > SplitParts = getRegSplitParts (RC, EltSize);
836
787
unsigned NumSubRegs = SplitParts.empty () ? 1 : SplitParts.size ();
@@ -845,47 +796,6 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
845
796
Register SubReg =
846
797
NumSubRegs == 1 ? SuperReg : getSubReg (SuperReg, SplitParts[i]);
847
798
848
- if (SpillToSMEM) {
849
- int64_t FrOffset = FrameInfo.getObjectOffset (Index);
850
-
851
- // The allocated memory size is really the wavefront size * the frame
852
- // index size. The widest register class is 64 bytes, so a 4-byte scratch
853
- // allocation is enough to spill this in a single stack object.
854
- //
855
- // FIXME: Frame size/offsets are computed earlier than this, so the extra
856
- // space is still unnecessarily allocated.
857
-
858
- unsigned Align = FrameInfo.getObjectAlignment (Index);
859
- MachinePointerInfo PtrInfo
860
- = MachinePointerInfo::getFixedStack (*MF, Index, EltSize * i);
861
- MachineMemOperand *MMO
862
- = MF->getMachineMemOperand (PtrInfo, MachineMemOperand::MOStore,
863
- EltSize, MinAlign (Align, EltSize * i));
864
-
865
- // SMEM instructions only support a single offset, so increment the wave
866
- // offset.
867
-
868
- int64_t Offset = (ST.getWavefrontSize () * FrOffset) + (EltSize * i);
869
- if (Offset != 0 ) {
870
- BuildMI (*MBB, MI, DL, TII->get (AMDGPU::S_ADD_U32), OffsetReg)
871
- .addReg (FrameReg)
872
- .addImm (Offset);
873
- } else {
874
- BuildMI (*MBB, MI, DL, TII->get (AMDGPU::S_MOV_B32), OffsetReg)
875
- .addReg (FrameReg);
876
- }
877
-
878
- BuildMI (*MBB, MI, DL, TII->get (ScalarStoreOp))
879
- .addReg (SubReg, getKillRegState (IsKill)) // sdata
880
- .addReg (MFI->getScratchRSrcReg ()) // sbase
881
- .addReg (OffsetReg, RegState::Kill) // soff
882
- .addImm (0 ) // glc
883
- .addImm (0 ) // dlc
884
- .addMemOperand (MMO);
885
-
886
- continue ;
887
- }
888
-
889
799
if (SpillToVGPR) {
890
800
SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
891
801
@@ -914,10 +824,8 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
914
824
return false ;
915
825
916
826
// Spill SGPR to a frame index.
917
- // TODO: Should VI try to spill to VGPR and then spill to SMEM?
918
827
if (!TmpVGPR.isValid ())
919
828
TmpVGPR = RS->scavengeRegister (&AMDGPU::VGPR_32RegClass, MI, 0 );
920
- // TODO: Should VI try to spill to VGPR and then spill to SMEM?
921
829
922
830
MachineInstrBuilder Mov
923
831
= BuildMI (*MBB, MI, DL, TII->get (AMDGPU::V_MOV_B32_e32), TmpVGPR)
@@ -979,82 +887,24 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
979
887
const DebugLoc &DL = MI->getDebugLoc ();
980
888
981
889
Register SuperReg = MI->getOperand (0 ).getReg ();
982
- bool SpillToSMEM = spillSGPRToSMEM ();
983
- if (SpillToSMEM && OnlyToVGPR)
984
- return false ;
985
890
986
891
assert (SuperReg != AMDGPU::M0 && " m0 should never spill" );
987
892
988
- unsigned OffsetReg = AMDGPU::M0;
989
893
unsigned M0CopyReg = AMDGPU::NoRegister;
990
894
991
- if (SpillToSMEM) {
992
- if (RS->isRegUsed (AMDGPU::M0)) {
993
- M0CopyReg = RS->scavengeRegister (&AMDGPU::SReg_32_XM0RegClass, MI, 0 , false );
994
- BuildMI (*MBB, MI, DL, TII->get (AMDGPU::COPY), M0CopyReg)
995
- .addReg (AMDGPU::M0);
996
- }
997
- }
998
-
999
895
unsigned EltSize = 4 ;
1000
- unsigned ScalarLoadOp;
1001
-
1002
- Register FrameReg = getFrameRegister (*MF);
1003
896
1004
897
const TargetRegisterClass *RC = getPhysRegClass (SuperReg);
1005
- if (SpillToSMEM && isSGPRClass (RC)) {
1006
- // XXX - if private_element_size is larger than 4 it might be useful to be
1007
- // able to spill wider vmem spills.
1008
- std::tie (EltSize, ScalarLoadOp) =
1009
- getSpillEltSize (getRegSizeInBits (*RC) / 8 , false );
1010
- }
1011
898
1012
899
ArrayRef<int16_t > SplitParts = getRegSplitParts (RC, EltSize);
1013
900
unsigned NumSubRegs = SplitParts.empty () ? 1 : SplitParts.size ();
1014
901
1015
- // SubReg carries the "Kill" flag when SubReg == SuperReg.
1016
- int64_t FrOffset = FrameInfo.getObjectOffset (Index);
1017
-
1018
902
Register TmpVGPR;
1019
903
1020
904
for (unsigned i = 0 , e = NumSubRegs; i < e; ++i) {
1021
905
Register SubReg =
1022
906
NumSubRegs == 1 ? SuperReg : getSubReg (SuperReg, SplitParts[i]);
1023
907
1024
- if (SpillToSMEM) {
1025
- // FIXME: Size may be > 4 but extra bytes wasted.
1026
- unsigned Align = FrameInfo.getObjectAlignment (Index);
1027
- MachinePointerInfo PtrInfo
1028
- = MachinePointerInfo::getFixedStack (*MF, Index, EltSize * i);
1029
- MachineMemOperand *MMO
1030
- = MF->getMachineMemOperand (PtrInfo, MachineMemOperand::MOLoad,
1031
- EltSize, MinAlign (Align, EltSize * i));
1032
-
1033
- // Add i * 4 offset
1034
- int64_t Offset = (ST.getWavefrontSize () * FrOffset) + (EltSize * i);
1035
- if (Offset != 0 ) {
1036
- BuildMI (*MBB, MI, DL, TII->get (AMDGPU::S_ADD_U32), OffsetReg)
1037
- .addReg (FrameReg)
1038
- .addImm (Offset);
1039
- } else {
1040
- BuildMI (*MBB, MI, DL, TII->get (AMDGPU::S_MOV_B32), OffsetReg)
1041
- .addReg (FrameReg);
1042
- }
1043
-
1044
- auto MIB =
1045
- BuildMI (*MBB, MI, DL, TII->get (ScalarLoadOp), SubReg)
1046
- .addReg (MFI->getScratchRSrcReg ()) // sbase
1047
- .addReg (OffsetReg, RegState::Kill) // soff
1048
- .addImm (0 ) // glc
1049
- .addImm (0 ) // dlc
1050
- .addMemOperand (MMO);
1051
-
1052
- if (NumSubRegs > 1 && i == 0 )
1053
- MIB.addReg (SuperReg, RegState::ImplicitDefine);
1054
-
1055
- continue ;
1056
- }
1057
-
1058
908
if (SpillToVGPR) {
1059
909
SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
1060
910
auto MIB =
0 commit comments