@@ -152,6 +152,13 @@ class SIFixSGPRCopies : public MachineFunctionPass {
152
152
153
153
void processPHINode (MachineInstr &MI);
154
154
155
+ // Check if MO is an immediate materialized into a VGPR, and if so replace it
156
+ // with an SGPR immediate. The VGPR immediate is also deleted if it does not
157
+ // have any other uses.
158
+ bool tryMoveVGPRConstToSGPR (MachineOperand &MO, Register NewDst,
159
+ MachineBasicBlock *BlockToInsertTo,
160
+ MachineBasicBlock::iterator PointToInsertTo);
161
+
155
162
StringRef getPassName () const override { return " SI Fix SGPR copies" ; }
156
163
157
164
void getAnalysisUsage (AnalysisUsage &AU) const override {
@@ -662,13 +669,17 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
662
669
: MBB;
663
670
MachineBasicBlock::iterator PointToInsertCopy =
664
671
MI.isPHI () ? BlockToInsertCopy->getFirstInstrTerminator () : I;
665
- MachineInstr *NewCopy =
666
- BuildMI (*BlockToInsertCopy, PointToInsertCopy,
667
- PointToInsertCopy->getDebugLoc (),
668
- TII->get (AMDGPU::COPY), NewDst)
669
- .addReg (MO.getReg ());
670
- MO.setReg (NewDst);
671
- analyzeVGPRToSGPRCopy (NewCopy);
672
+
673
+ if (!tryMoveVGPRConstToSGPR (MO, NewDst, BlockToInsertCopy,
674
+ PointToInsertCopy)) {
675
+ MachineInstr *NewCopy =
676
+ BuildMI (*BlockToInsertCopy, PointToInsertCopy,
677
+ PointToInsertCopy->getDebugLoc (),
678
+ TII->get (AMDGPU::COPY), NewDst)
679
+ .addReg (MO.getReg ());
680
+ MO.setReg (NewDst);
681
+ analyzeVGPRToSGPRCopy (NewCopy);
682
+ }
672
683
}
673
684
}
674
685
}
@@ -829,6 +840,32 @@ void SIFixSGPRCopies::processPHINode(MachineInstr &MI) {
829
840
}
830
841
}
831
842
843
+ bool SIFixSGPRCopies::tryMoveVGPRConstToSGPR (
844
+ MachineOperand &MaybeVGPRConstMO, Register DstReg,
845
+ MachineBasicBlock *BlockToInsertTo,
846
+ MachineBasicBlock::iterator PointToInsertTo) {
847
+
848
+ MachineInstr *DefMI = MRI->getVRegDef (MaybeVGPRConstMO.getReg ());
849
+ if (!DefMI || !DefMI->isMoveImmediate ())
850
+ return false ;
851
+
852
+ MachineOperand *SrcConst = TII->getNamedOperand (*DefMI, AMDGPU::OpName::src0);
853
+ if (SrcConst->isReg ())
854
+ return false ;
855
+
856
+ const TargetRegisterClass *SrcRC =
857
+ MRI->getRegClass (MaybeVGPRConstMO.getReg ());
858
+ unsigned MoveSize = TRI->getRegSizeInBits (*SrcRC);
859
+ unsigned MoveOp = MoveSize == 64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
860
+ BuildMI (*BlockToInsertTo, PointToInsertTo, PointToInsertTo->getDebugLoc (),
861
+ TII->get (MoveOp), DstReg)
862
+ .add (*SrcConst);
863
+ if (MRI->hasOneUse (MaybeVGPRConstMO.getReg ()))
864
+ DefMI->eraseFromParent ();
865
+ MaybeVGPRConstMO.setReg (DstReg);
866
+ return true ;
867
+ }
868
+
832
869
bool SIFixSGPRCopies::lowerSpecialCase (MachineInstr &MI,
833
870
MachineBasicBlock::iterator &I) {
834
871
Register DstReg = MI.getOperand (0 ).getReg ();
@@ -846,25 +883,10 @@ bool SIFixSGPRCopies::lowerSpecialCase(MachineInstr &MI,
846
883
TII->get (AMDGPU::V_READFIRSTLANE_B32), TmpReg)
847
884
.add (MI.getOperand (1 ));
848
885
MI.getOperand (1 ).setReg (TmpReg);
849
- } else {
850
- MachineInstr *DefMI = MRI->getVRegDef (SrcReg);
851
- if (DefMI && DefMI->isMoveImmediate ()) {
852
- MachineOperand SrcConst = DefMI->getOperand (AMDGPU::getNamedOperandIdx (
853
- DefMI->getOpcode (), AMDGPU::OpName::src0));
854
- if (!SrcConst.isReg ()) {
855
- const TargetRegisterClass *SrcRC = MRI->getRegClass (SrcReg);
856
- unsigned MoveSize = TRI->getRegSizeInBits (*SrcRC);
857
- unsigned MoveOp =
858
- MoveSize == 64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
859
- BuildMI (*MI.getParent (), MI, MI.getDebugLoc (), TII->get (MoveOp),
860
- DstReg)
861
- .add (SrcConst);
862
- I = std::next (I);
863
- if (MRI->hasOneUse (SrcReg))
864
- DefMI->eraseFromParent ();
865
- MI.eraseFromParent ();
866
- }
867
- }
886
+ } else if (tryMoveVGPRConstToSGPR (MI.getOperand (1 ), DstReg, MI.getParent (),
887
+ MI)) {
888
+ I = std::next (I);
889
+ MI.eraseFromParent ();
868
890
}
869
891
return true ;
870
892
}
0 commit comments