@@ -2749,6 +2749,63 @@ static MachineInstr *swapRegAndNonRegOperand(MachineInstr &MI,
2749
2749
return &MI;
2750
2750
}
2751
2751
2752
+ static MachineInstr *swapImmOperands(MachineInstr &MI,
2753
+ MachineOperand &NonRegOp1,
2754
+ MachineOperand &NonRegOp2) {
2755
+ unsigned TargetFlags = NonRegOp1.getTargetFlags();
2756
+ int64_t NonRegVal = NonRegOp1.getImm();
2757
+
2758
+ NonRegOp1.setImm(NonRegOp2.getImm());
2759
+ NonRegOp2.setImm(NonRegVal);
2760
+ NonRegOp1.setTargetFlags(NonRegOp2.getTargetFlags());
2761
+ NonRegOp2.setTargetFlags(TargetFlags);
2762
+ return &MI;
2763
+ }
2764
+
2765
+ bool SIInstrInfo::isLegalToSwap(const MachineInstr &MI, unsigned OpIdx0,
2766
+ const MachineOperand *MO0, unsigned OpIdx1,
2767
+ const MachineOperand *MO1) const {
2768
+ const MCInstrDesc &InstDesc = MI.getDesc();
2769
+ const MCOperandInfo &OpInfo0 = InstDesc.operands()[OpIdx0];
2770
+ const MCOperandInfo &OpInfo1 = InstDesc.operands()[OpIdx1];
2771
+ const TargetRegisterClass *DefinedRC1 =
2772
+ OpInfo1.RegClass != -1 ? RI.getRegClass(OpInfo1.RegClass) : nullptr;
2773
+ const TargetRegisterClass *DefinedRC0 =
2774
+ OpInfo1.RegClass != -1 ? RI.getRegClass(OpInfo0.RegClass) : nullptr;
2775
+
2776
+ unsigned Opc = MI.getOpcode();
2777
+ int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
2778
+
2779
+ // Swap doesn't breach constant bus or literal limits
2780
+ // It may move literal to position other than src0, this is not allowed
2781
+ // pre-gfx10 However, most test cases need literals in Src0 for VOP
2782
+ // FIXME: After gfx9, literal can be in place other than Src0
2783
+ if (isVALU(MI)) {
2784
+ if ((int)OpIdx0 == Src0Idx && !MO0->isReg() &&
2785
+ !isInlineConstant(*MO0, OpInfo1))
2786
+ return false;
2787
+ if ((int)OpIdx1 == Src0Idx && !MO1->isReg() &&
2788
+ !isInlineConstant(*MO1, OpInfo0))
2789
+ return false;
2790
+ }
2791
+
2792
+ if (OpIdx1 != Src0Idx && MO0->isReg()) {
2793
+ if (!DefinedRC1)
2794
+ return OpInfo1.OperandType == MCOI::OPERAND_UNKNOWN;
2795
+ return isLegalRegOperand(MI, OpIdx1, *MO0);
2796
+ }
2797
+ if (OpIdx0 != Src0Idx && MO1->isReg()) {
2798
+ if (!DefinedRC0)
2799
+ return OpInfo0.OperandType == MCOI::OPERAND_UNKNOWN;
2800
+ return isLegalRegOperand(MI, OpIdx0, *MO1);
2801
+ }
2802
+
2803
+ // No need to check 64-bit literals since swapping does not bring new
2804
+ // 64-bit literals into current instruction to fold to 32-bit
2805
+
2806
+ return isImmOperandLegal(MI, OpIdx1, *MO0);
2807
+ }
2808
+
2752
2809
MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
2753
2810
unsigned Src0Idx,
2754
2811
unsigned Src1Idx) const {
@@ -2770,21 +2827,20 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
2770
2827
2771
2828
MachineOperand &Src0 = MI.getOperand(Src0Idx);
2772
2829
MachineOperand &Src1 = MI.getOperand(Src1Idx);
2773
-
2830
+ if (!isLegalToSwap(MI, Src0Idx, &Src0, Src1Idx, &Src1)) {
2831
+ return nullptr;
2832
+ }
2774
2833
MachineInstr *CommutedMI = nullptr;
2775
2834
if (Src0.isReg() && Src1.isReg()) {
2776
- if (isOperandLegal(MI, Src1Idx, &Src0)) {
2777
- // Be sure to copy the source modifiers to the right place.
2778
- CommutedMI
2779
- = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, Src0Idx, Src1Idx);
2780
- }
2781
-
2835
+ // Be sure to copy the source modifiers to the right place.
2836
+ CommutedMI =
2837
+ TargetInstrInfo::commuteInstructionImpl(MI, NewMI, Src0Idx, Src1Idx);
2782
2838
} else if (Src0.isReg() && !Src1.isReg()) {
2783
- if (isOperandLegal(MI, Src1Idx, &Src0))
2784
- CommutedMI = swapRegAndNonRegOperand(MI, Src0, Src1);
2839
+ CommutedMI = swapRegAndNonRegOperand(MI, Src0, Src1);
2785
2840
} else if (!Src0.isReg() && Src1.isReg()) {
2786
- if (isOperandLegal(MI, Src1Idx, &Src0))
2787
- CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0);
2841
+ CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0);
2842
+ } else if (Src0.isImm() && Src1.isImm()) {
2843
+ CommutedMI = swapImmOperands(MI, Src0, Src1);
2788
2844
} else {
2789
2845
// FIXME: Found two non registers to commute. This does happen.
2790
2846
return nullptr;
@@ -5817,6 +5873,49 @@ bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI,
5817
5873
return RC->hasSuperClassEq(DRC);
5818
5874
}
5819
5875
5876
+ bool SIInstrInfo::isLegalRegOperand(const MachineInstr &MI, unsigned OpIdx,
5877
+ const MachineOperand &MO) const {
5878
+ const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
5879
+ const MCOperandInfo OpInfo = MI.getDesc().operands()[OpIdx];
5880
+ unsigned Opc = MI.getOpcode();
5881
+
5882
+ if (!isLegalRegOperand(MRI, OpInfo, MO))
5883
+ return false;
5884
+
5885
+ // check Accumulate GPR operand
5886
+ bool IsAGPR = RI.isAGPR(MRI, MO.getReg());
5887
+ if (IsAGPR && !ST.hasMAIInsts())
5888
+ return false;
5889
+ if (IsAGPR && (!ST.hasGFX90AInsts() || !MRI.reservedRegsFrozen()) &&
5890
+ (MI.mayLoad() || MI.mayStore() || isDS(Opc) || isMIMG(Opc)))
5891
+ return false;
5892
+ // Atomics should have both vdst and vdata either vgpr or agpr.
5893
+ const int VDstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
5894
+ const int DataIdx = AMDGPU::getNamedOperandIdx(
5895
+ Opc, isDS(Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
5896
+ if ((int)OpIdx == VDstIdx && DataIdx != -1 &&
5897
+ MI.getOperand(DataIdx).isReg() &&
5898
+ RI.isAGPR(MRI, MI.getOperand(DataIdx).getReg()) != IsAGPR)
5899
+ return false;
5900
+ if ((int)OpIdx == DataIdx) {
5901
+ if (VDstIdx != -1 &&
5902
+ RI.isAGPR(MRI, MI.getOperand(VDstIdx).getReg()) != IsAGPR)
5903
+ return false;
5904
+ // DS instructions with 2 src operands also must have tied RC.
5905
+ const int Data1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::data1);
5906
+ if (Data1Idx != -1 && MI.getOperand(Data1Idx).isReg() &&
5907
+ RI.isAGPR(MRI, MI.getOperand(Data1Idx).getReg()) != IsAGPR)
5908
+ return false;
5909
+ }
5910
+
5911
+ // Check V_ACCVGPR_WRITE_B32_e64
5912
+ if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts() &&
5913
+ (int)OpIdx == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) &&
5914
+ RI.isSGPRReg(MRI, MO.getReg()))
5915
+ return false;
5916
+ return true;
5917
+ }
5918
+
5820
5919
bool SIInstrInfo::isLegalVSrcOperand(const MachineRegisterInfo &MRI,
5821
5920
const MCOperandInfo &OpInfo,
5822
5921
const MachineOperand &MO) const {
@@ -5879,40 +5978,7 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
5879
5978
if (MO->isReg()) {
5880
5979
if (!DefinedRC)
5881
5980
return OpInfo.OperandType == MCOI::OPERAND_UNKNOWN;
5882
- if (!isLegalRegOperand(MRI, OpInfo, *MO))
5883
- return false;
5884
- bool IsAGPR = RI.isAGPR(MRI, MO->getReg());
5885
- if (IsAGPR && !ST.hasMAIInsts())
5886
- return false;
5887
- unsigned Opc = MI.getOpcode();
5888
- if (IsAGPR &&
5889
- (!ST.hasGFX90AInsts() || !MRI.reservedRegsFrozen()) &&
5890
- (MI.mayLoad() || MI.mayStore() || isDS(Opc) || isMIMG(Opc)))
5891
- return false;
5892
- // Atomics should have both vdst and vdata either vgpr or agpr.
5893
- const int VDstIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst);
5894
- const int DataIdx = AMDGPU::getNamedOperandIdx(Opc,
5895
- isDS(Opc) ? AMDGPU::OpName::data0 : AMDGPU::OpName::vdata);
5896
- if ((int)OpIdx == VDstIdx && DataIdx != -1 &&
5897
- MI.getOperand(DataIdx).isReg() &&
5898
- RI.isAGPR(MRI, MI.getOperand(DataIdx).getReg()) != IsAGPR)
5899
- return false;
5900
- if ((int)OpIdx == DataIdx) {
5901
- if (VDstIdx != -1 &&
5902
- RI.isAGPR(MRI, MI.getOperand(VDstIdx).getReg()) != IsAGPR)
5903
- return false;
5904
- // DS instructions with 2 src operands also must have tied RC.
5905
- const int Data1Idx = AMDGPU::getNamedOperandIdx(Opc,
5906
- AMDGPU::OpName::data1);
5907
- if (Data1Idx != -1 && MI.getOperand(Data1Idx).isReg() &&
5908
- RI.isAGPR(MRI, MI.getOperand(Data1Idx).getReg()) != IsAGPR)
5909
- return false;
5910
- }
5911
- if (Opc == AMDGPU::V_ACCVGPR_WRITE_B32_e64 && !ST.hasGFX90AInsts() &&
5912
- (int)OpIdx == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) &&
5913
- RI.isSGPRReg(MRI, MO->getReg()))
5914
- return false;
5915
- return true;
5981
+ return isLegalRegOperand(MI, OpIdx, *MO);
5916
5982
}
5917
5983
5918
5984
if (MO->isImm()) {
0 commit comments