Skip to content

Commit 5ae0d57

Browse files
committed
Hackily use shrinkToUses
1 parent b69a9f8 commit 5ae0d57

File tree

2 files changed

+45
-40
lines changed

2 files changed

+45
-40
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 23 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -3935,42 +3935,37 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
39353935
!RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) {
39363936
MachineInstr *DefMI;
39373937
const auto killDef = [&](SlotIndex OldUseIdx) -> void {
3938-
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3938+
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
39393939
// The only user is the instruction which will be killed.
39403940
Register DefReg = DefMI->getOperand(0).getReg();
39413941

3942+
if (MRI.hasOneNonDBGUse(DefReg)) {
3943+
// We cannot just remove the DefMI here, calling pass will crash.
3944+
DefMI->setDesc(get(AMDGPU::IMPLICIT_DEF));
3945+
DefMI->getOperand(0).setIsDead(true);
3946+
for (unsigned I = DefMI->getNumOperands() - 1; I != 0; --I)
3947+
DefMI->removeOperand(I);
3948+
if (LV)
3949+
LV->getVarInfo(DefReg).AliveBlocks.clear();
3950+
}
3951+
39423952
if (LIS) {
39433953
LiveInterval &DefLI = LIS->getInterval(DefReg);
3944-
LiveRange::Segment *OldSeg = DefLI.getSegmentContaining(OldUseIdx);
3945-
assert(OldSeg && "segment not found for instruction in LiveInterval");
3946-
3947-
if (OldSeg->end == OldUseIdx.getRegSlot()) {
3948-
// We only want to leave the dead def.
3949-
DefLI.removeSegment(OldSeg->start.getDeadSlot(), OldUseIdx.getRegSlot(),
3950-
true);
3951-
3952-
for (auto &SR : DefLI.subranges()) {
3953-
LiveRange::Segment *OldSegSR = SR.getSegmentContaining(OldUseIdx);
3954-
if (OldSegSR->end == OldUseIdx.getRegSlot()) {
3955-
// We only want to leave the dead def.
3956-
SR.removeSegment(OldSegSR->start.getDeadSlot(),
3957-
OldUseIdx.getRegSlot(), true);
3958-
}
3959-
}
39603954

3961-
DefLI.removeEmptySubRanges();
3955+
// We cannot delete the original instruction here, so hack out the use
3956+
// in the original instruction with a dummy register so we can use
3957+
// shrinkToUses to deal with any multi-use edge cases. Other targets do
3958+
// not have the complexity of deleting a use to consider here.
3959+
Register DummyReg = MRI.cloneVirtualRegister(DefReg);
3960+
for (MachineOperand &MIOp : MI.uses()) {
3961+
if (MIOp.isReg() && MIOp.getReg() == DefReg) {
3962+
MIOp.setIsUndef(true);
3963+
MIOp.setReg(DummyReg);
3964+
}
39623965
}
3963-
}
39643966

3965-
if (!MRI.hasOneNonDBGUse(DefReg))
3966-
return;
3967-
// We cannot just remove the DefMI here, calling pass will crash.
3968-
DefMI->setDesc(get(AMDGPU::IMPLICIT_DEF));
3969-
DefMI->getOperand(0).setIsDead(true);
3970-
for (unsigned I = DefMI->getNumOperands() - 1; I != 0; --I)
3971-
DefMI->removeOperand(I);
3972-
if (LV)
3973-
LV->getVarInfo(DefReg).AliveBlocks.clear();
3967+
LIS->shrinkToUses(&DefLI);
3968+
}
39743969
};
39753970

39763971
int64_t Imm;

llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
1-
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX10 %s
2-
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=two-address-instruction -verify-each -o - %s | FileCheck --check-prefixes=GFX10 %s
3-
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=livevars,liveintervals,twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX10 %s
1+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX10,GFX10-NOLIS %s
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=two-address-instruction -verify-each -o - %s | FileCheck --check-prefixes=GFX10,GFX10-NOLIS %s
3+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=liveintervals,twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX10,GFX10-LIS %s
4+
45

56
# GFX10-LABEL: name: test_fmamk_reg_imm_f16
67
# GFX10: dead %2:vgpr_32 = IMPLICIT_DEF
78
# GFX10-NOT: V_MOV_B32
8-
# GFX10: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
9+
# GFX10-NOLIS: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
10+
# GFX10-LIS: V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec
911
---
1012
name: test_fmamk_reg_imm_f16
1113
tracksRegLiveness: true
@@ -23,7 +25,8 @@ body: |
2325
# GFX10: %0:vreg_64 = IMPLICIT_DEF
2426
# GFX10: %1:vgpr_32 = COPY %0.sub1
2527
# GFX10: dead undef %2.sub0:vreg_64 = IMPLICIT_DEF
26-
# GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
28+
# GFX10-NOLIS: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
29+
# GFX10-LIS: %3:vgpr_32 = V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec
2730
---
2831
name: test_fmamk_reg_imm_f16__imm_is_subreg
2932
tracksRegLiveness: true
@@ -42,7 +45,8 @@ body: |
4245
# GFX10: %1:vgpr_32 = COPY %0.sub1
4346
# GFX10: undef %2.sub1:vreg_64 = V_MOV_B32_e32 9999, implicit $exec
4447
# GFX10: %2.sub0:vreg_64 = V_MOV_B32_e32 1078523331, implicit $exec
45-
# GFX10: %3:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed %0.sub0, 0, %2.sub0, 0, killed %1, 0, 0, 0, implicit $mode, implicit $e
48+
# GFX10-NOLIS: %3:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed %0.sub0, 0, %2.sub0, 0, killed %1, 0, 0, 0, implicit $mode, implicit $e
49+
# GFX10-LIS: %3:vgpr_32 = V_FMA_F16_gfx9_e64 0, %0.sub0, 0, %2.sub0, 0, %1, 0, 0, 0, implicit $mode, implicit $e
4650
---
4751
name: test_fmamk_reg_imm_f16__imm_is_subreg_fully_defined
4852
tracksRegLiveness: true
@@ -61,7 +65,8 @@ body: |
6165
# GFX10: %1:vgpr_32 = COPY %0.sub1
6266
# GFX10: %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
6367
# GFX10: S_NOP 0, implicit %2
64-
# GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
68+
# GFX10-NOLIS: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
69+
# GFX10-LIS: %3:vgpr_32 = V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec
6570
---
6671
name: test_fmamk_reg_imm_f16__use_imm_before_mac
6772
tracksRegLiveness: true
@@ -80,7 +85,8 @@ body: |
8085
# GFX10: %0:vreg_64 = IMPLICIT_DEF
8186
# GFX10: %1:vgpr_32 = COPY %0.sub1
8287
# GFX10: %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
83-
# GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
88+
# GFX10-NOLIS: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
89+
# GFX10-LIS: %3:vgpr_32 = V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec
8490
---
8591
name: test_fmamk_reg_imm_f16__use_imm_after_mac
8692
tracksRegLiveness: true
@@ -100,7 +106,8 @@ body: |
100106
# GFX10: %1:vgpr_32 = COPY %0.sub1
101107
# GFX10: %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
102108
# GFX10: S_NOP 0, implicit %2
103-
# GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
109+
# GFX10-NOLIS: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
110+
# GFX10-LIS: %3:vgpr_32 = V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec
104111
# GFX10: S_NOP 0, implicit %2
105112

106113
---
@@ -121,7 +128,8 @@ body: |
121128
# GFX10-LABEL: name: test_fmamk_imm_reg_f16
122129
# GFX10: dead %2:vgpr_32 = IMPLICIT_DEF
123130
# GFX10-NOT: V_MOV_B32
124-
# GFX10: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
131+
# GFX10-NOLIS: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
132+
# GFX10-LIS: V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec
125133
---
126134
name: test_fmamk_imm_reg_f16
127135
tracksRegLiveness: true
@@ -143,7 +151,8 @@ body: |
143151
# GFX10-LABEL: name: test_fmaak_f16
144152
# GFX10: %1:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
145153
# GFX10-NOT: V_MOV_B32
146-
# GFX10: V_FMAAK_F16 killed %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec
154+
# GFX10-NOLIS: V_FMAAK_F16 killed %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec
155+
# GFX10-LIS: V_FMAAK_F16 %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec
147156
---
148157
name: test_fmaak_f16
149158
tracksRegLiveness: true
@@ -163,7 +172,8 @@ body: |
163172
# GFX10-LABEL: name: test_fmaak_inline_literal_f16
164173
# GFX10: %1:vgpr_32 = V_MOV_B32_e32 49664, implicit $exec
165174
# GFX10-NOT: V_MOV_B32
166-
# GFX10: %2:vgpr_32 = V_FMAAK_F16 16384, killed %0, 49664, implicit $mode, implicit $exec
175+
# GFX10-NOLIS: %2:vgpr_32 = V_FMAAK_F16 16384, killed %0, 49664, implicit $mode, implicit $exec
176+
# GFX10-LIS: %2:vgpr_32 = V_FMAAK_F16 16384, %0, 49664, implicit $mode, implicit $exec
167177

168178
---
169179
name: test_fmaak_inline_literal_f16

0 commit comments

Comments
 (0)